1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_impl.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_rts.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ndp.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/ipsec_info.h> 67 #include <inet/sctp_ip.h> 68 69 /* 70 * Function names with nce_ prefix are static while function 71 * names with ndp_ prefix are used by rest of the IP. 72 * 73 * Lock ordering: 74 * 75 * ndp_g_lock -> ill_lock -> nce_lock 76 * 77 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 78 * nce_next. Nce_lock protects the contents of the NCE (particularly 79 * nce_refcnt). 80 */ 81 82 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 83 uint32_t ll_addr_len); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 88 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 89 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 90 uchar_t *addr); 91 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 92 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 93 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 extern void th_trace_rrecord(th_trace_t *); 102 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 103 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 104 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 105 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 106 const in_addr_t *, const in_addr_t *, const in_addr_t *, 107 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 108 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 109 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 110 nce_t **); 111 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 112 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 113 nce_t **, mblk_t *, mblk_t *); 114 115 116 #ifdef NCE_DEBUG 117 void nce_trace_inactive(nce_t *); 118 #endif 119 120 ndp_g_t ndp4, ndp6; 121 122 #define NCE_HASH_PTR_V4(addr) \ 123 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 124 125 #define NCE_HASH_PTR_V6(addr) \ 126 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 127 128 /* 129 * Compute default flags to use for an advertisement of this nce's address. 130 */ 131 static int 132 nce_advert_flags(const nce_t *nce) 133 { 134 int flag = 0; 135 136 if (nce->nce_flags & NCE_F_ISROUTER) 137 flag |= NDP_ISROUTER; 138 if (!(nce->nce_flags & NCE_F_PROXY)) 139 flag |= NDP_ORIDE; 140 return (flag); 141 } 142 143 int 144 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 145 const void *mask, const void *extract_mask, 146 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 147 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 148 { 149 int status; 150 151 if (ill->ill_isv6) 152 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 153 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 154 hw_extract_start, flags, state, newnce); 155 else 156 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 157 (in_addr_t *)mask, (in_addr_t *)extract_mask, 158 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 159 return (status); 160 } 161 162 /* Non-tunable probe interval, based on link capabilities */ 163 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 164 165 /* 166 * NDP Cache Entry creation routine. 167 * Mapped entries will never do NUD . 168 * This routine must always be called with ndp6.ndp_g_lock held. 169 * Prior to return, nce_refcnt is incremented. 170 */ 171 static int 172 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 173 const in6_addr_t *mask, const in6_addr_t *extract_mask, 174 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 175 nce_t **newnce) 176 { 177 static nce_t nce_nil; 178 nce_t *nce; 179 mblk_t *mp; 180 mblk_t *template; 181 nce_t **ncep; 182 int err; 183 boolean_t dropped = B_FALSE; 184 185 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 186 ASSERT(ill != NULL && ill->ill_isv6); 187 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 188 ip0dbg(("ndp_add: no addr\n")); 189 return (EINVAL); 190 } 191 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 192 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 193 return (EINVAL); 194 } 195 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 196 (flags & NCE_F_MAPPING)) { 197 ip0dbg(("ndp_add: extract mask zero for mapping")); 198 return (EINVAL); 199 } 200 /* 201 * Allocate the mblk to hold the nce. 202 * 203 * XXX This can come out of a separate cache - nce_cache. 204 * We don't need the mp anymore as there are no more 205 * "qwriter"s 206 */ 207 mp = allocb(sizeof (nce_t), BPRI_MED); 208 if (mp == NULL) 209 return (ENOMEM); 210 211 nce = (nce_t *)mp->b_rptr; 212 mp->b_wptr = (uchar_t *)&nce[1]; 213 *nce = nce_nil; 214 215 /* 216 * This one holds link layer address 217 */ 218 if (ill->ill_net_type == IRE_IF_RESOLVER) { 219 template = nce_udreq_alloc(ill); 220 } else { 221 if (ill->ill_resolver_mp == NULL) { 222 freeb(mp); 223 return (EINVAL); 224 } 225 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 226 template = copyb(ill->ill_resolver_mp); 227 } 228 if (template == NULL) { 229 freeb(mp); 230 return (ENOMEM); 231 } 232 nce->nce_ill = ill; 233 nce->nce_ipversion = IPV6_VERSION; 234 nce->nce_flags = flags; 235 nce->nce_state = state; 236 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 237 nce->nce_rcnt = ill->ill_xmit_count; 238 nce->nce_addr = *addr; 239 nce->nce_mask = *mask; 240 nce->nce_extract_mask = *extract_mask; 241 nce->nce_ll_extract_start = hw_extract_start; 242 nce->nce_fp_mp = NULL; 243 nce->nce_res_mp = template; 244 if (state == ND_REACHABLE) 245 nce->nce_last = TICK_TO_MSEC(lbolt64); 246 else 247 nce->nce_last = 0; 248 nce->nce_qd_mp = NULL; 249 nce->nce_mp = mp; 250 if (hw_addr != NULL) 251 nce_set_ll(nce, hw_addr); 252 /* This one is for nce getting created */ 253 nce->nce_refcnt = 1; 254 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 255 if (nce->nce_flags & NCE_F_MAPPING) { 256 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 257 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 258 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 259 ncep = &ndp6.nce_mask_entries; 260 } else { 261 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 262 } 263 264 #ifdef NCE_DEBUG 265 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 266 #endif 267 /* 268 * Atomically ensure that the ill is not CONDEMNED, before 269 * adding the NCE. 270 */ 271 mutex_enter(&ill->ill_lock); 272 if (ill->ill_state_flags & ILL_CONDEMNED) { 273 mutex_exit(&ill->ill_lock); 274 freeb(mp); 275 freeb(template); 276 return (EINVAL); 277 } 278 if ((nce->nce_next = *ncep) != NULL) 279 nce->nce_next->nce_ptpn = &nce->nce_next; 280 *ncep = nce; 281 nce->nce_ptpn = ncep; 282 *newnce = nce; 283 /* This one is for nce being used by an active thread */ 284 NCE_REFHOLD(*newnce); 285 286 /* Bump up the number of nce's referencing this ill */ 287 ill->ill_nce_cnt++; 288 mutex_exit(&ill->ill_lock); 289 290 err = 0; 291 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 292 mutex_enter(&nce->nce_lock); 293 mutex_exit(&ndp6.ndp_g_lock); 294 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 295 mutex_exit(&nce->nce_lock); 296 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 297 &ipv6_all_zeros, addr, NDP_PROBE); 298 if (dropped) { 299 mutex_enter(&nce->nce_lock); 300 nce->nce_pcnt++; 301 mutex_exit(&nce->nce_lock); 302 } 303 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 304 mutex_enter(&ndp6.ndp_g_lock); 305 err = EINPROGRESS; 306 } else if (flags & NCE_F_UNSOL_ADV) { 307 /* 308 * We account for the transmit below by assigning one 309 * less than the ndd variable. Subsequent decrements 310 * are done in ndp_timer. 311 */ 312 mutex_enter(&nce->nce_lock); 313 mutex_exit(&ndp6.ndp_g_lock); 314 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 315 mutex_exit(&nce->nce_lock); 316 dropped = nce_xmit(ill, 317 ND_NEIGHBOR_ADVERT, 318 ill, /* ill to be used for extracting ill_nd_lla */ 319 B_TRUE, /* use ill_nd_lla */ 320 addr, /* Source and target of the advertisement pkt */ 321 &ipv6_all_hosts_mcast, /* Destination of the packet */ 322 nce_advert_flags(nce)); 323 mutex_enter(&nce->nce_lock); 324 if (dropped) 325 nce->nce_unsolicit_count++; 326 if (nce->nce_unsolicit_count != 0) { 327 nce->nce_timeout_id = timeout(ndp_timer, nce, 328 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 329 } 330 mutex_exit(&nce->nce_lock); 331 mutex_enter(&ndp6.ndp_g_lock); 332 } 333 /* 334 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 335 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 336 * We call nce_fastpath from nce_update if the link layer address of 337 * the peer changes from nce_update 338 */ 339 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 340 nce_fastpath(nce); 341 return (err); 342 } 343 344 int 345 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 346 const void *mask, const void *extract_mask, 347 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 348 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 349 { 350 int status; 351 352 if (ill->ill_isv6) { 353 status = ndp_lookup_then_add_v6(ill, hw_addr, 354 (in6_addr_t *)addr, (in6_addr_t *)mask, 355 (in6_addr_t *)extract_mask, hw_extract_start, flags, 356 state, newnce, fp_mp, res_mp); 357 } else { 358 status = ndp_lookup_then_add_v4(ill, hw_addr, 359 (in_addr_t *)addr, (in_addr_t *)mask, 360 (in_addr_t *)extract_mask, hw_extract_start, flags, 361 state, newnce, fp_mp, res_mp); 362 } 363 364 return (status); 365 } 366 367 static int 368 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 369 const in6_addr_t *mask, const in6_addr_t *extract_mask, 370 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 371 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 372 { 373 int err = 0; 374 nce_t *nce; 375 376 ASSERT(ill != NULL && ill->ill_isv6); 377 mutex_enter(&ndp6.ndp_g_lock); 378 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 379 nce = nce_lookup_addr(ill, addr, nce); 380 if (nce == NULL) { 381 err = ndp_add(ill, 382 hw_addr, 383 addr, 384 mask, 385 extract_mask, 386 hw_extract_start, 387 flags, 388 state, 389 newnce, 390 fp_mp, 391 res_mp); 392 } else { 393 *newnce = nce; 394 err = EEXIST; 395 } 396 mutex_exit(&ndp6.ndp_g_lock); 397 return (err); 398 } 399 400 /* 401 * Remove all the CONDEMNED nces from the appropriate hash table. 402 * We create a private list of NCEs, these may have ires pointing 403 * to them, so the list will be passed through to clean up dependent 404 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 405 */ 406 static void 407 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 408 { 409 nce_t *nce1; 410 nce_t **ptpn; 411 412 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 413 ASSERT(ndp->ndp_g_walker == 0); 414 for (; nce; nce = nce1) { 415 nce1 = nce->nce_next; 416 mutex_enter(&nce->nce_lock); 417 if (nce->nce_flags & NCE_F_CONDEMNED) { 418 ptpn = nce->nce_ptpn; 419 nce1 = nce->nce_next; 420 if (nce1 != NULL) 421 nce1->nce_ptpn = ptpn; 422 *ptpn = nce1; 423 nce->nce_ptpn = NULL; 424 nce->nce_next = NULL; 425 nce->nce_next = *free_nce_list; 426 *free_nce_list = nce; 427 } 428 mutex_exit(&nce->nce_lock); 429 } 430 } 431 432 /* 433 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 434 * will return this NCE. Also no new IREs will be created that 435 * point to this NCE (See ire_add_v6). Also no new timeouts will 436 * be started (See NDP_RESTART_TIMER). 437 * 2. Cancel any currently running timeouts. 438 * 3. If there is an ndp walker, return. The walker will do the cleanup. 439 * This ensures that walkers see a consistent list of NCEs while walking. 440 * 4. Otherwise remove the NCE from the list of NCEs 441 * 5. Delete all IREs pointing to this NCE. 442 */ 443 void 444 ndp_delete(nce_t *nce) 445 { 446 nce_t **ptpn; 447 nce_t *nce1; 448 int ipversion = nce->nce_ipversion; 449 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 450 451 /* Serialize deletes */ 452 mutex_enter(&nce->nce_lock); 453 if (nce->nce_flags & NCE_F_CONDEMNED) { 454 /* Some other thread is doing the delete */ 455 mutex_exit(&nce->nce_lock); 456 return; 457 } 458 /* 459 * Caller has a refhold. Also 1 ref for being in the list. Thus 460 * refcnt has to be >= 2 461 */ 462 ASSERT(nce->nce_refcnt >= 2); 463 nce->nce_flags |= NCE_F_CONDEMNED; 464 mutex_exit(&nce->nce_lock); 465 466 nce_fastpath_list_delete(nce); 467 468 /* 469 * Cancel any running timer. Timeout can't be restarted 470 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 471 * Passing invalid timeout id is fine. 472 */ 473 if (nce->nce_timeout_id != 0) { 474 (void) untimeout(nce->nce_timeout_id); 475 nce->nce_timeout_id = 0; 476 } 477 478 mutex_enter(&ndp->ndp_g_lock); 479 if (nce->nce_ptpn == NULL) { 480 /* 481 * The last ndp walker has already removed this nce from 482 * the list after we marked the nce CONDEMNED and before 483 * we grabbed the global lock. 484 */ 485 mutex_exit(&ndp->ndp_g_lock); 486 return; 487 } 488 if (ndp->ndp_g_walker > 0) { 489 /* 490 * Can't unlink. The walker will clean up 491 */ 492 ndp->ndp_g_walker_cleanup = B_TRUE; 493 mutex_exit(&ndp->ndp_g_lock); 494 return; 495 } 496 497 /* 498 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 499 * the timer since it is marked CONDEMNED. 500 */ 501 ptpn = nce->nce_ptpn; 502 nce1 = nce->nce_next; 503 if (nce1 != NULL) 504 nce1->nce_ptpn = ptpn; 505 *ptpn = nce1; 506 nce->nce_ptpn = NULL; 507 nce->nce_next = NULL; 508 mutex_exit(&ndp->ndp_g_lock); 509 510 nce_ire_delete(nce); 511 } 512 513 void 514 ndp_inactive(nce_t *nce) 515 { 516 mblk_t **mpp; 517 ill_t *ill; 518 519 ASSERT(nce->nce_refcnt == 0); 520 ASSERT(MUTEX_HELD(&nce->nce_lock)); 521 ASSERT(nce->nce_fastpath == NULL); 522 523 /* Free all nce allocated messages */ 524 mpp = &nce->nce_first_mp_to_free; 525 do { 526 while (*mpp != NULL) { 527 mblk_t *mp; 528 529 mp = *mpp; 530 *mpp = mp->b_next; 531 532 inet_freemsg(mp); 533 } 534 } while (mpp++ != &nce->nce_last_mp_to_free); 535 536 #ifdef NCE_DEBUG 537 nce_trace_inactive(nce); 538 #endif 539 540 ill = nce->nce_ill; 541 mutex_enter(&ill->ill_lock); 542 ill->ill_nce_cnt--; 543 /* 544 * If the number of nce's associated with this ill have dropped 545 * to zero, check whether we need to restart any operation that 546 * is waiting for this to happen. 547 */ 548 if (ill->ill_nce_cnt == 0) { 549 /* ipif_ill_refrele_tail drops the ill_lock */ 550 ipif_ill_refrele_tail(ill); 551 } else { 552 mutex_exit(&ill->ill_lock); 553 } 554 mutex_destroy(&nce->nce_lock); 555 if (nce->nce_mp != NULL) 556 inet_freemsg(nce->nce_mp); 557 } 558 559 /* 560 * ndp_walk routine. Delete the nce if it is associated with the ill 561 * that is going away. Always called as a writer. 562 */ 563 void 564 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 565 { 566 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 567 ndp_delete(nce); 568 } 569 } 570 571 /* 572 * Walk a list of to be inactive NCEs and blow away all the ires. 573 */ 574 static void 575 nce_ire_delete_list(nce_t *nce) 576 { 577 nce_t *nce_next; 578 579 ASSERT(nce != NULL); 580 while (nce != NULL) { 581 nce_next = nce->nce_next; 582 nce->nce_next = NULL; 583 584 /* 585 * It is possible for the last ndp walker (this thread) 586 * to come here after ndp_delete has marked the nce CONDEMNED 587 * and before it has removed the nce from the fastpath list 588 * or called untimeout. So we need to do it here. It is safe 589 * for both ndp_delete and this thread to do it twice or 590 * even simultaneously since each of the threads has a 591 * reference on the nce. 592 */ 593 nce_fastpath_list_delete(nce); 594 /* 595 * Cancel any running timer. Timeout can't be restarted 596 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 597 * Passing invalid timeout id is fine. 598 */ 599 if (nce->nce_timeout_id != 0) { 600 (void) untimeout(nce->nce_timeout_id); 601 nce->nce_timeout_id = 0; 602 } 603 /* 604 * We might hit this func thus in the v4 case: 605 * ipif_down->ipif_ndp_down->ndp_walk 606 */ 607 608 if (nce->nce_ipversion == IPV4_VERSION) { 609 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 610 IRE_CACHE, nce_ire_delete1, 611 (char *)nce, nce->nce_ill); 612 } else { 613 ASSERT(nce->nce_ipversion == IPV6_VERSION); 614 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 615 IRE_CACHE, nce_ire_delete1, 616 (char *)nce, nce->nce_ill); 617 } 618 NCE_REFRELE_NOTR(nce); 619 nce = nce_next; 620 } 621 } 622 623 /* 624 * Delete an ire when the nce goes away. 625 */ 626 /* ARGSUSED */ 627 static void 628 nce_ire_delete(nce_t *nce) 629 { 630 if (nce->nce_ipversion == IPV6_VERSION) { 631 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 632 nce_ire_delete1, (char *)nce, nce->nce_ill); 633 NCE_REFRELE_NOTR(nce); 634 } else { 635 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 636 nce_ire_delete1, (char *)nce, nce->nce_ill); 637 NCE_REFRELE_NOTR(nce); 638 } 639 } 640 641 /* 642 * ire_walk routine used to delete every IRE that shares this nce 643 */ 644 static void 645 nce_ire_delete1(ire_t *ire, char *nce_arg) 646 { 647 nce_t *nce = (nce_t *)nce_arg; 648 649 ASSERT(ire->ire_type == IRE_CACHE); 650 651 if (ire->ire_nce == nce) { 652 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 653 ire_delete(ire); 654 } 655 } 656 657 /* 658 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 659 */ 660 boolean_t 661 ndp_restart_dad(nce_t *nce) 662 { 663 boolean_t started; 664 boolean_t dropped; 665 666 if (nce == NULL) 667 return (B_FALSE); 668 mutex_enter(&nce->nce_lock); 669 if (nce->nce_state == ND_PROBE) { 670 mutex_exit(&nce->nce_lock); 671 started = B_TRUE; 672 } else if (nce->nce_state == ND_REACHABLE) { 673 nce->nce_state = ND_PROBE; 674 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 675 mutex_exit(&nce->nce_lock); 676 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 677 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 678 if (dropped) { 679 mutex_enter(&nce->nce_lock); 680 nce->nce_pcnt++; 681 mutex_exit(&nce->nce_lock); 682 } 683 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 684 started = B_TRUE; 685 } else { 686 mutex_exit(&nce->nce_lock); 687 started = B_FALSE; 688 } 689 return (started); 690 } 691 692 /* 693 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 694 * If one is found, the refcnt on the nce will be incremented. 695 */ 696 nce_t * 697 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 698 { 699 nce_t *nce; 700 701 ASSERT(ill != NULL && ill->ill_isv6); 702 if (!caller_holds_lock) { 703 mutex_enter(&ndp6.ndp_g_lock); 704 } 705 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 706 nce = nce_lookup_addr(ill, addr, nce); 707 if (nce == NULL) 708 nce = nce_lookup_mapping(ill, addr); 709 if (!caller_holds_lock) 710 mutex_exit(&ndp6.ndp_g_lock); 711 return (nce); 712 } 713 /* 714 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 715 * If one is found, the refcnt on the nce will be incremented. 716 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 717 * so we skip the nce_lookup_mapping call. 718 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 719 */ 720 nce_t * 721 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 722 { 723 nce_t *nce; 724 in6_addr_t addr6; 725 726 if (!caller_holds_lock) { 727 mutex_enter(&ndp4.ndp_g_lock); 728 } 729 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 730 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 731 nce = nce_lookup_addr(ill, &addr6, nce); 732 if (!caller_holds_lock) 733 mutex_exit(&ndp4.ndp_g_lock); 734 return (nce); 735 } 736 737 /* 738 * Cache entry lookup. Try to find an nce matching the parameters passed. 739 * Look only for exact entries (no mappings). If an nce is found, increment 740 * the hold count on that nce. The caller passes in the start of the 741 * appropriate hash table, and must be holding the appropriate global 742 * lock (ndp_g_lock). 743 */ 744 static nce_t * 745 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 746 { 747 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 748 749 ASSERT(ill != NULL); 750 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 751 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 752 return (NULL); 753 for (; nce != NULL; nce = nce->nce_next) { 754 if (nce->nce_ill == ill) { 755 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 756 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 757 &ipv6_all_ones)) { 758 mutex_enter(&nce->nce_lock); 759 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 760 NCE_REFHOLD_LOCKED(nce); 761 mutex_exit(&nce->nce_lock); 762 break; 763 } 764 mutex_exit(&nce->nce_lock); 765 } 766 } 767 } 768 return (nce); 769 } 770 771 /* 772 * Cache entry lookup. Try to find an nce matching the parameters passed. 773 * Look only for mappings. 774 */ 775 static nce_t * 776 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 777 { 778 nce_t *nce; 779 780 ASSERT(ill != NULL && ill->ill_isv6); 781 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 782 if (!IN6_IS_ADDR_MULTICAST(addr)) 783 return (NULL); 784 nce = ndp6.nce_mask_entries; 785 for (; nce != NULL; nce = nce->nce_next) 786 if (nce->nce_ill == ill && 787 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 788 mutex_enter(&nce->nce_lock); 789 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 790 NCE_REFHOLD_LOCKED(nce); 791 mutex_exit(&nce->nce_lock); 792 break; 793 } 794 mutex_exit(&nce->nce_lock); 795 } 796 return (nce); 797 } 798 799 /* 800 * Process passed in parameters either from an incoming packet or via 801 * user ioctl. 802 */ 803 void 804 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 805 { 806 ill_t *ill = nce->nce_ill; 807 uint32_t hw_addr_len = ill->ill_nd_lla_len; 808 mblk_t *mp; 809 boolean_t ll_updated = B_FALSE; 810 boolean_t ll_changed; 811 812 ASSERT(nce->nce_ipversion == IPV6_VERSION); 813 /* 814 * No updates of link layer address or the neighbor state is 815 * allowed, when the cache is in NONUD state. This still 816 * allows for responding to reachability solicitation. 817 */ 818 mutex_enter(&nce->nce_lock); 819 if (nce->nce_state == ND_INCOMPLETE) { 820 if (hw_addr == NULL) { 821 mutex_exit(&nce->nce_lock); 822 return; 823 } 824 nce_set_ll(nce, hw_addr); 825 /* 826 * Update nce state and send the queued packets 827 * back to ip this time ire will be added. 828 */ 829 if (flag & ND_NA_FLAG_SOLICITED) { 830 nce_update(nce, ND_REACHABLE, NULL); 831 } else { 832 nce_update(nce, ND_STALE, NULL); 833 } 834 mutex_exit(&nce->nce_lock); 835 nce_fastpath(nce); 836 mutex_enter(&nce->nce_lock); 837 mp = nce->nce_qd_mp; 838 nce->nce_qd_mp = NULL; 839 mutex_exit(&nce->nce_lock); 840 while (mp != NULL) { 841 mblk_t *nxt_mp, *data_mp; 842 843 nxt_mp = mp->b_next; 844 mp->b_next = NULL; 845 846 if (mp->b_datap->db_type == M_CTL) 847 data_mp = mp->b_cont; 848 else 849 data_mp = mp; 850 if (data_mp->b_prev != NULL) { 851 ill_t *inbound_ill; 852 queue_t *fwdq = NULL; 853 uint_t ifindex; 854 855 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 856 inbound_ill = ill_lookup_on_ifindex(ifindex, 857 B_TRUE, NULL, NULL, NULL, NULL); 858 if (inbound_ill == NULL) { 859 data_mp->b_prev = NULL; 860 freemsg(mp); 861 return; 862 } else { 863 fwdq = inbound_ill->ill_rq; 864 } 865 data_mp->b_prev = NULL; 866 /* 867 * Send a forwarded packet back into ip_rput_v6 868 * just as in ire_send_v6(). 869 * Extract the queue from b_prev (set in 870 * ip_rput_data_v6). 871 */ 872 if (fwdq != NULL) { 873 /* 874 * Forwarded packets hop count will 875 * get decremented in ip_rput_data_v6 876 */ 877 if (data_mp != mp) 878 freeb(mp); 879 put(fwdq, data_mp); 880 } else { 881 /* 882 * Send locally originated packets back 883 * into * ip_wput_v6. 884 */ 885 put(ill->ill_wq, mp); 886 } 887 ill_refrele(inbound_ill); 888 } else { 889 put(ill->ill_wq, mp); 890 } 891 mp = nxt_mp; 892 } 893 return; 894 } 895 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 896 if (!is_adv) { 897 /* If this is a SOLICITATION request only */ 898 if (ll_changed) 899 nce_update(nce, ND_STALE, hw_addr); 900 mutex_exit(&nce->nce_lock); 901 return; 902 } 903 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 904 /* If in any other state than REACHABLE, ignore */ 905 if (nce->nce_state == ND_REACHABLE) { 906 nce_update(nce, ND_STALE, NULL); 907 } 908 mutex_exit(&nce->nce_lock); 909 return; 910 } else { 911 if (ll_changed) { 912 nce_update(nce, ND_UNCHANGED, hw_addr); 913 ll_updated = B_TRUE; 914 } 915 if (flag & ND_NA_FLAG_SOLICITED) { 916 nce_update(nce, ND_REACHABLE, NULL); 917 } else { 918 if (ll_updated) { 919 nce_update(nce, ND_STALE, NULL); 920 } 921 } 922 mutex_exit(&nce->nce_lock); 923 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 924 NCE_F_ISROUTER)) { 925 ire_t *ire; 926 927 /* 928 * Router turned to host. We need to remove the 929 * entry as well as any default route that may be 930 * using this as a next hop. This is required by 931 * section 7.2.5 of RFC 2461. 932 */ 933 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 934 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 935 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 936 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 937 MATCH_IRE_DEFAULT); 938 if (ire != NULL) { 939 ip_rts_rtmsg(RTM_DELETE, ire, 0); 940 ire_delete(ire); 941 ire_refrele(ire); 942 } 943 ndp_delete(nce); 944 } 945 } 946 } 947 948 /* 949 * Pass arg1 to the pfi supplied, along with each nce in existence. 950 * ndp_walk() places a REFHOLD on the nce and drops the lock when 951 * walking the hash list. 952 */ 953 void 954 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 955 boolean_t trace) 956 { 957 958 nce_t *nce; 959 nce_t *nce1; 960 nce_t **ncep; 961 nce_t *free_nce_list = NULL; 962 963 mutex_enter(&ndp->ndp_g_lock); 964 /* Prevent ndp_delete from unlink and free of NCE */ 965 ndp->ndp_g_walker++; 966 mutex_exit(&ndp->ndp_g_lock); 967 for (ncep = ndp->nce_hash_tbl; 968 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 969 for (nce = *ncep; nce != NULL; nce = nce1) { 970 nce1 = nce->nce_next; 971 if (ill == NULL || nce->nce_ill == ill) { 972 if (trace) { 973 NCE_REFHOLD(nce); 974 (*pfi)(nce, arg1); 975 NCE_REFRELE(nce); 976 } else { 977 NCE_REFHOLD_NOTR(nce); 978 (*pfi)(nce, arg1); 979 NCE_REFRELE_NOTR(nce); 980 } 981 } 982 } 983 } 984 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 985 nce1 = nce->nce_next; 986 if (ill == NULL || nce->nce_ill == ill) { 987 if (trace) { 988 NCE_REFHOLD(nce); 989 (*pfi)(nce, arg1); 990 NCE_REFRELE(nce); 991 } else { 992 NCE_REFHOLD_NOTR(nce); 993 (*pfi)(nce, arg1); 994 NCE_REFRELE_NOTR(nce); 995 } 996 } 997 } 998 mutex_enter(&ndp->ndp_g_lock); 999 ndp->ndp_g_walker--; 1000 /* 1001 * While NCE's are removed from global list they are placed 1002 * in a private list, to be passed to nce_ire_delete_list(). 1003 * The reason is, there may be ires pointing to this nce 1004 * which needs to cleaned up. 1005 */ 1006 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 1007 /* Time to delete condemned entries */ 1008 for (ncep = ndp->nce_hash_tbl; 1009 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 1010 nce = *ncep; 1011 if (nce != NULL) { 1012 nce_remove(ndp, nce, &free_nce_list); 1013 } 1014 } 1015 nce = ndp->nce_mask_entries; 1016 if (nce != NULL) { 1017 nce_remove(ndp, nce, &free_nce_list); 1018 } 1019 ndp->ndp_g_walker_cleanup = B_FALSE; 1020 } 1021 mutex_exit(&ndp->ndp_g_lock); 1022 1023 if (free_nce_list != NULL) { 1024 nce_ire_delete_list(free_nce_list); 1025 } 1026 } 1027 1028 void 1029 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1030 { 1031 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1032 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1033 } 1034 1035 /* 1036 * Process resolve requests. Handles both mapped entries 1037 * as well as cases that needs to be send out on the wire. 1038 * Lookup a NCE for a given IRE. Regardless of whether one exists 1039 * or one is created, we defer making ire point to nce until the 1040 * ire is actually added at which point the nce_refcnt on the nce is 1041 * incremented. This is done primarily to have symmetry between ire_add() 1042 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1043 */ 1044 int 1045 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1046 { 1047 nce_t *nce; 1048 int err = 0; 1049 uint32_t ms; 1050 mblk_t *mp_nce = NULL; 1051 1052 ASSERT(ill != NULL); 1053 ASSERT(ill->ill_isv6); 1054 if (IN6_IS_ADDR_MULTICAST(dst)) { 1055 err = nce_set_multicast(ill, dst); 1056 return (err); 1057 } 1058 err = ndp_lookup_then_add(ill, 1059 NULL, /* No hardware address */ 1060 dst, 1061 &ipv6_all_ones, 1062 &ipv6_all_zeros, 1063 0, 1064 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1065 ND_INCOMPLETE, 1066 &nce, 1067 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1068 NULL); 1069 1070 switch (err) { 1071 case 0: 1072 /* 1073 * New cache entry was created. Make sure that the state 1074 * is not ND_INCOMPLETE. It can be in some other state 1075 * even before we send out the solicitation as we could 1076 * get un-solicited advertisements. 1077 * 1078 * If this is an XRESOLV interface, simply return 0, 1079 * since we don't want to solicit just yet. 1080 */ 1081 if (ill->ill_flags & ILLF_XRESOLV) { 1082 NCE_REFRELE(nce); 1083 return (0); 1084 } 1085 rw_enter(&ill_g_lock, RW_READER); 1086 mutex_enter(&nce->nce_lock); 1087 if (nce->nce_state != ND_INCOMPLETE) { 1088 mutex_exit(&nce->nce_lock); 1089 rw_exit(&ill_g_lock); 1090 NCE_REFRELE(nce); 1091 return (0); 1092 } 1093 mp_nce = ip_prepend_zoneid(mp, zoneid); 1094 if (mp_nce == NULL) { 1095 /* The caller will free mp */ 1096 mutex_exit(&nce->nce_lock); 1097 rw_exit(&ill_g_lock); 1098 ndp_delete(nce); 1099 NCE_REFRELE(nce); 1100 return (ENOMEM); 1101 } 1102 ms = nce_solicit(nce, mp_nce); 1103 rw_exit(&ill_g_lock); 1104 if (ms == 0) { 1105 /* The caller will free mp */ 1106 if (mp_nce != mp) 1107 freeb(mp_nce); 1108 mutex_exit(&nce->nce_lock); 1109 ndp_delete(nce); 1110 NCE_REFRELE(nce); 1111 return (EBUSY); 1112 } 1113 mutex_exit(&nce->nce_lock); 1114 NDP_RESTART_TIMER(nce, (clock_t)ms); 1115 NCE_REFRELE(nce); 1116 return (EINPROGRESS); 1117 case EEXIST: 1118 /* Resolution in progress just queue the packet */ 1119 mutex_enter(&nce->nce_lock); 1120 if (nce->nce_state == ND_INCOMPLETE) { 1121 mp_nce = ip_prepend_zoneid(mp, zoneid); 1122 if (mp_nce == NULL) { 1123 err = ENOMEM; 1124 } else { 1125 nce_queue_mp(nce, mp_nce); 1126 err = EINPROGRESS; 1127 } 1128 } else { 1129 /* 1130 * Any other state implies we have 1131 * a nce but IRE needs to be added ... 1132 * ire_add_v6() will take care of the 1133 * the case when the nce becomes CONDEMNED 1134 * before the ire is added to the table. 1135 */ 1136 err = 0; 1137 } 1138 mutex_exit(&nce->nce_lock); 1139 NCE_REFRELE(nce); 1140 break; 1141 default: 1142 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1143 break; 1144 } 1145 return (err); 1146 } 1147 1148 /* 1149 * When there is no resolver, the link layer template is passed in 1150 * the IRE. 1151 * Lookup a NCE for a given IRE. Regardless of whether one exists 1152 * or one is created, we defer making ire point to nce until the 1153 * ire is actually added at which point the nce_refcnt on the nce is 1154 * incremented. This is done primarily to have symmetry between ire_add() 1155 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1156 */ 1157 int 1158 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1159 { 1160 nce_t *nce; 1161 int err = 0; 1162 1163 ASSERT(ill != NULL); 1164 ASSERT(ill->ill_isv6); 1165 if (IN6_IS_ADDR_MULTICAST(dst)) { 1166 err = nce_set_multicast(ill, dst); 1167 return (err); 1168 } 1169 1170 err = ndp_lookup_then_add(ill, 1171 NULL, /* hardware address */ 1172 dst, 1173 &ipv6_all_ones, 1174 &ipv6_all_zeros, 1175 0, 1176 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1177 ND_REACHABLE, 1178 &nce, 1179 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1180 NULL); 1181 1182 switch (err) { 1183 case 0: 1184 /* 1185 * Cache entry with a proper resolver cookie was 1186 * created. 1187 */ 1188 NCE_REFRELE(nce); 1189 break; 1190 case EEXIST: 1191 err = 0; 1192 NCE_REFRELE(nce); 1193 break; 1194 default: 1195 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1196 break; 1197 } 1198 return (err); 1199 } 1200 1201 /* 1202 * For each interface an entry is added for the unspecified multicast group. 1203 * Here that mapping is used to form the multicast cache entry for a particular 1204 * multicast destination. 1205 */ 1206 static int 1207 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1208 { 1209 nce_t *mnce; /* Multicast mapping entry */ 1210 nce_t *nce; 1211 uchar_t *hw_addr = NULL; 1212 int err = 0; 1213 1214 ASSERT(ill != NULL); 1215 ASSERT(ill->ill_isv6); 1216 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1217 1218 mutex_enter(&ndp6.ndp_g_lock); 1219 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1220 nce = nce_lookup_addr(ill, dst, nce); 1221 if (nce != NULL) { 1222 mutex_exit(&ndp6.ndp_g_lock); 1223 NCE_REFRELE(nce); 1224 return (0); 1225 } 1226 /* No entry, now lookup for a mapping this should never fail */ 1227 mnce = nce_lookup_mapping(ill, dst); 1228 if (mnce == NULL) { 1229 /* Something broken for the interface. */ 1230 mutex_exit(&ndp6.ndp_g_lock); 1231 return (ESRCH); 1232 } 1233 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1234 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1235 /* 1236 * For IRE_IF_RESOLVER a hardware mapping can be 1237 * generated, for IRE_IF_NORESOLVER, resolution cookie 1238 * in the ill is copied in ndp_add(). 1239 */ 1240 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1241 if (hw_addr == NULL) { 1242 mutex_exit(&ndp6.ndp_g_lock); 1243 NCE_REFRELE(mnce); 1244 return (ENOMEM); 1245 } 1246 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1247 } 1248 NCE_REFRELE(mnce); 1249 /* 1250 * IRE_IF_NORESOLVER type simply copies the resolution 1251 * cookie passed in. So no hw_addr is needed. 1252 */ 1253 err = ndp_add(ill, 1254 hw_addr, 1255 dst, 1256 &ipv6_all_ones, 1257 &ipv6_all_zeros, 1258 0, 1259 NCE_F_NONUD, 1260 ND_REACHABLE, 1261 &nce, 1262 NULL, 1263 NULL); 1264 mutex_exit(&ndp6.ndp_g_lock); 1265 if (hw_addr != NULL) 1266 kmem_free(hw_addr, ill->ill_nd_lla_len); 1267 if (err != 0) { 1268 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1269 return (err); 1270 } 1271 NCE_REFRELE(nce); 1272 return (0); 1273 } 1274 1275 /* 1276 * Return the link layer address, and any flags of a nce. 1277 */ 1278 int 1279 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1280 { 1281 nce_t *nce; 1282 in6_addr_t *addr; 1283 sin6_t *sin6; 1284 dl_unitdata_req_t *dl; 1285 1286 ASSERT(ill != NULL && ill->ill_isv6); 1287 sin6 = (sin6_t *)&lnr->lnr_addr; 1288 addr = &sin6->sin6_addr; 1289 1290 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1291 if (nce == NULL) 1292 return (ESRCH); 1293 /* If in INCOMPLETE state, no link layer address is available yet */ 1294 if (nce->nce_state == ND_INCOMPLETE) 1295 goto done; 1296 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1297 if (ill->ill_flags & ILLF_XRESOLV) 1298 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1299 else 1300 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1301 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1302 sizeof (lnr->lnr_hdw_addr)); 1303 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1304 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1305 if (nce->nce_flags & NCE_F_ISROUTER) 1306 lnr->lnr_flags = NDF_ISROUTER_ON; 1307 if (nce->nce_flags & NCE_F_PROXY) 1308 lnr->lnr_flags |= NDF_PROXY_ON; 1309 if (nce->nce_flags & NCE_F_ANYCAST) 1310 lnr->lnr_flags |= NDF_ANYCAST_ON; 1311 done: 1312 NCE_REFRELE(nce); 1313 return (0); 1314 } 1315 1316 /* 1317 * Send Enable/Disable multicast reqs to driver. 1318 */ 1319 int 1320 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1321 uint32_t hw_addr_offset, mblk_t *mp) 1322 { 1323 nce_t *nce; 1324 uchar_t *hw_addr; 1325 1326 ASSERT(ill != NULL && ill->ill_isv6); 1327 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1328 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1329 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1330 freemsg(mp); 1331 return (EINVAL); 1332 } 1333 mutex_enter(&ndp6.ndp_g_lock); 1334 nce = nce_lookup_mapping(ill, addr); 1335 if (nce == NULL) { 1336 mutex_exit(&ndp6.ndp_g_lock); 1337 freemsg(mp); 1338 return (ESRCH); 1339 } 1340 mutex_exit(&ndp6.ndp_g_lock); 1341 /* 1342 * Update dl_addr_length and dl_addr_offset for primitives that 1343 * have physical addresses as opposed to full saps 1344 */ 1345 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1346 case DL_ENABMULTI_REQ: 1347 /* Track the state if this is the first enabmulti */ 1348 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1349 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1350 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1351 break; 1352 case DL_DISABMULTI_REQ: 1353 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1354 break; 1355 default: 1356 NCE_REFRELE(nce); 1357 ip1dbg(("ndp_mcastreq: default\n")); 1358 return (EINVAL); 1359 } 1360 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1361 NCE_REFRELE(nce); 1362 putnext(ill->ill_wq, mp); 1363 return (0); 1364 } 1365 1366 /* 1367 * Send a neighbor solicitation. 1368 * Returns number of milliseconds after which we should either rexmit or abort. 1369 * Return of zero means we should abort. 1370 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1371 * 1372 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1373 * the packet. 1374 * NOTE: This routine does not consume mp. 1375 */ 1376 uint32_t 1377 nce_solicit(nce_t *nce, mblk_t *mp) 1378 { 1379 ill_t *ill; 1380 ill_t *src_ill; 1381 ip6_t *ip6h; 1382 in6_addr_t src; 1383 in6_addr_t dst; 1384 ipif_t *ipif; 1385 ip6i_t *ip6i; 1386 boolean_t dropped = B_FALSE; 1387 1388 ASSERT(RW_READ_HELD(&ill_g_lock)); 1389 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1390 ill = nce->nce_ill; 1391 ASSERT(ill != NULL); 1392 1393 if (nce->nce_rcnt == 0) { 1394 return (0); 1395 } 1396 1397 if (mp == NULL) { 1398 ASSERT(nce->nce_qd_mp != NULL); 1399 mp = nce->nce_qd_mp; 1400 } else { 1401 nce_queue_mp(nce, mp); 1402 } 1403 1404 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1405 if (mp->b_datap->db_type == M_CTL) 1406 mp = mp->b_cont; 1407 1408 ip6h = (ip6_t *)mp->b_rptr; 1409 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1410 /* 1411 * This message should have been pulled up already in 1412 * ip_wput_v6. We can't do pullups here because the message 1413 * could be from the nce_qd_mp which could have b_next/b_prev 1414 * non-NULL. 1415 */ 1416 ip6i = (ip6i_t *)ip6h; 1417 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1418 sizeof (ip6i_t) + IPV6_HDR_LEN); 1419 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1420 } 1421 src = ip6h->ip6_src; 1422 /* 1423 * If the src of outgoing packet is one of the assigned interface 1424 * addresses use it, otherwise we will pick the source address below. 1425 */ 1426 src_ill = ill; 1427 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1428 if (ill->ill_group != NULL) 1429 src_ill = ill->ill_group->illgrp_ill; 1430 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1431 for (ipif = src_ill->ill_ipif; ipif != NULL; 1432 ipif = ipif->ipif_next) { 1433 if (IN6_ARE_ADDR_EQUAL(&src, 1434 &ipif->ipif_v6lcl_addr)) { 1435 break; 1436 } 1437 } 1438 if (ipif != NULL) 1439 break; 1440 } 1441 /* 1442 * If no relevant ipif can be found, then it's not one of our 1443 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1444 * found, but it's not yet done with DAD verification, then 1445 * just postpone this transmission until later. 1446 */ 1447 if (src_ill == NULL) 1448 src = ipv6_all_zeros; 1449 else if (!ipif->ipif_addr_ready) 1450 return (ill->ill_reachable_retrans_time); 1451 } 1452 dst = nce->nce_addr; 1453 /* 1454 * If source address is unspecified, nce_xmit will choose 1455 * one for us and initialize the hardware address also 1456 * appropriately. 1457 */ 1458 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1459 src_ill = NULL; 1460 nce->nce_rcnt--; 1461 mutex_exit(&nce->nce_lock); 1462 rw_exit(&ill_g_lock); 1463 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1464 &dst, 0); 1465 rw_enter(&ill_g_lock, RW_READER); 1466 mutex_enter(&nce->nce_lock); 1467 if (dropped) 1468 nce->nce_rcnt++; 1469 return (ill->ill_reachable_retrans_time); 1470 } 1471 1472 /* 1473 * Attempt to recover an address on an interface that's been marked as a 1474 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1475 * no easy way to just probe the address and have the right thing happen if 1476 * it's no longer in use. Instead, we just bring it up normally and allow the 1477 * regular interface start-up logic to probe for a remaining duplicate and take 1478 * us back down if necessary. 1479 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1480 * ip_ndp_excl. 1481 */ 1482 /* ARGSUSED */ 1483 static void 1484 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1485 { 1486 ill_t *ill = rq->q_ptr; 1487 ipif_t *ipif; 1488 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1489 1490 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1491 /* 1492 * We do not support recovery of proxy ARP'd interfaces, 1493 * because the system lacks a complete proxy ARP mechanism. 1494 */ 1495 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1496 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1497 continue; 1498 } 1499 1500 /* 1501 * If we have already recovered or if the interface is going 1502 * away, then ignore. 1503 */ 1504 mutex_enter(&ill->ill_lock); 1505 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1506 (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) { 1507 mutex_exit(&ill->ill_lock); 1508 continue; 1509 } 1510 1511 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1512 ill->ill_ipif_dup_count--; 1513 mutex_exit(&ill->ill_lock); 1514 ipif->ipif_was_dup = B_TRUE; 1515 1516 if (ipif_ndp_up(ipif, addr) != EINPROGRESS) 1517 (void) ipif_up_done_v6(ipif); 1518 } 1519 freeb(mp); 1520 } 1521 1522 /* 1523 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1524 * As long as someone else holds the address, the interface will stay down. 1525 * When that conflict goes away, the interface is brought back up. This is 1526 * done so that accidental shutdowns of addresses aren't made permanent. Your 1527 * server will recover from a failure. 1528 * 1529 * For DHCP and temporary addresses, recovery is not done in the kernel. 1530 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1531 * 1532 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1533 */ 1534 static void 1535 ipif6_dup_recovery(void *arg) 1536 { 1537 ipif_t *ipif = arg; 1538 1539 ipif->ipif_recovery_id = 0; 1540 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1541 return; 1542 1543 /* 1544 * No lock, because this is just an optimization. 1545 */ 1546 if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED)) 1547 return; 1548 1549 /* If the link is down, we'll retry this later */ 1550 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1551 return; 1552 1553 ndp_do_recovery(ipif); 1554 } 1555 1556 /* 1557 * Perform interface recovery by forcing the duplicate interfaces up and 1558 * allowing the system to determine which ones should stay up. 1559 * 1560 * Called both by recovery timer expiry and link-up notification. 1561 */ 1562 void 1563 ndp_do_recovery(ipif_t *ipif) 1564 { 1565 ill_t *ill = ipif->ipif_ill; 1566 mblk_t *mp; 1567 1568 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1569 if (mp == NULL) { 1570 mutex_enter(&ill->ill_lock); 1571 if (ipif->ipif_recovery_id == 0 && 1572 !(ipif->ipif_state_flags & (IPIF_MOVING | 1573 IPIF_CONDEMNED))) { 1574 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1575 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1576 } 1577 mutex_exit(&ill->ill_lock); 1578 } else { 1579 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1580 sizeof (ipif->ipif_v6lcl_addr)); 1581 ill_refhold(ill); 1582 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1583 CUR_OP, B_FALSE); 1584 } 1585 } 1586 1587 /* 1588 * Find the solicitation in the given message, and extract printable details 1589 * (MAC and IP addresses) from it. 1590 */ 1591 static nd_neighbor_solicit_t * 1592 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1593 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1594 { 1595 nd_neighbor_solicit_t *ns; 1596 ip6_t *ip6h; 1597 uchar_t *addr; 1598 int alen; 1599 1600 alen = 0; 1601 ip6h = (ip6_t *)mp->b_rptr; 1602 if (dl_mp == NULL) { 1603 nd_opt_hdr_t *opt; 1604 int nslen; 1605 1606 /* 1607 * If it's from the fast-path, then it can't be a probe 1608 * message, and thus must include the source linkaddr option. 1609 * Extract that here. 1610 */ 1611 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1612 nslen = mp->b_wptr - (uchar_t *)ns; 1613 if ((nslen -= sizeof (*ns)) > 0) { 1614 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1615 ND_OPT_SOURCE_LINKADDR); 1616 if (opt != NULL && 1617 opt->nd_opt_len * 8 - sizeof (*opt) >= 1618 ill->ill_nd_lla_len) { 1619 addr = (uchar_t *)(opt + 1); 1620 alen = ill->ill_nd_lla_len; 1621 } 1622 } 1623 /* 1624 * We cheat a bit here for the sake of printing usable log 1625 * messages in the rare case where the reply we got was unicast 1626 * without a source linkaddr option, and the interface is in 1627 * fastpath mode. (Sigh.) 1628 */ 1629 if (alen == 0 && ill->ill_type == IFT_ETHER && 1630 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1631 struct ether_header *pether; 1632 1633 pether = (struct ether_header *)((char *)ip6h - 1634 sizeof (*pether)); 1635 addr = pether->ether_shost.ether_addr_octet; 1636 alen = ETHERADDRL; 1637 } 1638 } else { 1639 dl_unitdata_ind_t *dlu; 1640 1641 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1642 alen = dlu->dl_src_addr_length; 1643 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1644 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1645 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1646 if (ill->ill_sap_length < 0) { 1647 alen += ill->ill_sap_length; 1648 } else { 1649 addr += ill->ill_sap_length; 1650 alen -= ill->ill_sap_length; 1651 } 1652 } 1653 } 1654 if (alen > 0) { 1655 *haddr = addr; 1656 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1657 } else { 1658 *haddr = NULL; 1659 (void) strcpy(hbuf, "?"); 1660 } 1661 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1662 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1663 return (ns); 1664 } 1665 1666 /* 1667 * This is for exclusive changes due to NDP duplicate address detection 1668 * failure. 1669 */ 1670 /* ARGSUSED */ 1671 static void 1672 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1673 { 1674 ill_t *ill = rq->q_ptr; 1675 ipif_t *ipif; 1676 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1677 char hbuf[MAC_STR_LEN]; 1678 char sbuf[INET6_ADDRSTRLEN]; 1679 nd_neighbor_solicit_t *ns; 1680 mblk_t *dl_mp = NULL; 1681 uchar_t *haddr; 1682 1683 if (DB_TYPE(mp) != M_DATA) { 1684 dl_mp = mp; 1685 mp = mp->b_cont; 1686 } 1687 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1688 sizeof (sbuf), &haddr); 1689 if (haddr != NULL && 1690 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1691 /* 1692 * Ignore conflicts generated by misbehaving switches that just 1693 * reflect our own messages back to us. 1694 */ 1695 goto ignore_conflict; 1696 } 1697 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1698 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1699 1700 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1701 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1702 &ns->nd_ns_target)) { 1703 continue; 1704 } 1705 1706 /* If it's already marked, then don't do anything. */ 1707 if (ipif->ipif_flags & IPIF_DUPLICATE) 1708 continue; 1709 1710 /* 1711 * If this is a failure during duplicate recovery, then don't 1712 * complain. It may take a long time to recover. 1713 */ 1714 if (!ipif->ipif_was_dup) { 1715 if (ipif->ipif_id != 0) { 1716 (void) snprintf(ibuf + ill->ill_name_length - 1, 1717 sizeof (ibuf) - ill->ill_name_length + 1, 1718 ":%d", ipif->ipif_id); 1719 } 1720 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1721 "use by %s); disabled", ibuf, sbuf, hbuf); 1722 } 1723 mutex_enter(&ill->ill_lock); 1724 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1725 ipif->ipif_flags |= IPIF_DUPLICATE; 1726 ill->ill_ipif_dup_count++; 1727 mutex_exit(&ill->ill_lock); 1728 (void) ipif_down(ipif, NULL, NULL); 1729 ipif_down_tail(ipif); 1730 mutex_enter(&ill->ill_lock); 1731 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1732 ill->ill_net_type == IRE_IF_RESOLVER && 1733 !(ipif->ipif_state_flags & (IPIF_MOVING | 1734 IPIF_CONDEMNED)) && 1735 ip_dup_recovery > 0) { 1736 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1737 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1738 } 1739 mutex_exit(&ill->ill_lock); 1740 } 1741 ignore_conflict: 1742 if (dl_mp != NULL) 1743 freeb(dl_mp); 1744 freemsg(mp); 1745 } 1746 1747 /* 1748 * Handle failure by tearing down the ipifs with the specified address. Note 1749 * that tearing down the ipif also means deleting the nce through ipif_down, so 1750 * it's not possible to do recovery by just restarting the nce timer. Instead, 1751 * we start a timer on the ipif. 1752 */ 1753 static void 1754 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1755 { 1756 if ((mp = copymsg(mp)) != NULL) { 1757 if (dl_mp == NULL) 1758 dl_mp = mp; 1759 else if ((dl_mp = copyb(dl_mp)) != NULL) 1760 dl_mp->b_cont = mp; 1761 if (dl_mp == NULL) { 1762 freemsg(mp); 1763 } else { 1764 ill_refhold(ill); 1765 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1766 ip_ndp_excl, CUR_OP, B_FALSE); 1767 } 1768 } 1769 ndp_delete(nce); 1770 } 1771 1772 /* 1773 * Handle a discovered conflict: some other system is advertising that it owns 1774 * one of our IP addresses. We need to defend ourselves, or just shut down the 1775 * interface. 1776 */ 1777 static void 1778 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1779 { 1780 ipif_t *ipif; 1781 uint32_t now; 1782 uint_t maxdefense; 1783 uint_t defs; 1784 1785 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1786 NULL, NULL); 1787 if (ipif == NULL) 1788 return; 1789 /* 1790 * First, figure out if this address is disposable. 1791 */ 1792 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1793 maxdefense = ip_max_temp_defend; 1794 else 1795 maxdefense = ip_max_defend; 1796 1797 /* 1798 * Now figure out how many times we've defended ourselves. Ignore 1799 * defenses that happened long in the past. 1800 */ 1801 now = gethrestime_sec(); 1802 mutex_enter(&nce->nce_lock); 1803 if ((defs = nce->nce_defense_count) > 0 && 1804 now - nce->nce_defense_time > ip_defend_interval) { 1805 nce->nce_defense_count = defs = 0; 1806 } 1807 nce->nce_defense_count++; 1808 nce->nce_defense_time = now; 1809 mutex_exit(&nce->nce_lock); 1810 ipif_refrele(ipif); 1811 1812 /* 1813 * If we've defended ourselves too many times already, then give up and 1814 * tear down the interface(s) using this address. Otherwise, defend by 1815 * sending out an unsolicited Neighbor Advertisement. 1816 */ 1817 if (defs >= maxdefense) { 1818 ip_ndp_failure(ill, mp, dl_mp, nce); 1819 } else { 1820 char hbuf[MAC_STR_LEN]; 1821 char sbuf[INET6_ADDRSTRLEN]; 1822 uchar_t *haddr; 1823 1824 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1825 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1826 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1827 hbuf, sbuf, ill->ill_name); 1828 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1829 &nce->nce_addr, &ipv6_all_hosts_mcast, 1830 nce_advert_flags(nce)); 1831 } 1832 } 1833 1834 static void 1835 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1836 { 1837 nd_neighbor_solicit_t *ns; 1838 uint32_t hlen = ill->ill_nd_lla_len; 1839 uchar_t *haddr = NULL; 1840 icmp6_t *icmp_nd; 1841 ip6_t *ip6h; 1842 nce_t *our_nce = NULL; 1843 in6_addr_t target; 1844 in6_addr_t src; 1845 int len; 1846 int flag = 0; 1847 nd_opt_hdr_t *opt = NULL; 1848 boolean_t bad_solicit = B_FALSE; 1849 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1850 1851 ip6h = (ip6_t *)mp->b_rptr; 1852 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1853 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1854 src = ip6h->ip6_src; 1855 ns = (nd_neighbor_solicit_t *)icmp_nd; 1856 target = ns->nd_ns_target; 1857 if (IN6_IS_ADDR_MULTICAST(&target)) { 1858 if (ip_debug > 2) { 1859 /* ip1dbg */ 1860 pr_addr_dbg("ndp_input_solicit: Target is" 1861 " multicast! %s\n", AF_INET6, &target); 1862 } 1863 bad_solicit = B_TRUE; 1864 goto done; 1865 } 1866 if (len > sizeof (nd_neighbor_solicit_t)) { 1867 /* Options present */ 1868 opt = (nd_opt_hdr_t *)&ns[1]; 1869 len -= sizeof (nd_neighbor_solicit_t); 1870 if (!ndp_verify_optlen(opt, len)) { 1871 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1872 bad_solicit = B_TRUE; 1873 goto done; 1874 } 1875 } 1876 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1877 /* Check to see if this is a valid DAD solicitation */ 1878 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1879 if (ip_debug > 2) { 1880 /* ip1dbg */ 1881 pr_addr_dbg("ndp_input_solicit: IPv6 " 1882 "Destination is not solicited node " 1883 "multicast %s\n", AF_INET6, 1884 &ip6h->ip6_dst); 1885 } 1886 bad_solicit = B_TRUE; 1887 goto done; 1888 } 1889 } 1890 1891 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1892 /* 1893 * If this is a valid Solicitation, a permanent 1894 * entry should exist in the cache 1895 */ 1896 if (our_nce == NULL || 1897 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1898 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1899 "ifname=%s ", ill->ill_name)); 1900 if (ip_debug > 2) { 1901 /* ip1dbg */ 1902 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1903 } 1904 bad_solicit = B_TRUE; 1905 goto done; 1906 } 1907 1908 /* At this point we should have a verified NS per spec */ 1909 if (opt != NULL) { 1910 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1911 if (opt != NULL) { 1912 haddr = (uchar_t *)&opt[1]; 1913 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1914 hlen == 0) { 1915 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1916 bad_solicit = B_TRUE; 1917 goto done; 1918 } 1919 } 1920 } 1921 1922 /* If sending directly to peer, set the unicast flag */ 1923 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1924 flag |= NDP_UNICAST; 1925 1926 /* 1927 * Create/update the entry for the soliciting node. 1928 * or respond to outstanding queries, don't if 1929 * the source is unspecified address. 1930 */ 1931 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1932 int err; 1933 nce_t *nnce; 1934 1935 ASSERT(ill->ill_isv6); 1936 /* 1937 * Regular solicitations *must* include the Source Link-Layer 1938 * Address option. Ignore messages that do not. 1939 */ 1940 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1941 ip1dbg(("ndp_input_solicit: source link-layer address " 1942 "option missing with a specified source.\n")); 1943 bad_solicit = B_TRUE; 1944 goto done; 1945 } 1946 1947 /* 1948 * This is a regular solicitation. If we're still in the 1949 * process of verifying the address, then don't respond at all 1950 * and don't keep track of the sender. 1951 */ 1952 if (our_nce->nce_state == ND_PROBE) 1953 goto done; 1954 1955 /* 1956 * If the solicitation doesn't have sender hardware address 1957 * (legal for unicast solicitation), then process without 1958 * installing the return NCE. Either we already know it, or 1959 * we'll be forced to look it up when (and if) we reply to the 1960 * packet. 1961 */ 1962 if (haddr == NULL) 1963 goto no_source; 1964 1965 err = ndp_lookup_then_add(ill, 1966 haddr, 1967 &src, /* Soliciting nodes address */ 1968 &ipv6_all_ones, 1969 &ipv6_all_zeros, 1970 0, 1971 0, 1972 ND_STALE, 1973 &nnce, 1974 NULL, 1975 NULL); 1976 switch (err) { 1977 case 0: 1978 /* done with this entry */ 1979 NCE_REFRELE(nnce); 1980 break; 1981 case EEXIST: 1982 /* 1983 * B_FALSE indicates this is not an 1984 * an advertisement. 1985 */ 1986 ndp_process(nnce, haddr, 0, B_FALSE); 1987 NCE_REFRELE(nnce); 1988 break; 1989 default: 1990 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1991 err)); 1992 goto done; 1993 } 1994 no_source: 1995 flag |= NDP_SOLICITED; 1996 } else { 1997 /* 1998 * No source link layer address option should be present in a 1999 * valid DAD request. 2000 */ 2001 if (haddr != NULL) { 2002 ip1dbg(("ndp_input_solicit: source link-layer address " 2003 "option present with an unspecified source.\n")); 2004 bad_solicit = B_TRUE; 2005 goto done; 2006 } 2007 if (our_nce->nce_state == ND_PROBE) { 2008 /* 2009 * Internally looped-back probes won't have DLPI 2010 * attached to them. External ones (which are sent by 2011 * multicast) always will. Just ignore our own 2012 * transmissions. 2013 */ 2014 if (dl_mp != NULL) { 2015 /* 2016 * If someone else is probing our address, then 2017 * we've crossed wires. Declare failure. 2018 */ 2019 ip_ndp_failure(ill, mp, dl_mp, our_nce); 2020 } 2021 goto done; 2022 } 2023 /* 2024 * This is a DAD probe. Multicast the advertisement to the 2025 * all-nodes address. 2026 */ 2027 src = ipv6_all_hosts_mcast; 2028 } 2029 flag |= nce_advert_flags(our_nce); 2030 /* Response to a solicitation */ 2031 (void) nce_xmit(ill, 2032 ND_NEIGHBOR_ADVERT, 2033 ill, /* ill to be used for extracting ill_nd_lla */ 2034 B_TRUE, /* use ill_nd_lla */ 2035 &target, /* Source and target of the advertisement pkt */ 2036 &src, /* IP Destination (source of original pkt) */ 2037 flag); 2038 done: 2039 if (bad_solicit) 2040 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2041 if (our_nce != NULL) 2042 NCE_REFRELE(our_nce); 2043 } 2044 2045 void 2046 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2047 { 2048 nd_neighbor_advert_t *na; 2049 uint32_t hlen = ill->ill_nd_lla_len; 2050 uchar_t *haddr = NULL; 2051 icmp6_t *icmp_nd; 2052 ip6_t *ip6h; 2053 nce_t *dst_nce = NULL; 2054 in6_addr_t target; 2055 nd_opt_hdr_t *opt = NULL; 2056 int len; 2057 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2058 2059 ip6h = (ip6_t *)mp->b_rptr; 2060 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2061 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2062 na = (nd_neighbor_advert_t *)icmp_nd; 2063 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2064 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2065 ip1dbg(("ndp_input_advert: Target is multicast but the " 2066 "solicited flag is not zero\n")); 2067 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2068 return; 2069 } 2070 target = na->nd_na_target; 2071 if (IN6_IS_ADDR_MULTICAST(&target)) { 2072 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2073 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2074 return; 2075 } 2076 if (len > sizeof (nd_neighbor_advert_t)) { 2077 opt = (nd_opt_hdr_t *)&na[1]; 2078 if (!ndp_verify_optlen(opt, 2079 len - sizeof (nd_neighbor_advert_t))) { 2080 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2081 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2082 return; 2083 } 2084 /* At this point we have a verified NA per spec */ 2085 len -= sizeof (nd_neighbor_advert_t); 2086 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2087 if (opt != NULL) { 2088 haddr = (uchar_t *)&opt[1]; 2089 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2090 hlen == 0) { 2091 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2092 BUMP_MIB(mib, 2093 ipv6IfIcmpInBadNeighborAdvertisements); 2094 return; 2095 } 2096 } 2097 } 2098 2099 /* 2100 * If this interface is part of the group look at all the 2101 * ills in the group. 2102 */ 2103 rw_enter(&ill_g_lock, RW_READER); 2104 if (ill->ill_group != NULL) 2105 ill = ill->ill_group->illgrp_ill; 2106 2107 for (; ill != NULL; ill = ill->ill_group_next) { 2108 mutex_enter(&ill->ill_lock); 2109 if (!ILL_CAN_LOOKUP(ill)) { 2110 mutex_exit(&ill->ill_lock); 2111 continue; 2112 } 2113 ill_refhold_locked(ill); 2114 mutex_exit(&ill->ill_lock); 2115 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2116 /* We have to drop the lock since ndp_process calls put* */ 2117 rw_exit(&ill_g_lock); 2118 if (dst_nce != NULL) { 2119 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2120 dst_nce->nce_state == ND_PROBE) { 2121 /* 2122 * Someone else sent an advertisement for an 2123 * address that we're trying to configure. 2124 * Tear it down. Note that dl_mp might be NULL 2125 * if we're getting a unicast reply. This 2126 * isn't typically done (multicast is the norm 2127 * in response to a probe), but ip_ndp_failure 2128 * will handle the dl_mp == NULL case as well. 2129 */ 2130 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2131 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2132 /* 2133 * Someone just announced one of our local 2134 * addresses. If it wasn't us, then this is a 2135 * conflict. Defend the address or shut it 2136 * down. 2137 */ 2138 if (dl_mp != NULL && 2139 (haddr == NULL || 2140 nce_cmp_ll_addr(dst_nce, haddr, 2141 ill->ill_nd_lla_len))) { 2142 ip_ndp_conflict(ill, mp, dl_mp, 2143 dst_nce); 2144 } 2145 } else { 2146 if (na->nd_na_flags_reserved & 2147 ND_NA_FLAG_ROUTER) { 2148 dst_nce->nce_flags |= NCE_F_ISROUTER; 2149 } 2150 /* B_TRUE indicates this an advertisement */ 2151 ndp_process(dst_nce, haddr, 2152 na->nd_na_flags_reserved, B_TRUE); 2153 } 2154 NCE_REFRELE(dst_nce); 2155 } 2156 rw_enter(&ill_g_lock, RW_READER); 2157 ill_refrele(ill); 2158 } 2159 rw_exit(&ill_g_lock); 2160 } 2161 2162 /* 2163 * Process NDP neighbor solicitation/advertisement messages. 2164 * The checksum has already checked o.k before reaching here. 2165 */ 2166 void 2167 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2168 { 2169 icmp6_t *icmp_nd; 2170 ip6_t *ip6h; 2171 int len; 2172 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2173 2174 2175 if (!pullupmsg(mp, -1)) { 2176 ip1dbg(("ndp_input: pullupmsg failed\n")); 2177 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2178 goto done; 2179 } 2180 ip6h = (ip6_t *)mp->b_rptr; 2181 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2182 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2183 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2184 goto done; 2185 } 2186 /* 2187 * NDP does not accept any extension headers between the 2188 * IP header and the ICMP header since e.g. a routing 2189 * header could be dangerous. 2190 * This assumes that any AH or ESP headers are removed 2191 * by ip prior to passing the packet to ndp_input. 2192 */ 2193 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2194 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2195 ip6h->ip6_nxt)); 2196 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2197 goto done; 2198 } 2199 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2200 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2201 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2202 if (icmp_nd->icmp6_code != 0) { 2203 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2204 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2205 goto done; 2206 } 2207 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2208 /* 2209 * Make sure packet length is large enough for either 2210 * a NS or a NA icmp packet. 2211 */ 2212 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2213 ip1dbg(("ndp_input: packet too short\n")); 2214 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2215 goto done; 2216 } 2217 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2218 ndp_input_solicit(ill, mp, dl_mp); 2219 } else { 2220 ndp_input_advert(ill, mp, dl_mp); 2221 } 2222 done: 2223 freemsg(mp); 2224 } 2225 2226 /* 2227 * nce_xmit is called to form and transmit a ND solicitation or 2228 * advertisement ICMP packet. 2229 * 2230 * If the source address is unspecified and this isn't a probe (used for 2231 * duplicate address detection), an appropriate source address and link layer 2232 * address will be chosen here. The link layer address option is included if 2233 * the source is specified (i.e., all non-probe packets), and omitted (per the 2234 * specification) otherwise. 2235 * 2236 * It returns B_FALSE only if it does a successful put() to the 2237 * corresponding ill's ill_wq otherwise returns B_TRUE. 2238 */ 2239 static boolean_t 2240 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2241 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2242 int flag) 2243 { 2244 uint32_t len; 2245 icmp6_t *icmp6; 2246 mblk_t *mp; 2247 ip6_t *ip6h; 2248 nd_opt_hdr_t *opt; 2249 uint_t plen; 2250 ip6i_t *ip6i; 2251 ipif_t *src_ipif = NULL; 2252 uint8_t *hw_addr; 2253 2254 /* 2255 * If we have a unspecified source(sender) address, select a 2256 * proper source address for the solicitation here itself so 2257 * that we can initialize the h/w address correctly. This is 2258 * needed for interface groups as source address can come from 2259 * the whole group and the h/w address initialized from ill will 2260 * be wrong if the source address comes from a different ill. 2261 * 2262 * Note that the NA never comes here with the unspecified source 2263 * address. The following asserts that whenever the source 2264 * address is specified, the haddr also should be specified. 2265 */ 2266 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2267 2268 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2269 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2270 /* 2271 * Pick a source address for this solicitation, but 2272 * restrict the selection to addresses assigned to the 2273 * output interface (or interface group). We do this 2274 * because the destination will create a neighbor cache 2275 * entry for the source address of this packet, so the 2276 * source address had better be a valid neighbor. 2277 */ 2278 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2279 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2280 if (src_ipif == NULL) { 2281 char buf[INET6_ADDRSTRLEN]; 2282 2283 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2284 inet_ntop(AF_INET6, (char *)target, buf, 2285 sizeof (buf)))); 2286 return (B_TRUE); 2287 } 2288 sender = &src_ipif->ipif_v6src_addr; 2289 hwaddr_ill = src_ipif->ipif_ill; 2290 } 2291 2292 /* 2293 * Always make sure that the NS/NA packets don't get load 2294 * spread. This is needed so that the probe packets sent 2295 * by the in.mpathd daemon can really go out on the desired 2296 * interface. Probe packets are made to go out on a desired 2297 * interface by including a ip6i with ATTACH_IF flag. As these 2298 * packets indirectly end up sending/receiving NS/NA packets 2299 * (neighbor doing NUD), we have to make sure that NA 2300 * also go out on the same interface. 2301 */ 2302 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2303 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2304 plen * 8; 2305 mp = allocb(len, BPRI_LO); 2306 if (mp == NULL) { 2307 if (src_ipif != NULL) 2308 ipif_refrele(src_ipif); 2309 return (B_TRUE); 2310 } 2311 bzero((char *)mp->b_rptr, len); 2312 mp->b_wptr = mp->b_rptr + len; 2313 2314 ip6i = (ip6i_t *)mp->b_rptr; 2315 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2316 ip6i->ip6i_nxt = IPPROTO_RAW; 2317 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2318 if (flag & NDP_PROBE) 2319 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2320 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2321 2322 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2323 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2324 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2325 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2326 ip6h->ip6_hops = IPV6_MAX_HOPS; 2327 ip6h->ip6_dst = *target; 2328 icmp6 = (icmp6_t *)&ip6h[1]; 2329 2330 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2331 sizeof (nd_neighbor_advert_t)); 2332 2333 if (operation == ND_NEIGHBOR_SOLICIT) { 2334 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2335 2336 if (!(flag & NDP_PROBE)) 2337 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2338 ip6h->ip6_src = *sender; 2339 ns->nd_ns_target = *target; 2340 if (!(flag & NDP_UNICAST)) { 2341 /* Form multicast address of the target */ 2342 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2343 ip6h->ip6_dst.s6_addr32[3] |= 2344 ns->nd_ns_target.s6_addr32[3]; 2345 } 2346 } else { 2347 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2348 2349 ASSERT(!(flag & NDP_PROBE)); 2350 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2351 ip6h->ip6_src = *sender; 2352 na->nd_na_target = *sender; 2353 if (flag & NDP_ISROUTER) 2354 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2355 if (flag & NDP_SOLICITED) 2356 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2357 if (flag & NDP_ORIDE) 2358 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2359 } 2360 2361 hw_addr = NULL; 2362 if (!(flag & NDP_PROBE)) { 2363 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2364 hwaddr_ill->ill_phys_addr; 2365 if (hw_addr != NULL) { 2366 /* Fill in link layer address and option len */ 2367 opt->nd_opt_len = (uint8_t)plen; 2368 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2369 } 2370 } 2371 if (hw_addr == NULL) { 2372 /* If there's no link layer address option, then strip it. */ 2373 len -= plen * 8; 2374 mp->b_wptr = mp->b_rptr + len; 2375 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2376 } 2377 2378 icmp6->icmp6_type = (uint8_t)operation; 2379 icmp6->icmp6_code = 0; 2380 /* 2381 * Prepare for checksum by putting icmp length in the icmp 2382 * checksum field. The checksum is calculated in ip_wput_v6. 2383 */ 2384 icmp6->icmp6_cksum = ip6h->ip6_plen; 2385 2386 if (src_ipif != NULL) 2387 ipif_refrele(src_ipif); 2388 if (canput(ill->ill_wq)) { 2389 put(ill->ill_wq, mp); 2390 return (B_FALSE); 2391 } 2392 freemsg(mp); 2393 return (B_TRUE); 2394 } 2395 2396 /* 2397 * Make a link layer address (does not include the SAP) from an nce. 2398 * To form the link layer address, use the last four bytes of ipv6 2399 * address passed in and the fixed offset stored in nce. 2400 */ 2401 static void 2402 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2403 { 2404 uchar_t *mask, *to; 2405 ill_t *ill = nce->nce_ill; 2406 int len; 2407 2408 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2409 return; 2410 ASSERT(nce->nce_res_mp != NULL); 2411 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2412 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2413 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2414 ASSERT(addr != NULL); 2415 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2416 addrpos, ill->ill_nd_lla_len); 2417 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2418 IPV6_ADDR_LEN); 2419 mask = (uchar_t *)&nce->nce_extract_mask; 2420 mask += (IPV6_ADDR_LEN - len); 2421 addr += (IPV6_ADDR_LEN - len); 2422 to = addrpos + nce->nce_ll_extract_start; 2423 while (len-- > 0) 2424 *to++ |= *mask++ & *addr++; 2425 } 2426 2427 /* 2428 * Pass a cache report back out via NDD. 2429 */ 2430 /* ARGSUSED */ 2431 int 2432 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2433 { 2434 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2435 " proto addr/mask"); 2436 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2437 return (0); 2438 } 2439 2440 /* 2441 * Add a single line to the NDP Cache Entry Report. 2442 */ 2443 static void 2444 nce_report1(nce_t *nce, uchar_t *mp_arg) 2445 { 2446 ill_t *ill = nce->nce_ill; 2447 char local_buf[INET6_ADDRSTRLEN]; 2448 uchar_t flags_buf[10]; 2449 uint32_t flags = nce->nce_flags; 2450 mblk_t *mp = (mblk_t *)mp_arg; 2451 uchar_t *h; 2452 uchar_t *m = flags_buf; 2453 in6_addr_t v6addr; 2454 2455 /* 2456 * Lock the nce to protect nce_res_mp from being changed 2457 * if an external resolver address resolution completes 2458 * while nce_res_mp is being accessed here. 2459 * 2460 * Deal with all address formats, not just Ethernet-specific 2461 * In addition, make sure that the mblk has enough space 2462 * before writing to it. If is doesn't, allocate a new one. 2463 */ 2464 if (nce->nce_ipversion == IPV4_VERSION) 2465 /* Don't include v4 nce_ts in NDP cache entry report */ 2466 return; 2467 2468 ASSERT(ill != NULL); 2469 v6addr = nce->nce_mask; 2470 if (flags & NCE_F_PERMANENT) 2471 *m++ = 'P'; 2472 if (flags & NCE_F_ISROUTER) 2473 *m++ = 'R'; 2474 if (flags & NCE_F_MAPPING) 2475 *m++ = 'M'; 2476 *m = '\0'; 2477 2478 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2479 size_t addrlen; 2480 char *addr_buf; 2481 dl_unitdata_req_t *dl; 2482 2483 mutex_enter(&nce->nce_lock); 2484 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2485 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2486 if (ill->ill_flags & ILLF_XRESOLV) 2487 addrlen = (3 * (dl->dl_dest_addr_length)); 2488 else 2489 addrlen = (3 * (ill->ill_nd_lla_len)); 2490 if (addrlen <= 0) { 2491 mutex_exit(&nce->nce_lock); 2492 (void) mi_mpprintf(mp, 2493 "%8s %9s %5s %s/%d", 2494 ill->ill_name, 2495 "None", 2496 (uchar_t *)&flags_buf, 2497 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2498 (char *)local_buf, sizeof (local_buf)), 2499 ip_mask_to_plen_v6(&v6addr)); 2500 } else { 2501 /* 2502 * Convert the hardware/lla address to ascii 2503 */ 2504 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2505 if (addr_buf == NULL) { 2506 mutex_exit(&nce->nce_lock); 2507 return; 2508 } 2509 (void) mac_colon_addr((uint8_t *)h, 2510 (ill->ill_flags & ILLF_XRESOLV) ? 2511 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2512 addr_buf, addrlen); 2513 mutex_exit(&nce->nce_lock); 2514 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2515 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2516 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2517 (char *)local_buf, sizeof (local_buf)), 2518 ip_mask_to_plen_v6(&v6addr)); 2519 kmem_free(addr_buf, addrlen); 2520 } 2521 } else { 2522 (void) mi_mpprintf(mp, 2523 "%8s %9s %5s %s/%d", 2524 ill->ill_name, 2525 "None", 2526 (uchar_t *)&flags_buf, 2527 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2528 (char *)local_buf, sizeof (local_buf)), 2529 ip_mask_to_plen_v6(&v6addr)); 2530 } 2531 } 2532 2533 mblk_t * 2534 nce_udreq_alloc(ill_t *ill) 2535 { 2536 mblk_t *template_mp = NULL; 2537 dl_unitdata_req_t *dlur; 2538 int sap_length; 2539 2540 ASSERT(ill->ill_isv6); 2541 2542 sap_length = ill->ill_sap_length; 2543 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2544 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2545 if (template_mp == NULL) 2546 return (NULL); 2547 2548 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2549 dlur->dl_priority.dl_min = 0; 2550 dlur->dl_priority.dl_max = 0; 2551 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2552 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2553 2554 /* Copy in the SAP value. */ 2555 NCE_LL_SAP_COPY(ill, template_mp); 2556 2557 return (template_mp); 2558 } 2559 2560 /* 2561 * NDP retransmit timer. 2562 * This timer goes off when: 2563 * a. It is time to retransmit NS for resolver. 2564 * b. It is time to send reachability probes. 2565 */ 2566 void 2567 ndp_timer(void *arg) 2568 { 2569 nce_t *nce = arg; 2570 ill_t *ill = nce->nce_ill; 2571 uint32_t ms; 2572 char addrbuf[INET6_ADDRSTRLEN]; 2573 mblk_t *mp; 2574 boolean_t dropped = B_FALSE; 2575 2576 /* 2577 * The timer has to be cancelled by ndp_delete before doing the final 2578 * refrele. So the NCE is guaranteed to exist when the timer runs 2579 * until it clears the timeout_id. Before clearing the timeout_id 2580 * bump up the refcnt so that we can continue to use the nce 2581 */ 2582 ASSERT(nce != NULL); 2583 2584 /* 2585 * Grab the ill_g_lock now itself to avoid lock order problems. 2586 * nce_solicit needs ill_g_lock to be able to traverse ills 2587 */ 2588 rw_enter(&ill_g_lock, RW_READER); 2589 mutex_enter(&nce->nce_lock); 2590 NCE_REFHOLD_LOCKED(nce); 2591 nce->nce_timeout_id = 0; 2592 2593 /* 2594 * Check the reachability state first. 2595 */ 2596 switch (nce->nce_state) { 2597 case ND_DELAY: 2598 rw_exit(&ill_g_lock); 2599 nce->nce_state = ND_PROBE; 2600 mutex_exit(&nce->nce_lock); 2601 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2602 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2603 if (ip_debug > 3) { 2604 /* ip2dbg */ 2605 pr_addr_dbg("ndp_timer: state for %s changed " 2606 "to PROBE\n", AF_INET6, &nce->nce_addr); 2607 } 2608 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2609 NCE_REFRELE(nce); 2610 return; 2611 case ND_PROBE: 2612 /* must be retransmit timer */ 2613 rw_exit(&ill_g_lock); 2614 nce->nce_pcnt--; 2615 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2616 nce->nce_pcnt >= -1); 2617 if (nce->nce_pcnt > 0) { 2618 /* 2619 * As per RFC2461, the nce gets deleted after 2620 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2621 * Note that the first unicast solicitation is sent 2622 * during the DELAY state. 2623 */ 2624 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2625 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2626 addrbuf, sizeof (addrbuf)))); 2627 mutex_exit(&nce->nce_lock); 2628 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2629 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2630 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2631 NDP_UNICAST); 2632 if (dropped) { 2633 mutex_enter(&nce->nce_lock); 2634 nce->nce_pcnt++; 2635 mutex_exit(&nce->nce_lock); 2636 } 2637 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2638 } else if (nce->nce_pcnt < 0) { 2639 /* No hope, delete the nce */ 2640 nce->nce_state = ND_UNREACHABLE; 2641 mutex_exit(&nce->nce_lock); 2642 if (ip_debug > 2) { 2643 /* ip1dbg */ 2644 pr_addr_dbg("ndp_timer: Delete IRE for" 2645 " dst %s\n", AF_INET6, &nce->nce_addr); 2646 } 2647 ndp_delete(nce); 2648 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2649 /* Wait RetransTimer, before deleting the entry */ 2650 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2651 nce->nce_pcnt, inet_ntop(AF_INET6, 2652 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2653 mutex_exit(&nce->nce_lock); 2654 /* Wait one interval before killing */ 2655 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2656 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2657 ipif_t *ipif; 2658 2659 /* 2660 * We're done probing, and we can now declare this 2661 * address to be usable. Let IP know that it's ok to 2662 * use. 2663 */ 2664 nce->nce_state = ND_REACHABLE; 2665 mutex_exit(&nce->nce_lock); 2666 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2667 ALL_ZONES, NULL, NULL, NULL, NULL); 2668 if (ipif != NULL) { 2669 if (ipif->ipif_was_dup) { 2670 char ibuf[LIFNAMSIZ + 10]; 2671 char sbuf[INET6_ADDRSTRLEN]; 2672 2673 ipif->ipif_was_dup = B_FALSE; 2674 (void) strlcpy(ibuf, ill->ill_name, 2675 sizeof (ibuf)); 2676 (void) inet_ntop(AF_INET6, 2677 &ipif->ipif_v6lcl_addr, 2678 sbuf, sizeof (sbuf)); 2679 if (ipif->ipif_id != 0) { 2680 (void) snprintf(ibuf + 2681 ill->ill_name_length - 1, 2682 sizeof (ibuf) - 2683 ill->ill_name_length + 1, 2684 ":%d", ipif->ipif_id); 2685 } 2686 cmn_err(CE_NOTE, "recovered address " 2687 "%s on %s", sbuf, ibuf); 2688 } 2689 if ((ipif->ipif_flags & IPIF_UP) && 2690 !ipif->ipif_addr_ready) { 2691 ip_rts_ifmsg(ipif); 2692 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2693 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2694 } 2695 ipif->ipif_addr_ready = 1; 2696 ipif_refrele(ipif); 2697 } 2698 /* Begin defending our new address */ 2699 nce->nce_unsolicit_count = 0; 2700 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2701 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2702 nce_advert_flags(nce)); 2703 if (dropped) { 2704 nce->nce_unsolicit_count = 1; 2705 NDP_RESTART_TIMER(nce, 2706 ip_ndp_unsolicit_interval); 2707 } else if (ip_ndp_defense_interval != 0) { 2708 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2709 } 2710 } else { 2711 /* 2712 * This is an address we're probing to be our own, but 2713 * the ill is down. Wait until it comes back before 2714 * doing anything, but switch to reachable state so 2715 * that the restart will work. 2716 */ 2717 nce->nce_state = ND_REACHABLE; 2718 mutex_exit(&nce->nce_lock); 2719 } 2720 NCE_REFRELE(nce); 2721 return; 2722 case ND_INCOMPLETE: 2723 /* 2724 * Must be resolvers retransmit timer. 2725 */ 2726 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2727 ip6i_t *ip6i; 2728 ip6_t *ip6h; 2729 mblk_t *data_mp; 2730 2731 /* 2732 * Walk the list of packets queued, and see if there 2733 * are any multipathing probe packets. Such packets 2734 * are always queued at the head. Since this is a 2735 * retransmit timer firing, mark such packets as 2736 * delayed in ND resolution. This info will be used 2737 * in ip_wput_v6(). Multipathing probe packets will 2738 * always have an ip6i_t. Once we hit a packet without 2739 * it, we can break out of this loop. 2740 */ 2741 if (mp->b_datap->db_type == M_CTL) 2742 data_mp = mp->b_cont; 2743 else 2744 data_mp = mp; 2745 2746 ip6h = (ip6_t *)data_mp->b_rptr; 2747 if (ip6h->ip6_nxt != IPPROTO_RAW) 2748 break; 2749 2750 /* 2751 * This message should have been pulled up already in 2752 * ip_wput_v6. We can't do pullups here because the 2753 * b_next/b_prev is non-NULL. 2754 */ 2755 ip6i = (ip6i_t *)ip6h; 2756 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2757 sizeof (ip6i_t) + IPV6_HDR_LEN); 2758 2759 /* Mark this packet as delayed due to ND resolution */ 2760 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2761 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2762 } 2763 if (nce->nce_qd_mp != NULL) { 2764 ms = nce_solicit(nce, NULL); 2765 rw_exit(&ill_g_lock); 2766 if (ms == 0) { 2767 if (nce->nce_state != ND_REACHABLE) { 2768 mutex_exit(&nce->nce_lock); 2769 nce_resolv_failed(nce); 2770 ndp_delete(nce); 2771 } else { 2772 mutex_exit(&nce->nce_lock); 2773 } 2774 } else { 2775 mutex_exit(&nce->nce_lock); 2776 NDP_RESTART_TIMER(nce, (clock_t)ms); 2777 } 2778 NCE_REFRELE(nce); 2779 return; 2780 } 2781 mutex_exit(&nce->nce_lock); 2782 rw_exit(&ill_g_lock); 2783 NCE_REFRELE(nce); 2784 break; 2785 case ND_REACHABLE : 2786 rw_exit(&ill_g_lock); 2787 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2788 nce->nce_unsolicit_count != 0) || 2789 ((nce->nce_flags & NCE_F_PERMANENT) && 2790 ip_ndp_defense_interval != 0)) { 2791 if (nce->nce_unsolicit_count > 0) 2792 nce->nce_unsolicit_count--; 2793 mutex_exit(&nce->nce_lock); 2794 dropped = nce_xmit(ill, 2795 ND_NEIGHBOR_ADVERT, 2796 ill, /* ill to be used for hw addr */ 2797 B_FALSE, /* use ill_phys_addr */ 2798 &nce->nce_addr, 2799 &ipv6_all_hosts_mcast, 2800 nce_advert_flags(nce)); 2801 if (dropped) { 2802 mutex_enter(&nce->nce_lock); 2803 nce->nce_unsolicit_count++; 2804 mutex_exit(&nce->nce_lock); 2805 } 2806 if (nce->nce_unsolicit_count != 0) { 2807 NDP_RESTART_TIMER(nce, 2808 ip_ndp_unsolicit_interval); 2809 } else { 2810 NDP_RESTART_TIMER(nce, 2811 ip_ndp_defense_interval); 2812 } 2813 } else { 2814 mutex_exit(&nce->nce_lock); 2815 } 2816 NCE_REFRELE(nce); 2817 break; 2818 default: 2819 rw_exit(&ill_g_lock); 2820 mutex_exit(&nce->nce_lock); 2821 NCE_REFRELE(nce); 2822 break; 2823 } 2824 } 2825 2826 /* 2827 * Set a link layer address from the ll_addr passed in. 2828 * Copy SAP from ill. 2829 */ 2830 static void 2831 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2832 { 2833 ill_t *ill = nce->nce_ill; 2834 uchar_t *woffset; 2835 2836 ASSERT(ll_addr != NULL); 2837 /* Always called before fast_path_probe */ 2838 ASSERT(nce->nce_fp_mp == NULL); 2839 if (ill->ill_sap_length != 0) { 2840 /* 2841 * Copy the SAP type specified in the 2842 * request into the xmit template. 2843 */ 2844 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2845 } 2846 if (ill->ill_phys_addr_length > 0) { 2847 /* 2848 * The bcopy() below used to be called for the physical address 2849 * length rather than the link layer address length. For 2850 * ethernet and many other media, the phys_addr and lla are 2851 * identical. 2852 * However, with xresolv interfaces being introduced, the 2853 * phys_addr and lla are no longer the same, and the physical 2854 * address may not have any useful meaning, so we use the lla 2855 * for IPv6 address resolution and destination addressing. 2856 * 2857 * For PPP or other interfaces with a zero length 2858 * physical address, don't do anything here. 2859 * The bcopy() with a zero phys_addr length was previously 2860 * a no-op for interfaces with a zero-length physical address. 2861 * Using the lla for them would change the way they operate. 2862 * Doing nothing in such cases preserves expected behavior. 2863 */ 2864 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2865 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2866 } 2867 } 2868 2869 static boolean_t 2870 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2871 { 2872 ill_t *ill = nce->nce_ill; 2873 uchar_t *ll_offset; 2874 2875 ASSERT(nce->nce_res_mp != NULL); 2876 if (ll_addr == NULL) 2877 return (B_FALSE); 2878 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2879 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2880 return (B_TRUE); 2881 return (B_FALSE); 2882 } 2883 2884 /* 2885 * Updates the link layer address or the reachability state of 2886 * a cache entry. Reset probe counter if needed. 2887 */ 2888 static void 2889 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2890 { 2891 ill_t *ill = nce->nce_ill; 2892 boolean_t need_stop_timer = B_FALSE; 2893 boolean_t need_fastpath_update = B_FALSE; 2894 2895 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2896 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2897 /* 2898 * If this interface does not do NUD, there is no point 2899 * in allowing an update to the cache entry. Although 2900 * we will respond to NS. 2901 * The only time we accept an update for a resolver when 2902 * NUD is turned off is when it has just been created. 2903 * Non-Resolvers will always be created as REACHABLE. 2904 */ 2905 if (new_state != ND_UNCHANGED) { 2906 if ((nce->nce_flags & NCE_F_NONUD) && 2907 (nce->nce_state != ND_INCOMPLETE)) 2908 return; 2909 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2910 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2911 need_stop_timer = B_TRUE; 2912 if (new_state == ND_REACHABLE) 2913 nce->nce_last = TICK_TO_MSEC(lbolt64); 2914 else { 2915 /* We force NUD in this case */ 2916 nce->nce_last = 0; 2917 } 2918 nce->nce_state = new_state; 2919 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2920 } 2921 /* 2922 * In case of fast path we need to free the the fastpath 2923 * M_DATA and do another probe. Otherwise we can just 2924 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2925 * whatever packets that happens to be transmitting at the time. 2926 */ 2927 if (new_ll_addr != NULL) { 2928 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2929 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2930 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2931 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2932 if (nce->nce_fp_mp != NULL) { 2933 freemsg(nce->nce_fp_mp); 2934 nce->nce_fp_mp = NULL; 2935 } 2936 need_fastpath_update = B_TRUE; 2937 } 2938 mutex_exit(&nce->nce_lock); 2939 if (need_stop_timer) { 2940 (void) untimeout(nce->nce_timeout_id); 2941 nce->nce_timeout_id = 0; 2942 } 2943 if (need_fastpath_update) 2944 nce_fastpath(nce); 2945 mutex_enter(&nce->nce_lock); 2946 } 2947 2948 void 2949 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2950 { 2951 uint_t count = 0; 2952 mblk_t **mpp; 2953 2954 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2955 2956 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2957 mpp = &(*mpp)->b_next) { 2958 if (++count > 2959 nce->nce_ill->ill_max_buf) { 2960 mblk_t *tmp = nce->nce_qd_mp->b_next; 2961 2962 nce->nce_qd_mp->b_next = NULL; 2963 nce->nce_qd_mp->b_prev = NULL; 2964 freemsg(nce->nce_qd_mp); 2965 nce->nce_qd_mp = tmp; 2966 } 2967 } 2968 /* put this on the list */ 2969 if (head_insert) { 2970 mp->b_next = nce->nce_qd_mp; 2971 nce->nce_qd_mp = mp; 2972 } else { 2973 *mpp = mp; 2974 } 2975 } 2976 2977 static void 2978 nce_queue_mp(nce_t *nce, mblk_t *mp) 2979 { 2980 boolean_t head_insert = B_FALSE; 2981 ip6_t *ip6h; 2982 ip6i_t *ip6i; 2983 mblk_t *data_mp; 2984 2985 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2986 2987 if (mp->b_datap->db_type == M_CTL) 2988 data_mp = mp->b_cont; 2989 else 2990 data_mp = mp; 2991 ip6h = (ip6_t *)data_mp->b_rptr; 2992 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2993 /* 2994 * This message should have been pulled up already in 2995 * ip_wput_v6. We can't do pullups here because the message 2996 * could be from the nce_qd_mp which could have b_next/b_prev 2997 * non-NULL. 2998 */ 2999 ip6i = (ip6i_t *)ip6h; 3000 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 3001 sizeof (ip6i_t) + IPV6_HDR_LEN); 3002 /* 3003 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 3004 * This has 2 aspects mentioned below. 3005 * 1. Perform head insertion in the nce_qd_mp for these packets. 3006 * This ensures that next retransmit of ND solicitation 3007 * will use the interface specified by the probe packet, 3008 * for both NS and NA. This corresponds to the src address 3009 * in the IPv6 packet. If we insert at tail, we will be 3010 * depending on the packet at the head for successful 3011 * ND resolution. This is not reliable, because the interface 3012 * on which the NA arrives could be different from the interface 3013 * on which the NS was sent, and if the receiving interface is 3014 * failed, it will appear that the sending interface is also 3015 * failed, causing in.mpathd to misdiagnose this as link 3016 * failure. 3017 * 2. Drop the original packet, if the ND resolution did not 3018 * succeed in the first attempt. However we will create the 3019 * nce and the ire, as soon as the ND resolution succeeds. 3020 * We don't gain anything by queueing multiple probe packets 3021 * and sending them back-to-back once resolution succeeds. 3022 * It is sufficient to send just 1 packet after ND resolution 3023 * succeeds. Since mpathd is sending down probe packets at a 3024 * constant rate, we don't need to send the queued packet. We 3025 * need to queue it only for NDP resolution. The benefit of 3026 * dropping the probe packets that were delayed in ND 3027 * resolution, is that in.mpathd will not see inflated 3028 * RTT. If the ND resolution does not succeed within 3029 * in.mpathd's failure detection time, mpathd may detect 3030 * a failure, and it does not matter whether the packet 3031 * was queued or dropped. 3032 */ 3033 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3034 head_insert = B_TRUE; 3035 } 3036 3037 nce_queue_mp_common(nce, mp, head_insert); 3038 } 3039 3040 /* 3041 * Called when address resolution failed due to a timeout. 3042 * Send an ICMP unreachable in response to all queued packets. 3043 */ 3044 void 3045 nce_resolv_failed(nce_t *nce) 3046 { 3047 mblk_t *mp, *nxt_mp, *first_mp; 3048 char buf[INET6_ADDRSTRLEN]; 3049 ip6_t *ip6h; 3050 zoneid_t zoneid = GLOBAL_ZONEID; 3051 3052 ip1dbg(("nce_resolv_failed: dst %s\n", 3053 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3054 mutex_enter(&nce->nce_lock); 3055 mp = nce->nce_qd_mp; 3056 nce->nce_qd_mp = NULL; 3057 mutex_exit(&nce->nce_lock); 3058 while (mp != NULL) { 3059 nxt_mp = mp->b_next; 3060 mp->b_next = NULL; 3061 mp->b_prev = NULL; 3062 3063 first_mp = mp; 3064 if (mp->b_datap->db_type == M_CTL) { 3065 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3066 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3067 zoneid = io->ipsec_out_zoneid; 3068 ASSERT(zoneid != ALL_ZONES); 3069 mp = mp->b_cont; 3070 } 3071 3072 ip6h = (ip6_t *)mp->b_rptr; 3073 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3074 ip6i_t *ip6i; 3075 /* 3076 * This message should have been pulled up already 3077 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3078 * the header is pulled up. 3079 */ 3080 ip6i = (ip6i_t *)ip6h; 3081 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3082 sizeof (ip6i_t) + IPV6_HDR_LEN); 3083 mp->b_rptr += sizeof (ip6i_t); 3084 } 3085 /* 3086 * Ignore failure since icmp_unreachable_v6 will silently 3087 * drop packets with an unspecified source address. 3088 */ 3089 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3090 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3091 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid); 3092 mp = nxt_mp; 3093 } 3094 } 3095 3096 /* 3097 * Called by SIOCSNDP* ioctl to add/change an nce entry 3098 * and the corresponding attributes. 3099 * Disallow states other than ND_REACHABLE or ND_STALE. 3100 */ 3101 int 3102 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3103 { 3104 sin6_t *sin6; 3105 in6_addr_t *addr; 3106 nce_t *nce; 3107 int err; 3108 uint16_t new_flags = 0; 3109 uint16_t old_flags = 0; 3110 int inflags = lnr->lnr_flags; 3111 3112 ASSERT(ill->ill_isv6); 3113 if ((lnr->lnr_state_create != ND_REACHABLE) && 3114 (lnr->lnr_state_create != ND_STALE)) 3115 return (EINVAL); 3116 3117 sin6 = (sin6_t *)&lnr->lnr_addr; 3118 addr = &sin6->sin6_addr; 3119 3120 mutex_enter(&ndp6.ndp_g_lock); 3121 /* We know it can not be mapping so just look in the hash table */ 3122 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3123 nce = nce_lookup_addr(ill, addr, nce); 3124 if (nce != NULL) 3125 new_flags = nce->nce_flags; 3126 3127 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3128 case NDF_ISROUTER_ON: 3129 new_flags |= NCE_F_ISROUTER; 3130 break; 3131 case NDF_ISROUTER_OFF: 3132 new_flags &= ~NCE_F_ISROUTER; 3133 break; 3134 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3135 mutex_exit(&ndp6.ndp_g_lock); 3136 if (nce != NULL) 3137 NCE_REFRELE(nce); 3138 return (EINVAL); 3139 } 3140 3141 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3142 case NDF_ANYCAST_ON: 3143 new_flags |= NCE_F_ANYCAST; 3144 break; 3145 case NDF_ANYCAST_OFF: 3146 new_flags &= ~NCE_F_ANYCAST; 3147 break; 3148 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3149 mutex_exit(&ndp6.ndp_g_lock); 3150 if (nce != NULL) 3151 NCE_REFRELE(nce); 3152 return (EINVAL); 3153 } 3154 3155 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3156 case NDF_PROXY_ON: 3157 new_flags |= NCE_F_PROXY; 3158 break; 3159 case NDF_PROXY_OFF: 3160 new_flags &= ~NCE_F_PROXY; 3161 break; 3162 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3163 mutex_exit(&ndp6.ndp_g_lock); 3164 if (nce != NULL) 3165 NCE_REFRELE(nce); 3166 return (EINVAL); 3167 } 3168 3169 if (nce == NULL) { 3170 err = ndp_add(ill, 3171 (uchar_t *)lnr->lnr_hdw_addr, 3172 addr, 3173 &ipv6_all_ones, 3174 &ipv6_all_zeros, 3175 0, 3176 new_flags, 3177 lnr->lnr_state_create, 3178 &nce, 3179 NULL, 3180 NULL); 3181 if (err != 0) { 3182 mutex_exit(&ndp6.ndp_g_lock); 3183 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3184 return (err); 3185 } 3186 } 3187 old_flags = nce->nce_flags; 3188 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3189 /* 3190 * Router turned to host, delete all ires. 3191 * XXX Just delete the entry, but we need to add too. 3192 */ 3193 nce->nce_flags &= ~NCE_F_ISROUTER; 3194 mutex_exit(&ndp6.ndp_g_lock); 3195 ndp_delete(nce); 3196 NCE_REFRELE(nce); 3197 return (0); 3198 } 3199 mutex_exit(&ndp6.ndp_g_lock); 3200 3201 mutex_enter(&nce->nce_lock); 3202 nce->nce_flags = new_flags; 3203 mutex_exit(&nce->nce_lock); 3204 /* 3205 * Note that we ignore the state at this point, which 3206 * should be either STALE or REACHABLE. Instead we let 3207 * the link layer address passed in to determine the state 3208 * much like incoming packets. 3209 */ 3210 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3211 NCE_REFRELE(nce); 3212 return (0); 3213 } 3214 3215 /* 3216 * If the device driver supports it, we make nce_fp_mp to have 3217 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3218 * The caller insures there is hold on nce for this function. 3219 * Note that since ill_fastpath_probe() copies the mblk there is 3220 * no need for the hold beyond this function. 3221 */ 3222 void 3223 nce_fastpath(nce_t *nce) 3224 { 3225 ill_t *ill = nce->nce_ill; 3226 int res; 3227 3228 ASSERT(ill != NULL); 3229 if (nce->nce_fp_mp != NULL) { 3230 /* Already contains fastpath info */ 3231 return; 3232 } 3233 if (nce->nce_res_mp != NULL) { 3234 nce_fastpath_list_add(nce); 3235 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3236 /* 3237 * EAGAIN is an indication of a transient error 3238 * i.e. allocation failure etc. leave the nce in the list it 3239 * will be updated when another probe happens for another ire 3240 * if not it will be taken out of the list when the ire is 3241 * deleted. 3242 */ 3243 3244 if (res != 0 && res != EAGAIN) 3245 nce_fastpath_list_delete(nce); 3246 } 3247 } 3248 3249 /* 3250 * Drain the list of nce's waiting for fastpath response. 3251 */ 3252 void 3253 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3254 void *arg) 3255 { 3256 3257 nce_t *next_nce; 3258 nce_t *current_nce; 3259 nce_t *first_nce; 3260 nce_t *prev_nce = NULL; 3261 3262 mutex_enter(&ill->ill_lock); 3263 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3264 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3265 next_nce = current_nce->nce_fastpath; 3266 /* 3267 * Take it off the list if we're flushing, or if the callback 3268 * routine tells us to do so. Otherwise, leave the nce in the 3269 * fastpath list to handle any pending response from the lower 3270 * layer. We can't drain the list when the callback routine 3271 * comparison failed, because the response is asynchronous in 3272 * nature, and may not arrive in the same order as the list 3273 * insertion. 3274 */ 3275 if (func == NULL || func(current_nce, arg)) { 3276 current_nce->nce_fastpath = NULL; 3277 if (current_nce == first_nce) 3278 ill->ill_fastpath_list = first_nce = next_nce; 3279 else 3280 prev_nce->nce_fastpath = next_nce; 3281 } else { 3282 /* previous element that is still in the list */ 3283 prev_nce = current_nce; 3284 } 3285 current_nce = next_nce; 3286 } 3287 mutex_exit(&ill->ill_lock); 3288 } 3289 3290 /* 3291 * Add nce to the nce fastpath list. 3292 */ 3293 void 3294 nce_fastpath_list_add(nce_t *nce) 3295 { 3296 ill_t *ill; 3297 3298 ill = nce->nce_ill; 3299 3300 mutex_enter(&ill->ill_lock); 3301 mutex_enter(&nce->nce_lock); 3302 3303 /* 3304 * if nce has not been deleted and 3305 * is not already in the list add it. 3306 */ 3307 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3308 (nce->nce_fastpath == NULL)) { 3309 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3310 ill->ill_fastpath_list = nce; 3311 } 3312 3313 mutex_exit(&nce->nce_lock); 3314 mutex_exit(&ill->ill_lock); 3315 } 3316 3317 /* 3318 * remove nce from the nce fastpath list. 3319 */ 3320 void 3321 nce_fastpath_list_delete(nce_t *nce) 3322 { 3323 nce_t *nce_ptr; 3324 3325 ill_t *ill; 3326 3327 ill = nce->nce_ill; 3328 ASSERT(ill != NULL); 3329 3330 mutex_enter(&ill->ill_lock); 3331 if (nce->nce_fastpath == NULL) 3332 goto done; 3333 3334 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3335 3336 if (ill->ill_fastpath_list == nce) { 3337 ill->ill_fastpath_list = nce->nce_fastpath; 3338 } else { 3339 nce_ptr = ill->ill_fastpath_list; 3340 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3341 if (nce_ptr->nce_fastpath == nce) { 3342 nce_ptr->nce_fastpath = nce->nce_fastpath; 3343 break; 3344 } 3345 nce_ptr = nce_ptr->nce_fastpath; 3346 } 3347 } 3348 3349 nce->nce_fastpath = NULL; 3350 done: 3351 mutex_exit(&ill->ill_lock); 3352 } 3353 3354 /* 3355 * Update all NCE's that are not in fastpath mode and 3356 * have an nce_fp_mp that matches mp. mp->b_cont contains 3357 * the fastpath header. 3358 * 3359 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3360 */ 3361 boolean_t 3362 ndp_fastpath_update(nce_t *nce, void *arg) 3363 { 3364 mblk_t *mp, *fp_mp; 3365 uchar_t *mp_rptr, *ud_mp_rptr; 3366 mblk_t *ud_mp = nce->nce_res_mp; 3367 ptrdiff_t cmplen; 3368 3369 if (nce->nce_flags & NCE_F_MAPPING) 3370 return (B_TRUE); 3371 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3372 return (B_TRUE); 3373 3374 ip2dbg(("ndp_fastpath_update: trying\n")); 3375 mp = (mblk_t *)arg; 3376 mp_rptr = mp->b_rptr; 3377 cmplen = mp->b_wptr - mp_rptr; 3378 ASSERT(cmplen >= 0); 3379 ud_mp_rptr = ud_mp->b_rptr; 3380 /* 3381 * The nce is locked here to prevent any other threads 3382 * from accessing and changing nce_res_mp when the IPv6 address 3383 * becomes resolved to an lla while we're in the middle 3384 * of looking at and comparing the hardware address (lla). 3385 * It is also locked to prevent multiple threads in nce_fastpath_update 3386 * from examining nce_res_mp atthe same time. 3387 */ 3388 mutex_enter(&nce->nce_lock); 3389 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3390 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3391 mutex_exit(&nce->nce_lock); 3392 /* 3393 * Don't take the ire off the fastpath list yet, 3394 * since the response may come later. 3395 */ 3396 return (B_FALSE); 3397 } 3398 /* Matched - install mp as the fastpath mp */ 3399 ip1dbg(("ndp_fastpath_update: match\n")); 3400 fp_mp = dupb(mp->b_cont); 3401 if (fp_mp != NULL) { 3402 nce->nce_fp_mp = fp_mp; 3403 } 3404 mutex_exit(&nce->nce_lock); 3405 return (B_TRUE); 3406 } 3407 3408 /* 3409 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3410 * driver. Note that it assumes IP is exclusive... 3411 */ 3412 /* ARGSUSED */ 3413 void 3414 ndp_fastpath_flush(nce_t *nce, char *arg) 3415 { 3416 if (nce->nce_flags & NCE_F_MAPPING) 3417 return; 3418 /* No fastpath info? */ 3419 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3420 return; 3421 3422 if (nce->nce_ipversion == IPV4_VERSION && 3423 nce->nce_flags & NCE_F_BCAST) { 3424 /* 3425 * IPv4 BROADCAST entries: 3426 * We can't delete the nce since it is difficult to 3427 * recreate these without going through the 3428 * ipif down/up dance. 3429 * 3430 * All access to nce->nce_fp_mp in the case of these 3431 * is protected by nce_lock. 3432 */ 3433 mutex_enter(&nce->nce_lock); 3434 if (nce->nce_fp_mp != NULL) { 3435 freeb(nce->nce_fp_mp); 3436 nce->nce_fp_mp = NULL; 3437 mutex_exit(&nce->nce_lock); 3438 nce_fastpath(nce); 3439 } else { 3440 mutex_exit(&nce->nce_lock); 3441 } 3442 } else { 3443 /* Just delete the NCE... */ 3444 ndp_delete(nce); 3445 } 3446 } 3447 3448 /* 3449 * Return a pointer to a given option in the packet. 3450 * Assumes that option part of the packet have already been validated. 3451 */ 3452 nd_opt_hdr_t * 3453 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3454 { 3455 while (optlen > 0) { 3456 if (opt->nd_opt_type == opt_type) 3457 return (opt); 3458 optlen -= 8 * opt->nd_opt_len; 3459 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3460 } 3461 return (NULL); 3462 } 3463 3464 /* 3465 * Verify all option lengths present are > 0, also check to see 3466 * if the option lengths and packet length are consistent. 3467 */ 3468 boolean_t 3469 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3470 { 3471 ASSERT(opt != NULL); 3472 while (optlen > 0) { 3473 if (opt->nd_opt_len == 0) 3474 return (B_FALSE); 3475 optlen -= 8 * opt->nd_opt_len; 3476 if (optlen < 0) 3477 return (B_FALSE); 3478 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3479 } 3480 return (B_TRUE); 3481 } 3482 3483 /* 3484 * ndp_walk function. 3485 * Free a fraction of the NCE cache entries. 3486 * A fraction of zero means to not free any in that category. 3487 */ 3488 void 3489 ndp_cache_reclaim(nce_t *nce, char *arg) 3490 { 3491 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3492 uint_t rand; 3493 3494 if (nce->nce_flags & NCE_F_PERMANENT) 3495 return; 3496 3497 rand = (uint_t)lbolt + 3498 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3499 if (ncr->ncr_host != 0 && 3500 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3501 ndp_delete(nce); 3502 return; 3503 } 3504 } 3505 3506 /* 3507 * ndp_walk function. 3508 * Count the number of NCEs that can be deleted. 3509 * These would be hosts but not routers. 3510 */ 3511 void 3512 ndp_cache_count(nce_t *nce, char *arg) 3513 { 3514 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3515 3516 if (nce->nce_flags & NCE_F_PERMANENT) 3517 return; 3518 3519 ncc->ncc_total++; 3520 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3521 ncc->ncc_host++; 3522 } 3523 3524 #ifdef NCE_DEBUG 3525 th_trace_t * 3526 th_trace_nce_lookup(nce_t *nce) 3527 { 3528 int bucket_id; 3529 th_trace_t *th_trace; 3530 3531 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3532 3533 bucket_id = IP_TR_HASH(curthread); 3534 ASSERT(bucket_id < IP_TR_HASH_MAX); 3535 3536 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3537 th_trace = th_trace->th_next) { 3538 if (th_trace->th_id == curthread) 3539 return (th_trace); 3540 } 3541 return (NULL); 3542 } 3543 3544 void 3545 nce_trace_ref(nce_t *nce) 3546 { 3547 int bucket_id; 3548 th_trace_t *th_trace; 3549 3550 /* 3551 * Attempt to locate the trace buffer for the curthread. 3552 * If it does not exist, then allocate a new trace buffer 3553 * and link it in list of trace bufs for this ipif, at the head 3554 */ 3555 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3556 3557 if (nce->nce_trace_disable == B_TRUE) 3558 return; 3559 3560 th_trace = th_trace_nce_lookup(nce); 3561 if (th_trace == NULL) { 3562 bucket_id = IP_TR_HASH(curthread); 3563 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3564 KM_NOSLEEP); 3565 if (th_trace == NULL) { 3566 nce->nce_trace_disable = B_TRUE; 3567 nce_trace_inactive(nce); 3568 return; 3569 } 3570 th_trace->th_id = curthread; 3571 th_trace->th_next = nce->nce_trace[bucket_id]; 3572 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3573 if (th_trace->th_next != NULL) 3574 th_trace->th_next->th_prev = &th_trace->th_next; 3575 nce->nce_trace[bucket_id] = th_trace; 3576 } 3577 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3578 th_trace->th_refcnt++; 3579 th_trace_rrecord(th_trace); 3580 } 3581 3582 void 3583 nce_untrace_ref(nce_t *nce) 3584 { 3585 th_trace_t *th_trace; 3586 3587 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3588 3589 if (nce->nce_trace_disable == B_TRUE) 3590 return; 3591 3592 th_trace = th_trace_nce_lookup(nce); 3593 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3594 3595 th_trace_rrecord(th_trace); 3596 th_trace->th_refcnt--; 3597 } 3598 3599 void 3600 nce_trace_inactive(nce_t *nce) 3601 { 3602 th_trace_t *th_trace; 3603 int i; 3604 3605 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3606 3607 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3608 while (nce->nce_trace[i] != NULL) { 3609 th_trace = nce->nce_trace[i]; 3610 3611 /* unlink th_trace and free it */ 3612 nce->nce_trace[i] = th_trace->th_next; 3613 if (th_trace->th_next != NULL) 3614 th_trace->th_next->th_prev = 3615 &nce->nce_trace[i]; 3616 3617 th_trace->th_next = NULL; 3618 th_trace->th_prev = NULL; 3619 kmem_free(th_trace, sizeof (th_trace_t)); 3620 } 3621 } 3622 3623 } 3624 3625 /* ARGSUSED */ 3626 int 3627 nce_thread_exit(nce_t *nce, caddr_t arg) 3628 { 3629 th_trace_t *th_trace; 3630 3631 mutex_enter(&nce->nce_lock); 3632 th_trace = th_trace_nce_lookup(nce); 3633 3634 if (th_trace == NULL) { 3635 mutex_exit(&nce->nce_lock); 3636 return (0); 3637 } 3638 3639 ASSERT(th_trace->th_refcnt == 0); 3640 3641 /* unlink th_trace and free it */ 3642 *th_trace->th_prev = th_trace->th_next; 3643 if (th_trace->th_next != NULL) 3644 th_trace->th_next->th_prev = th_trace->th_prev; 3645 th_trace->th_next = NULL; 3646 th_trace->th_prev = NULL; 3647 kmem_free(th_trace, sizeof (th_trace_t)); 3648 mutex_exit(&nce->nce_lock); 3649 return (0); 3650 } 3651 #endif 3652 3653 /* 3654 * Called when address resolution fails due to a timeout. 3655 * Send an ICMP unreachable in response to all queued packets. 3656 */ 3657 void 3658 arp_resolv_failed(nce_t *nce) 3659 { 3660 mblk_t *mp, *nxt_mp, *first_mp; 3661 char buf[INET6_ADDRSTRLEN]; 3662 zoneid_t zoneid = GLOBAL_ZONEID; 3663 struct in_addr ipv4addr; 3664 3665 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3666 ip3dbg(("arp_resolv_failed: dst %s\n", 3667 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3668 mutex_enter(&nce->nce_lock); 3669 mp = nce->nce_qd_mp; 3670 nce->nce_qd_mp = NULL; 3671 mutex_exit(&nce->nce_lock); 3672 3673 while (mp != NULL) { 3674 nxt_mp = mp->b_next; 3675 mp->b_next = NULL; 3676 mp->b_prev = NULL; 3677 3678 first_mp = mp; 3679 /* 3680 * Send icmp unreachable messages 3681 * to the hosts. 3682 */ 3683 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3684 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3685 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3686 ICMP_HOST_UNREACHABLE, zoneid); 3687 mp = nxt_mp; 3688 } 3689 } 3690 3691 static int 3692 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3693 const in_addr_t *mask, const in_addr_t *extract_mask, 3694 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3695 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3696 { 3697 int err = 0; 3698 nce_t *nce; 3699 in6_addr_t addr6; 3700 3701 mutex_enter(&ndp4.ndp_g_lock); 3702 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3703 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3704 nce = nce_lookup_addr(ill, &addr6, nce); 3705 if (nce == NULL) { 3706 err = ndp_add_v4(ill, 3707 hw_addr, 3708 addr, 3709 mask, 3710 extract_mask, 3711 hw_extract_start, 3712 flags, 3713 state, 3714 newnce, 3715 fp_mp, 3716 res_mp); 3717 } else { 3718 *newnce = nce; 3719 err = EEXIST; 3720 } 3721 mutex_exit(&ndp4.ndp_g_lock); 3722 return (err); 3723 } 3724 3725 /* 3726 * NDP Cache Entry creation routine for IPv4. 3727 * Mapped entries are handled in arp. 3728 * This routine must always be called with ndp4.ndp_g_lock held. 3729 * Prior to return, nce_refcnt is incremented. 3730 */ 3731 static int 3732 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3733 const in_addr_t *mask, const in_addr_t *extract_mask, 3734 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3735 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3736 { 3737 static nce_t nce_nil; 3738 nce_t *nce; 3739 mblk_t *mp; 3740 mblk_t *template; 3741 nce_t **ncep; 3742 3743 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3744 ASSERT(ill != NULL); 3745 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3746 return (EINVAL); 3747 } 3748 ASSERT((flags & NCE_F_MAPPING) == 0); 3749 ASSERT(extract_mask == NULL); 3750 /* 3751 * Allocate the mblk to hold the nce. 3752 */ 3753 mp = allocb(sizeof (nce_t), BPRI_MED); 3754 if (mp == NULL) 3755 return (ENOMEM); 3756 3757 nce = (nce_t *)mp->b_rptr; 3758 mp->b_wptr = (uchar_t *)&nce[1]; 3759 *nce = nce_nil; 3760 3761 /* 3762 * This one holds link layer address; if res_mp has been provided 3763 * by the caller, accept it without any further checks. Otherwise, 3764 * for V4, we fill it up with ill_resolver_mp here, then in 3765 * in ire_arpresolve(), we fill it up with the ARP query 3766 * once its formulated. 3767 */ 3768 if (res_mp != NULL) { 3769 template = res_mp; 3770 } else { 3771 if (ill->ill_resolver_mp == NULL) { 3772 freeb(mp); 3773 return (EINVAL); 3774 } 3775 template = copyb(ill->ill_resolver_mp); 3776 } 3777 if (template == NULL) { 3778 freeb(mp); 3779 return (ENOMEM); 3780 } 3781 nce->nce_ill = ill; 3782 nce->nce_ipversion = IPV4_VERSION; 3783 nce->nce_flags = flags; 3784 nce->nce_state = state; 3785 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3786 nce->nce_rcnt = ill->ill_xmit_count; 3787 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3788 if (*mask == IP_HOST_MASK) { 3789 nce->nce_mask = ipv6_all_ones; 3790 } else { 3791 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3792 } 3793 nce->nce_extract_mask = ipv6_all_zeros; 3794 nce->nce_ll_extract_start = hw_extract_start; 3795 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3796 nce->nce_res_mp = template; 3797 if (state == ND_REACHABLE) 3798 nce->nce_last = TICK_TO_MSEC(lbolt64); 3799 else 3800 nce->nce_last = 0; 3801 nce->nce_qd_mp = NULL; 3802 nce->nce_mp = mp; 3803 if (hw_addr != NULL) 3804 nce_set_ll(nce, hw_addr); 3805 /* This one is for nce getting created */ 3806 nce->nce_refcnt = 1; 3807 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3808 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3809 3810 #ifdef NCE_DEBUG 3811 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3812 #endif 3813 /* 3814 * Atomically ensure that the ill is not CONDEMNED, before 3815 * adding the NCE. 3816 */ 3817 mutex_enter(&ill->ill_lock); 3818 if (ill->ill_state_flags & ILL_CONDEMNED) { 3819 mutex_exit(&ill->ill_lock); 3820 freeb(mp); 3821 if (res_mp == NULL) { 3822 /* 3823 * template was locally allocated. need to free it. 3824 */ 3825 freeb(template); 3826 } 3827 return (EINVAL); 3828 } 3829 if ((nce->nce_next = *ncep) != NULL) 3830 nce->nce_next->nce_ptpn = &nce->nce_next; 3831 *ncep = nce; 3832 nce->nce_ptpn = ncep; 3833 *newnce = nce; 3834 /* This one is for nce being used by an active thread */ 3835 NCE_REFHOLD(*newnce); 3836 3837 /* Bump up the number of nce's referencing this ill */ 3838 ill->ill_nce_cnt++; 3839 mutex_exit(&ill->ill_lock); 3840 return (0); 3841 } 3842 3843 void 3844 ndp_flush_qd_mp(nce_t *nce) 3845 { 3846 mblk_t *qd_mp, *qd_next; 3847 3848 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3849 qd_mp = nce->nce_qd_mp; 3850 nce->nce_qd_mp = NULL; 3851 while (qd_mp != NULL) { 3852 qd_next = qd_mp->b_next; 3853 qd_mp->b_next = NULL; 3854 qd_mp->b_prev = NULL; 3855 freemsg(qd_mp); 3856 qd_mp = qd_next; 3857 } 3858 } 3859 3860 nce_t * 3861 nce_reinit(nce_t *nce) 3862 { 3863 nce_t *newnce = NULL; 3864 in_addr_t nce_addr, nce_mask; 3865 3866 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3867 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3868 /* 3869 * delete the old one. this will get rid of any ire's pointing 3870 * at this nce. 3871 */ 3872 ndp_delete(nce); 3873 /* 3874 * create a new nce with the same addr and mask. 3875 */ 3876 mutex_enter(&ndp4.ndp_g_lock); 3877 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3878 ND_INITIAL, &newnce, NULL, NULL); 3879 mutex_exit(&ndp4.ndp_g_lock); 3880 /* 3881 * refrele the old nce. 3882 */ 3883 NCE_REFRELE(nce); 3884 return (newnce); 3885 } 3886 3887 /* 3888 * ndp_walk routine to delete all entries that have a given destination or 3889 * gateway address and cached link layer (MAC) address. This is used when ARP 3890 * informs us that a network-to-link-layer mapping may have changed. 3891 */ 3892 void 3893 nce_delete_hw_changed(nce_t *nce, void *arg) 3894 { 3895 nce_hw_map_t *hwm = arg; 3896 mblk_t *mp; 3897 dl_unitdata_req_t *dlu; 3898 uchar_t *macaddr; 3899 ill_t *ill; 3900 int saplen; 3901 ipaddr_t nce_addr; 3902 3903 if (nce->nce_state != ND_REACHABLE) 3904 return; 3905 3906 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3907 if (nce_addr != hwm->hwm_addr) 3908 return; 3909 3910 mutex_enter(&nce->nce_lock); 3911 if ((mp = nce->nce_res_mp) == NULL) { 3912 mutex_exit(&nce->nce_lock); 3913 return; 3914 } 3915 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3916 macaddr = (uchar_t *)(dlu + 1); 3917 ill = nce->nce_ill; 3918 if ((saplen = ill->ill_sap_length) > 0) 3919 macaddr += saplen; 3920 else 3921 saplen = -saplen; 3922 3923 /* 3924 * If the hardware address is unchanged, then leave this one alone. 3925 * Note that saplen == abs(saplen) now. 3926 */ 3927 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3928 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3929 mutex_exit(&nce->nce_lock); 3930 return; 3931 } 3932 mutex_exit(&nce->nce_lock); 3933 3934 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3935 ndp_delete(nce); 3936 } 3937 3938 /* 3939 * This function verifies whether a given IPv4 address is potentially known to 3940 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3941 * so that it can continue to look for hardware changes on that address. 3942 */ 3943 boolean_t 3944 ndp_lookup_ipaddr(in_addr_t addr) 3945 { 3946 nce_t *nce; 3947 struct in_addr nceaddr; 3948 3949 if (addr == INADDR_ANY) 3950 return (B_FALSE); 3951 3952 mutex_enter(&ndp4.ndp_g_lock); 3953 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3954 for (; nce != NULL; nce = nce->nce_next) { 3955 /* Note that only v4 mapped entries are in the table. */ 3956 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3957 if (addr == nceaddr.s_addr && 3958 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3959 /* Single flag check; no lock needed */ 3960 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3961 break; 3962 } 3963 } 3964 mutex_exit(&ndp4.ndp_g_lock); 3965 return (nce != NULL); 3966 } 3967