1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/sysmacros.h> 32 #include <sys/errno.h> 33 #include <sys/dlpi.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 42 #include <net/if.h> 43 #include <net/if_dl.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/mib2.h> 52 #include <inet/nd.h> 53 #include <inet/ip.h> 54 #include <inet/ip_if.h> 55 #include <inet/ip_ire.h> 56 #include <inet/ip_rts.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/ipsec_info.h> 61 62 /* 63 * Function names with nce_ prefix are static while function 64 * names with ndp_ prefix are used by rest of the IP. 65 */ 66 67 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 68 uint32_t ll_addr_len); 69 static void nce_fastpath(nce_t *nce); 70 static void nce_ire_delete(nce_t *nce); 71 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 72 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 73 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 74 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 75 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 76 uchar_t *addr); 77 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 78 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 79 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 80 static mblk_t *nce_udreq_alloc(ill_t *ill); 81 static void nce_update(nce_t *nce, uint16_t new_state, 82 uchar_t *new_ll_addr); 83 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 84 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 85 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 86 const in6_addr_t *target, int flag); 87 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 88 extern void th_trace_rrecord(th_trace_t *); 89 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 90 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 91 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 92 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 93 const in_addr_t *, const in_addr_t *, const in_addr_t *, 94 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 95 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 96 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 97 nce_t **); 98 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 99 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 100 nce_t **, mblk_t *, mblk_t *); 101 102 103 #ifdef NCE_DEBUG 104 void nce_trace_inactive(nce_t *); 105 #endif 106 107 ndp_g_t ndp4, ndp6; 108 109 #define NCE_HASH_PTR_V4(addr) \ 110 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 111 112 #define NCE_HASH_PTR_V6(addr) \ 113 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 114 115 int 116 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 117 const void *mask, const void *extract_mask, 118 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 119 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 120 { 121 int status; 122 123 if (ill->ill_isv6) 124 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 125 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 126 hw_extract_start, flags, state, newnce); 127 else 128 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 129 (in_addr_t *)mask, (in_addr_t *)extract_mask, 130 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 131 return (status); 132 } 133 134 /* 135 * NDP Cache Entry creation routine. 136 * Mapped entries will never do NUD . 137 * This routine must always be called with ndp6.ndp_g_lock held. 138 * Prior to return, nce_refcnt is incremented. 139 */ 140 static int 141 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 142 const in6_addr_t *mask, const in6_addr_t *extract_mask, 143 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 144 nce_t **newnce) 145 { 146 static nce_t nce_nil; 147 nce_t *nce; 148 mblk_t *mp; 149 mblk_t *template; 150 nce_t **ncep; 151 boolean_t dropped = B_FALSE; 152 153 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 154 ASSERT(ill != NULL && ill->ill_isv6); 155 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 156 ip0dbg(("ndp_add: no addr\n")); 157 return (EINVAL); 158 } 159 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 160 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 161 return (EINVAL); 162 } 163 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 164 (flags & NCE_F_MAPPING)) { 165 ip0dbg(("ndp_add: extract mask zero for mapping")); 166 return (EINVAL); 167 } 168 /* 169 * Allocate the mblk to hold the nce. 170 * 171 * XXX This can come out of a separate cache - nce_cache. 172 * We don't need the mp anymore as there are no more 173 * "qwriter"s 174 */ 175 mp = allocb(sizeof (nce_t), BPRI_MED); 176 if (mp == NULL) 177 return (ENOMEM); 178 179 nce = (nce_t *)mp->b_rptr; 180 mp->b_wptr = (uchar_t *)&nce[1]; 181 *nce = nce_nil; 182 183 /* 184 * This one holds link layer address 185 */ 186 if (ill->ill_net_type == IRE_IF_RESOLVER) { 187 template = nce_udreq_alloc(ill); 188 } else { 189 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 190 ASSERT((ill->ill_resolver_mp != NULL)); 191 template = copyb(ill->ill_resolver_mp); 192 } 193 if (template == NULL) { 194 freeb(mp); 195 return (ENOMEM); 196 } 197 nce->nce_ill = ill; 198 nce->nce_ipversion = IPV6_VERSION; 199 nce->nce_flags = flags; 200 nce->nce_state = state; 201 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 202 nce->nce_rcnt = ill->ill_xmit_count; 203 nce->nce_addr = *addr; 204 nce->nce_mask = *mask; 205 nce->nce_extract_mask = *extract_mask; 206 nce->nce_ll_extract_start = hw_extract_start; 207 nce->nce_fp_mp = NULL; 208 nce->nce_res_mp = template; 209 if (state == ND_REACHABLE) 210 nce->nce_last = TICK_TO_MSEC(lbolt64); 211 else 212 nce->nce_last = 0; 213 nce->nce_qd_mp = NULL; 214 nce->nce_mp = mp; 215 if (hw_addr != NULL) 216 nce_set_ll(nce, hw_addr); 217 /* This one is for nce getting created */ 218 nce->nce_refcnt = 1; 219 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 220 if (nce->nce_flags & NCE_F_MAPPING) { 221 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 222 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 223 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 224 ncep = &ndp6.nce_mask_entries; 225 } else { 226 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 227 } 228 229 #ifdef NCE_DEBUG 230 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 231 #endif 232 /* 233 * Atomically ensure that the ill is not CONDEMNED, before 234 * adding the NCE. 235 */ 236 mutex_enter(&ill->ill_lock); 237 if (ill->ill_state_flags & ILL_CONDEMNED) { 238 mutex_exit(&ill->ill_lock); 239 freeb(mp); 240 return (EINVAL); 241 } 242 if ((nce->nce_next = *ncep) != NULL) 243 nce->nce_next->nce_ptpn = &nce->nce_next; 244 *ncep = nce; 245 nce->nce_ptpn = ncep; 246 *newnce = nce; 247 /* This one is for nce being used by an active thread */ 248 NCE_REFHOLD(*newnce); 249 250 /* Bump up the number of nce's referencing this ill */ 251 ill->ill_nce_cnt++; 252 mutex_exit(&ill->ill_lock); 253 254 /* 255 * Before we insert the nce, honor the UNSOL_ADV flag. 256 * We cannot hold the ndp_g_lock and call nce_xmit 257 * which does a putnext. 258 */ 259 if (flags & NCE_F_UNSOL_ADV) { 260 flags |= NDP_ORIDE; 261 /* 262 * We account for the transmit below by assigning one 263 * less than the ndd variable. Subsequent decrements 264 * are done in ndp_timer. 265 */ 266 mutex_enter(&nce->nce_lock); 267 mutex_exit(&ndp6.ndp_g_lock); 268 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 269 mutex_exit(&nce->nce_lock); 270 dropped = nce_xmit(ill, 271 ND_NEIGHBOR_ADVERT, 272 ill, /* ill to be used for extracting ill_nd_lla */ 273 B_TRUE, /* use ill_nd_lla */ 274 addr, /* Source and target of the advertisement pkt */ 275 &ipv6_all_hosts_mcast, /* Destination of the packet */ 276 flags); 277 mutex_enter(&nce->nce_lock); 278 if (dropped) 279 nce->nce_unsolicit_count++; 280 if (nce->nce_unsolicit_count != 0) { 281 nce->nce_timeout_id = timeout(ndp_timer, nce, 282 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 283 } 284 mutex_exit(&nce->nce_lock); 285 mutex_enter(&ndp6.ndp_g_lock); 286 } 287 /* 288 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 289 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 290 * We call nce_fastpath from nce_update if the link layer address of 291 * the peer changes from nce_update 292 */ 293 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 294 nce_fastpath(nce); 295 return (0); 296 } 297 298 int 299 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 300 const void *mask, const void *extract_mask, 301 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 302 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 303 { 304 int status; 305 306 if (ill->ill_isv6) { 307 status = ndp_lookup_then_add_v6(ill, hw_addr, 308 (in6_addr_t *)addr, (in6_addr_t *)mask, 309 (in6_addr_t *)extract_mask, hw_extract_start, flags, 310 state, newnce, fp_mp, res_mp); 311 } else { 312 status = ndp_lookup_then_add_v4(ill, hw_addr, 313 (in_addr_t *)addr, (in_addr_t *)mask, 314 (in_addr_t *)extract_mask, hw_extract_start, flags, 315 state, newnce, fp_mp, res_mp); 316 } 317 318 return (status); 319 } 320 321 static int 322 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 323 const in6_addr_t *mask, const in6_addr_t *extract_mask, 324 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 325 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 326 { 327 int err = 0; 328 nce_t *nce; 329 330 ASSERT(ill != NULL && ill->ill_isv6); 331 mutex_enter(&ndp6.ndp_g_lock); 332 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 333 nce = nce_lookup_addr(ill, addr, nce); 334 if (nce == NULL) { 335 err = ndp_add(ill, 336 hw_addr, 337 addr, 338 mask, 339 extract_mask, 340 hw_extract_start, 341 flags, 342 state, 343 newnce, 344 fp_mp, 345 res_mp); 346 } else { 347 *newnce = nce; 348 err = EEXIST; 349 } 350 mutex_exit(&ndp6.ndp_g_lock); 351 return (err); 352 } 353 354 /* 355 * Remove all the CONDEMNED nces from the appropriate hash table. 356 * We create a private list of NCEs, these may have ires pointing 357 * to them, so the list will be passed through to clean up dependent 358 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 359 */ 360 static void 361 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 362 { 363 nce_t *nce1; 364 nce_t **ptpn; 365 366 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 367 ASSERT(ndp->ndp_g_walker == 0); 368 for (; nce; nce = nce1) { 369 nce1 = nce->nce_next; 370 mutex_enter(&nce->nce_lock); 371 if (nce->nce_flags & NCE_F_CONDEMNED) { 372 ptpn = nce->nce_ptpn; 373 nce1 = nce->nce_next; 374 if (nce1 != NULL) 375 nce1->nce_ptpn = ptpn; 376 *ptpn = nce1; 377 nce->nce_ptpn = NULL; 378 nce->nce_next = NULL; 379 nce->nce_next = *free_nce_list; 380 *free_nce_list = nce; 381 } 382 mutex_exit(&nce->nce_lock); 383 } 384 } 385 386 /* 387 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 388 * will return this NCE. Also no new IREs will be created that 389 * point to this NCE (See ire_add_v6). Also no new timeouts will 390 * be started (See NDP_RESTART_TIMER). 391 * 2. Cancel any currently running timeouts. 392 * 3. If there is an ndp walker, return. The walker will do the cleanup. 393 * This ensures that walkers see a consistent list of NCEs while walking. 394 * 4. Otherwise remove the NCE from the list of NCEs 395 * 5. Delete all IREs pointing to this NCE. 396 */ 397 void 398 ndp_delete(nce_t *nce) 399 { 400 nce_t **ptpn; 401 nce_t *nce1; 402 int ipversion = nce->nce_ipversion; 403 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 404 405 /* Serialize deletes */ 406 mutex_enter(&nce->nce_lock); 407 if (nce->nce_flags & NCE_F_CONDEMNED) { 408 /* Some other thread is doing the delete */ 409 mutex_exit(&nce->nce_lock); 410 return; 411 } 412 /* 413 * Caller has a refhold. Also 1 ref for being in the list. Thus 414 * refcnt has to be >= 2 415 */ 416 ASSERT(nce->nce_refcnt >= 2); 417 nce->nce_flags |= NCE_F_CONDEMNED; 418 mutex_exit(&nce->nce_lock); 419 420 nce_fastpath_list_delete(nce); 421 422 /* 423 * Cancel any running timer. Timeout can't be restarted 424 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 425 * Passing invalid timeout id is fine. 426 */ 427 if (nce->nce_timeout_id != 0) { 428 (void) untimeout(nce->nce_timeout_id); 429 nce->nce_timeout_id = 0; 430 } 431 432 mutex_enter(&ndp->ndp_g_lock); 433 if (nce->nce_ptpn == NULL) { 434 /* 435 * The last ndp walker has already removed this nce from 436 * the list after we marked the nce CONDEMNED and before 437 * we grabbed the global lock. 438 */ 439 mutex_exit(&ndp->ndp_g_lock); 440 return; 441 } 442 if (ndp->ndp_g_walker > 0) { 443 /* 444 * Can't unlink. The walker will clean up 445 */ 446 ndp->ndp_g_walker_cleanup = B_TRUE; 447 mutex_exit(&ndp->ndp_g_lock); 448 return; 449 } 450 451 /* 452 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 453 * the timer since it is marked CONDEMNED. 454 */ 455 ptpn = nce->nce_ptpn; 456 nce1 = nce->nce_next; 457 if (nce1 != NULL) 458 nce1->nce_ptpn = ptpn; 459 *ptpn = nce1; 460 nce->nce_ptpn = NULL; 461 nce->nce_next = NULL; 462 mutex_exit(&ndp->ndp_g_lock); 463 464 nce_ire_delete(nce); 465 } 466 467 void 468 ndp_inactive(nce_t *nce) 469 { 470 mblk_t **mpp; 471 ill_t *ill; 472 473 ASSERT(nce->nce_refcnt == 0); 474 ASSERT(MUTEX_HELD(&nce->nce_lock)); 475 ASSERT(nce->nce_fastpath == NULL); 476 477 /* Free all nce allocated messages */ 478 mpp = &nce->nce_first_mp_to_free; 479 do { 480 while (*mpp != NULL) { 481 mblk_t *mp; 482 483 mp = *mpp; 484 *mpp = mp->b_next; 485 mp->b_next = NULL; 486 mp->b_prev = NULL; 487 freemsg(mp); 488 } 489 } while (mpp++ != &nce->nce_last_mp_to_free); 490 491 #ifdef NCE_DEBUG 492 nce_trace_inactive(nce); 493 #endif 494 495 ill = nce->nce_ill; 496 mutex_enter(&ill->ill_lock); 497 ill->ill_nce_cnt--; 498 /* 499 * If the number of nce's associated with this ill have dropped 500 * to zero, check whether we need to restart any operation that 501 * is waiting for this to happen. 502 */ 503 if (ill->ill_nce_cnt == 0) { 504 /* ipif_ill_refrele_tail drops the ill_lock */ 505 ipif_ill_refrele_tail(ill); 506 } else { 507 mutex_exit(&ill->ill_lock); 508 } 509 mutex_destroy(&nce->nce_lock); 510 freeb(nce->nce_mp); 511 } 512 513 /* 514 * ndp_walk routine. Delete the nce if it is associated with the ill 515 * that is going away. Always called as a writer. 516 */ 517 void 518 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 519 { 520 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 521 ndp_delete(nce); 522 } 523 } 524 525 /* 526 * Walk a list of to be inactive NCEs and blow away all the ires. 527 */ 528 static void 529 nce_ire_delete_list(nce_t *nce) 530 { 531 nce_t *nce_next; 532 533 ASSERT(nce != NULL); 534 while (nce != NULL) { 535 nce_next = nce->nce_next; 536 nce->nce_next = NULL; 537 538 /* 539 * It is possible for the last ndp walker (this thread) 540 * to come here after ndp_delete has marked the nce CONDEMNED 541 * and before it has removed the nce from the fastpath list 542 * or called untimeout. So we need to do it here. It is safe 543 * for both ndp_delete and this thread to do it twice or 544 * even simultaneously since each of the threads has a 545 * reference on the nce. 546 */ 547 nce_fastpath_list_delete(nce); 548 /* 549 * Cancel any running timer. Timeout can't be restarted 550 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 551 * Passing invalid timeout id is fine. 552 */ 553 if (nce->nce_timeout_id != 0) { 554 (void) untimeout(nce->nce_timeout_id); 555 nce->nce_timeout_id = 0; 556 } 557 /* 558 * We might hit this func thus in the v4 case: 559 * ipif_down->ipif_ndp_down->ndp_walk 560 */ 561 562 if (nce->nce_ipversion == IPV4_VERSION) { 563 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 564 IRE_CACHE, nce_ire_delete1, 565 (char *)nce, nce->nce_ill); 566 } else { 567 ASSERT(nce->nce_ipversion == IPV6_VERSION); 568 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 569 IRE_CACHE, nce_ire_delete1, 570 (char *)nce, nce->nce_ill); 571 } 572 NCE_REFRELE_NOTR(nce); 573 nce = nce_next; 574 } 575 } 576 577 /* 578 * Delete an ire when the nce goes away. 579 */ 580 /* ARGSUSED */ 581 static void 582 nce_ire_delete(nce_t *nce) 583 { 584 if (nce->nce_ipversion == IPV6_VERSION) { 585 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 586 nce_ire_delete1, (char *)nce, nce->nce_ill); 587 NCE_REFRELE_NOTR(nce); 588 } else { 589 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 590 nce_ire_delete1, (char *)nce, nce->nce_ill); 591 NCE_REFRELE_NOTR(nce); 592 } 593 } 594 595 /* 596 * ire_walk routine used to delete every IRE that shares this nce 597 */ 598 static void 599 nce_ire_delete1(ire_t *ire, char *nce_arg) 600 { 601 nce_t *nce = (nce_t *)nce_arg; 602 603 ASSERT(ire->ire_type == IRE_CACHE); 604 605 if (ire->ire_nce == nce) { 606 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 607 ire_delete(ire); 608 } 609 } 610 611 /* 612 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 613 * If one is found, the refcnt on the nce will be incremented. 614 */ 615 nce_t * 616 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 617 { 618 nce_t *nce; 619 620 ASSERT(ill != NULL && ill->ill_isv6); 621 if (!caller_holds_lock) { 622 mutex_enter(&ndp6.ndp_g_lock); 623 } 624 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 625 nce = nce_lookup_addr(ill, addr, nce); 626 if (nce == NULL) 627 nce = nce_lookup_mapping(ill, addr); 628 if (!caller_holds_lock) 629 mutex_exit(&ndp6.ndp_g_lock); 630 return (nce); 631 } 632 /* 633 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 634 * If one is found, the refcnt on the nce will be incremented. 635 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 636 * so we skip the nce_lookup_mapping call. 637 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 638 */ 639 nce_t * 640 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 641 { 642 nce_t *nce; 643 in6_addr_t addr6; 644 645 if (!caller_holds_lock) { 646 mutex_enter(&ndp4.ndp_g_lock); 647 } 648 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 649 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 650 nce = nce_lookup_addr(ill, &addr6, nce); 651 if (!caller_holds_lock) 652 mutex_exit(&ndp4.ndp_g_lock); 653 return (nce); 654 } 655 656 /* 657 * Cache entry lookup. Try to find an nce matching the parameters passed. 658 * Look only for exact entries (no mappings). If an nce is found, increment 659 * the hold count on that nce. The caller passes in the start of the 660 * appropriate hash table, and must be holding the appropriate global 661 * lock (ndp_g_lock). 662 */ 663 static nce_t * 664 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 665 { 666 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 667 668 ASSERT(ill != NULL); 669 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 670 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 671 return (NULL); 672 for (; nce != NULL; nce = nce->nce_next) { 673 if (nce->nce_ill == ill) { 674 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 675 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 676 &ipv6_all_ones)) { 677 mutex_enter(&nce->nce_lock); 678 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 679 NCE_REFHOLD_LOCKED(nce); 680 mutex_exit(&nce->nce_lock); 681 break; 682 } 683 mutex_exit(&nce->nce_lock); 684 } 685 } 686 } 687 return (nce); 688 } 689 690 /* 691 * Cache entry lookup. Try to find an nce matching the parameters passed. 692 * Look only for mappings. 693 */ 694 static nce_t * 695 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 696 { 697 nce_t *nce; 698 699 ASSERT(ill != NULL && ill->ill_isv6); 700 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 701 if (!IN6_IS_ADDR_MULTICAST(addr)) 702 return (NULL); 703 nce = ndp6.nce_mask_entries; 704 for (; nce != NULL; nce = nce->nce_next) 705 if (nce->nce_ill == ill && 706 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 707 mutex_enter(&nce->nce_lock); 708 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 709 NCE_REFHOLD_LOCKED(nce); 710 mutex_exit(&nce->nce_lock); 711 break; 712 } 713 mutex_exit(&nce->nce_lock); 714 } 715 return (nce); 716 } 717 718 /* 719 * Process passed in parameters either from an incoming packet or via 720 * user ioctl. 721 */ 722 void 723 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 724 { 725 ill_t *ill = nce->nce_ill; 726 uint32_t hw_addr_len = ill->ill_nd_lla_len; 727 mblk_t *mp; 728 boolean_t ll_updated = B_FALSE; 729 boolean_t ll_changed; 730 731 ASSERT(nce->nce_ipversion == IPV6_VERSION); 732 /* 733 * No updates of link layer address or the neighbor state is 734 * allowed, when the cache is in NONUD state. This still 735 * allows for responding to reachability solicitation. 736 */ 737 mutex_enter(&nce->nce_lock); 738 if (nce->nce_state == ND_INCOMPLETE) { 739 if (hw_addr == NULL) { 740 mutex_exit(&nce->nce_lock); 741 return; 742 } 743 nce_set_ll(nce, hw_addr); 744 /* 745 * Update nce state and send the queued packets 746 * back to ip this time ire will be added. 747 */ 748 if (flag & ND_NA_FLAG_SOLICITED) { 749 nce_update(nce, ND_REACHABLE, NULL); 750 } else { 751 nce_update(nce, ND_STALE, NULL); 752 } 753 mutex_exit(&nce->nce_lock); 754 nce_fastpath(nce); 755 mutex_enter(&nce->nce_lock); 756 mp = nce->nce_qd_mp; 757 nce->nce_qd_mp = NULL; 758 mutex_exit(&nce->nce_lock); 759 while (mp != NULL) { 760 mblk_t *nxt_mp; 761 762 nxt_mp = mp->b_next; 763 mp->b_next = NULL; 764 if (mp->b_prev != NULL) { 765 ill_t *inbound_ill; 766 queue_t *fwdq = NULL; 767 uint_t ifindex; 768 769 ifindex = (uint_t)(uintptr_t)mp->b_prev; 770 inbound_ill = ill_lookup_on_ifindex(ifindex, 771 B_TRUE, NULL, NULL, NULL, NULL); 772 if (inbound_ill == NULL) { 773 mp->b_prev = NULL; 774 freemsg(mp); 775 return; 776 } else { 777 fwdq = inbound_ill->ill_rq; 778 } 779 mp->b_prev = NULL; 780 /* 781 * Send a forwarded packet back into ip_rput_v6 782 * just as in ire_send_v6(). 783 * Extract the queue from b_prev (set in 784 * ip_rput_data_v6). 785 */ 786 if (fwdq != NULL) { 787 /* 788 * Forwarded packets hop count will 789 * get decremented in ip_rput_data_v6 790 */ 791 put(fwdq, mp); 792 } else { 793 /* 794 * Send locally originated packets back 795 * into * ip_wput_v6. 796 */ 797 put(ill->ill_wq, mp); 798 } 799 ill_refrele(inbound_ill); 800 } else { 801 put(ill->ill_wq, mp); 802 } 803 mp = nxt_mp; 804 } 805 return; 806 } 807 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 808 if (!is_adv) { 809 /* If this is a SOLICITATION request only */ 810 if (ll_changed) 811 nce_update(nce, ND_STALE, hw_addr); 812 mutex_exit(&nce->nce_lock); 813 return; 814 } 815 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 816 /* If in any other state than REACHABLE, ignore */ 817 if (nce->nce_state == ND_REACHABLE) { 818 nce_update(nce, ND_STALE, NULL); 819 } 820 mutex_exit(&nce->nce_lock); 821 return; 822 } else { 823 if (ll_changed) { 824 nce_update(nce, ND_UNCHANGED, hw_addr); 825 ll_updated = B_TRUE; 826 } 827 if (flag & ND_NA_FLAG_SOLICITED) { 828 nce_update(nce, ND_REACHABLE, NULL); 829 } else { 830 if (ll_updated) { 831 nce_update(nce, ND_STALE, NULL); 832 } 833 } 834 mutex_exit(&nce->nce_lock); 835 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 836 NCE_F_ISROUTER)) { 837 ire_t *ire; 838 839 /* 840 * Router turned to host. We need to remove the 841 * entry as well as any default route that may be 842 * using this as a next hop. This is required by 843 * section 7.2.5 of RFC 2461. 844 */ 845 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 846 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 847 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 848 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 849 MATCH_IRE_DEFAULT); 850 if (ire != NULL) { 851 ip_rts_rtmsg(RTM_DELETE, ire, 0); 852 ire_delete(ire); 853 ire_refrele(ire); 854 } 855 ndp_delete(nce); 856 } 857 } 858 } 859 860 /* 861 * Pass arg1 to the pfi supplied, along with each nce in existence. 862 * ndp_walk() places a REFHOLD on the nce and drops the lock when 863 * walking the hash list. 864 */ 865 void 866 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 867 boolean_t trace) 868 { 869 870 nce_t *nce; 871 nce_t *nce1; 872 nce_t **ncep; 873 nce_t *free_nce_list = NULL; 874 875 mutex_enter(&ndp->ndp_g_lock); 876 /* Prevent ndp_delete from unlink and free of NCE */ 877 ndp->ndp_g_walker++; 878 mutex_exit(&ndp->ndp_g_lock); 879 for (ncep = ndp->nce_hash_tbl; 880 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 881 for (nce = *ncep; nce != NULL; nce = nce1) { 882 nce1 = nce->nce_next; 883 if (ill == NULL || nce->nce_ill == ill) { 884 if (trace) { 885 NCE_REFHOLD(nce); 886 (*pfi)(nce, arg1); 887 NCE_REFRELE(nce); 888 } else { 889 NCE_REFHOLD_NOTR(nce); 890 (*pfi)(nce, arg1); 891 NCE_REFRELE_NOTR(nce); 892 } 893 } 894 } 895 } 896 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 897 nce1 = nce->nce_next; 898 if (ill == NULL || nce->nce_ill == ill) { 899 if (trace) { 900 NCE_REFHOLD(nce); 901 (*pfi)(nce, arg1); 902 NCE_REFRELE(nce); 903 } else { 904 NCE_REFHOLD_NOTR(nce); 905 (*pfi)(nce, arg1); 906 NCE_REFRELE_NOTR(nce); 907 } 908 } 909 } 910 mutex_enter(&ndp->ndp_g_lock); 911 ndp->ndp_g_walker--; 912 /* 913 * While NCE's are removed from global list they are placed 914 * in a private list, to be passed to nce_ire_delete_list(). 915 * The reason is, there may be ires pointing to this nce 916 * which needs to cleaned up. 917 */ 918 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 919 /* Time to delete condemned entries */ 920 for (ncep = ndp->nce_hash_tbl; 921 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 922 nce = *ncep; 923 if (nce != NULL) { 924 nce_remove(ndp, nce, &free_nce_list); 925 } 926 } 927 nce = ndp->nce_mask_entries; 928 if (nce != NULL) { 929 nce_remove(ndp, nce, &free_nce_list); 930 } 931 ndp->ndp_g_walker_cleanup = B_FALSE; 932 } 933 mutex_exit(&ndp->ndp_g_lock); 934 935 if (free_nce_list != NULL) { 936 nce_ire_delete_list(free_nce_list); 937 } 938 } 939 940 void 941 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 942 { 943 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 944 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 945 } 946 947 /* 948 * Prepend the zoneid using an ipsec_out_t for later use by functions like 949 * ip_rput_v6() after neighbor discovery has taken place. If the message 950 * block already has a M_CTL at the front of it, then simply set the zoneid 951 * appropriately. 952 */ 953 static mblk_t * 954 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 955 { 956 mblk_t *first_mp; 957 ipsec_out_t *io; 958 959 ASSERT(zoneid != ALL_ZONES); 960 if (mp->b_datap->db_type == M_CTL) { 961 io = (ipsec_out_t *)mp->b_rptr; 962 ASSERT(io->ipsec_out_type == IPSEC_OUT); 963 io->ipsec_out_zoneid = zoneid; 964 return (mp); 965 } 966 967 first_mp = ipsec_alloc_ipsec_out(); 968 if (first_mp == NULL) 969 return (NULL); 970 io = (ipsec_out_t *)first_mp->b_rptr; 971 /* This is not a secure packet */ 972 io->ipsec_out_secure = B_FALSE; 973 io->ipsec_out_zoneid = zoneid; 974 first_mp->b_cont = mp; 975 return (first_mp); 976 } 977 978 /* 979 * Process resolve requests. Handles both mapped entries 980 * as well as cases that needs to be send out on the wire. 981 * Lookup a NCE for a given IRE. Regardless of whether one exists 982 * or one is created, we defer making ire point to nce until the 983 * ire is actually added at which point the nce_refcnt on the nce is 984 * incremented. This is done primarily to have symmetry between ire_add() 985 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 986 */ 987 int 988 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 989 { 990 nce_t *nce; 991 int err = 0; 992 uint32_t ms; 993 mblk_t *mp_nce = NULL; 994 995 ASSERT(ill != NULL); 996 ASSERT(ill->ill_isv6); 997 if (IN6_IS_ADDR_MULTICAST(dst)) { 998 err = nce_set_multicast(ill, dst); 999 return (err); 1000 } 1001 err = ndp_lookup_then_add(ill, 1002 NULL, /* No hardware address */ 1003 dst, 1004 &ipv6_all_ones, 1005 &ipv6_all_zeros, 1006 0, 1007 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1008 ND_INCOMPLETE, 1009 &nce, 1010 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1011 NULL); 1012 1013 switch (err) { 1014 case 0: 1015 /* 1016 * New cache entry was created. Make sure that the state 1017 * is not ND_INCOMPLETE. It can be in some other state 1018 * even before we send out the solicitation as we could 1019 * get un-solicited advertisements. 1020 * 1021 * If this is an XRESOLV interface, simply return 0, 1022 * since we don't want to solicit just yet. 1023 */ 1024 if (ill->ill_flags & ILLF_XRESOLV) { 1025 NCE_REFRELE(nce); 1026 return (0); 1027 } 1028 rw_enter(&ill_g_lock, RW_READER); 1029 mutex_enter(&nce->nce_lock); 1030 if (nce->nce_state != ND_INCOMPLETE) { 1031 mutex_exit(&nce->nce_lock); 1032 rw_exit(&ill_g_lock); 1033 NCE_REFRELE(nce); 1034 return (0); 1035 } 1036 mp_nce = ndp_prepend_zone(mp, zoneid); 1037 if (mp_nce == NULL) { 1038 /* The caller will free mp */ 1039 mutex_exit(&nce->nce_lock); 1040 rw_exit(&ill_g_lock); 1041 ndp_delete(nce); 1042 NCE_REFRELE(nce); 1043 return (ENOMEM); 1044 } 1045 ms = nce_solicit(nce, mp_nce); 1046 rw_exit(&ill_g_lock); 1047 if (ms == 0) { 1048 /* The caller will free mp */ 1049 if (mp_nce != mp) 1050 freeb(mp_nce); 1051 mutex_exit(&nce->nce_lock); 1052 ndp_delete(nce); 1053 NCE_REFRELE(nce); 1054 return (EBUSY); 1055 } 1056 mutex_exit(&nce->nce_lock); 1057 NDP_RESTART_TIMER(nce, (clock_t)ms); 1058 NCE_REFRELE(nce); 1059 return (EINPROGRESS); 1060 case EEXIST: 1061 /* Resolution in progress just queue the packet */ 1062 mutex_enter(&nce->nce_lock); 1063 if (nce->nce_state == ND_INCOMPLETE) { 1064 mp_nce = ndp_prepend_zone(mp, zoneid); 1065 if (mp_nce == NULL) { 1066 err = ENOMEM; 1067 } else { 1068 nce_queue_mp(nce, mp_nce); 1069 err = EINPROGRESS; 1070 } 1071 } else { 1072 /* 1073 * Any other state implies we have 1074 * a nce but IRE needs to be added ... 1075 * ire_add_v6() will take care of the 1076 * the case when the nce becomes CONDEMNED 1077 * before the ire is added to the table. 1078 */ 1079 err = 0; 1080 } 1081 mutex_exit(&nce->nce_lock); 1082 NCE_REFRELE(nce); 1083 break; 1084 default: 1085 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1086 break; 1087 } 1088 return (err); 1089 } 1090 1091 /* 1092 * When there is no resolver, the link layer template is passed in 1093 * the IRE. 1094 * Lookup a NCE for a given IRE. Regardless of whether one exists 1095 * or one is created, we defer making ire point to nce until the 1096 * ire is actually added at which point the nce_refcnt on the nce is 1097 * incremented. This is done primarily to have symmetry between ire_add() 1098 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1099 */ 1100 int 1101 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1102 { 1103 nce_t *nce; 1104 int err = 0; 1105 1106 ASSERT(ill != NULL); 1107 ASSERT(ill->ill_isv6); 1108 if (IN6_IS_ADDR_MULTICAST(dst)) { 1109 err = nce_set_multicast(ill, dst); 1110 return (err); 1111 } 1112 1113 err = ndp_lookup_then_add(ill, 1114 NULL, /* hardware address */ 1115 dst, 1116 &ipv6_all_ones, 1117 &ipv6_all_zeros, 1118 0, 1119 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1120 ND_REACHABLE, 1121 &nce, 1122 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1123 NULL); 1124 1125 switch (err) { 1126 case 0: 1127 /* 1128 * Cache entry with a proper resolver cookie was 1129 * created. 1130 */ 1131 NCE_REFRELE(nce); 1132 break; 1133 case EEXIST: 1134 err = 0; 1135 NCE_REFRELE(nce); 1136 break; 1137 default: 1138 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1139 break; 1140 } 1141 return (err); 1142 } 1143 1144 /* 1145 * For each interface an entry is added for the unspecified multicast group. 1146 * Here that mapping is used to form the multicast cache entry for a particular 1147 * multicast destination. 1148 */ 1149 static int 1150 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1151 { 1152 nce_t *mnce; /* Multicast mapping entry */ 1153 nce_t *nce; 1154 uchar_t *hw_addr = NULL; 1155 int err = 0; 1156 1157 ASSERT(ill != NULL); 1158 ASSERT(ill->ill_isv6); 1159 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1160 1161 mutex_enter(&ndp6.ndp_g_lock); 1162 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1163 nce = nce_lookup_addr(ill, dst, nce); 1164 if (nce != NULL) { 1165 mutex_exit(&ndp6.ndp_g_lock); 1166 NCE_REFRELE(nce); 1167 return (0); 1168 } 1169 /* No entry, now lookup for a mapping this should never fail */ 1170 mnce = nce_lookup_mapping(ill, dst); 1171 if (mnce == NULL) { 1172 /* Something broken for the interface. */ 1173 mutex_exit(&ndp6.ndp_g_lock); 1174 return (ESRCH); 1175 } 1176 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1177 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1178 /* 1179 * For IRE_IF_RESOLVER a hardware mapping can be 1180 * generated, for IRE_IF_NORESOLVER, resolution cookie 1181 * in the ill is copied in ndp_add(). 1182 */ 1183 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1184 if (hw_addr == NULL) { 1185 mutex_exit(&ndp6.ndp_g_lock); 1186 NCE_REFRELE(mnce); 1187 return (ENOMEM); 1188 } 1189 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1190 } 1191 NCE_REFRELE(mnce); 1192 /* 1193 * IRE_IF_NORESOLVER type simply copies the resolution 1194 * cookie passed in. So no hw_addr is needed. 1195 */ 1196 err = ndp_add(ill, 1197 hw_addr, 1198 dst, 1199 &ipv6_all_ones, 1200 &ipv6_all_zeros, 1201 0, 1202 NCE_F_NONUD, 1203 ND_REACHABLE, 1204 &nce, 1205 NULL, 1206 NULL); 1207 mutex_exit(&ndp6.ndp_g_lock); 1208 if (hw_addr != NULL) 1209 kmem_free(hw_addr, ill->ill_nd_lla_len); 1210 if (err != 0) { 1211 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1212 return (err); 1213 } 1214 NCE_REFRELE(nce); 1215 return (0); 1216 } 1217 1218 /* 1219 * Return the link layer address, and any flags of a nce. 1220 */ 1221 int 1222 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1223 { 1224 nce_t *nce; 1225 in6_addr_t *addr; 1226 sin6_t *sin6; 1227 dl_unitdata_req_t *dl; 1228 1229 ASSERT(ill != NULL && ill->ill_isv6); 1230 sin6 = (sin6_t *)&lnr->lnr_addr; 1231 addr = &sin6->sin6_addr; 1232 1233 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1234 if (nce == NULL) 1235 return (ESRCH); 1236 /* If in INCOMPLETE state, no link layer address is available yet */ 1237 if (nce->nce_state == ND_INCOMPLETE) 1238 goto done; 1239 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1240 if (ill->ill_flags & ILLF_XRESOLV) 1241 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1242 else 1243 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1244 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1245 sizeof (lnr->lnr_hdw_addr)); 1246 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1247 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1248 if (nce->nce_flags & NCE_F_ISROUTER) 1249 lnr->lnr_flags = NDF_ISROUTER_ON; 1250 if (nce->nce_flags & NCE_F_PROXY) 1251 lnr->lnr_flags |= NDF_PROXY_ON; 1252 if (nce->nce_flags & NCE_F_ANYCAST) 1253 lnr->lnr_flags |= NDF_ANYCAST_ON; 1254 done: 1255 NCE_REFRELE(nce); 1256 return (0); 1257 } 1258 1259 /* 1260 * Send Enable/Disable multicast reqs to driver. 1261 */ 1262 int 1263 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1264 uint32_t hw_addr_offset, mblk_t *mp) 1265 { 1266 nce_t *nce; 1267 uchar_t *hw_addr; 1268 1269 ASSERT(ill != NULL && ill->ill_isv6); 1270 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1271 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1272 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1273 freemsg(mp); 1274 return (EINVAL); 1275 } 1276 mutex_enter(&ndp6.ndp_g_lock); 1277 nce = nce_lookup_mapping(ill, addr); 1278 if (nce == NULL) { 1279 mutex_exit(&ndp6.ndp_g_lock); 1280 freemsg(mp); 1281 return (ESRCH); 1282 } 1283 mutex_exit(&ndp6.ndp_g_lock); 1284 /* 1285 * Update dl_addr_length and dl_addr_offset for primitives that 1286 * have physical addresses as opposed to full saps 1287 */ 1288 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1289 case DL_ENABMULTI_REQ: 1290 /* Track the state if this is the first enabmulti */ 1291 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1292 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1293 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1294 break; 1295 case DL_DISABMULTI_REQ: 1296 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1297 break; 1298 default: 1299 NCE_REFRELE(nce); 1300 ip1dbg(("ndp_mcastreq: default\n")); 1301 return (EINVAL); 1302 } 1303 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1304 NCE_REFRELE(nce); 1305 putnext(ill->ill_wq, mp); 1306 return (0); 1307 } 1308 1309 /* 1310 * Send a neighbor solicitation. 1311 * Returns number of milliseconds after which we should either rexmit or abort. 1312 * Return of zero means we should abort. 1313 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1314 * 1315 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1316 * the packet. 1317 * NOTE: This routine does not consume mp. 1318 */ 1319 uint32_t 1320 nce_solicit(nce_t *nce, mblk_t *mp) 1321 { 1322 ill_t *ill; 1323 ill_t *src_ill; 1324 ip6_t *ip6h; 1325 in6_addr_t src; 1326 in6_addr_t dst; 1327 ipif_t *ipif; 1328 ip6i_t *ip6i; 1329 boolean_t dropped = B_FALSE; 1330 1331 ASSERT(RW_READ_HELD(&ill_g_lock)); 1332 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1333 ill = nce->nce_ill; 1334 ASSERT(ill != NULL); 1335 1336 if (nce->nce_rcnt == 0) { 1337 return (0); 1338 } 1339 1340 if (mp == NULL) { 1341 ASSERT(nce->nce_qd_mp != NULL); 1342 mp = nce->nce_qd_mp; 1343 } else { 1344 nce_queue_mp(nce, mp); 1345 } 1346 1347 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1348 if (mp->b_datap->db_type == M_CTL) 1349 mp = mp->b_cont; 1350 1351 ip6h = (ip6_t *)mp->b_rptr; 1352 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1353 /* 1354 * This message should have been pulled up already in 1355 * ip_wput_v6. We can't do pullups here because the message 1356 * could be from the nce_qd_mp which could have b_next/b_prev 1357 * non-NULL. 1358 */ 1359 ip6i = (ip6i_t *)ip6h; 1360 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1361 sizeof (ip6i_t) + IPV6_HDR_LEN); 1362 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1363 } 1364 src = ip6h->ip6_src; 1365 /* 1366 * If the src of outgoing packet is one of the assigned interface 1367 * addresses use it, otherwise we will pick the source address below. 1368 */ 1369 src_ill = ill; 1370 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1371 if (ill->ill_group != NULL) 1372 src_ill = ill->ill_group->illgrp_ill; 1373 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1374 for (ipif = src_ill->ill_ipif; ipif != NULL; 1375 ipif = ipif->ipif_next) { 1376 if (IN6_ARE_ADDR_EQUAL(&src, 1377 &ipif->ipif_v6lcl_addr)) { 1378 break; 1379 } 1380 } 1381 if (ipif != NULL) 1382 break; 1383 } 1384 if (src_ill == NULL) { 1385 /* May be a forwarding packet */ 1386 src_ill = ill; 1387 src = ipv6_all_zeros; 1388 } 1389 } 1390 dst = nce->nce_addr; 1391 /* 1392 * If source address is unspecified, nce_xmit will choose 1393 * one for us and initialize the hardware address also 1394 * appropriately. 1395 */ 1396 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1397 src_ill = NULL; 1398 nce->nce_rcnt--; 1399 mutex_exit(&nce->nce_lock); 1400 rw_exit(&ill_g_lock); 1401 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1402 &dst, 0); 1403 rw_enter(&ill_g_lock, RW_READER); 1404 mutex_enter(&nce->nce_lock); 1405 if (dropped) 1406 nce->nce_rcnt++; 1407 return (ill->ill_reachable_retrans_time); 1408 } 1409 1410 void 1411 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1412 { 1413 nd_neighbor_solicit_t *ns; 1414 uint32_t hlen = ill->ill_nd_lla_len; 1415 uchar_t *haddr = NULL; 1416 icmp6_t *icmp_nd; 1417 ip6_t *ip6h; 1418 nce_t *our_nce = NULL; 1419 in6_addr_t target; 1420 in6_addr_t src; 1421 int len; 1422 int flag = 0; 1423 nd_opt_hdr_t *opt = NULL; 1424 boolean_t bad_solicit = B_FALSE; 1425 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1426 1427 ip6h = (ip6_t *)mp->b_rptr; 1428 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1429 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1430 src = ip6h->ip6_src; 1431 ns = (nd_neighbor_solicit_t *)icmp_nd; 1432 target = ns->nd_ns_target; 1433 if (IN6_IS_ADDR_MULTICAST(&target)) { 1434 if (ip_debug > 2) { 1435 /* ip1dbg */ 1436 pr_addr_dbg("ndp_input_solicit: Target is" 1437 " multicast! %s\n", AF_INET6, &target); 1438 } 1439 bad_solicit = B_TRUE; 1440 goto done; 1441 } 1442 if (len > sizeof (nd_neighbor_solicit_t)) { 1443 /* Options present */ 1444 opt = (nd_opt_hdr_t *)&ns[1]; 1445 len -= sizeof (nd_neighbor_solicit_t); 1446 if (!ndp_verify_optlen(opt, len)) { 1447 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1448 bad_solicit = B_TRUE; 1449 goto done; 1450 } 1451 } 1452 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1453 /* Check to see if this is a valid DAD solicitation */ 1454 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1455 if (ip_debug > 2) { 1456 /* ip1dbg */ 1457 pr_addr_dbg("ndp_input_solicit: IPv6 " 1458 "Destination is not solicited node " 1459 "multicast %s\n", AF_INET6, 1460 &ip6h->ip6_dst); 1461 } 1462 bad_solicit = B_TRUE; 1463 goto done; 1464 } 1465 } 1466 1467 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1468 /* 1469 * If this is a valid Solicitation, a permanent 1470 * entry should exist in the cache 1471 */ 1472 if (our_nce == NULL || 1473 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1474 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1475 "ifname=%s ", ill->ill_name)); 1476 if (ip_debug > 2) { 1477 /* ip1dbg */ 1478 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1479 } 1480 bad_solicit = B_TRUE; 1481 goto done; 1482 } 1483 1484 /* At this point we should have a verified NS per spec */ 1485 if (opt != NULL) { 1486 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1487 if (opt != NULL) { 1488 /* 1489 * No source link layer address option should 1490 * be present in a valid DAD request. 1491 */ 1492 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1493 ip1dbg(("ndp_input_solicit: source link-layer " 1494 "address option present with an " 1495 "unspecified source. \n")); 1496 bad_solicit = B_TRUE; 1497 goto done; 1498 } 1499 haddr = (uchar_t *)&opt[1]; 1500 if (hlen > opt->nd_opt_len * 8 || 1501 hlen == 0) { 1502 bad_solicit = B_TRUE; 1503 goto done; 1504 } 1505 } 1506 } 1507 /* 1508 * haddr can be NULL if no options are present, 1509 * or no Source link layer address is present in, 1510 * recvd NDP options of solicitation message. 1511 */ 1512 if (haddr == NULL) { 1513 nce_t *nnce; 1514 mutex_enter(&ndp6.ndp_g_lock); 1515 nnce = *((nce_t **)NCE_HASH_PTR_V6(src)); 1516 nnce = nce_lookup_addr(ill, &src, nnce); 1517 mutex_exit(&ndp6.ndp_g_lock); 1518 1519 if (nnce == NULL) { 1520 in6_addr_t dst = ipv6_solicited_node_mcast; 1521 1522 /* Form solicited node multicast address */ 1523 dst.s6_addr32[3] |= src.s6_addr32[3]; 1524 (void) nce_xmit(ill, 1525 ND_NEIGHBOR_SOLICIT, 1526 ill, 1527 B_TRUE, 1528 &target, 1529 &dst, 1530 flag); 1531 bad_solicit = B_TRUE; 1532 goto done; 1533 } 1534 } 1535 /* Set override flag, it will be reset later if need be. */ 1536 flag |= NDP_ORIDE; 1537 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1538 flag |= NDP_UNICAST; 1539 } 1540 1541 /* 1542 * Create/update the entry for the soliciting node. 1543 * or respond to outstanding queries, don't if 1544 * the source is unspecified address. 1545 */ 1546 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1547 int err = 0; 1548 nce_t *nnce; 1549 1550 ASSERT(ill->ill_isv6); 1551 err = ndp_lookup_then_add(ill, 1552 haddr, 1553 &src, /* Soliciting nodes address */ 1554 &ipv6_all_ones, 1555 &ipv6_all_zeros, 1556 0, 1557 0, 1558 ND_STALE, 1559 &nnce, 1560 NULL, 1561 NULL); 1562 switch (err) { 1563 case 0: 1564 /* done with this entry */ 1565 NCE_REFRELE(nnce); 1566 break; 1567 case EEXIST: 1568 /* 1569 * B_FALSE indicates this is not an 1570 * an advertisement. 1571 */ 1572 ndp_process(nnce, haddr, 0, B_FALSE); 1573 NCE_REFRELE(nnce); 1574 break; 1575 default: 1576 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1577 err)); 1578 goto done; 1579 } 1580 flag |= NDP_SOLICITED; 1581 } else { 1582 /* 1583 * This is a DAD req, multicast the advertisement 1584 * to the all-nodes address. 1585 */ 1586 src = ipv6_all_hosts_mcast; 1587 } 1588 if (our_nce->nce_flags & NCE_F_ISROUTER) 1589 flag |= NDP_ISROUTER; 1590 if (our_nce->nce_flags & NCE_F_PROXY) 1591 flag &= ~NDP_ORIDE; 1592 /* Response to a solicitation */ 1593 (void) nce_xmit(ill, 1594 ND_NEIGHBOR_ADVERT, 1595 ill, /* ill to be used for extracting ill_nd_lla */ 1596 B_TRUE, /* use ill_nd_lla */ 1597 &target, /* Source and target of the advertisement pkt */ 1598 &src, /* IP Destination (source of original pkt) */ 1599 flag); 1600 done: 1601 if (bad_solicit) 1602 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1603 if (our_nce != NULL) 1604 NCE_REFRELE(our_nce); 1605 } 1606 1607 void 1608 ndp_input_advert(ill_t *ill, mblk_t *mp) 1609 { 1610 nd_neighbor_advert_t *na; 1611 uint32_t hlen = ill->ill_nd_lla_len; 1612 uchar_t *haddr = NULL; 1613 icmp6_t *icmp_nd; 1614 ip6_t *ip6h; 1615 nce_t *dst_nce = NULL; 1616 in6_addr_t target; 1617 nd_opt_hdr_t *opt = NULL; 1618 int len; 1619 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1620 1621 ip6h = (ip6_t *)mp->b_rptr; 1622 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1623 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1624 na = (nd_neighbor_advert_t *)icmp_nd; 1625 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1626 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1627 ip1dbg(("ndp_input_advert: Target is multicast but the " 1628 "solicited flag is not zero\n")); 1629 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1630 return; 1631 } 1632 target = na->nd_na_target; 1633 if (IN6_IS_ADDR_MULTICAST(&target)) { 1634 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1635 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1636 return; 1637 } 1638 if (len > sizeof (nd_neighbor_advert_t)) { 1639 opt = (nd_opt_hdr_t *)&na[1]; 1640 if (!ndp_verify_optlen(opt, 1641 len - sizeof (nd_neighbor_advert_t))) { 1642 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1643 return; 1644 } 1645 /* At this point we have a verified NA per spec */ 1646 len -= sizeof (nd_neighbor_advert_t); 1647 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1648 if (opt != NULL) { 1649 haddr = (uchar_t *)&opt[1]; 1650 if (hlen > opt->nd_opt_len * 8 || 1651 hlen == 0) { 1652 BUMP_MIB(mib, 1653 ipv6IfIcmpInBadNeighborAdvertisements); 1654 return; 1655 } 1656 } 1657 } 1658 1659 /* 1660 * If this interface is part of the group look at all the 1661 * ills in the group. 1662 */ 1663 rw_enter(&ill_g_lock, RW_READER); 1664 if (ill->ill_group != NULL) 1665 ill = ill->ill_group->illgrp_ill; 1666 1667 for (; ill != NULL; ill = ill->ill_group_next) { 1668 mutex_enter(&ill->ill_lock); 1669 if (!ILL_CAN_LOOKUP(ill)) { 1670 mutex_exit(&ill->ill_lock); 1671 continue; 1672 } 1673 ill_refhold_locked(ill); 1674 mutex_exit(&ill->ill_lock); 1675 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1676 /* We have to drop the lock since ndp_process calls put* */ 1677 rw_exit(&ill_g_lock); 1678 if (dst_nce != NULL) { 1679 if (na->nd_na_flags_reserved & 1680 ND_NA_FLAG_ROUTER) { 1681 dst_nce->nce_flags |= NCE_F_ISROUTER; 1682 } 1683 /* B_TRUE indicates this an advertisement */ 1684 ndp_process(dst_nce, haddr, 1685 na->nd_na_flags_reserved, B_TRUE); 1686 NCE_REFRELE(dst_nce); 1687 } 1688 rw_enter(&ill_g_lock, RW_READER); 1689 ill_refrele(ill); 1690 } 1691 rw_exit(&ill_g_lock); 1692 } 1693 1694 /* 1695 * Process NDP neighbor solicitation/advertisement messages. 1696 * The checksum has already checked o.k before reaching here. 1697 */ 1698 void 1699 ndp_input(ill_t *ill, mblk_t *mp) 1700 { 1701 icmp6_t *icmp_nd; 1702 ip6_t *ip6h; 1703 int len; 1704 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1705 1706 1707 if (!pullupmsg(mp, -1)) { 1708 ip1dbg(("ndp_input: pullupmsg failed\n")); 1709 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1710 goto done; 1711 } 1712 ip6h = (ip6_t *)mp->b_rptr; 1713 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1714 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1715 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1716 goto done; 1717 } 1718 /* 1719 * NDP does not accept any extension headers between the 1720 * IP header and the ICMP header since e.g. a routing 1721 * header could be dangerous. 1722 * This assumes that any AH or ESP headers are removed 1723 * by ip prior to passing the packet to ndp_input. 1724 */ 1725 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1726 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1727 ip6h->ip6_nxt)); 1728 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1729 goto done; 1730 } 1731 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1732 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1733 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1734 if (icmp_nd->icmp6_code != 0) { 1735 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1736 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1737 goto done; 1738 } 1739 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1740 /* 1741 * Make sure packet length is large enough for either 1742 * a NS or a NA icmp packet. 1743 */ 1744 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1745 ip1dbg(("ndp_input: packet too short\n")); 1746 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1747 goto done; 1748 } 1749 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1750 ndp_input_solicit(ill, mp); 1751 } else { 1752 ndp_input_advert(ill, mp); 1753 } 1754 done: 1755 freemsg(mp); 1756 } 1757 1758 /* 1759 * nce_xmit is called to form and transmit a ND solicitation or 1760 * advertisement ICMP packet. 1761 * If source address is unspecified, appropriate source address 1762 * and link layer address will be chosen here. This function 1763 * *always* sends the link layer option. 1764 * It returns B_FALSE only if it does a successful put() to the 1765 * corresponding ill's ill_wq otherwise returns B_TRUE. 1766 */ 1767 static boolean_t 1768 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1769 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1770 int flag) 1771 { 1772 uint32_t len; 1773 icmp6_t *icmp6; 1774 mblk_t *mp; 1775 ip6_t *ip6h; 1776 nd_opt_hdr_t *opt; 1777 uint_t plen; 1778 ip6i_t *ip6i; 1779 ipif_t *src_ipif = NULL; 1780 1781 /* 1782 * If we have a unspecified source(sender) address, select a 1783 * proper source address for the solicitation here itself so 1784 * that we can initialize the h/w address correctly. This is 1785 * needed for interface groups as source address can come from 1786 * the whole group and the h/w address initialized from ill will 1787 * be wrong if the source address comes from a different ill. 1788 * 1789 * Note that the NA never comes here with the unspecified source 1790 * address. The following asserts that whenever the source 1791 * address is specified, the haddr also should be specified. 1792 */ 1793 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1794 1795 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1796 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1797 /* 1798 * Pick a source address for this solicitation, but 1799 * restrict the selection to addresses assigned to the 1800 * output interface (or interface group). We do this 1801 * because the destination will create a neighbor cache 1802 * entry for the source address of this packet, so the 1803 * source address had better be a valid neighbor. 1804 */ 1805 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 1806 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1807 if (src_ipif == NULL) { 1808 char buf[INET6_ADDRSTRLEN]; 1809 1810 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 1811 inet_ntop(AF_INET6, (char *)target, buf, 1812 sizeof (buf)))); 1813 return (B_TRUE); 1814 } 1815 sender = &src_ipif->ipif_v6src_addr; 1816 hwaddr_ill = src_ipif->ipif_ill; 1817 } 1818 1819 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1820 /* 1821 * Always make sure that the NS/NA packets don't get load 1822 * spread. This is needed so that the probe packets sent 1823 * by the in.mpathd daemon can really go out on the desired 1824 * interface. Probe packets are made to go out on a desired 1825 * interface by including a ip6i with ATTACH_IF flag. As these 1826 * packets indirectly end up sending/receiving NS/NA packets 1827 * (neighbor doing NUD), we have to make sure that NA 1828 * also go out on the same interface. 1829 */ 1830 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1831 plen * 8; 1832 mp = allocb(len, BPRI_LO); 1833 if (mp == NULL) { 1834 if (src_ipif != NULL) 1835 ipif_refrele(src_ipif); 1836 return (B_TRUE); 1837 } 1838 bzero((char *)mp->b_rptr, len); 1839 mp->b_wptr = mp->b_rptr + len; 1840 1841 ip6i = (ip6i_t *)mp->b_rptr; 1842 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1843 ip6i->ip6i_nxt = IPPROTO_RAW; 1844 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1845 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1846 1847 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1848 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1849 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1850 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1851 ip6h->ip6_hops = IPV6_MAX_HOPS; 1852 ip6h->ip6_dst = *target; 1853 icmp6 = (icmp6_t *)&ip6h[1]; 1854 1855 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1856 sizeof (nd_neighbor_advert_t)); 1857 1858 if (operation == ND_NEIGHBOR_SOLICIT) { 1859 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1860 1861 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1862 ip6h->ip6_src = *sender; 1863 ns->nd_ns_target = *target; 1864 if (!(flag & NDP_UNICAST)) { 1865 /* Form multicast address of the target */ 1866 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1867 ip6h->ip6_dst.s6_addr32[3] |= 1868 ns->nd_ns_target.s6_addr32[3]; 1869 } 1870 } else { 1871 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1872 1873 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1874 ip6h->ip6_src = *sender; 1875 na->nd_na_target = *sender; 1876 if (flag & NDP_ISROUTER) 1877 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1878 if (flag & NDP_SOLICITED) 1879 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1880 if (flag & NDP_ORIDE) 1881 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1882 1883 } 1884 /* Fill in link layer address and option len */ 1885 opt->nd_opt_len = (uint8_t)plen; 1886 mutex_enter(&hwaddr_ill->ill_lock); 1887 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1888 &opt[1], hwaddr_ill->ill_nd_lla_len); 1889 mutex_exit(&hwaddr_ill->ill_lock); 1890 icmp6->icmp6_type = (uint8_t)operation; 1891 icmp6->icmp6_code = 0; 1892 /* 1893 * Prepare for checksum by putting icmp length in the icmp 1894 * checksum field. The checksum is calculated in ip_wput_v6. 1895 */ 1896 icmp6->icmp6_cksum = ip6h->ip6_plen; 1897 1898 if (src_ipif != NULL) 1899 ipif_refrele(src_ipif); 1900 if (canput(ill->ill_wq)) { 1901 put(ill->ill_wq, mp); 1902 return (B_FALSE); 1903 } 1904 freemsg(mp); 1905 return (B_TRUE); 1906 } 1907 1908 /* 1909 * Make a link layer address (does not include the SAP) from an nce. 1910 * To form the link layer address, use the last four bytes of ipv6 1911 * address passed in and the fixed offset stored in nce. 1912 */ 1913 static void 1914 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1915 { 1916 uchar_t *mask, *to; 1917 ill_t *ill = nce->nce_ill; 1918 int len; 1919 1920 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1921 return; 1922 ASSERT(nce->nce_res_mp != NULL); 1923 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1924 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1925 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1926 ASSERT(addr != NULL); 1927 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1928 addrpos, ill->ill_nd_lla_len); 1929 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1930 IPV6_ADDR_LEN); 1931 mask = (uchar_t *)&nce->nce_extract_mask; 1932 mask += (IPV6_ADDR_LEN - len); 1933 addr += (IPV6_ADDR_LEN - len); 1934 to = addrpos + nce->nce_ll_extract_start; 1935 while (len-- > 0) 1936 *to++ |= *mask++ & *addr++; 1937 } 1938 1939 /* 1940 * Pass a cache report back out via NDD. 1941 */ 1942 /* ARGSUSED */ 1943 int 1944 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1945 { 1946 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1947 " proto addr/mask"); 1948 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1949 return (0); 1950 } 1951 1952 /* 1953 * convert a link level address of arbitrary length 1954 * to an ascii string. 1955 * The caller *must* have already verified that the string buffer 1956 * is large enough to hold the entire string, including trailing NULL. 1957 */ 1958 static void 1959 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1960 { 1961 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1962 int i; 1963 size_t len; 1964 1965 buf[0] = '\0'; 1966 for (i = 0; i < addrlen; i++) { 1967 addrbyte[0] = '\0'; 1968 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1969 len = strlen((const char *)addrbyte); 1970 bcopy(addrbyte, buf, len); 1971 buf = buf + len; 1972 } 1973 *--buf = '\0'; 1974 } 1975 1976 /* 1977 * Add a single line to the NDP Cache Entry Report. 1978 */ 1979 static void 1980 nce_report1(nce_t *nce, uchar_t *mp_arg) 1981 { 1982 ill_t *ill = nce->nce_ill; 1983 char local_buf[INET6_ADDRSTRLEN]; 1984 uchar_t flags_buf[10]; 1985 uint32_t flags = nce->nce_flags; 1986 mblk_t *mp = (mblk_t *)mp_arg; 1987 uchar_t *h; 1988 uchar_t *m = flags_buf; 1989 in6_addr_t v6addr; 1990 1991 /* 1992 * Lock the nce to protect nce_res_mp from being changed 1993 * if an external resolver address resolution completes 1994 * while nce_res_mp is being accessed here. 1995 * 1996 * Deal with all address formats, not just Ethernet-specific 1997 * In addition, make sure that the mblk has enough space 1998 * before writing to it. If is doesn't, allocate a new one. 1999 */ 2000 if (nce->nce_ipversion == IPV4_VERSION) 2001 /* Don't include v4 nce_ts in NDP cache entry report */ 2002 return; 2003 2004 ASSERT(ill != NULL); 2005 v6addr = nce->nce_mask; 2006 if (flags & NCE_F_PERMANENT) 2007 *m++ = 'P'; 2008 if (flags & NCE_F_ISROUTER) 2009 *m++ = 'R'; 2010 if (flags & NCE_F_MAPPING) 2011 *m++ = 'M'; 2012 *m = '\0'; 2013 2014 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2015 size_t addrlen; 2016 uchar_t *addr_buf; 2017 dl_unitdata_req_t *dl; 2018 2019 mutex_enter(&nce->nce_lock); 2020 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2021 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2022 if (ill->ill_flags & ILLF_XRESOLV) 2023 addrlen = (3 * (dl->dl_dest_addr_length)); 2024 else 2025 addrlen = (3 * (ill->ill_nd_lla_len)); 2026 if (addrlen <= 0) { 2027 mutex_exit(&nce->nce_lock); 2028 (void) mi_mpprintf(mp, 2029 "%8s %9s %5s %s/%d", 2030 ill->ill_name, 2031 "None", 2032 (uchar_t *)&flags_buf, 2033 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2034 (char *)local_buf, sizeof (local_buf)), 2035 ip_mask_to_plen_v6(&v6addr)); 2036 } else { 2037 /* 2038 * Convert the hardware/lla address to ascii 2039 */ 2040 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2041 if (addr_buf == NULL) { 2042 mutex_exit(&nce->nce_lock); 2043 return; 2044 } 2045 if (ill->ill_flags & ILLF_XRESOLV) 2046 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 2047 addr_buf); 2048 else 2049 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 2050 addr_buf); 2051 mutex_exit(&nce->nce_lock); 2052 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2053 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2054 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2055 (char *)local_buf, sizeof (local_buf)), 2056 ip_mask_to_plen_v6(&v6addr)); 2057 kmem_free(addr_buf, addrlen); 2058 } 2059 } else { 2060 (void) mi_mpprintf(mp, 2061 "%8s %9s %5s %s/%d", 2062 ill->ill_name, 2063 "None", 2064 (uchar_t *)&flags_buf, 2065 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2066 (char *)local_buf, sizeof (local_buf)), 2067 ip_mask_to_plen_v6(&v6addr)); 2068 } 2069 } 2070 2071 mblk_t * 2072 nce_udreq_alloc(ill_t *ill) 2073 { 2074 mblk_t *template_mp = NULL; 2075 dl_unitdata_req_t *dlur; 2076 int sap_length; 2077 2078 ASSERT(ill->ill_isv6); 2079 2080 sap_length = ill->ill_sap_length; 2081 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2082 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2083 if (template_mp == NULL) 2084 return (NULL); 2085 2086 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2087 dlur->dl_priority.dl_min = 0; 2088 dlur->dl_priority.dl_max = 0; 2089 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2090 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2091 2092 /* Copy in the SAP value. */ 2093 NCE_LL_SAP_COPY(ill, template_mp); 2094 2095 return (template_mp); 2096 } 2097 2098 /* 2099 * NDP retransmit timer. 2100 * This timer goes off when: 2101 * a. It is time to retransmit NS for resolver. 2102 * b. It is time to send reachability probes. 2103 */ 2104 void 2105 ndp_timer(void *arg) 2106 { 2107 nce_t *nce = arg; 2108 ill_t *ill = nce->nce_ill; 2109 uint32_t ms; 2110 char addrbuf[INET6_ADDRSTRLEN]; 2111 mblk_t *mp; 2112 boolean_t dropped = B_FALSE; 2113 2114 /* 2115 * The timer has to be cancelled by ndp_delete before doing the final 2116 * refrele. So the NCE is guaranteed to exist when the timer runs 2117 * until it clears the timeout_id. Before clearing the timeout_id 2118 * bump up the refcnt so that we can continue to use the nce 2119 */ 2120 ASSERT(nce != NULL); 2121 2122 /* 2123 * Grab the ill_g_lock now itself to avoid lock order problems. 2124 * nce_solicit needs ill_g_lock to be able to traverse ills 2125 */ 2126 rw_enter(&ill_g_lock, RW_READER); 2127 mutex_enter(&nce->nce_lock); 2128 NCE_REFHOLD_LOCKED(nce); 2129 nce->nce_timeout_id = 0; 2130 2131 /* 2132 * Check the reachability state first. 2133 */ 2134 switch (nce->nce_state) { 2135 case ND_DELAY: 2136 rw_exit(&ill_g_lock); 2137 nce->nce_state = ND_PROBE; 2138 mutex_exit(&nce->nce_lock); 2139 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2140 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2141 if (ip_debug > 3) { 2142 /* ip2dbg */ 2143 pr_addr_dbg("ndp_timer: state for %s changed " 2144 "to PROBE\n", AF_INET6, &nce->nce_addr); 2145 } 2146 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2147 NCE_REFRELE(nce); 2148 return; 2149 case ND_PROBE: 2150 /* must be retransmit timer */ 2151 rw_exit(&ill_g_lock); 2152 nce->nce_pcnt--; 2153 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2154 nce->nce_pcnt >= -1); 2155 if (nce->nce_pcnt == 0) { 2156 /* Wait RetransTimer, before deleting the entry */ 2157 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2158 nce->nce_pcnt, inet_ntop(AF_INET6, 2159 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2160 mutex_exit(&nce->nce_lock); 2161 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2162 } else { 2163 /* 2164 * As per RFC2461, the nce gets deleted after 2165 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2166 * Note that the first unicast solicitation is sent 2167 * during the DELAY state. 2168 */ 2169 if (nce->nce_pcnt > 0) { 2170 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2171 nce->nce_pcnt, inet_ntop(AF_INET6, 2172 &nce->nce_addr, 2173 addrbuf, sizeof (addrbuf)))); 2174 mutex_exit(&nce->nce_lock); 2175 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2176 NULL, B_FALSE, &ipv6_all_zeros, 2177 &nce->nce_addr, NDP_UNICAST); 2178 if (dropped) { 2179 mutex_enter(&nce->nce_lock); 2180 nce->nce_pcnt++; 2181 mutex_exit(&nce->nce_lock); 2182 } 2183 NDP_RESTART_TIMER(nce, 2184 ill->ill_reachable_retrans_time); 2185 } else { 2186 /* No hope, delete the nce */ 2187 nce->nce_state = ND_UNREACHABLE; 2188 mutex_exit(&nce->nce_lock); 2189 if (ip_debug > 2) { 2190 /* ip1dbg */ 2191 pr_addr_dbg("ndp_timer: Delete IRE for" 2192 " dst %s\n", AF_INET6, 2193 &nce->nce_addr); 2194 } 2195 ndp_delete(nce); 2196 } 2197 } 2198 NCE_REFRELE(nce); 2199 return; 2200 case ND_INCOMPLETE: 2201 /* 2202 * Must be resolvers retransmit timer. 2203 */ 2204 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2205 ip6i_t *ip6i; 2206 ip6_t *ip6h; 2207 mblk_t *data_mp; 2208 2209 /* 2210 * Walk the list of packets queued, and see if there 2211 * are any multipathing probe packets. Such packets 2212 * are always queued at the head. Since this is a 2213 * retransmit timer firing, mark such packets as 2214 * delayed in ND resolution. This info will be used 2215 * in ip_wput_v6(). Multipathing probe packets will 2216 * always have an ip6i_t. Once we hit a packet without 2217 * it, we can break out of this loop. 2218 */ 2219 if (mp->b_datap->db_type == M_CTL) 2220 data_mp = mp->b_cont; 2221 else 2222 data_mp = mp; 2223 2224 ip6h = (ip6_t *)data_mp->b_rptr; 2225 if (ip6h->ip6_nxt != IPPROTO_RAW) 2226 break; 2227 2228 /* 2229 * This message should have been pulled up already in 2230 * ip_wput_v6. We can't do pullups here because the 2231 * b_next/b_prev is non-NULL. 2232 */ 2233 ip6i = (ip6i_t *)ip6h; 2234 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2235 sizeof (ip6i_t) + IPV6_HDR_LEN); 2236 2237 /* Mark this packet as delayed due to ND resolution */ 2238 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2239 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2240 } 2241 if (nce->nce_qd_mp != NULL) { 2242 ms = nce_solicit(nce, NULL); 2243 rw_exit(&ill_g_lock); 2244 if (ms == 0) { 2245 if (nce->nce_state != ND_REACHABLE) { 2246 mutex_exit(&nce->nce_lock); 2247 nce_resolv_failed(nce); 2248 ndp_delete(nce); 2249 } else { 2250 mutex_exit(&nce->nce_lock); 2251 } 2252 } else { 2253 mutex_exit(&nce->nce_lock); 2254 NDP_RESTART_TIMER(nce, (clock_t)ms); 2255 } 2256 NCE_REFRELE(nce); 2257 return; 2258 } 2259 mutex_exit(&nce->nce_lock); 2260 rw_exit(&ill_g_lock); 2261 NCE_REFRELE(nce); 2262 break; 2263 case ND_REACHABLE : 2264 rw_exit(&ill_g_lock); 2265 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2266 nce->nce_unsolicit_count != 0) { 2267 nce->nce_unsolicit_count--; 2268 mutex_exit(&nce->nce_lock); 2269 dropped = nce_xmit(ill, 2270 ND_NEIGHBOR_ADVERT, 2271 ill, /* ill to be used for hw addr */ 2272 B_FALSE, /* use ill_phys_addr */ 2273 &nce->nce_addr, 2274 &ipv6_all_hosts_mcast, 2275 nce->nce_flags | NDP_ORIDE); 2276 if (dropped) { 2277 mutex_enter(&nce->nce_lock); 2278 nce->nce_unsolicit_count++; 2279 mutex_exit(&nce->nce_lock); 2280 } 2281 if (nce->nce_unsolicit_count != 0) { 2282 NDP_RESTART_TIMER(nce, 2283 ip_ndp_unsolicit_interval); 2284 } 2285 } else { 2286 mutex_exit(&nce->nce_lock); 2287 } 2288 NCE_REFRELE(nce); 2289 break; 2290 default: 2291 rw_exit(&ill_g_lock); 2292 mutex_exit(&nce->nce_lock); 2293 NCE_REFRELE(nce); 2294 break; 2295 } 2296 } 2297 2298 /* 2299 * Set a link layer address from the ll_addr passed in. 2300 * Copy SAP from ill. 2301 */ 2302 static void 2303 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2304 { 2305 ill_t *ill = nce->nce_ill; 2306 uchar_t *woffset; 2307 2308 ASSERT(ll_addr != NULL); 2309 /* Always called before fast_path_probe */ 2310 ASSERT(nce->nce_fp_mp == NULL); 2311 if (ill->ill_sap_length != 0) { 2312 /* 2313 * Copy the SAP type specified in the 2314 * request into the xmit template. 2315 */ 2316 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2317 } 2318 if (ill->ill_phys_addr_length > 0) { 2319 /* 2320 * The bcopy() below used to be called for the physical address 2321 * length rather than the link layer address length. For 2322 * ethernet and many other media, the phys_addr and lla are 2323 * identical. 2324 * However, with xresolv interfaces being introduced, the 2325 * phys_addr and lla are no longer the same, and the physical 2326 * address may not have any useful meaning, so we use the lla 2327 * for IPv6 address resolution and destination addressing. 2328 * 2329 * For PPP or other interfaces with a zero length 2330 * physical address, don't do anything here. 2331 * The bcopy() with a zero phys_addr length was previously 2332 * a no-op for interfaces with a zero-length physical address. 2333 * Using the lla for them would change the way they operate. 2334 * Doing nothing in such cases preserves expected behavior. 2335 */ 2336 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2337 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2338 } 2339 } 2340 2341 static boolean_t 2342 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2343 { 2344 ill_t *ill = nce->nce_ill; 2345 uchar_t *ll_offset; 2346 2347 ASSERT(nce->nce_res_mp != NULL); 2348 if (ll_addr == NULL) 2349 return (B_FALSE); 2350 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2351 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2352 return (B_TRUE); 2353 return (B_FALSE); 2354 } 2355 2356 /* 2357 * Updates the link layer address or the reachability state of 2358 * a cache entry. Reset probe counter if needed. 2359 */ 2360 static void 2361 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2362 { 2363 ill_t *ill = nce->nce_ill; 2364 boolean_t need_stop_timer = B_FALSE; 2365 boolean_t need_fastpath_update = B_FALSE; 2366 2367 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2368 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2369 /* 2370 * If this interface does not do NUD, there is no point 2371 * in allowing an update to the cache entry. Although 2372 * we will respond to NS. 2373 * The only time we accept an update for a resolver when 2374 * NUD is turned off is when it has just been created. 2375 * Non-Resolvers will always be created as REACHABLE. 2376 */ 2377 if (new_state != ND_UNCHANGED) { 2378 if ((nce->nce_flags & NCE_F_NONUD) && 2379 (nce->nce_state != ND_INCOMPLETE)) 2380 return; 2381 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2382 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2383 need_stop_timer = B_TRUE; 2384 if (new_state == ND_REACHABLE) 2385 nce->nce_last = TICK_TO_MSEC(lbolt64); 2386 else { 2387 /* We force NUD in this case */ 2388 nce->nce_last = 0; 2389 } 2390 nce->nce_state = new_state; 2391 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2392 } 2393 /* 2394 * In case of fast path we need to free the the fastpath 2395 * M_DATA and do another probe. Otherwise we can just 2396 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2397 * whatever packets that happens to be transmitting at the time. 2398 */ 2399 if (new_ll_addr != NULL) { 2400 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2401 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2402 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2403 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2404 if (nce->nce_fp_mp != NULL) { 2405 freemsg(nce->nce_fp_mp); 2406 nce->nce_fp_mp = NULL; 2407 } 2408 need_fastpath_update = B_TRUE; 2409 } 2410 mutex_exit(&nce->nce_lock); 2411 if (need_stop_timer) { 2412 (void) untimeout(nce->nce_timeout_id); 2413 nce->nce_timeout_id = 0; 2414 } 2415 if (need_fastpath_update) 2416 nce_fastpath(nce); 2417 mutex_enter(&nce->nce_lock); 2418 } 2419 2420 void 2421 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2422 { 2423 uint_t count = 0; 2424 mblk_t **mpp; 2425 2426 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2427 2428 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2429 mpp = &(*mpp)->b_next) { 2430 if (++count > 2431 nce->nce_ill->ill_max_buf) { 2432 mblk_t *tmp = nce->nce_qd_mp->b_next; 2433 2434 nce->nce_qd_mp->b_next = NULL; 2435 nce->nce_qd_mp->b_prev = NULL; 2436 freemsg(nce->nce_qd_mp); 2437 nce->nce_qd_mp = tmp; 2438 } 2439 } 2440 /* put this on the list */ 2441 if (head_insert) { 2442 mp->b_next = nce->nce_qd_mp; 2443 nce->nce_qd_mp = mp; 2444 } else { 2445 *mpp = mp; 2446 } 2447 } 2448 2449 static void 2450 nce_queue_mp(nce_t *nce, mblk_t *mp) 2451 { 2452 boolean_t head_insert = B_FALSE; 2453 ip6_t *ip6h; 2454 ip6i_t *ip6i; 2455 mblk_t *data_mp; 2456 2457 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2458 2459 if (mp->b_datap->db_type == M_CTL) 2460 data_mp = mp->b_cont; 2461 else 2462 data_mp = mp; 2463 ip6h = (ip6_t *)data_mp->b_rptr; 2464 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2465 /* 2466 * This message should have been pulled up already in 2467 * ip_wput_v6. We can't do pullups here because the message 2468 * could be from the nce_qd_mp which could have b_next/b_prev 2469 * non-NULL. 2470 */ 2471 ip6i = (ip6i_t *)ip6h; 2472 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2473 sizeof (ip6i_t) + IPV6_HDR_LEN); 2474 /* 2475 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2476 * This has 2 aspects mentioned below. 2477 * 1. Perform head insertion in the nce_qd_mp for these packets. 2478 * This ensures that next retransmit of ND solicitation 2479 * will use the interface specified by the probe packet, 2480 * for both NS and NA. This corresponds to the src address 2481 * in the IPv6 packet. If we insert at tail, we will be 2482 * depending on the packet at the head for successful 2483 * ND resolution. This is not reliable, because the interface 2484 * on which the NA arrives could be different from the interface 2485 * on which the NS was sent, and if the receiving interface is 2486 * failed, it will appear that the sending interface is also 2487 * failed, causing in.mpathd to misdiagnose this as link 2488 * failure. 2489 * 2. Drop the original packet, if the ND resolution did not 2490 * succeed in the first attempt. However we will create the 2491 * nce and the ire, as soon as the ND resolution succeeds. 2492 * We don't gain anything by queueing multiple probe packets 2493 * and sending them back-to-back once resolution succeeds. 2494 * It is sufficient to send just 1 packet after ND resolution 2495 * succeeds. Since mpathd is sending down probe packets at a 2496 * constant rate, we don't need to send the queued packet. We 2497 * need to queue it only for NDP resolution. The benefit of 2498 * dropping the probe packets that were delayed in ND 2499 * resolution, is that in.mpathd will not see inflated 2500 * RTT. If the ND resolution does not succeed within 2501 * in.mpathd's failure detection time, mpathd may detect 2502 * a failure, and it does not matter whether the packet 2503 * was queued or dropped. 2504 */ 2505 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2506 head_insert = B_TRUE; 2507 } 2508 2509 nce_queue_mp_common(nce, mp, head_insert); 2510 } 2511 2512 /* 2513 * Called when address resolution failed due to a timeout. 2514 * Send an ICMP unreachable in response to all queued packets. 2515 */ 2516 void 2517 nce_resolv_failed(nce_t *nce) 2518 { 2519 mblk_t *mp, *nxt_mp, *first_mp; 2520 char buf[INET6_ADDRSTRLEN]; 2521 ip6_t *ip6h; 2522 zoneid_t zoneid = GLOBAL_ZONEID; 2523 2524 ip1dbg(("nce_resolv_failed: dst %s\n", 2525 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2526 mutex_enter(&nce->nce_lock); 2527 mp = nce->nce_qd_mp; 2528 nce->nce_qd_mp = NULL; 2529 mutex_exit(&nce->nce_lock); 2530 while (mp != NULL) { 2531 nxt_mp = mp->b_next; 2532 mp->b_next = NULL; 2533 mp->b_prev = NULL; 2534 2535 first_mp = mp; 2536 if (mp->b_datap->db_type == M_CTL) { 2537 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2538 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2539 zoneid = io->ipsec_out_zoneid; 2540 ASSERT(zoneid != ALL_ZONES); 2541 mp = mp->b_cont; 2542 } 2543 2544 ip6h = (ip6_t *)mp->b_rptr; 2545 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2546 ip6i_t *ip6i; 2547 /* 2548 * This message should have been pulled up already 2549 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2550 * the header is pulled up. 2551 */ 2552 ip6i = (ip6i_t *)ip6h; 2553 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2554 sizeof (ip6i_t) + IPV6_HDR_LEN); 2555 mp->b_rptr += sizeof (ip6i_t); 2556 } 2557 /* 2558 * Ignore failure since icmp_unreachable_v6 will silently 2559 * drop packets with an unspecified source address. 2560 */ 2561 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2562 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2563 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2564 mp = nxt_mp; 2565 } 2566 } 2567 2568 /* 2569 * Called by SIOCSNDP* ioctl to add/change an nce entry 2570 * and the corresponding attributes. 2571 * Disallow states other than ND_REACHABLE or ND_STALE. 2572 */ 2573 int 2574 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2575 { 2576 sin6_t *sin6; 2577 in6_addr_t *addr; 2578 nce_t *nce; 2579 int err; 2580 uint16_t new_flags = 0; 2581 uint16_t old_flags = 0; 2582 int inflags = lnr->lnr_flags; 2583 2584 ASSERT(ill->ill_isv6); 2585 if ((lnr->lnr_state_create != ND_REACHABLE) && 2586 (lnr->lnr_state_create != ND_STALE)) 2587 return (EINVAL); 2588 2589 sin6 = (sin6_t *)&lnr->lnr_addr; 2590 addr = &sin6->sin6_addr; 2591 2592 mutex_enter(&ndp6.ndp_g_lock); 2593 /* We know it can not be mapping so just look in the hash table */ 2594 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 2595 nce = nce_lookup_addr(ill, addr, nce); 2596 if (nce != NULL) 2597 new_flags = nce->nce_flags; 2598 2599 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2600 case NDF_ISROUTER_ON: 2601 new_flags |= NCE_F_ISROUTER; 2602 break; 2603 case NDF_ISROUTER_OFF: 2604 new_flags &= ~NCE_F_ISROUTER; 2605 break; 2606 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2607 mutex_exit(&ndp6.ndp_g_lock); 2608 if (nce != NULL) 2609 NCE_REFRELE(nce); 2610 return (EINVAL); 2611 } 2612 2613 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2614 case NDF_ANYCAST_ON: 2615 new_flags |= NCE_F_ANYCAST; 2616 break; 2617 case NDF_ANYCAST_OFF: 2618 new_flags &= ~NCE_F_ANYCAST; 2619 break; 2620 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2621 mutex_exit(&ndp6.ndp_g_lock); 2622 if (nce != NULL) 2623 NCE_REFRELE(nce); 2624 return (EINVAL); 2625 } 2626 2627 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2628 case NDF_PROXY_ON: 2629 new_flags |= NCE_F_PROXY; 2630 break; 2631 case NDF_PROXY_OFF: 2632 new_flags &= ~NCE_F_PROXY; 2633 break; 2634 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2635 mutex_exit(&ndp6.ndp_g_lock); 2636 if (nce != NULL) 2637 NCE_REFRELE(nce); 2638 return (EINVAL); 2639 } 2640 2641 if (nce == NULL) { 2642 err = ndp_add(ill, 2643 (uchar_t *)lnr->lnr_hdw_addr, 2644 addr, 2645 &ipv6_all_ones, 2646 &ipv6_all_zeros, 2647 0, 2648 new_flags, 2649 lnr->lnr_state_create, 2650 &nce, 2651 NULL, 2652 NULL); 2653 if (err != 0) { 2654 mutex_exit(&ndp6.ndp_g_lock); 2655 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2656 return (err); 2657 } 2658 } 2659 old_flags = nce->nce_flags; 2660 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2661 /* 2662 * Router turned to host, delete all ires. 2663 * XXX Just delete the entry, but we need to add too. 2664 */ 2665 nce->nce_flags &= ~NCE_F_ISROUTER; 2666 mutex_exit(&ndp6.ndp_g_lock); 2667 ndp_delete(nce); 2668 NCE_REFRELE(nce); 2669 return (0); 2670 } 2671 mutex_exit(&ndp6.ndp_g_lock); 2672 2673 mutex_enter(&nce->nce_lock); 2674 nce->nce_flags = new_flags; 2675 mutex_exit(&nce->nce_lock); 2676 /* 2677 * Note that we ignore the state at this point, which 2678 * should be either STALE or REACHABLE. Instead we let 2679 * the link layer address passed in to determine the state 2680 * much like incoming packets. 2681 */ 2682 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2683 NCE_REFRELE(nce); 2684 return (0); 2685 } 2686 2687 /* 2688 * If the device driver supports it, we make nce_fp_mp to have 2689 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2690 * The caller insures there is hold on nce for this function. 2691 * Note that since ill_fastpath_probe() copies the mblk there is 2692 * no need for the hold beyond this function. 2693 */ 2694 static void 2695 nce_fastpath(nce_t *nce) 2696 { 2697 ill_t *ill = nce->nce_ill; 2698 int res; 2699 2700 ASSERT(ill != NULL); 2701 if (nce->nce_fp_mp != NULL) { 2702 /* Already contains fastpath info */ 2703 return; 2704 } 2705 if (nce->nce_res_mp != NULL) { 2706 nce_fastpath_list_add(nce); 2707 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2708 /* 2709 * EAGAIN is an indication of a transient error 2710 * i.e. allocation failure etc. leave the nce in the list it 2711 * will be updated when another probe happens for another ire 2712 * if not it will be taken out of the list when the ire is 2713 * deleted. 2714 */ 2715 2716 if (res != 0 && res != EAGAIN) 2717 nce_fastpath_list_delete(nce); 2718 } 2719 } 2720 2721 /* 2722 * Drain the list of nce's waiting for fastpath response. 2723 */ 2724 void 2725 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2726 void *arg) 2727 { 2728 2729 nce_t *next_nce; 2730 nce_t *current_nce; 2731 nce_t *first_nce; 2732 nce_t *prev_nce = NULL; 2733 2734 ASSERT(ill != NULL && ill->ill_isv6); 2735 2736 mutex_enter(&ill->ill_lock); 2737 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2738 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2739 next_nce = current_nce->nce_fastpath; 2740 /* 2741 * Take it off the list if we're flushing, or if the callback 2742 * routine tells us to do so. Otherwise, leave the nce in the 2743 * fastpath list to handle any pending response from the lower 2744 * layer. We can't drain the list when the callback routine 2745 * comparison failed, because the response is asynchronous in 2746 * nature, and may not arrive in the same order as the list 2747 * insertion. 2748 */ 2749 if (func == NULL || func(current_nce, arg)) { 2750 current_nce->nce_fastpath = NULL; 2751 if (current_nce == first_nce) 2752 ill->ill_fastpath_list = first_nce = next_nce; 2753 else 2754 prev_nce->nce_fastpath = next_nce; 2755 } else { 2756 /* previous element that is still in the list */ 2757 prev_nce = current_nce; 2758 } 2759 current_nce = next_nce; 2760 } 2761 mutex_exit(&ill->ill_lock); 2762 } 2763 2764 /* 2765 * Add nce to the nce fastpath list. 2766 */ 2767 void 2768 nce_fastpath_list_add(nce_t *nce) 2769 { 2770 ill_t *ill; 2771 2772 ill = nce->nce_ill; 2773 ASSERT(ill != NULL && ill->ill_isv6); 2774 2775 mutex_enter(&ill->ill_lock); 2776 mutex_enter(&nce->nce_lock); 2777 2778 /* 2779 * if nce has not been deleted and 2780 * is not already in the list add it. 2781 */ 2782 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2783 (nce->nce_fastpath == NULL)) { 2784 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2785 ill->ill_fastpath_list = nce; 2786 } 2787 2788 mutex_exit(&nce->nce_lock); 2789 mutex_exit(&ill->ill_lock); 2790 } 2791 2792 /* 2793 * remove nce from the nce fastpath list. 2794 */ 2795 void 2796 nce_fastpath_list_delete(nce_t *nce) 2797 { 2798 nce_t *nce_ptr; 2799 2800 ill_t *ill; 2801 2802 ill = nce->nce_ill; 2803 ASSERT(ill != NULL); 2804 if (!ill->ill_isv6) { 2805 /* 2806 * v4 nce_t's do not have nce_fastpath set. 2807 */ 2808 return; 2809 } 2810 2811 mutex_enter(&ill->ill_lock); 2812 if (nce->nce_fastpath == NULL) 2813 goto done; 2814 2815 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2816 2817 if (ill->ill_fastpath_list == nce) { 2818 ill->ill_fastpath_list = nce->nce_fastpath; 2819 } else { 2820 nce_ptr = ill->ill_fastpath_list; 2821 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2822 if (nce_ptr->nce_fastpath == nce) { 2823 nce_ptr->nce_fastpath = nce->nce_fastpath; 2824 break; 2825 } 2826 nce_ptr = nce_ptr->nce_fastpath; 2827 } 2828 } 2829 2830 nce->nce_fastpath = NULL; 2831 done: 2832 mutex_exit(&ill->ill_lock); 2833 } 2834 2835 /* 2836 * Update all NCE's that are not in fastpath mode and 2837 * have an nce_fp_mp that matches mp. mp->b_cont contains 2838 * the fastpath header. 2839 * 2840 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2841 */ 2842 boolean_t 2843 ndp_fastpath_update(nce_t *nce, void *arg) 2844 { 2845 mblk_t *mp, *fp_mp; 2846 uchar_t *mp_rptr, *ud_mp_rptr; 2847 mblk_t *ud_mp = nce->nce_res_mp; 2848 ptrdiff_t cmplen; 2849 2850 if (nce->nce_flags & NCE_F_MAPPING) 2851 return (B_TRUE); 2852 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2853 return (B_TRUE); 2854 2855 ip2dbg(("ndp_fastpath_update: trying\n")); 2856 mp = (mblk_t *)arg; 2857 mp_rptr = mp->b_rptr; 2858 cmplen = mp->b_wptr - mp_rptr; 2859 ASSERT(cmplen >= 0); 2860 ud_mp_rptr = ud_mp->b_rptr; 2861 /* 2862 * The nce is locked here to prevent any other threads 2863 * from accessing and changing nce_res_mp when the IPv6 address 2864 * becomes resolved to an lla while we're in the middle 2865 * of looking at and comparing the hardware address (lla). 2866 * It is also locked to prevent multiple threads in nce_fastpath_update 2867 * from examining nce_res_mp atthe same time. 2868 */ 2869 mutex_enter(&nce->nce_lock); 2870 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2871 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2872 mutex_exit(&nce->nce_lock); 2873 /* 2874 * Don't take the ire off the fastpath list yet, 2875 * since the response may come later. 2876 */ 2877 return (B_FALSE); 2878 } 2879 /* Matched - install mp as the fastpath mp */ 2880 ip1dbg(("ndp_fastpath_update: match\n")); 2881 fp_mp = dupb(mp->b_cont); 2882 if (fp_mp != NULL) { 2883 nce->nce_fp_mp = fp_mp; 2884 } 2885 mutex_exit(&nce->nce_lock); 2886 return (B_TRUE); 2887 } 2888 2889 /* 2890 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2891 * driver. Note that it assumes IP is exclusive... 2892 */ 2893 /* ARGSUSED */ 2894 void 2895 ndp_fastpath_flush(nce_t *nce, char *arg) 2896 { 2897 if (nce->nce_flags & NCE_F_MAPPING) 2898 return; 2899 /* No fastpath info? */ 2900 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2901 return; 2902 2903 /* Just delete the NCE... */ 2904 ndp_delete(nce); 2905 } 2906 2907 /* 2908 * Return a pointer to a given option in the packet. 2909 * Assumes that option part of the packet have already been validated. 2910 */ 2911 nd_opt_hdr_t * 2912 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2913 { 2914 while (optlen > 0) { 2915 if (opt->nd_opt_type == opt_type) 2916 return (opt); 2917 optlen -= 8 * opt->nd_opt_len; 2918 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2919 } 2920 return (NULL); 2921 } 2922 2923 /* 2924 * Verify all option lengths present are > 0, also check to see 2925 * if the option lengths and packet length are consistent. 2926 */ 2927 boolean_t 2928 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2929 { 2930 ASSERT(opt != NULL); 2931 while (optlen > 0) { 2932 if (opt->nd_opt_len == 0) 2933 return (B_FALSE); 2934 optlen -= 8 * opt->nd_opt_len; 2935 if (optlen < 0) 2936 return (B_FALSE); 2937 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2938 } 2939 return (B_TRUE); 2940 } 2941 2942 /* 2943 * ndp_walk function. 2944 * Free a fraction of the NCE cache entries. 2945 * A fraction of zero means to not free any in that category. 2946 */ 2947 void 2948 ndp_cache_reclaim(nce_t *nce, char *arg) 2949 { 2950 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2951 uint_t rand; 2952 2953 if (nce->nce_flags & NCE_F_PERMANENT) 2954 return; 2955 2956 rand = (uint_t)lbolt + 2957 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2958 if (ncr->ncr_host != 0 && 2959 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2960 ndp_delete(nce); 2961 return; 2962 } 2963 } 2964 2965 /* 2966 * ndp_walk function. 2967 * Count the number of NCEs that can be deleted. 2968 * These would be hosts but not routers. 2969 */ 2970 void 2971 ndp_cache_count(nce_t *nce, char *arg) 2972 { 2973 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2974 2975 if (nce->nce_flags & NCE_F_PERMANENT) 2976 return; 2977 2978 ncc->ncc_total++; 2979 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2980 ncc->ncc_host++; 2981 } 2982 2983 #ifdef NCE_DEBUG 2984 th_trace_t * 2985 th_trace_nce_lookup(nce_t *nce) 2986 { 2987 int bucket_id; 2988 th_trace_t *th_trace; 2989 2990 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2991 2992 bucket_id = IP_TR_HASH(curthread); 2993 ASSERT(bucket_id < IP_TR_HASH_MAX); 2994 2995 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2996 th_trace = th_trace->th_next) { 2997 if (th_trace->th_id == curthread) 2998 return (th_trace); 2999 } 3000 return (NULL); 3001 } 3002 3003 void 3004 nce_trace_ref(nce_t *nce) 3005 { 3006 int bucket_id; 3007 th_trace_t *th_trace; 3008 3009 /* 3010 * Attempt to locate the trace buffer for the curthread. 3011 * If it does not exist, then allocate a new trace buffer 3012 * and link it in list of trace bufs for this ipif, at the head 3013 */ 3014 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3015 3016 if (nce->nce_trace_disable == B_TRUE) 3017 return; 3018 3019 th_trace = th_trace_nce_lookup(nce); 3020 if (th_trace == NULL) { 3021 bucket_id = IP_TR_HASH(curthread); 3022 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3023 KM_NOSLEEP); 3024 if (th_trace == NULL) { 3025 nce->nce_trace_disable = B_TRUE; 3026 nce_trace_inactive(nce); 3027 return; 3028 } 3029 th_trace->th_id = curthread; 3030 th_trace->th_next = nce->nce_trace[bucket_id]; 3031 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3032 if (th_trace->th_next != NULL) 3033 th_trace->th_next->th_prev = &th_trace->th_next; 3034 nce->nce_trace[bucket_id] = th_trace; 3035 } 3036 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3037 th_trace->th_refcnt++; 3038 th_trace_rrecord(th_trace); 3039 } 3040 3041 void 3042 nce_untrace_ref(nce_t *nce) 3043 { 3044 th_trace_t *th_trace; 3045 3046 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3047 3048 if (nce->nce_trace_disable == B_TRUE) 3049 return; 3050 3051 th_trace = th_trace_nce_lookup(nce); 3052 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3053 3054 th_trace_rrecord(th_trace); 3055 th_trace->th_refcnt--; 3056 } 3057 3058 void 3059 nce_trace_inactive(nce_t *nce) 3060 { 3061 th_trace_t *th_trace; 3062 int i; 3063 3064 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3065 3066 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3067 while (nce->nce_trace[i] != NULL) { 3068 th_trace = nce->nce_trace[i]; 3069 3070 /* unlink th_trace and free it */ 3071 nce->nce_trace[i] = th_trace->th_next; 3072 if (th_trace->th_next != NULL) 3073 th_trace->th_next->th_prev = 3074 &nce->nce_trace[i]; 3075 3076 th_trace->th_next = NULL; 3077 th_trace->th_prev = NULL; 3078 kmem_free(th_trace, sizeof (th_trace_t)); 3079 } 3080 } 3081 3082 } 3083 3084 /* ARGSUSED */ 3085 int 3086 nce_thread_exit(nce_t *nce, caddr_t arg) 3087 { 3088 th_trace_t *th_trace; 3089 3090 mutex_enter(&nce->nce_lock); 3091 th_trace = th_trace_nce_lookup(nce); 3092 3093 if (th_trace == NULL) { 3094 mutex_exit(&nce->nce_lock); 3095 return (0); 3096 } 3097 3098 ASSERT(th_trace->th_refcnt == 0); 3099 3100 /* unlink th_trace and free it */ 3101 *th_trace->th_prev = th_trace->th_next; 3102 if (th_trace->th_next != NULL) 3103 th_trace->th_next->th_prev = th_trace->th_prev; 3104 th_trace->th_next = NULL; 3105 th_trace->th_prev = NULL; 3106 kmem_free(th_trace, sizeof (th_trace_t)); 3107 mutex_exit(&nce->nce_lock); 3108 return (0); 3109 } 3110 #endif 3111 3112 /* 3113 * Called when address resolution fails due to a timeout. 3114 * Send an ICMP unreachable in response to all queued packets. 3115 */ 3116 void 3117 arp_resolv_failed(nce_t *nce) 3118 { 3119 mblk_t *mp, *nxt_mp, *first_mp; 3120 char buf[INET6_ADDRSTRLEN]; 3121 zoneid_t zoneid = GLOBAL_ZONEID; 3122 struct in_addr ipv4addr; 3123 3124 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3125 ip3dbg(("arp_resolv_failed: dst %s\n", 3126 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3127 mutex_enter(&nce->nce_lock); 3128 mp = nce->nce_qd_mp; 3129 nce->nce_qd_mp = NULL; 3130 mutex_exit(&nce->nce_lock); 3131 3132 while (mp != NULL) { 3133 nxt_mp = mp->b_next; 3134 mp->b_next = NULL; 3135 mp->b_prev = NULL; 3136 3137 first_mp = mp; 3138 /* 3139 * Send icmp unreachable messages 3140 * to the hosts. 3141 */ 3142 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3143 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3144 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3145 ICMP_HOST_UNREACHABLE); 3146 mp = nxt_mp; 3147 } 3148 } 3149 3150 static int 3151 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3152 const in_addr_t *mask, const in_addr_t *extract_mask, 3153 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3154 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3155 { 3156 int err = 0; 3157 nce_t *nce; 3158 in6_addr_t addr6; 3159 3160 mutex_enter(&ndp4.ndp_g_lock); 3161 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3162 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3163 nce = nce_lookup_addr(ill, &addr6, nce); 3164 if (nce == NULL) { 3165 err = ndp_add_v4(ill, 3166 hw_addr, 3167 addr, 3168 mask, 3169 extract_mask, 3170 hw_extract_start, 3171 flags, 3172 state, 3173 newnce, 3174 fp_mp, 3175 res_mp); 3176 } else { 3177 *newnce = nce; 3178 err = EEXIST; 3179 } 3180 mutex_exit(&ndp4.ndp_g_lock); 3181 return (err); 3182 } 3183 3184 /* 3185 * NDP Cache Entry creation routine for IPv4. 3186 * Mapped entries are handled in arp. 3187 * This routine must always be called with ndp4.ndp_g_lock held. 3188 * Prior to return, nce_refcnt is incremented. 3189 */ 3190 static int 3191 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3192 const in_addr_t *mask, const in_addr_t *extract_mask, 3193 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3194 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3195 { 3196 static nce_t nce_nil; 3197 nce_t *nce; 3198 mblk_t *mp; 3199 mblk_t *template; 3200 nce_t **ncep; 3201 3202 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3203 ASSERT(ill != NULL); 3204 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3205 return (EINVAL); 3206 } 3207 ASSERT((flags & NCE_F_MAPPING) == 0); 3208 ASSERT(extract_mask == NULL); 3209 /* 3210 * Allocate the mblk to hold the nce. 3211 */ 3212 mp = allocb(sizeof (nce_t), BPRI_MED); 3213 if (mp == NULL) 3214 return (ENOMEM); 3215 3216 nce = (nce_t *)mp->b_rptr; 3217 mp->b_wptr = (uchar_t *)&nce[1]; 3218 *nce = nce_nil; 3219 3220 /* 3221 * This one holds link layer address; if res_mp has been provided 3222 * by the caller, accept it without any further checks. Otherwise, 3223 * for V4, we fill it up with ill_resolver_mp here, then in 3224 * in ire_arpresolve(), we fill it up with the ARP query 3225 * once its formulated. 3226 */ 3227 if (res_mp != NULL) { 3228 template = res_mp; 3229 } else { 3230 template = copyb(ill->ill_resolver_mp); 3231 } 3232 if (template == NULL) { 3233 freeb(mp); 3234 return (ENOMEM); 3235 } 3236 nce->nce_ill = ill; 3237 nce->nce_ipversion = IPV4_VERSION; 3238 nce->nce_flags = flags; 3239 nce->nce_state = state; 3240 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3241 nce->nce_rcnt = ill->ill_xmit_count; 3242 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3243 if (*mask == IP_HOST_MASK) { 3244 nce->nce_mask = ipv6_all_ones; 3245 } else { 3246 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3247 } 3248 nce->nce_extract_mask = ipv6_all_zeros; 3249 nce->nce_ll_extract_start = hw_extract_start; 3250 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3251 nce->nce_res_mp = template; 3252 if (state == ND_REACHABLE) 3253 nce->nce_last = TICK_TO_MSEC(lbolt64); 3254 else 3255 nce->nce_last = 0; 3256 nce->nce_qd_mp = NULL; 3257 nce->nce_mp = mp; 3258 if (hw_addr != NULL) 3259 nce_set_ll(nce, hw_addr); 3260 /* This one is for nce getting created */ 3261 nce->nce_refcnt = 1; 3262 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3263 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3264 3265 #ifdef NCE_DEBUG 3266 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3267 #endif 3268 /* 3269 * Atomically ensure that the ill is not CONDEMNED, before 3270 * adding the NCE. 3271 */ 3272 mutex_enter(&ill->ill_lock); 3273 if (ill->ill_state_flags & ILL_CONDEMNED) { 3274 mutex_exit(&ill->ill_lock); 3275 freeb(mp); 3276 if (res_mp == NULL) { 3277 /* 3278 * template was locally allocated. need to free it. 3279 */ 3280 freeb(template); 3281 } 3282 return (EINVAL); 3283 } 3284 if ((nce->nce_next = *ncep) != NULL) 3285 nce->nce_next->nce_ptpn = &nce->nce_next; 3286 *ncep = nce; 3287 nce->nce_ptpn = ncep; 3288 *newnce = nce; 3289 /* This one is for nce being used by an active thread */ 3290 NCE_REFHOLD(*newnce); 3291 3292 /* Bump up the number of nce's referencing this ill */ 3293 ill->ill_nce_cnt++; 3294 mutex_exit(&ill->ill_lock); 3295 return (0); 3296 } 3297 3298 void 3299 ndp_flush_qd_mp(nce_t *nce) 3300 { 3301 mblk_t *qd_mp, *qd_next; 3302 3303 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3304 qd_mp = nce->nce_qd_mp; 3305 nce->nce_qd_mp = NULL; 3306 while (qd_mp != NULL) { 3307 qd_next = qd_mp->b_next; 3308 qd_mp->b_next = NULL; 3309 qd_mp->b_prev = NULL; 3310 freemsg(qd_mp); 3311 qd_mp = qd_next; 3312 } 3313 } 3314 3315 nce_t * 3316 nce_reinit(nce_t *nce) 3317 { 3318 nce_t *newnce = NULL; 3319 in_addr_t nce_addr, nce_mask; 3320 3321 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3322 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3323 /* 3324 * delete the old one. this will get rid of any ire's pointing 3325 * at this nce. 3326 */ 3327 ndp_delete(nce); 3328 /* 3329 * create a new nce with the same addr and mask. 3330 */ 3331 mutex_enter(&ndp4.ndp_g_lock); 3332 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3333 ND_INITIAL, &newnce, NULL, NULL); 3334 mutex_exit(&ndp4.ndp_g_lock); 3335 /* 3336 * refrele the old nce. 3337 */ 3338 NCE_REFRELE(nce); 3339 return (newnce); 3340 } 3341