1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/sysmacros.h> 32 #include <sys/errno.h> 33 #include <sys/dlpi.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 42 #include <net/if.h> 43 #include <net/if_dl.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/mib2.h> 52 #include <inet/nd.h> 53 #include <inet/ip.h> 54 #include <inet/ip_if.h> 55 #include <inet/ip_ire.h> 56 #include <inet/ip_rts.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/ipsec_info.h> 61 62 /* 63 * Function names with nce_ prefix are static while function 64 * names with ndp_ prefix are used by rest of the IP. 65 */ 66 67 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 68 uint32_t ll_addr_len); 69 static void nce_fastpath(nce_t *nce); 70 static void nce_ire_delete(nce_t *nce); 71 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 72 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 73 static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); 74 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 75 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 76 uchar_t *addr); 77 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 78 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 79 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 80 static mblk_t *nce_udreq_alloc(ill_t *ill); 81 static void nce_update(nce_t *nce, uint16_t new_state, 82 uchar_t *new_ll_addr); 83 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 84 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 85 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 86 const in6_addr_t *target, int flag); 87 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 88 extern void th_trace_rrecord(th_trace_t *); 89 90 #ifdef NCE_DEBUG 91 void nce_trace_inactive(nce_t *); 92 #endif 93 94 /* NDP Cache Entry Hash Table */ 95 #define NCE_TABLE_SIZE 256 96 static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; 97 static nce_t *nce_mask_entries; /* mask not all ones */ 98 static int ndp_g_walker = 0; /* # of active thread */ 99 /* walking nce hash list */ 100 /* ndp_g_walker_cleanup will be true, when deletion have to be defered */ 101 static boolean_t ndp_g_walker_cleanup = B_FALSE; 102 103 #ifdef _BIG_ENDIAN 104 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 105 ((((addr)->s6_addr32[0] & 0xff020000) == 0xff020000) && \ 106 ((addr)->s6_addr32[1] == 0x0) && \ 107 ((addr)->s6_addr32[2] == 0x00000001) && \ 108 ((addr)->s6_addr32[3] & 0xff000000) == 0xff000000) 109 #else /* _BIG_ENDIAN */ 110 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 111 ((((addr)->s6_addr32[0] & 0x000002ff) == 0x000002ff) && \ 112 ((addr)->s6_addr32[1] == 0x0) && \ 113 ((addr)->s6_addr32[2] == 0x01000000) && \ 114 ((addr)->s6_addr32[3] & 0x000000ff) == 0x000000ff) 115 #endif 116 117 #define NCE_HASH_PTR(addr) \ 118 (&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 119 120 /* 121 * NDP Cache Entry creation routine. 122 * Mapped entries will never do NUD . 123 * This routine must always be called with ndp_g_lock held. 124 * Prior to return, nce_refcnt is incremented. 125 */ 126 int 127 ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 128 const in6_addr_t *mask, const in6_addr_t *extract_mask, 129 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 130 nce_t **newnce) 131 { 132 static nce_t nce_nil; 133 nce_t *nce; 134 mblk_t *mp; 135 mblk_t *template; 136 nce_t **ncep; 137 boolean_t dropped = B_FALSE; 138 139 ASSERT(MUTEX_HELD(&ndp_g_lock)); 140 ASSERT(ill != NULL); 141 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 142 ip0dbg(("ndp_add: no addr\n")); 143 return (EINVAL); 144 } 145 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 146 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 147 return (EINVAL); 148 } 149 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 150 (flags & NCE_F_MAPPING)) { 151 ip0dbg(("ndp_add: extract mask zero for mapping")); 152 return (EINVAL); 153 } 154 /* 155 * Allocate the mblk to hold the nce. 156 * 157 * XXX This can come out of a separate cache - nce_cache. 158 * We don't need the mp anymore as there are no more 159 * "qwriter"s 160 */ 161 mp = allocb(sizeof (nce_t), BPRI_MED); 162 if (mp == NULL) 163 return (ENOMEM); 164 165 nce = (nce_t *)mp->b_rptr; 166 mp->b_wptr = (uchar_t *)&nce[1]; 167 *nce = nce_nil; 168 169 /* 170 * This one holds link layer address 171 */ 172 if (ill->ill_net_type == IRE_IF_RESOLVER) { 173 template = nce_udreq_alloc(ill); 174 } else { 175 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 176 ASSERT((ill->ill_resolver_mp != NULL)); 177 template = copyb(ill->ill_resolver_mp); 178 } 179 if (template == NULL) { 180 freeb(mp); 181 return (ENOMEM); 182 } 183 nce->nce_ill = ill; 184 nce->nce_flags = flags; 185 nce->nce_state = state; 186 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 187 nce->nce_rcnt = ill->ill_xmit_count; 188 nce->nce_addr = *addr; 189 nce->nce_mask = *mask; 190 nce->nce_extract_mask = *extract_mask; 191 nce->nce_ll_extract_start = hw_extract_start; 192 nce->nce_fp_mp = NULL; 193 nce->nce_res_mp = template; 194 if (state == ND_REACHABLE) 195 nce->nce_last = TICK_TO_MSEC(lbolt64); 196 else 197 nce->nce_last = 0; 198 nce->nce_qd_mp = NULL; 199 nce->nce_mp = mp; 200 if (hw_addr != NULL) 201 nce_set_ll(nce, hw_addr); 202 /* This one is for nce getting created */ 203 nce->nce_refcnt = 1; 204 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 205 if (nce->nce_flags & NCE_F_MAPPING) { 206 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 207 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 208 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 209 ncep = &nce_mask_entries; 210 } else { 211 ncep = ((nce_t **)NCE_HASH_PTR(*addr)); 212 } 213 214 #ifdef NCE_DEBUG 215 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 216 #endif 217 /* 218 * Atomically ensure that the ill is not CONDEMNED, before 219 * adding the NCE. 220 */ 221 mutex_enter(&ill->ill_lock); 222 if (ill->ill_state_flags & ILL_CONDEMNED) { 223 mutex_exit(&ill->ill_lock); 224 freeb(mp); 225 return (EINVAL); 226 } 227 if ((nce->nce_next = *ncep) != NULL) 228 nce->nce_next->nce_ptpn = &nce->nce_next; 229 *ncep = nce; 230 nce->nce_ptpn = ncep; 231 *newnce = nce; 232 /* This one is for nce being used by an active thread */ 233 NCE_REFHOLD(*newnce); 234 235 /* Bump up the number of nce's referencing this ill */ 236 ill->ill_nce_cnt++; 237 mutex_exit(&ill->ill_lock); 238 239 /* 240 * Before we insert the nce, honor the UNSOL_ADV flag. 241 * We cannot hold the ndp_g_lock and call nce_xmit 242 * which does a putnext. 243 */ 244 if (flags & NCE_F_UNSOL_ADV) { 245 flags |= NDP_ORIDE; 246 /* 247 * We account for the transmit below by assigning one 248 * less than the ndd variable. Subsequent decrements 249 * are done in ndp_timer. 250 */ 251 mutex_enter(&nce->nce_lock); 252 mutex_exit(&ndp_g_lock); 253 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 254 mutex_exit(&nce->nce_lock); 255 dropped = nce_xmit(ill, 256 ND_NEIGHBOR_ADVERT, 257 ill, /* ill to be used for extracting ill_nd_lla */ 258 B_TRUE, /* use ill_nd_lla */ 259 addr, /* Source and target of the advertisement pkt */ 260 &ipv6_all_hosts_mcast, /* Destination of the packet */ 261 flags); 262 mutex_enter(&nce->nce_lock); 263 if (dropped) 264 nce->nce_unsolicit_count++; 265 if (nce->nce_unsolicit_count != 0) { 266 nce->nce_timeout_id = timeout(ndp_timer, nce, 267 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 268 } 269 mutex_exit(&nce->nce_lock); 270 mutex_enter(&ndp_g_lock); 271 } 272 /* 273 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 274 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 275 * We call nce_fastpath from nce_update if the link layer address of 276 * the peer changes from nce_update 277 */ 278 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 279 nce_fastpath(nce); 280 return (0); 281 } 282 283 int 284 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 285 const in6_addr_t *mask, const in6_addr_t *extract_mask, 286 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 287 nce_t **newnce) 288 { 289 int err = 0; 290 nce_t *nce; 291 292 mutex_enter(&ndp_g_lock); 293 nce = nce_lookup_addr(ill, addr); 294 if (nce == NULL) { 295 err = ndp_add(ill, 296 hw_addr, 297 addr, 298 mask, 299 extract_mask, 300 hw_extract_start, 301 flags, 302 state, 303 newnce); 304 } else { 305 *newnce = nce; 306 err = EEXIST; 307 } 308 mutex_exit(&ndp_g_lock); 309 return (err); 310 } 311 312 /* 313 * Remove all the CONDEMNED nces from the appropriate hash table. 314 * We create a private list of NCEs, these may have ires pointing 315 * to them, so the list will be passed through to clean up dependent 316 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 317 */ 318 static void 319 nce_remove(nce_t *nce, nce_t **free_nce_list) 320 { 321 nce_t *nce1; 322 nce_t **ptpn; 323 324 ASSERT(MUTEX_HELD(&ndp_g_lock)); 325 ASSERT(ndp_g_walker == 0); 326 for (; nce; nce = nce1) { 327 nce1 = nce->nce_next; 328 mutex_enter(&nce->nce_lock); 329 if (nce->nce_flags & NCE_F_CONDEMNED) { 330 ptpn = nce->nce_ptpn; 331 nce1 = nce->nce_next; 332 if (nce1 != NULL) 333 nce1->nce_ptpn = ptpn; 334 *ptpn = nce1; 335 nce->nce_ptpn = NULL; 336 nce->nce_next = NULL; 337 nce->nce_next = *free_nce_list; 338 *free_nce_list = nce; 339 } 340 mutex_exit(&nce->nce_lock); 341 } 342 } 343 344 /* 345 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 346 * will return this NCE. Also no new IREs will be created that 347 * point to this NCE (See ire_add_v6). Also no new timeouts will 348 * be started (See NDP_RESTART_TIMER). 349 * 2. Cancel any currently running timeouts. 350 * 3. If there is an ndp walker, return. The walker will do the cleanup. 351 * This ensures that walkers see a consistent list of NCEs while walking. 352 * 4. Otherwise remove the NCE from the list of NCEs 353 * 5. Delete all IREs pointing to this NCE. 354 */ 355 void 356 ndp_delete(nce_t *nce) 357 { 358 nce_t **ptpn; 359 nce_t *nce1; 360 361 /* Serialize deletes */ 362 mutex_enter(&nce->nce_lock); 363 if (nce->nce_flags & NCE_F_CONDEMNED) { 364 /* Some other thread is doing the delete */ 365 mutex_exit(&nce->nce_lock); 366 return; 367 } 368 /* 369 * Caller has a refhold. Also 1 ref for being in the list. Thus 370 * refcnt has to be >= 2 371 */ 372 ASSERT(nce->nce_refcnt >= 2); 373 nce->nce_flags |= NCE_F_CONDEMNED; 374 mutex_exit(&nce->nce_lock); 375 376 nce_fastpath_list_delete(nce); 377 378 /* 379 * Cancel any running timer. Timeout can't be restarted 380 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 381 * Passing invalid timeout id is fine. 382 */ 383 if (nce->nce_timeout_id != 0) { 384 (void) untimeout(nce->nce_timeout_id); 385 nce->nce_timeout_id = 0; 386 } 387 388 mutex_enter(&ndp_g_lock); 389 if (nce->nce_ptpn == NULL) { 390 /* 391 * The last ndp walker has already removed this nce from 392 * the list after we marked the nce CONDEMNED and before 393 * we grabbed the ndp_g_lock. 394 */ 395 mutex_exit(&ndp_g_lock); 396 return; 397 } 398 if (ndp_g_walker > 0) { 399 /* 400 * Can't unlink. The walker will clean up 401 */ 402 ndp_g_walker_cleanup = B_TRUE; 403 mutex_exit(&ndp_g_lock); 404 return; 405 } 406 407 /* 408 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 409 * the timer since it is marked CONDEMNED. 410 */ 411 ptpn = nce->nce_ptpn; 412 nce1 = nce->nce_next; 413 if (nce1 != NULL) 414 nce1->nce_ptpn = ptpn; 415 *ptpn = nce1; 416 nce->nce_ptpn = NULL; 417 nce->nce_next = NULL; 418 mutex_exit(&ndp_g_lock); 419 420 nce_ire_delete(nce); 421 } 422 423 void 424 ndp_inactive(nce_t *nce) 425 { 426 mblk_t **mpp; 427 ill_t *ill; 428 429 ASSERT(nce->nce_refcnt == 0); 430 ASSERT(MUTEX_HELD(&nce->nce_lock)); 431 ASSERT(nce->nce_fastpath == NULL); 432 433 /* Free all nce allocated messages */ 434 mpp = &nce->nce_first_mp_to_free; 435 do { 436 while (*mpp != NULL) { 437 mblk_t *mp; 438 439 mp = *mpp; 440 *mpp = mp->b_next; 441 mp->b_next = NULL; 442 mp->b_prev = NULL; 443 freemsg(mp); 444 } 445 } while (mpp++ != &nce->nce_last_mp_to_free); 446 447 #ifdef NCE_DEBUG 448 nce_trace_inactive(nce); 449 #endif 450 451 ill = nce->nce_ill; 452 mutex_enter(&ill->ill_lock); 453 ill->ill_nce_cnt--; 454 /* 455 * If the number of nce's associated with this ill have dropped 456 * to zero, check whether we need to restart any operation that 457 * is waiting for this to happen. 458 */ 459 if (ill->ill_nce_cnt == 0) { 460 /* ipif_ill_refrele_tail drops the ill_lock */ 461 ipif_ill_refrele_tail(ill); 462 } else { 463 mutex_exit(&ill->ill_lock); 464 } 465 mutex_destroy(&nce->nce_lock); 466 freeb(nce->nce_mp); 467 } 468 469 /* 470 * ndp_walk routine. Delete the nce if it is associated with the ill 471 * that is going away. Always called as a writer. 472 */ 473 void 474 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 475 { 476 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 477 ndp_delete(nce); 478 } 479 } 480 481 /* 482 * Walk a list of to be inactive NCEs and blow away all the ires. 483 */ 484 static void 485 nce_ire_delete_list(nce_t *nce) 486 { 487 nce_t *nce_next; 488 489 ASSERT(nce != NULL); 490 while (nce != NULL) { 491 nce_next = nce->nce_next; 492 nce->nce_next = NULL; 493 494 /* 495 * It is possible for the last ndp walker (this thread) 496 * to come here after ndp_delete has marked the nce CONDEMNED 497 * and before it has removed the nce from the fastpath list 498 * or called untimeout. So we need to do it here. It is safe 499 * for both ndp_delete and this thread to do it twice or 500 * even simultaneously since each of the threads has a 501 * reference on the nce. 502 */ 503 nce_fastpath_list_delete(nce); 504 /* 505 * Cancel any running timer. Timeout can't be restarted 506 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 507 * Passing invalid timeout id is fine. 508 */ 509 if (nce->nce_timeout_id != 0) { 510 (void) untimeout(nce->nce_timeout_id); 511 nce->nce_timeout_id = 0; 512 } 513 514 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 515 nce_ire_delete1, (char *)nce, nce->nce_ill); 516 NCE_REFRELE_NOTR(nce); 517 nce = nce_next; 518 } 519 } 520 521 /* 522 * Delete an ire when the nce goes away. 523 */ 524 /* ARGSUSED */ 525 static void 526 nce_ire_delete(nce_t *nce) 527 { 528 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 529 nce_ire_delete1, (char *)nce, nce->nce_ill); 530 NCE_REFRELE_NOTR(nce); 531 } 532 533 /* 534 * ire_walk routine used to delete every IRE that shares this nce 535 */ 536 static void 537 nce_ire_delete1(ire_t *ire, char *nce_arg) 538 { 539 nce_t *nce = (nce_t *)nce_arg; 540 541 ASSERT(ire->ire_type == IRE_CACHE); 542 543 if (ire->ire_nce == nce) 544 ire_delete(ire); 545 } 546 547 /* 548 * Cache entry lookup. Try to find an nce matching the parameters passed. 549 * If one is found, the refcnt on the nce will be incremented. 550 */ 551 nce_t * 552 ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 553 { 554 nce_t *nce; 555 556 if (!caller_holds_lock) 557 mutex_enter(&ndp_g_lock); 558 nce = nce_lookup_addr(ill, addr); 559 if (nce == NULL) 560 nce = nce_lookup_mapping(ill, addr); 561 if (!caller_holds_lock) 562 mutex_exit(&ndp_g_lock); 563 return (nce); 564 } 565 566 /* 567 * Cache entry lookup. Try to find an nce matching the parameters passed. 568 * Look only for exact entries (no mappings). If an nce is found, increment 569 * the hold count on that nce. 570 */ 571 static nce_t * 572 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 573 { 574 nce_t *nce; 575 576 ASSERT(ill != NULL); 577 ASSERT(MUTEX_HELD(&ndp_g_lock)); 578 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 579 return (NULL); 580 nce = *((nce_t **)NCE_HASH_PTR(*addr)); 581 for (; nce != NULL; nce = nce->nce_next) { 582 if (nce->nce_ill == ill) { 583 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 584 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 585 &ipv6_all_ones)) { 586 mutex_enter(&nce->nce_lock); 587 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 588 NCE_REFHOLD_LOCKED(nce); 589 mutex_exit(&nce->nce_lock); 590 break; 591 } 592 mutex_exit(&nce->nce_lock); 593 } 594 } 595 } 596 return (nce); 597 } 598 599 /* 600 * Cache entry lookup. Try to find an nce matching the parameters passed. 601 * Look only for mappings. 602 */ 603 static nce_t * 604 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 605 { 606 nce_t *nce; 607 608 ASSERT(ill != NULL); 609 ASSERT(MUTEX_HELD(&ndp_g_lock)); 610 if (!IN6_IS_ADDR_MULTICAST(addr)) 611 return (NULL); 612 nce = nce_mask_entries; 613 for (; nce != NULL; nce = nce->nce_next) 614 if (nce->nce_ill == ill && 615 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 616 mutex_enter(&nce->nce_lock); 617 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 618 NCE_REFHOLD_LOCKED(nce); 619 mutex_exit(&nce->nce_lock); 620 break; 621 } 622 mutex_exit(&nce->nce_lock); 623 } 624 return (nce); 625 } 626 627 /* 628 * Process passed in parameters either from an incoming packet or via 629 * user ioctl. 630 */ 631 void 632 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 633 { 634 ill_t *ill = nce->nce_ill; 635 uint32_t hw_addr_len = ill->ill_nd_lla_len; 636 mblk_t *mp; 637 boolean_t ll_updated = B_FALSE; 638 boolean_t ll_changed; 639 640 /* 641 * No updates of link layer address or the neighbor state is 642 * allowed, when the cache is in NONUD state. This still 643 * allows for responding to reachability solicitation. 644 */ 645 mutex_enter(&nce->nce_lock); 646 if (nce->nce_state == ND_INCOMPLETE) { 647 if (hw_addr == NULL) { 648 mutex_exit(&nce->nce_lock); 649 return; 650 } 651 nce_set_ll(nce, hw_addr); 652 /* 653 * Update nce state and send the queued packets 654 * back to ip this time ire will be added. 655 */ 656 if (flag & ND_NA_FLAG_SOLICITED) { 657 nce_update(nce, ND_REACHABLE, NULL); 658 } else { 659 nce_update(nce, ND_STALE, NULL); 660 } 661 mutex_exit(&nce->nce_lock); 662 nce_fastpath(nce); 663 mutex_enter(&nce->nce_lock); 664 mp = nce->nce_qd_mp; 665 nce->nce_qd_mp = NULL; 666 mutex_exit(&nce->nce_lock); 667 while (mp != NULL) { 668 mblk_t *nxt_mp; 669 670 nxt_mp = mp->b_next; 671 mp->b_next = NULL; 672 if (mp->b_prev != NULL) { 673 ill_t *inbound_ill; 674 queue_t *fwdq = NULL; 675 uint_t ifindex; 676 677 ifindex = (uint_t)(uintptr_t)mp->b_prev; 678 inbound_ill = ill_lookup_on_ifindex(ifindex, 679 B_TRUE, NULL, NULL, NULL, NULL); 680 if (inbound_ill == NULL) { 681 mp->b_prev = NULL; 682 freemsg(mp); 683 return; 684 } else { 685 fwdq = inbound_ill->ill_rq; 686 } 687 mp->b_prev = NULL; 688 /* 689 * Send a forwarded packet back into ip_rput_v6 690 * just as in ire_send_v6(). 691 * Extract the queue from b_prev (set in 692 * ip_rput_data_v6). 693 */ 694 if (fwdq != NULL) { 695 /* 696 * Forwarded packets hop count will 697 * get decremented in ip_rput_data_v6 698 */ 699 put(fwdq, mp); 700 } else { 701 /* 702 * Send locally originated packets back 703 * into * ip_wput_v6. 704 */ 705 put(ill->ill_wq, mp); 706 } 707 ill_refrele(inbound_ill); 708 } else { 709 put(ill->ill_wq, mp); 710 } 711 mp = nxt_mp; 712 } 713 return; 714 } 715 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 716 if (!is_adv) { 717 /* If this is a SOLICITATION request only */ 718 if (ll_changed) 719 nce_update(nce, ND_STALE, hw_addr); 720 mutex_exit(&nce->nce_lock); 721 return; 722 } 723 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 724 /* If in any other state than REACHABLE, ignore */ 725 if (nce->nce_state == ND_REACHABLE) { 726 nce_update(nce, ND_STALE, NULL); 727 } 728 mutex_exit(&nce->nce_lock); 729 return; 730 } else { 731 if (ll_changed) { 732 nce_update(nce, ND_UNCHANGED, hw_addr); 733 ll_updated = B_TRUE; 734 } 735 if (flag & ND_NA_FLAG_SOLICITED) { 736 nce_update(nce, ND_REACHABLE, NULL); 737 } else { 738 if (ll_updated) { 739 nce_update(nce, ND_STALE, NULL); 740 } 741 } 742 mutex_exit(&nce->nce_lock); 743 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 744 NCE_F_ISROUTER)) { 745 ire_t *ire; 746 747 /* 748 * Router turned to host. We need to remove the 749 * entry as well as any default route that may be 750 * using this as a next hop. This is required by 751 * section 7.2.5 of RFC 2461. 752 */ 753 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 754 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 755 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 756 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 757 MATCH_IRE_DEFAULT); 758 if (ire != NULL) { 759 ip_rts_rtmsg(RTM_DELETE, ire, 0); 760 ire_delete(ire); 761 ire_refrele(ire); 762 } 763 ndp_delete(nce); 764 } 765 } 766 } 767 768 /* 769 * Pass arg1 to the pfi supplied, along with each nce in existence. 770 * ndp_walk() places a REFHOLD on the nce and drops the lock when 771 * walking the hash list. 772 */ 773 void 774 ndp_walk_impl(ill_t *ill, pfi_t pfi, void *arg1, boolean_t trace) 775 { 776 777 nce_t *nce; 778 nce_t *nce1; 779 nce_t **ncep; 780 nce_t *free_nce_list = NULL; 781 782 mutex_enter(&ndp_g_lock); 783 ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ 784 mutex_exit(&ndp_g_lock); 785 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 786 for (nce = *ncep; nce; nce = nce1) { 787 nce1 = nce->nce_next; 788 if (ill == NULL || nce->nce_ill == ill) { 789 if (trace) { 790 NCE_REFHOLD(nce); 791 (*pfi)(nce, arg1); 792 NCE_REFRELE(nce); 793 } else { 794 NCE_REFHOLD_NOTR(nce); 795 (*pfi)(nce, arg1); 796 NCE_REFRELE_NOTR(nce); 797 } 798 } 799 } 800 } 801 for (nce = nce_mask_entries; nce; nce = nce1) { 802 nce1 = nce->nce_next; 803 if (ill == NULL || nce->nce_ill == ill) { 804 if (trace) { 805 NCE_REFHOLD(nce); 806 (*pfi)(nce, arg1); 807 NCE_REFRELE(nce); 808 } else { 809 NCE_REFHOLD_NOTR(nce); 810 (*pfi)(nce, arg1); 811 NCE_REFRELE_NOTR(nce); 812 } 813 } 814 } 815 mutex_enter(&ndp_g_lock); 816 ndp_g_walker--; 817 /* 818 * While NCE's are removed from global list they are placed 819 * in a private list, to be passed to nce_ire_delete_list(). 820 * The reason is, there may be ires pointing to this nce 821 * which needs to cleaned up. 822 */ 823 if (ndp_g_walker_cleanup && ndp_g_walker == 0) { 824 /* Time to delete condemned entries */ 825 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 826 nce = *ncep; 827 if (nce != NULL) { 828 nce_remove(nce, &free_nce_list); 829 } 830 } 831 nce = nce_mask_entries; 832 if (nce != NULL) { 833 nce_remove(nce, &free_nce_list); 834 } 835 ndp_g_walker_cleanup = B_FALSE; 836 } 837 mutex_exit(&ndp_g_lock); 838 839 if (free_nce_list != NULL) { 840 nce_ire_delete_list(free_nce_list); 841 } 842 } 843 844 void 845 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 846 { 847 ndp_walk_impl(ill, pfi, arg1, B_TRUE); 848 } 849 850 /* 851 * Prepend the zoneid using an ipsec_out_t for later use by functions like 852 * ip_rput_v6() after neighbor discovery has taken place. If the message 853 * block already has a M_CTL at the front of it, then simply set the zoneid 854 * appropriately. 855 */ 856 static mblk_t * 857 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 858 { 859 mblk_t *first_mp; 860 ipsec_out_t *io; 861 862 ASSERT(zoneid != ALL_ZONES); 863 if (mp->b_datap->db_type == M_CTL) { 864 io = (ipsec_out_t *)mp->b_rptr; 865 ASSERT(io->ipsec_out_type == IPSEC_OUT); 866 io->ipsec_out_zoneid = zoneid; 867 return (mp); 868 } 869 870 first_mp = ipsec_alloc_ipsec_out(); 871 if (first_mp == NULL) 872 return (NULL); 873 io = (ipsec_out_t *)first_mp->b_rptr; 874 /* This is not a secure packet */ 875 io->ipsec_out_secure = B_FALSE; 876 io->ipsec_out_zoneid = zoneid; 877 first_mp->b_cont = mp; 878 return (first_mp); 879 } 880 881 /* 882 * Process resolve requests. Handles both mapped entries 883 * as well as cases that needs to be send out on the wire. 884 * Lookup a NCE for a given IRE. Regardless of whether one exists 885 * or one is created, we defer making ire point to nce until the 886 * ire is actually added at which point the nce_refcnt on the nce is 887 * incremented. This is done primarily to have symmetry between ire_add() 888 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 889 */ 890 int 891 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 892 { 893 nce_t *nce; 894 int err = 0; 895 uint32_t ms; 896 mblk_t *mp_nce = NULL; 897 898 ASSERT(ill != NULL); 899 if (IN6_IS_ADDR_MULTICAST(dst)) { 900 err = nce_set_multicast(ill, dst); 901 return (err); 902 } 903 err = ndp_lookup_then_add(ill, 904 NULL, /* No hardware address */ 905 dst, 906 &ipv6_all_ones, 907 &ipv6_all_zeros, 908 0, 909 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 910 ND_INCOMPLETE, 911 &nce); 912 913 switch (err) { 914 case 0: 915 /* 916 * New cache entry was created. Make sure that the state 917 * is not ND_INCOMPLETE. It can be in some other state 918 * even before we send out the solicitation as we could 919 * get un-solicited advertisements. 920 * 921 * If this is an XRESOLV interface, simply return 0, 922 * since we don't want to solicit just yet. 923 */ 924 if (ill->ill_flags & ILLF_XRESOLV) { 925 NCE_REFRELE(nce); 926 return (0); 927 } 928 rw_enter(&ill_g_lock, RW_READER); 929 mutex_enter(&nce->nce_lock); 930 if (nce->nce_state != ND_INCOMPLETE) { 931 mutex_exit(&nce->nce_lock); 932 rw_exit(&ill_g_lock); 933 NCE_REFRELE(nce); 934 return (0); 935 } 936 mp_nce = ndp_prepend_zone(mp, zoneid); 937 if (mp_nce == NULL) { 938 /* The caller will free mp */ 939 mutex_exit(&nce->nce_lock); 940 rw_exit(&ill_g_lock); 941 ndp_delete(nce); 942 NCE_REFRELE(nce); 943 return (ENOMEM); 944 } 945 ms = nce_solicit(nce, mp_nce); 946 rw_exit(&ill_g_lock); 947 if (ms == 0) { 948 /* The caller will free mp */ 949 if (mp_nce != mp) 950 freeb(mp_nce); 951 mutex_exit(&nce->nce_lock); 952 ndp_delete(nce); 953 NCE_REFRELE(nce); 954 return (EBUSY); 955 } 956 mutex_exit(&nce->nce_lock); 957 NDP_RESTART_TIMER(nce, (clock_t)ms); 958 NCE_REFRELE(nce); 959 return (EINPROGRESS); 960 case EEXIST: 961 /* Resolution in progress just queue the packet */ 962 mutex_enter(&nce->nce_lock); 963 if (nce->nce_state == ND_INCOMPLETE) { 964 mp_nce = ndp_prepend_zone(mp, zoneid); 965 if (mp_nce == NULL) { 966 err = ENOMEM; 967 } else { 968 nce_queue_mp(nce, mp_nce); 969 err = EINPROGRESS; 970 } 971 } else { 972 /* 973 * Any other state implies we have 974 * a nce but IRE needs to be added ... 975 * ire_add_v6() will take care of the 976 * the case when the nce becomes CONDEMNED 977 * before the ire is added to the table. 978 */ 979 err = 0; 980 } 981 mutex_exit(&nce->nce_lock); 982 NCE_REFRELE(nce); 983 break; 984 default: 985 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 986 break; 987 } 988 return (err); 989 } 990 991 /* 992 * When there is no resolver, the link layer template is passed in 993 * the IRE. 994 * Lookup a NCE for a given IRE. Regardless of whether one exists 995 * or one is created, we defer making ire point to nce until the 996 * ire is actually added at which point the nce_refcnt on the nce is 997 * incremented. This is done primarily to have symmetry between ire_add() 998 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 999 */ 1000 int 1001 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1002 { 1003 nce_t *nce; 1004 int err = 0; 1005 1006 ASSERT(ill != NULL); 1007 if (IN6_IS_ADDR_MULTICAST(dst)) { 1008 err = nce_set_multicast(ill, dst); 1009 return (err); 1010 } 1011 1012 err = ndp_lookup_then_add(ill, 1013 NULL, /* hardware address */ 1014 dst, 1015 &ipv6_all_ones, 1016 &ipv6_all_zeros, 1017 0, 1018 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1019 ND_REACHABLE, 1020 &nce); 1021 1022 switch (err) { 1023 case 0: 1024 /* 1025 * Cache entry with a proper resolver cookie was 1026 * created. 1027 */ 1028 NCE_REFRELE(nce); 1029 break; 1030 case EEXIST: 1031 err = 0; 1032 NCE_REFRELE(nce); 1033 break; 1034 default: 1035 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1036 break; 1037 } 1038 return (err); 1039 } 1040 1041 /* 1042 * For each interface an entry is added for the unspecified multicast group. 1043 * Here that mapping is used to form the multicast cache entry for a particular 1044 * multicast destination. 1045 */ 1046 static int 1047 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1048 { 1049 nce_t *mnce; /* Multicast mapping entry */ 1050 nce_t *nce; 1051 uchar_t *hw_addr = NULL; 1052 int err = 0; 1053 1054 ASSERT(ill != NULL); 1055 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1056 1057 mutex_enter(&ndp_g_lock); 1058 nce = nce_lookup_addr(ill, dst); 1059 if (nce != NULL) { 1060 mutex_exit(&ndp_g_lock); 1061 NCE_REFRELE(nce); 1062 return (0); 1063 } 1064 /* No entry, now lookup for a mapping this should never fail */ 1065 mnce = nce_lookup_mapping(ill, dst); 1066 if (mnce == NULL) { 1067 /* Something broken for the interface. */ 1068 mutex_exit(&ndp_g_lock); 1069 return (ESRCH); 1070 } 1071 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1072 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1073 /* 1074 * For IRE_IF_RESOLVER a hardware mapping can be 1075 * generated, for IRE_IF_NORESOLVER, resolution cookie 1076 * in the ill is copied in ndp_add(). 1077 */ 1078 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1079 if (hw_addr == NULL) { 1080 mutex_exit(&ndp_g_lock); 1081 NCE_REFRELE(mnce); 1082 return (ENOMEM); 1083 } 1084 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1085 } 1086 NCE_REFRELE(mnce); 1087 /* 1088 * IRE_IF_NORESOLVER type simply copies the resolution 1089 * cookie passed in. So no hw_addr is needed. 1090 */ 1091 err = ndp_add(ill, 1092 hw_addr, 1093 dst, 1094 &ipv6_all_ones, 1095 &ipv6_all_zeros, 1096 0, 1097 NCE_F_NONUD, 1098 ND_REACHABLE, 1099 &nce); 1100 mutex_exit(&ndp_g_lock); 1101 if (hw_addr != NULL) 1102 kmem_free(hw_addr, ill->ill_nd_lla_len); 1103 if (err != 0) { 1104 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1105 return (err); 1106 } 1107 NCE_REFRELE(nce); 1108 return (0); 1109 } 1110 1111 /* 1112 * Return the link layer address, and any flags of a nce. 1113 */ 1114 int 1115 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1116 { 1117 nce_t *nce; 1118 in6_addr_t *addr; 1119 sin6_t *sin6; 1120 dl_unitdata_req_t *dl; 1121 1122 ASSERT(ill != NULL); 1123 sin6 = (sin6_t *)&lnr->lnr_addr; 1124 addr = &sin6->sin6_addr; 1125 1126 nce = ndp_lookup(ill, addr, B_FALSE); 1127 if (nce == NULL) 1128 return (ESRCH); 1129 /* If in INCOMPLETE state, no link layer address is available yet */ 1130 if (nce->nce_state == ND_INCOMPLETE) 1131 goto done; 1132 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1133 if (ill->ill_flags & ILLF_XRESOLV) 1134 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1135 else 1136 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1137 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1138 sizeof (lnr->lnr_hdw_addr)); 1139 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1140 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1141 if (nce->nce_flags & NCE_F_ISROUTER) 1142 lnr->lnr_flags = NDF_ISROUTER_ON; 1143 if (nce->nce_flags & NCE_F_PROXY) 1144 lnr->lnr_flags |= NDF_PROXY_ON; 1145 if (nce->nce_flags & NCE_F_ANYCAST) 1146 lnr->lnr_flags |= NDF_ANYCAST_ON; 1147 done: 1148 NCE_REFRELE(nce); 1149 return (0); 1150 } 1151 1152 /* 1153 * Send Enable/Disable multicast reqs to driver. 1154 */ 1155 int 1156 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1157 uint32_t hw_addr_offset, mblk_t *mp) 1158 { 1159 nce_t *nce; 1160 uchar_t *hw_addr; 1161 1162 ASSERT(ill != NULL); 1163 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1164 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1165 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1166 freemsg(mp); 1167 return (EINVAL); 1168 } 1169 mutex_enter(&ndp_g_lock); 1170 nce = nce_lookup_mapping(ill, addr); 1171 if (nce == NULL) { 1172 mutex_exit(&ndp_g_lock); 1173 freemsg(mp); 1174 return (ESRCH); 1175 } 1176 mutex_exit(&ndp_g_lock); 1177 /* 1178 * Update dl_addr_length and dl_addr_offset for primitives that 1179 * have physical addresses as opposed to full saps 1180 */ 1181 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1182 case DL_ENABMULTI_REQ: 1183 /* Track the state if this is the first enabmulti */ 1184 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1185 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1186 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1187 break; 1188 case DL_DISABMULTI_REQ: 1189 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1190 break; 1191 default: 1192 NCE_REFRELE(nce); 1193 ip1dbg(("ndp_mcastreq: default\n")); 1194 return (EINVAL); 1195 } 1196 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1197 NCE_REFRELE(nce); 1198 putnext(ill->ill_wq, mp); 1199 return (0); 1200 } 1201 1202 /* 1203 * Send a neighbor solicitation. 1204 * Returns number of milliseconds after which we should either rexmit or abort. 1205 * Return of zero means we should abort. 1206 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1207 * 1208 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1209 * the packet. 1210 * NOTE: This routine does not consume mp. 1211 */ 1212 uint32_t 1213 nce_solicit(nce_t *nce, mblk_t *mp) 1214 { 1215 ill_t *ill; 1216 ill_t *src_ill; 1217 ip6_t *ip6h; 1218 in6_addr_t src; 1219 in6_addr_t dst; 1220 ipif_t *ipif; 1221 ip6i_t *ip6i; 1222 boolean_t dropped = B_FALSE; 1223 1224 ASSERT(RW_READ_HELD(&ill_g_lock)); 1225 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1226 ill = nce->nce_ill; 1227 ASSERT(ill != NULL); 1228 1229 if (nce->nce_rcnt == 0) { 1230 return (0); 1231 } 1232 1233 if (mp == NULL) { 1234 ASSERT(nce->nce_qd_mp != NULL); 1235 mp = nce->nce_qd_mp; 1236 } else { 1237 nce_queue_mp(nce, mp); 1238 } 1239 1240 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1241 if (mp->b_datap->db_type == M_CTL) 1242 mp = mp->b_cont; 1243 1244 ip6h = (ip6_t *)mp->b_rptr; 1245 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1246 /* 1247 * This message should have been pulled up already in 1248 * ip_wput_v6. We can't do pullups here because the message 1249 * could be from the nce_qd_mp which could have b_next/b_prev 1250 * non-NULL. 1251 */ 1252 ip6i = (ip6i_t *)ip6h; 1253 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1254 sizeof (ip6i_t) + IPV6_HDR_LEN); 1255 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1256 } 1257 src = ip6h->ip6_src; 1258 /* 1259 * If the src of outgoing packet is one of the assigned interface 1260 * addresses use it, otherwise we will pick the source address below. 1261 */ 1262 src_ill = ill; 1263 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1264 if (ill->ill_group != NULL) 1265 src_ill = ill->ill_group->illgrp_ill; 1266 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1267 for (ipif = src_ill->ill_ipif; ipif != NULL; 1268 ipif = ipif->ipif_next) { 1269 if (IN6_ARE_ADDR_EQUAL(&src, 1270 &ipif->ipif_v6lcl_addr)) { 1271 break; 1272 } 1273 } 1274 if (ipif != NULL) 1275 break; 1276 } 1277 if (src_ill == NULL) { 1278 /* May be a forwarding packet */ 1279 src_ill = ill; 1280 src = ipv6_all_zeros; 1281 } 1282 } 1283 dst = nce->nce_addr; 1284 /* 1285 * If source address is unspecified, nce_xmit will choose 1286 * one for us and initialize the hardware address also 1287 * appropriately. 1288 */ 1289 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1290 src_ill = NULL; 1291 nce->nce_rcnt--; 1292 mutex_exit(&nce->nce_lock); 1293 rw_exit(&ill_g_lock); 1294 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1295 &dst, 0); 1296 rw_enter(&ill_g_lock, RW_READER); 1297 mutex_enter(&nce->nce_lock); 1298 if (dropped) 1299 nce->nce_rcnt++; 1300 return (ill->ill_reachable_retrans_time); 1301 } 1302 1303 void 1304 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1305 { 1306 nd_neighbor_solicit_t *ns; 1307 uint32_t hlen = ill->ill_nd_lla_len; 1308 uchar_t *haddr = NULL; 1309 icmp6_t *icmp_nd; 1310 ip6_t *ip6h; 1311 nce_t *our_nce = NULL; 1312 in6_addr_t target; 1313 in6_addr_t src; 1314 int len; 1315 int flag = 0; 1316 nd_opt_hdr_t *opt = NULL; 1317 boolean_t bad_solicit = B_FALSE; 1318 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1319 1320 ip6h = (ip6_t *)mp->b_rptr; 1321 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1322 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1323 src = ip6h->ip6_src; 1324 ns = (nd_neighbor_solicit_t *)icmp_nd; 1325 target = ns->nd_ns_target; 1326 if (IN6_IS_ADDR_MULTICAST(&target)) { 1327 if (ip_debug > 2) { 1328 /* ip1dbg */ 1329 pr_addr_dbg("ndp_input_solicit: Target is" 1330 " multicast! %s\n", AF_INET6, &target); 1331 } 1332 bad_solicit = B_TRUE; 1333 goto done; 1334 } 1335 if (len > sizeof (nd_neighbor_solicit_t)) { 1336 /* Options present */ 1337 opt = (nd_opt_hdr_t *)&ns[1]; 1338 len -= sizeof (nd_neighbor_solicit_t); 1339 if (!ndp_verify_optlen(opt, len)) { 1340 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1341 bad_solicit = B_TRUE; 1342 goto done; 1343 } 1344 } 1345 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1346 /* Check to see if this is a valid DAD solicitation */ 1347 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1348 if (ip_debug > 2) { 1349 /* ip1dbg */ 1350 pr_addr_dbg("ndp_input_solicit: IPv6 " 1351 "Destination is not solicited node " 1352 "multicast %s\n", AF_INET6, 1353 &ip6h->ip6_dst); 1354 } 1355 bad_solicit = B_TRUE; 1356 goto done; 1357 } 1358 } 1359 1360 our_nce = ndp_lookup(ill, &target, B_FALSE); 1361 /* 1362 * If this is a valid Solicitation, a permanent 1363 * entry should exist in the cache 1364 */ 1365 if (our_nce == NULL || 1366 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1367 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1368 "ifname=%s ", ill->ill_name)); 1369 if (ip_debug > 2) { 1370 /* ip1dbg */ 1371 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1372 } 1373 bad_solicit = B_TRUE; 1374 goto done; 1375 } 1376 1377 /* At this point we should have a verified NS per spec */ 1378 if (opt != NULL) { 1379 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1380 if (opt != NULL) { 1381 /* 1382 * No source link layer address option should 1383 * be present in a valid DAD request. 1384 */ 1385 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1386 ip1dbg(("ndp_input_solicit: source link-layer " 1387 "address option present with an " 1388 "unspecified source. \n")); 1389 bad_solicit = B_TRUE; 1390 goto done; 1391 } 1392 haddr = (uchar_t *)&opt[1]; 1393 if (hlen > opt->nd_opt_len * 8 || 1394 hlen == 0) { 1395 bad_solicit = B_TRUE; 1396 goto done; 1397 } 1398 } 1399 } 1400 /* 1401 * haddr can be NULL if no options are present, 1402 * or no Source link layer address is present in, 1403 * recvd NDP options of solicitation message. 1404 */ 1405 if (haddr == NULL) { 1406 nce_t *nnce; 1407 mutex_enter(&ndp_g_lock); 1408 nnce = nce_lookup_addr(ill, &src); 1409 mutex_exit(&ndp_g_lock); 1410 1411 if (nnce == NULL) { 1412 in6_addr_t dst = ipv6_solicited_node_mcast; 1413 1414 /* Form solicited node multicast address */ 1415 dst.s6_addr32[3] |= src.s6_addr32[3]; 1416 (void) nce_xmit(ill, 1417 ND_NEIGHBOR_SOLICIT, 1418 ill, 1419 B_TRUE, 1420 &target, 1421 &dst, 1422 flag); 1423 bad_solicit = B_TRUE; 1424 goto done; 1425 } 1426 } 1427 /* Set override flag, it will be reset later if need be. */ 1428 flag |= NDP_ORIDE; 1429 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1430 flag |= NDP_UNICAST; 1431 } 1432 1433 /* 1434 * Create/update the entry for the soliciting node. 1435 * or respond to outstanding queries, don't if 1436 * the source is unspecified address. 1437 */ 1438 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1439 int err = 0; 1440 nce_t *nnce; 1441 1442 err = ndp_lookup_then_add(ill, 1443 haddr, 1444 &src, /* Soliciting nodes address */ 1445 &ipv6_all_ones, 1446 &ipv6_all_zeros, 1447 0, 1448 0, 1449 ND_STALE, 1450 &nnce); 1451 switch (err) { 1452 case 0: 1453 /* done with this entry */ 1454 NCE_REFRELE(nnce); 1455 break; 1456 case EEXIST: 1457 /* 1458 * B_FALSE indicates this is not an 1459 * an advertisement. 1460 */ 1461 ndp_process(nnce, haddr, 0, B_FALSE); 1462 NCE_REFRELE(nnce); 1463 break; 1464 default: 1465 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1466 err)); 1467 goto done; 1468 } 1469 flag |= NDP_SOLICITED; 1470 } else { 1471 /* 1472 * This is a DAD req, multicast the advertisement 1473 * to the all-nodes address. 1474 */ 1475 src = ipv6_all_hosts_mcast; 1476 } 1477 if (our_nce->nce_flags & NCE_F_ISROUTER) 1478 flag |= NDP_ISROUTER; 1479 if (our_nce->nce_flags & NCE_F_PROXY) 1480 flag &= ~NDP_ORIDE; 1481 /* Response to a solicitation */ 1482 (void) nce_xmit(ill, 1483 ND_NEIGHBOR_ADVERT, 1484 ill, /* ill to be used for extracting ill_nd_lla */ 1485 B_TRUE, /* use ill_nd_lla */ 1486 &target, /* Source and target of the advertisement pkt */ 1487 &src, /* IP Destination (source of original pkt) */ 1488 flag); 1489 done: 1490 if (bad_solicit) 1491 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1492 if (our_nce != NULL) 1493 NCE_REFRELE(our_nce); 1494 } 1495 1496 void 1497 ndp_input_advert(ill_t *ill, mblk_t *mp) 1498 { 1499 nd_neighbor_advert_t *na; 1500 uint32_t hlen = ill->ill_nd_lla_len; 1501 uchar_t *haddr = NULL; 1502 icmp6_t *icmp_nd; 1503 ip6_t *ip6h; 1504 nce_t *dst_nce = NULL; 1505 in6_addr_t target; 1506 nd_opt_hdr_t *opt = NULL; 1507 int len; 1508 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1509 1510 ip6h = (ip6_t *)mp->b_rptr; 1511 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1512 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1513 na = (nd_neighbor_advert_t *)icmp_nd; 1514 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1515 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1516 ip1dbg(("ndp_input_advert: Target is multicast but the " 1517 "solicited flag is not zero\n")); 1518 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1519 return; 1520 } 1521 target = na->nd_na_target; 1522 if (IN6_IS_ADDR_MULTICAST(&target)) { 1523 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1524 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1525 return; 1526 } 1527 if (len > sizeof (nd_neighbor_advert_t)) { 1528 opt = (nd_opt_hdr_t *)&na[1]; 1529 if (!ndp_verify_optlen(opt, 1530 len - sizeof (nd_neighbor_advert_t))) { 1531 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1532 return; 1533 } 1534 /* At this point we have a verified NA per spec */ 1535 len -= sizeof (nd_neighbor_advert_t); 1536 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1537 if (opt != NULL) { 1538 haddr = (uchar_t *)&opt[1]; 1539 if (hlen > opt->nd_opt_len * 8 || 1540 hlen == 0) { 1541 BUMP_MIB(mib, 1542 ipv6IfIcmpInBadNeighborAdvertisements); 1543 return; 1544 } 1545 } 1546 } 1547 1548 /* 1549 * If this interface is part of the group look at all the 1550 * ills in the group. 1551 */ 1552 rw_enter(&ill_g_lock, RW_READER); 1553 if (ill->ill_group != NULL) 1554 ill = ill->ill_group->illgrp_ill; 1555 1556 for (; ill != NULL; ill = ill->ill_group_next) { 1557 mutex_enter(&ill->ill_lock); 1558 if (!ILL_CAN_LOOKUP(ill)) { 1559 mutex_exit(&ill->ill_lock); 1560 continue; 1561 } 1562 ill_refhold_locked(ill); 1563 mutex_exit(&ill->ill_lock); 1564 dst_nce = ndp_lookup(ill, &target, B_FALSE); 1565 /* We have to drop the lock since ndp_process calls put* */ 1566 rw_exit(&ill_g_lock); 1567 if (dst_nce != NULL) { 1568 if (na->nd_na_flags_reserved & 1569 ND_NA_FLAG_ROUTER) { 1570 dst_nce->nce_flags |= NCE_F_ISROUTER; 1571 } 1572 /* B_TRUE indicates this an advertisement */ 1573 ndp_process(dst_nce, haddr, 1574 na->nd_na_flags_reserved, B_TRUE); 1575 NCE_REFRELE(dst_nce); 1576 } 1577 rw_enter(&ill_g_lock, RW_READER); 1578 ill_refrele(ill); 1579 } 1580 rw_exit(&ill_g_lock); 1581 } 1582 1583 /* 1584 * Process NDP neighbor solicitation/advertisement messages. 1585 * The checksum has already checked o.k before reaching here. 1586 */ 1587 void 1588 ndp_input(ill_t *ill, mblk_t *mp) 1589 { 1590 icmp6_t *icmp_nd; 1591 ip6_t *ip6h; 1592 int len; 1593 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1594 1595 1596 if (!pullupmsg(mp, -1)) { 1597 ip1dbg(("ndp_input: pullupmsg failed\n")); 1598 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1599 goto done; 1600 } 1601 ip6h = (ip6_t *)mp->b_rptr; 1602 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1603 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1604 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1605 goto done; 1606 } 1607 /* 1608 * NDP does not accept any extension headers between the 1609 * IP header and the ICMP header since e.g. a routing 1610 * header could be dangerous. 1611 * This assumes that any AH or ESP headers are removed 1612 * by ip prior to passing the packet to ndp_input. 1613 */ 1614 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1615 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1616 ip6h->ip6_nxt)); 1617 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1618 goto done; 1619 } 1620 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1621 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1622 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1623 if (icmp_nd->icmp6_code != 0) { 1624 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1625 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1626 goto done; 1627 } 1628 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1629 /* 1630 * Make sure packet length is large enough for either 1631 * a NS or a NA icmp packet. 1632 */ 1633 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1634 ip1dbg(("ndp_input: packet too short\n")); 1635 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1636 goto done; 1637 } 1638 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1639 ndp_input_solicit(ill, mp); 1640 } else { 1641 ndp_input_advert(ill, mp); 1642 } 1643 done: 1644 freemsg(mp); 1645 } 1646 1647 /* 1648 * nce_xmit is called to form and transmit a ND solicitation or 1649 * advertisement ICMP packet. 1650 * If source address is unspecified, appropriate source address 1651 * and link layer address will be chosen here. This function 1652 * *always* sends the link layer option. 1653 * It returns B_FALSE only if it does a successful put() to the 1654 * corresponding ill's ill_wq otherwise returns B_TRUE. 1655 */ 1656 static boolean_t 1657 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1658 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1659 int flag) 1660 { 1661 uint32_t len; 1662 icmp6_t *icmp6; 1663 mblk_t *mp; 1664 ip6_t *ip6h; 1665 nd_opt_hdr_t *opt; 1666 uint_t plen; 1667 ip6i_t *ip6i; 1668 ipif_t *src_ipif = NULL; 1669 1670 /* 1671 * If we have a unspecified source(sender) address, select a 1672 * proper source address for the solicitation here itself so 1673 * that we can initialize the h/w address correctly. This is 1674 * needed for interface groups as source address can come from 1675 * the whole group and the h/w address initialized from ill will 1676 * be wrong if the source address comes from a different ill. 1677 * 1678 * Note that the NA never comes here with the unspecified source 1679 * address. The following asserts that whenever the source 1680 * address is specified, the haddr also should be specified. 1681 */ 1682 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1683 1684 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1685 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1686 /* 1687 * Pick a source address for this solicitation, but 1688 * restrict the selection to addresses assigned to the 1689 * output interface (or interface group). We do this 1690 * because the destination will create a neighbor cache 1691 * entry for the source address of this packet, so the 1692 * source address had better be a valid neighbor. 1693 */ 1694 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 1695 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1696 if (src_ipif == NULL) { 1697 char buf[INET6_ADDRSTRLEN]; 1698 1699 ip0dbg(("nce_xmit: No source ipif for dst %s\n", 1700 inet_ntop(AF_INET6, (char *)target, buf, 1701 sizeof (buf)))); 1702 return (B_TRUE); 1703 } 1704 sender = &src_ipif->ipif_v6src_addr; 1705 hwaddr_ill = src_ipif->ipif_ill; 1706 } 1707 1708 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1709 /* 1710 * Always make sure that the NS/NA packets don't get load 1711 * spread. This is needed so that the probe packets sent 1712 * by the in.mpathd daemon can really go out on the desired 1713 * interface. Probe packets are made to go out on a desired 1714 * interface by including a ip6i with ATTACH_IF flag. As these 1715 * packets indirectly end up sending/receiving NS/NA packets 1716 * (neighbor doing NUD), we have to make sure that NA 1717 * also go out on the same interface. 1718 */ 1719 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1720 plen * 8; 1721 mp = allocb(len, BPRI_LO); 1722 if (mp == NULL) { 1723 if (src_ipif != NULL) 1724 ipif_refrele(src_ipif); 1725 return (B_TRUE); 1726 } 1727 bzero((char *)mp->b_rptr, len); 1728 mp->b_wptr = mp->b_rptr + len; 1729 1730 ip6i = (ip6i_t *)mp->b_rptr; 1731 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1732 ip6i->ip6i_nxt = IPPROTO_RAW; 1733 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1734 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1735 1736 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1737 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1738 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1739 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1740 ip6h->ip6_hops = IPV6_MAX_HOPS; 1741 ip6h->ip6_dst = *target; 1742 icmp6 = (icmp6_t *)&ip6h[1]; 1743 1744 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1745 sizeof (nd_neighbor_advert_t)); 1746 1747 if (operation == ND_NEIGHBOR_SOLICIT) { 1748 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1749 1750 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1751 ip6h->ip6_src = *sender; 1752 ns->nd_ns_target = *target; 1753 if (!(flag & NDP_UNICAST)) { 1754 /* Form multicast address of the target */ 1755 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1756 ip6h->ip6_dst.s6_addr32[3] |= 1757 ns->nd_ns_target.s6_addr32[3]; 1758 } 1759 } else { 1760 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1761 1762 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1763 ip6h->ip6_src = *sender; 1764 na->nd_na_target = *sender; 1765 if (flag & NDP_ISROUTER) 1766 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1767 if (flag & NDP_SOLICITED) 1768 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1769 if (flag & NDP_ORIDE) 1770 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1771 1772 } 1773 /* Fill in link layer address and option len */ 1774 opt->nd_opt_len = (uint8_t)plen; 1775 mutex_enter(&hwaddr_ill->ill_lock); 1776 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1777 &opt[1], hwaddr_ill->ill_nd_lla_len); 1778 mutex_exit(&hwaddr_ill->ill_lock); 1779 icmp6->icmp6_type = (uint8_t)operation; 1780 icmp6->icmp6_code = 0; 1781 /* 1782 * Prepare for checksum by putting icmp length in the icmp 1783 * checksum field. The checksum is calculated in ip_wput_v6. 1784 */ 1785 icmp6->icmp6_cksum = ip6h->ip6_plen; 1786 1787 if (src_ipif != NULL) 1788 ipif_refrele(src_ipif); 1789 if (canput(ill->ill_wq)) { 1790 put(ill->ill_wq, mp); 1791 return (B_FALSE); 1792 } 1793 freemsg(mp); 1794 return (B_TRUE); 1795 } 1796 1797 /* 1798 * Make a link layer address (does not include the SAP) from an nce. 1799 * To form the link layer address, use the last four bytes of ipv6 1800 * address passed in and the fixed offset stored in nce. 1801 */ 1802 static void 1803 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1804 { 1805 uchar_t *mask, *to; 1806 ill_t *ill = nce->nce_ill; 1807 int len; 1808 1809 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1810 return; 1811 ASSERT(nce->nce_res_mp != NULL); 1812 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1813 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1814 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1815 ASSERT(addr != NULL); 1816 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1817 addrpos, ill->ill_nd_lla_len); 1818 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1819 IPV6_ADDR_LEN); 1820 mask = (uchar_t *)&nce->nce_extract_mask; 1821 mask += (IPV6_ADDR_LEN - len); 1822 addr += (IPV6_ADDR_LEN - len); 1823 to = addrpos + nce->nce_ll_extract_start; 1824 while (len-- > 0) 1825 *to++ |= *mask++ & *addr++; 1826 } 1827 1828 /* 1829 * Pass a cache report back out via NDD. 1830 */ 1831 /* ARGSUSED */ 1832 int 1833 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1834 { 1835 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1836 " proto addr/mask"); 1837 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1838 return (0); 1839 } 1840 1841 /* 1842 * convert a link level address of arbitrary length 1843 * to an ascii string. 1844 * The caller *must* have already verified that the string buffer 1845 * is large enough to hold the entire string, including trailing NULL. 1846 */ 1847 static void 1848 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1849 { 1850 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1851 int i; 1852 size_t len; 1853 1854 buf[0] = '\0'; 1855 for (i = 0; i < addrlen; i++) { 1856 addrbyte[0] = '\0'; 1857 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1858 len = strlen((const char *)addrbyte); 1859 bcopy(addrbyte, buf, len); 1860 buf = buf + len; 1861 } 1862 *--buf = '\0'; 1863 } 1864 1865 /* 1866 * Add a single line to the NDP Cache Entry Report. 1867 */ 1868 static void 1869 nce_report1(nce_t *nce, uchar_t *mp_arg) 1870 { 1871 ill_t *ill = nce->nce_ill; 1872 char local_buf[INET6_ADDRSTRLEN]; 1873 uchar_t flags_buf[10]; 1874 uint32_t flags = nce->nce_flags; 1875 mblk_t *mp = (mblk_t *)mp_arg; 1876 uchar_t *h; 1877 uchar_t *m = flags_buf; 1878 in6_addr_t v6addr; 1879 1880 /* 1881 * Lock the nce to protect nce_res_mp from being changed 1882 * if an external resolver address resolution completes 1883 * while nce_res_mp is being accessed here. 1884 * 1885 * Deal with all address formats, not just Ethernet-specific 1886 * In addition, make sure that the mblk has enough space 1887 * before writing to it. If is doesn't, allocate a new one. 1888 */ 1889 ASSERT(ill != NULL); 1890 v6addr = nce->nce_mask; 1891 if (flags & NCE_F_PERMANENT) 1892 *m++ = 'P'; 1893 if (flags & NCE_F_ISROUTER) 1894 *m++ = 'R'; 1895 if (flags & NCE_F_MAPPING) 1896 *m++ = 'M'; 1897 *m = '\0'; 1898 1899 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1900 size_t addrlen; 1901 uchar_t *addr_buf; 1902 dl_unitdata_req_t *dl; 1903 1904 mutex_enter(&nce->nce_lock); 1905 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1906 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1907 if (ill->ill_flags & ILLF_XRESOLV) 1908 addrlen = (3 * (dl->dl_dest_addr_length)); 1909 else 1910 addrlen = (3 * (ill->ill_nd_lla_len)); 1911 if (addrlen <= 0) { 1912 mutex_exit(&nce->nce_lock); 1913 (void) mi_mpprintf(mp, 1914 "%8s %9s %5s %s/%d", 1915 ill->ill_name, 1916 "None", 1917 (uchar_t *)&flags_buf, 1918 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1919 (char *)local_buf, sizeof (local_buf)), 1920 ip_mask_to_plen_v6(&v6addr)); 1921 } else { 1922 /* 1923 * Convert the hardware/lla address to ascii 1924 */ 1925 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 1926 if (addr_buf == NULL) { 1927 mutex_exit(&nce->nce_lock); 1928 return; 1929 } 1930 if (ill->ill_flags & ILLF_XRESOLV) 1931 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 1932 addr_buf); 1933 else 1934 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 1935 addr_buf); 1936 mutex_exit(&nce->nce_lock); 1937 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 1938 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 1939 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1940 (char *)local_buf, sizeof (local_buf)), 1941 ip_mask_to_plen_v6(&v6addr)); 1942 kmem_free(addr_buf, addrlen); 1943 } 1944 } else { 1945 (void) mi_mpprintf(mp, 1946 "%8s %9s %5s %s/%d", 1947 ill->ill_name, 1948 "None", 1949 (uchar_t *)&flags_buf, 1950 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1951 (char *)local_buf, sizeof (local_buf)), 1952 ip_mask_to_plen_v6(&v6addr)); 1953 } 1954 } 1955 1956 mblk_t * 1957 nce_udreq_alloc(ill_t *ill) 1958 { 1959 mblk_t *template_mp = NULL; 1960 dl_unitdata_req_t *dlur; 1961 int sap_length; 1962 1963 sap_length = ill->ill_sap_length; 1964 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 1965 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 1966 if (template_mp == NULL) 1967 return (NULL); 1968 1969 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 1970 dlur->dl_priority.dl_min = 0; 1971 dlur->dl_priority.dl_max = 0; 1972 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 1973 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 1974 1975 /* Copy in the SAP value. */ 1976 NCE_LL_SAP_COPY(ill, template_mp); 1977 1978 return (template_mp); 1979 } 1980 1981 /* 1982 * NDP retransmit timer. 1983 * This timer goes off when: 1984 * a. It is time to retransmit NS for resolver. 1985 * b. It is time to send reachability probes. 1986 */ 1987 void 1988 ndp_timer(void *arg) 1989 { 1990 nce_t *nce = arg; 1991 ill_t *ill = nce->nce_ill; 1992 uint32_t ms; 1993 char addrbuf[INET6_ADDRSTRLEN]; 1994 mblk_t *mp; 1995 boolean_t dropped = B_FALSE; 1996 1997 /* 1998 * The timer has to be cancelled by ndp_delete before doing the final 1999 * refrele. So the NCE is guaranteed to exist when the timer runs 2000 * until it clears the timeout_id. Before clearing the timeout_id 2001 * bump up the refcnt so that we can continue to use the nce 2002 */ 2003 ASSERT(nce != NULL); 2004 2005 /* 2006 * Grab the ill_g_lock now itself to avoid lock order problems. 2007 * nce_solicit needs ill_g_lock to be able to traverse ills 2008 */ 2009 rw_enter(&ill_g_lock, RW_READER); 2010 mutex_enter(&nce->nce_lock); 2011 NCE_REFHOLD_LOCKED(nce); 2012 nce->nce_timeout_id = 0; 2013 2014 /* 2015 * Check the reachability state first. 2016 */ 2017 switch (nce->nce_state) { 2018 case ND_DELAY: 2019 rw_exit(&ill_g_lock); 2020 nce->nce_state = ND_PROBE; 2021 mutex_exit(&nce->nce_lock); 2022 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2023 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2024 if (ip_debug > 3) { 2025 /* ip2dbg */ 2026 pr_addr_dbg("ndp_timer: state for %s changed " 2027 "to PROBE\n", AF_INET6, &nce->nce_addr); 2028 } 2029 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2030 NCE_REFRELE(nce); 2031 return; 2032 case ND_PROBE: 2033 /* must be retransmit timer */ 2034 rw_exit(&ill_g_lock); 2035 nce->nce_pcnt--; 2036 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2037 nce->nce_pcnt >= -1); 2038 if (nce->nce_pcnt == 0) { 2039 /* Wait RetransTimer, before deleting the entry */ 2040 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2041 nce->nce_pcnt, inet_ntop(AF_INET6, 2042 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2043 mutex_exit(&nce->nce_lock); 2044 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2045 } else { 2046 /* 2047 * As per RFC2461, the nce gets deleted after 2048 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2049 * Note that the first unicast solicitation is sent 2050 * during the DELAY state. 2051 */ 2052 if (nce->nce_pcnt > 0) { 2053 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2054 nce->nce_pcnt, inet_ntop(AF_INET6, 2055 &nce->nce_addr, 2056 addrbuf, sizeof (addrbuf)))); 2057 mutex_exit(&nce->nce_lock); 2058 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2059 NULL, B_FALSE, &ipv6_all_zeros, 2060 &nce->nce_addr, NDP_UNICAST); 2061 if (dropped) { 2062 mutex_enter(&nce->nce_lock); 2063 nce->nce_pcnt++; 2064 mutex_exit(&nce->nce_lock); 2065 } 2066 NDP_RESTART_TIMER(nce, 2067 ill->ill_reachable_retrans_time); 2068 } else { 2069 /* No hope, delete the nce */ 2070 nce->nce_state = ND_UNREACHABLE; 2071 mutex_exit(&nce->nce_lock); 2072 if (ip_debug > 2) { 2073 /* ip1dbg */ 2074 pr_addr_dbg("ndp_timer: Delete IRE for" 2075 " dst %s\n", AF_INET6, 2076 &nce->nce_addr); 2077 } 2078 ndp_delete(nce); 2079 } 2080 } 2081 NCE_REFRELE(nce); 2082 return; 2083 case ND_INCOMPLETE: 2084 /* 2085 * Must be resolvers retransmit timer. 2086 */ 2087 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2088 ip6i_t *ip6i; 2089 ip6_t *ip6h; 2090 mblk_t *data_mp; 2091 2092 /* 2093 * Walk the list of packets queued, and see if there 2094 * are any multipathing probe packets. Such packets 2095 * are always queued at the head. Since this is a 2096 * retransmit timer firing, mark such packets as 2097 * delayed in ND resolution. This info will be used 2098 * in ip_wput_v6(). Multipathing probe packets will 2099 * always have an ip6i_t. Once we hit a packet without 2100 * it, we can break out of this loop. 2101 */ 2102 if (mp->b_datap->db_type == M_CTL) 2103 data_mp = mp->b_cont; 2104 else 2105 data_mp = mp; 2106 2107 ip6h = (ip6_t *)data_mp->b_rptr; 2108 if (ip6h->ip6_nxt != IPPROTO_RAW) 2109 break; 2110 2111 /* 2112 * This message should have been pulled up already in 2113 * ip_wput_v6. We can't do pullups here because the 2114 * b_next/b_prev is non-NULL. 2115 */ 2116 ip6i = (ip6i_t *)ip6h; 2117 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2118 sizeof (ip6i_t) + IPV6_HDR_LEN); 2119 2120 /* Mark this packet as delayed due to ND resolution */ 2121 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2122 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2123 } 2124 if (nce->nce_qd_mp != NULL) { 2125 ms = nce_solicit(nce, NULL); 2126 rw_exit(&ill_g_lock); 2127 if (ms == 0) { 2128 if (nce->nce_state != ND_REACHABLE) { 2129 mutex_exit(&nce->nce_lock); 2130 nce_resolv_failed(nce); 2131 ndp_delete(nce); 2132 } else { 2133 mutex_exit(&nce->nce_lock); 2134 } 2135 } else { 2136 mutex_exit(&nce->nce_lock); 2137 NDP_RESTART_TIMER(nce, (clock_t)ms); 2138 } 2139 NCE_REFRELE(nce); 2140 return; 2141 } 2142 mutex_exit(&nce->nce_lock); 2143 rw_exit(&ill_g_lock); 2144 NCE_REFRELE(nce); 2145 break; 2146 case ND_REACHABLE : 2147 rw_exit(&ill_g_lock); 2148 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2149 nce->nce_unsolicit_count != 0) { 2150 nce->nce_unsolicit_count--; 2151 mutex_exit(&nce->nce_lock); 2152 dropped = nce_xmit(ill, 2153 ND_NEIGHBOR_ADVERT, 2154 ill, /* ill to be used for hw addr */ 2155 B_FALSE, /* use ill_phys_addr */ 2156 &nce->nce_addr, 2157 &ipv6_all_hosts_mcast, 2158 nce->nce_flags | NDP_ORIDE); 2159 if (dropped) { 2160 mutex_enter(&nce->nce_lock); 2161 nce->nce_unsolicit_count++; 2162 mutex_exit(&nce->nce_lock); 2163 } 2164 if (nce->nce_unsolicit_count != 0) { 2165 NDP_RESTART_TIMER(nce, 2166 ip_ndp_unsolicit_interval); 2167 } 2168 } else { 2169 mutex_exit(&nce->nce_lock); 2170 } 2171 NCE_REFRELE(nce); 2172 break; 2173 default: 2174 rw_exit(&ill_g_lock); 2175 mutex_exit(&nce->nce_lock); 2176 NCE_REFRELE(nce); 2177 break; 2178 } 2179 } 2180 2181 /* 2182 * Set a link layer address from the ll_addr passed in. 2183 * Copy SAP from ill. 2184 */ 2185 static void 2186 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2187 { 2188 ill_t *ill = nce->nce_ill; 2189 uchar_t *woffset; 2190 2191 ASSERT(ll_addr != NULL); 2192 /* Always called before fast_path_probe */ 2193 ASSERT(nce->nce_fp_mp == NULL); 2194 if (ill->ill_sap_length != 0) { 2195 /* 2196 * Copy the SAP type specified in the 2197 * request into the xmit template. 2198 */ 2199 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2200 } 2201 if (ill->ill_phys_addr_length > 0) { 2202 /* 2203 * The bcopy() below used to be called for the physical address 2204 * length rather than the link layer address length. For 2205 * ethernet and many other media, the phys_addr and lla are 2206 * identical. 2207 * However, with xresolv interfaces being introduced, the 2208 * phys_addr and lla are no longer the same, and the physical 2209 * address may not have any useful meaning, so we use the lla 2210 * for IPv6 address resolution and destination addressing. 2211 * 2212 * For PPP or other interfaces with a zero length 2213 * physical address, don't do anything here. 2214 * The bcopy() with a zero phys_addr length was previously 2215 * a no-op for interfaces with a zero-length physical address. 2216 * Using the lla for them would change the way they operate. 2217 * Doing nothing in such cases preserves expected behavior. 2218 */ 2219 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2220 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2221 } 2222 } 2223 2224 static boolean_t 2225 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2226 { 2227 ill_t *ill = nce->nce_ill; 2228 uchar_t *ll_offset; 2229 2230 ASSERT(nce->nce_res_mp != NULL); 2231 if (ll_addr == NULL) 2232 return (B_FALSE); 2233 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2234 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2235 return (B_TRUE); 2236 return (B_FALSE); 2237 } 2238 2239 /* 2240 * Updates the link layer address or the reachability state of 2241 * a cache entry. Reset probe counter if needed. 2242 */ 2243 static void 2244 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2245 { 2246 ill_t *ill = nce->nce_ill; 2247 boolean_t need_stop_timer = B_FALSE; 2248 boolean_t need_fastpath_update = B_FALSE; 2249 2250 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2251 /* 2252 * If this interface does not do NUD, there is no point 2253 * in allowing an update to the cache entry. Although 2254 * we will respond to NS. 2255 * The only time we accept an update for a resolver when 2256 * NUD is turned off is when it has just been created. 2257 * Non-Resolvers will always be created as REACHABLE. 2258 */ 2259 if (new_state != ND_UNCHANGED) { 2260 if ((nce->nce_flags & NCE_F_NONUD) && 2261 (nce->nce_state != ND_INCOMPLETE)) 2262 return; 2263 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2264 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2265 need_stop_timer = B_TRUE; 2266 if (new_state == ND_REACHABLE) 2267 nce->nce_last = TICK_TO_MSEC(lbolt64); 2268 else { 2269 /* We force NUD in this case */ 2270 nce->nce_last = 0; 2271 } 2272 nce->nce_state = new_state; 2273 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2274 } 2275 /* 2276 * In case of fast path we need to free the the fastpath 2277 * M_DATA and do another probe. Otherwise we can just 2278 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2279 * whatever packets that happens to be transmitting at the time. 2280 */ 2281 if (new_ll_addr != NULL) { 2282 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2283 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2284 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2285 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2286 if (nce->nce_fp_mp != NULL) { 2287 freemsg(nce->nce_fp_mp); 2288 nce->nce_fp_mp = NULL; 2289 } 2290 need_fastpath_update = B_TRUE; 2291 } 2292 mutex_exit(&nce->nce_lock); 2293 if (need_stop_timer) { 2294 (void) untimeout(nce->nce_timeout_id); 2295 nce->nce_timeout_id = 0; 2296 } 2297 if (need_fastpath_update) 2298 nce_fastpath(nce); 2299 mutex_enter(&nce->nce_lock); 2300 } 2301 2302 static void 2303 nce_queue_mp(nce_t *nce, mblk_t *mp) 2304 { 2305 uint_t count = 0; 2306 mblk_t **mpp; 2307 boolean_t head_insert = B_FALSE; 2308 ip6_t *ip6h; 2309 ip6i_t *ip6i; 2310 mblk_t *data_mp; 2311 2312 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2313 2314 if (mp->b_datap->db_type == M_CTL) 2315 data_mp = mp->b_cont; 2316 else 2317 data_mp = mp; 2318 ip6h = (ip6_t *)data_mp->b_rptr; 2319 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2320 /* 2321 * This message should have been pulled up already in 2322 * ip_wput_v6. We can't do pullups here because the message 2323 * could be from the nce_qd_mp which could have b_next/b_prev 2324 * non-NULL. 2325 */ 2326 ip6i = (ip6i_t *)ip6h; 2327 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2328 sizeof (ip6i_t) + IPV6_HDR_LEN); 2329 /* 2330 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2331 * This has 2 aspects mentioned below. 2332 * 1. Perform head insertion in the nce_qd_mp for these packets. 2333 * This ensures that next retransmit of ND solicitation 2334 * will use the interface specified by the probe packet, 2335 * for both NS and NA. This corresponds to the src address 2336 * in the IPv6 packet. If we insert at tail, we will be 2337 * depending on the packet at the head for successful 2338 * ND resolution. This is not reliable, because the interface 2339 * on which the NA arrives could be different from the interface 2340 * on which the NS was sent, and if the receiving interface is 2341 * failed, it will appear that the sending interface is also 2342 * failed, causing in.mpathd to misdiagnose this as link 2343 * failure. 2344 * 2. Drop the original packet, if the ND resolution did not 2345 * succeed in the first attempt. However we will create the 2346 * nce and the ire, as soon as the ND resolution succeeds. 2347 * We don't gain anything by queueing multiple probe packets 2348 * and sending them back-to-back once resolution succeeds. 2349 * It is sufficient to send just 1 packet after ND resolution 2350 * succeeds. Since mpathd is sending down probe packets at a 2351 * constant rate, we don't need to send the queued packet. We 2352 * need to queue it only for NDP resolution. The benefit of 2353 * dropping the probe packets that were delayed in ND 2354 * resolution, is that in.mpathd will not see inflated 2355 * RTT. If the ND resolution does not succeed within 2356 * in.mpathd's failure detection time, mpathd may detect 2357 * a failure, and it does not matter whether the packet 2358 * was queued or dropped. 2359 */ 2360 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2361 head_insert = B_TRUE; 2362 } 2363 2364 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2365 mpp = &(*mpp)->b_next) { 2366 if (++count > 2367 nce->nce_ill->ill_max_buf) { 2368 mblk_t *tmp = nce->nce_qd_mp->b_next; 2369 2370 nce->nce_qd_mp->b_next = NULL; 2371 nce->nce_qd_mp->b_prev = NULL; 2372 freemsg(nce->nce_qd_mp); 2373 ip1dbg(("nce_queue_mp: pkt dropped\n")); 2374 nce->nce_qd_mp = tmp; 2375 } 2376 } 2377 /* put this on the list */ 2378 if (head_insert) { 2379 mp->b_next = nce->nce_qd_mp; 2380 nce->nce_qd_mp = mp; 2381 } else { 2382 *mpp = mp; 2383 } 2384 } 2385 2386 /* 2387 * Called when address resolution failed due to a timeout. 2388 * Send an ICMP unreachable in response to all queued packets. 2389 */ 2390 void 2391 nce_resolv_failed(nce_t *nce) 2392 { 2393 mblk_t *mp, *nxt_mp, *first_mp; 2394 char buf[INET6_ADDRSTRLEN]; 2395 ip6_t *ip6h; 2396 zoneid_t zoneid = GLOBAL_ZONEID; 2397 2398 ip1dbg(("nce_resolv_failed: dst %s\n", 2399 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2400 mutex_enter(&nce->nce_lock); 2401 mp = nce->nce_qd_mp; 2402 nce->nce_qd_mp = NULL; 2403 mutex_exit(&nce->nce_lock); 2404 while (mp != NULL) { 2405 nxt_mp = mp->b_next; 2406 mp->b_next = NULL; 2407 mp->b_prev = NULL; 2408 2409 first_mp = mp; 2410 if (mp->b_datap->db_type == M_CTL) { 2411 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2412 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2413 zoneid = io->ipsec_out_zoneid; 2414 ASSERT(zoneid != ALL_ZONES); 2415 mp = mp->b_cont; 2416 } 2417 2418 ip6h = (ip6_t *)mp->b_rptr; 2419 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2420 ip6i_t *ip6i; 2421 /* 2422 * This message should have been pulled up already 2423 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2424 * the header is pulled up. 2425 */ 2426 ip6i = (ip6i_t *)ip6h; 2427 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2428 sizeof (ip6i_t) + IPV6_HDR_LEN); 2429 mp->b_rptr += sizeof (ip6i_t); 2430 } 2431 /* 2432 * Ignore failure since icmp_unreachable_v6 will silently 2433 * drop packets with an unspecified source address. 2434 */ 2435 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2436 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2437 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2438 mp = nxt_mp; 2439 } 2440 } 2441 2442 /* 2443 * Called by SIOCSNDP* ioctl to add/change an nce entry 2444 * and the corresponding attributes. 2445 * Disallow states other than ND_REACHABLE or ND_STALE. 2446 */ 2447 int 2448 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2449 { 2450 sin6_t *sin6; 2451 in6_addr_t *addr; 2452 nce_t *nce; 2453 int err; 2454 uint16_t new_flags = 0; 2455 uint16_t old_flags = 0; 2456 int inflags = lnr->lnr_flags; 2457 2458 if ((lnr->lnr_state_create != ND_REACHABLE) && 2459 (lnr->lnr_state_create != ND_STALE)) 2460 return (EINVAL); 2461 2462 sin6 = (sin6_t *)&lnr->lnr_addr; 2463 addr = &sin6->sin6_addr; 2464 2465 mutex_enter(&ndp_g_lock); 2466 /* We know it can not be mapping so just look in the hash table */ 2467 nce = nce_lookup_addr(ill, addr); 2468 if (nce != NULL) 2469 new_flags = nce->nce_flags; 2470 2471 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2472 case NDF_ISROUTER_ON: 2473 new_flags |= NCE_F_ISROUTER; 2474 break; 2475 case NDF_ISROUTER_OFF: 2476 new_flags &= ~NCE_F_ISROUTER; 2477 break; 2478 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2479 mutex_exit(&ndp_g_lock); 2480 if (nce != NULL) 2481 NCE_REFRELE(nce); 2482 return (EINVAL); 2483 } 2484 2485 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2486 case NDF_ANYCAST_ON: 2487 new_flags |= NCE_F_ANYCAST; 2488 break; 2489 case NDF_ANYCAST_OFF: 2490 new_flags &= ~NCE_F_ANYCAST; 2491 break; 2492 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2493 mutex_exit(&ndp_g_lock); 2494 if (nce != NULL) 2495 NCE_REFRELE(nce); 2496 return (EINVAL); 2497 } 2498 2499 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2500 case NDF_PROXY_ON: 2501 new_flags |= NCE_F_PROXY; 2502 break; 2503 case NDF_PROXY_OFF: 2504 new_flags &= ~NCE_F_PROXY; 2505 break; 2506 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2507 mutex_exit(&ndp_g_lock); 2508 if (nce != NULL) 2509 NCE_REFRELE(nce); 2510 return (EINVAL); 2511 } 2512 2513 if (nce == NULL) { 2514 err = ndp_add(ill, 2515 (uchar_t *)lnr->lnr_hdw_addr, 2516 addr, 2517 &ipv6_all_ones, 2518 &ipv6_all_zeros, 2519 0, 2520 new_flags, 2521 lnr->lnr_state_create, 2522 &nce); 2523 if (err != 0) { 2524 mutex_exit(&ndp_g_lock); 2525 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2526 return (err); 2527 } 2528 } 2529 old_flags = nce->nce_flags; 2530 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2531 /* 2532 * Router turned to host, delete all ires. 2533 * XXX Just delete the entry, but we need to add too. 2534 */ 2535 nce->nce_flags &= ~NCE_F_ISROUTER; 2536 mutex_exit(&ndp_g_lock); 2537 ndp_delete(nce); 2538 NCE_REFRELE(nce); 2539 return (0); 2540 } 2541 mutex_exit(&ndp_g_lock); 2542 2543 mutex_enter(&nce->nce_lock); 2544 nce->nce_flags = new_flags; 2545 mutex_exit(&nce->nce_lock); 2546 /* 2547 * Note that we ignore the state at this point, which 2548 * should be either STALE or REACHABLE. Instead we let 2549 * the link layer address passed in to determine the state 2550 * much like incoming packets. 2551 */ 2552 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2553 NCE_REFRELE(nce); 2554 return (0); 2555 } 2556 2557 /* 2558 * If the device driver supports it, we make nce_fp_mp to have 2559 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2560 * The caller insures there is hold on nce for this function. 2561 * Note that since ill_fastpath_probe() copies the mblk there is 2562 * no need for the hold beyond this function. 2563 */ 2564 static void 2565 nce_fastpath(nce_t *nce) 2566 { 2567 ill_t *ill = nce->nce_ill; 2568 int res; 2569 2570 ASSERT(ill != NULL); 2571 if (nce->nce_fp_mp != NULL) { 2572 /* Already contains fastpath info */ 2573 return; 2574 } 2575 if (nce->nce_res_mp != NULL) { 2576 nce_fastpath_list_add(nce); 2577 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2578 /* 2579 * EAGAIN is an indication of a transient error 2580 * i.e. allocation failure etc. leave the nce in the list it 2581 * will be updated when another probe happens for another ire 2582 * if not it will be taken out of the list when the ire is 2583 * deleted. 2584 */ 2585 2586 if (res != 0 && res != EAGAIN) 2587 nce_fastpath_list_delete(nce); 2588 } 2589 } 2590 2591 /* 2592 * Drain the list of nce's waiting for fastpath response. 2593 */ 2594 void 2595 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2596 void *arg) 2597 { 2598 2599 nce_t *next_nce; 2600 nce_t *current_nce; 2601 nce_t *first_nce; 2602 nce_t *prev_nce = NULL; 2603 2604 ASSERT(ill != NULL); 2605 2606 mutex_enter(&ill->ill_lock); 2607 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2608 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2609 next_nce = current_nce->nce_fastpath; 2610 /* 2611 * Take it off the list if we're flushing, or if the callback 2612 * routine tells us to do so. Otherwise, leave the nce in the 2613 * fastpath list to handle any pending response from the lower 2614 * layer. We can't drain the list when the callback routine 2615 * comparison failed, because the response is asynchronous in 2616 * nature, and may not arrive in the same order as the list 2617 * insertion. 2618 */ 2619 if (func == NULL || func(current_nce, arg)) { 2620 current_nce->nce_fastpath = NULL; 2621 if (current_nce == first_nce) 2622 ill->ill_fastpath_list = first_nce = next_nce; 2623 else 2624 prev_nce->nce_fastpath = next_nce; 2625 } else { 2626 /* previous element that is still in the list */ 2627 prev_nce = current_nce; 2628 } 2629 current_nce = next_nce; 2630 } 2631 mutex_exit(&ill->ill_lock); 2632 } 2633 2634 /* 2635 * Add nce to the nce fastpath list. 2636 */ 2637 void 2638 nce_fastpath_list_add(nce_t *nce) 2639 { 2640 ill_t *ill; 2641 2642 ill = nce->nce_ill; 2643 ASSERT(ill != NULL); 2644 2645 mutex_enter(&ill->ill_lock); 2646 mutex_enter(&nce->nce_lock); 2647 2648 /* 2649 * if nce has not been deleted and 2650 * is not already in the list add it. 2651 */ 2652 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2653 (nce->nce_fastpath == NULL)) { 2654 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2655 ill->ill_fastpath_list = nce; 2656 } 2657 2658 mutex_exit(&nce->nce_lock); 2659 mutex_exit(&ill->ill_lock); 2660 } 2661 2662 /* 2663 * remove nce from the nce fastpath list. 2664 */ 2665 void 2666 nce_fastpath_list_delete(nce_t *nce) 2667 { 2668 nce_t *nce_ptr; 2669 2670 ill_t *ill; 2671 2672 ill = nce->nce_ill; 2673 ASSERT(ill != NULL); 2674 2675 mutex_enter(&ill->ill_lock); 2676 if (nce->nce_fastpath == NULL) 2677 goto done; 2678 2679 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2680 2681 if (ill->ill_fastpath_list == nce) { 2682 ill->ill_fastpath_list = nce->nce_fastpath; 2683 } else { 2684 nce_ptr = ill->ill_fastpath_list; 2685 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2686 if (nce_ptr->nce_fastpath == nce) { 2687 nce_ptr->nce_fastpath = nce->nce_fastpath; 2688 break; 2689 } 2690 nce_ptr = nce_ptr->nce_fastpath; 2691 } 2692 } 2693 2694 nce->nce_fastpath = NULL; 2695 done: 2696 mutex_exit(&ill->ill_lock); 2697 } 2698 2699 /* 2700 * Update all NCE's that are not in fastpath mode and 2701 * have an nce_fp_mp that matches mp. mp->b_cont contains 2702 * the fastpath header. 2703 * 2704 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2705 */ 2706 boolean_t 2707 ndp_fastpath_update(nce_t *nce, void *arg) 2708 { 2709 mblk_t *mp, *fp_mp; 2710 uchar_t *mp_rptr, *ud_mp_rptr; 2711 mblk_t *ud_mp = nce->nce_res_mp; 2712 ptrdiff_t cmplen; 2713 2714 if (nce->nce_flags & NCE_F_MAPPING) 2715 return (B_TRUE); 2716 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2717 return (B_TRUE); 2718 2719 ip2dbg(("ndp_fastpath_update: trying\n")); 2720 mp = (mblk_t *)arg; 2721 mp_rptr = mp->b_rptr; 2722 cmplen = mp->b_wptr - mp_rptr; 2723 ASSERT(cmplen >= 0); 2724 ud_mp_rptr = ud_mp->b_rptr; 2725 /* 2726 * The nce is locked here to prevent any other threads 2727 * from accessing and changing nce_res_mp when the IPv6 address 2728 * becomes resolved to an lla while we're in the middle 2729 * of looking at and comparing the hardware address (lla). 2730 * It is also locked to prevent multiple threads in nce_fastpath_update 2731 * from examining nce_res_mp atthe same time. 2732 */ 2733 mutex_enter(&nce->nce_lock); 2734 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2735 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2736 mutex_exit(&nce->nce_lock); 2737 /* 2738 * Don't take the ire off the fastpath list yet, 2739 * since the response may come later. 2740 */ 2741 return (B_FALSE); 2742 } 2743 /* Matched - install mp as the fastpath mp */ 2744 ip1dbg(("ndp_fastpath_update: match\n")); 2745 fp_mp = dupb(mp->b_cont); 2746 if (fp_mp != NULL) { 2747 nce->nce_fp_mp = fp_mp; 2748 } 2749 mutex_exit(&nce->nce_lock); 2750 return (B_TRUE); 2751 } 2752 2753 /* 2754 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2755 * driver. Note that it assumes IP is exclusive... 2756 */ 2757 /* ARGSUSED */ 2758 void 2759 ndp_fastpath_flush(nce_t *nce, char *arg) 2760 { 2761 if (nce->nce_flags & NCE_F_MAPPING) 2762 return; 2763 /* No fastpath info? */ 2764 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2765 return; 2766 2767 /* Just delete the NCE... */ 2768 ndp_delete(nce); 2769 } 2770 2771 /* 2772 * Return a pointer to a given option in the packet. 2773 * Assumes that option part of the packet have already been validated. 2774 */ 2775 nd_opt_hdr_t * 2776 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2777 { 2778 while (optlen > 0) { 2779 if (opt->nd_opt_type == opt_type) 2780 return (opt); 2781 optlen -= 8 * opt->nd_opt_len; 2782 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2783 } 2784 return (NULL); 2785 } 2786 2787 /* 2788 * Verify all option lengths present are > 0, also check to see 2789 * if the option lengths and packet length are consistent. 2790 */ 2791 boolean_t 2792 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2793 { 2794 ASSERT(opt != NULL); 2795 while (optlen > 0) { 2796 if (opt->nd_opt_len == 0) 2797 return (B_FALSE); 2798 optlen -= 8 * opt->nd_opt_len; 2799 if (optlen < 0) 2800 return (B_FALSE); 2801 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2802 } 2803 return (B_TRUE); 2804 } 2805 2806 /* 2807 * ndp_walk function. 2808 * Free a fraction of the NCE cache entries. 2809 * A fraction of zero means to not free any in that category. 2810 */ 2811 void 2812 ndp_cache_reclaim(nce_t *nce, char *arg) 2813 { 2814 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2815 uint_t rand; 2816 2817 if (nce->nce_flags & NCE_F_PERMANENT) 2818 return; 2819 2820 rand = (uint_t)lbolt + 2821 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2822 if (ncr->ncr_host != 0 && 2823 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2824 ndp_delete(nce); 2825 return; 2826 } 2827 } 2828 2829 /* 2830 * ndp_walk function. 2831 * Count the number of NCEs that can be deleted. 2832 * These would be hosts but not routers. 2833 */ 2834 void 2835 ndp_cache_count(nce_t *nce, char *arg) 2836 { 2837 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2838 2839 if (nce->nce_flags & NCE_F_PERMANENT) 2840 return; 2841 2842 ncc->ncc_total++; 2843 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2844 ncc->ncc_host++; 2845 } 2846 2847 #ifdef NCE_DEBUG 2848 th_trace_t * 2849 th_trace_nce_lookup(nce_t *nce) 2850 { 2851 int bucket_id; 2852 th_trace_t *th_trace; 2853 2854 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2855 2856 bucket_id = IP_TR_HASH(curthread); 2857 ASSERT(bucket_id < IP_TR_HASH_MAX); 2858 2859 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2860 th_trace = th_trace->th_next) { 2861 if (th_trace->th_id == curthread) 2862 return (th_trace); 2863 } 2864 return (NULL); 2865 } 2866 2867 void 2868 nce_trace_ref(nce_t *nce) 2869 { 2870 int bucket_id; 2871 th_trace_t *th_trace; 2872 2873 /* 2874 * Attempt to locate the trace buffer for the curthread. 2875 * If it does not exist, then allocate a new trace buffer 2876 * and link it in list of trace bufs for this ipif, at the head 2877 */ 2878 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2879 2880 if (nce->nce_trace_disable == B_TRUE) 2881 return; 2882 2883 th_trace = th_trace_nce_lookup(nce); 2884 if (th_trace == NULL) { 2885 bucket_id = IP_TR_HASH(curthread); 2886 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 2887 KM_NOSLEEP); 2888 if (th_trace == NULL) { 2889 nce->nce_trace_disable = B_TRUE; 2890 nce_trace_inactive(nce); 2891 return; 2892 } 2893 th_trace->th_id = curthread; 2894 th_trace->th_next = nce->nce_trace[bucket_id]; 2895 th_trace->th_prev = &nce->nce_trace[bucket_id]; 2896 if (th_trace->th_next != NULL) 2897 th_trace->th_next->th_prev = &th_trace->th_next; 2898 nce->nce_trace[bucket_id] = th_trace; 2899 } 2900 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 2901 th_trace->th_refcnt++; 2902 th_trace_rrecord(th_trace); 2903 } 2904 2905 void 2906 nce_untrace_ref(nce_t *nce) 2907 { 2908 th_trace_t *th_trace; 2909 2910 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2911 2912 if (nce->nce_trace_disable == B_TRUE) 2913 return; 2914 2915 th_trace = th_trace_nce_lookup(nce); 2916 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 2917 2918 th_trace_rrecord(th_trace); 2919 th_trace->th_refcnt--; 2920 } 2921 2922 void 2923 nce_trace_inactive(nce_t *nce) 2924 { 2925 th_trace_t *th_trace; 2926 int i; 2927 2928 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2929 2930 for (i = 0; i < IP_TR_HASH_MAX; i++) { 2931 while (nce->nce_trace[i] != NULL) { 2932 th_trace = nce->nce_trace[i]; 2933 2934 /* unlink th_trace and free it */ 2935 nce->nce_trace[i] = th_trace->th_next; 2936 if (th_trace->th_next != NULL) 2937 th_trace->th_next->th_prev = 2938 &nce->nce_trace[i]; 2939 2940 th_trace->th_next = NULL; 2941 th_trace->th_prev = NULL; 2942 kmem_free(th_trace, sizeof (th_trace_t)); 2943 } 2944 } 2945 2946 } 2947 2948 /* ARGSUSED */ 2949 int 2950 nce_thread_exit(nce_t *nce, caddr_t arg) 2951 { 2952 th_trace_t *th_trace; 2953 2954 mutex_enter(&nce->nce_lock); 2955 th_trace = th_trace_nce_lookup(nce); 2956 2957 if (th_trace == NULL) { 2958 mutex_exit(&nce->nce_lock); 2959 return (0); 2960 } 2961 2962 ASSERT(th_trace->th_refcnt == 0); 2963 2964 /* unlink th_trace and free it */ 2965 *th_trace->th_prev = th_trace->th_next; 2966 if (th_trace->th_next != NULL) 2967 th_trace->th_next->th_prev = th_trace->th_prev; 2968 th_trace->th_next = NULL; 2969 th_trace->th_prev = NULL; 2970 kmem_free(th_trace, sizeof (th_trace_t)); 2971 mutex_exit(&nce->nce_lock); 2972 return (0); 2973 } 2974 #endif 2975