1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/sysmacros.h> 32 #include <sys/errno.h> 33 #include <sys/dlpi.h> 34 #include <sys/socket.h> 35 #include <sys/ddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 42 #include <net/if.h> 43 #include <net/if_dl.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/mib2.h> 52 #include <inet/nd.h> 53 #include <inet/ip.h> 54 #include <inet/ip_if.h> 55 #include <inet/ip_ire.h> 56 #include <inet/ip_rts.h> 57 #include <inet/ip6.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/ipsec_info.h> 61 62 /* 63 * Function names with nce_ prefix are static while function 64 * names with ndp_ prefix are used by rest of the IP. 65 */ 66 67 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 68 uint32_t ll_addr_len); 69 static void nce_fastpath(nce_t *nce); 70 static void nce_ire_delete(nce_t *nce); 71 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 72 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 73 static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); 74 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 75 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 76 uchar_t *addr); 77 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 78 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 79 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 80 static mblk_t *nce_udreq_alloc(ill_t *ill); 81 static void nce_update(nce_t *nce, uint16_t new_state, 82 uchar_t *new_ll_addr); 83 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 84 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 85 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 86 const in6_addr_t *target, int flag); 87 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 88 extern void th_trace_rrecord(th_trace_t *); 89 90 #ifdef NCE_DEBUG 91 void nce_trace_inactive(nce_t *); 92 #endif 93 94 /* NDP Cache Entry Hash Table */ 95 #define NCE_TABLE_SIZE 256 96 static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; 97 static nce_t *nce_mask_entries; /* mask not all ones */ 98 static int ndp_g_walker = 0; /* # of active thread */ 99 /* walking nce hash list */ 100 /* ndp_g_walker_cleanup will be true, when deletion have to be defered */ 101 static boolean_t ndp_g_walker_cleanup = B_FALSE; 102 103 #define NCE_HASH_PTR(addr) \ 104 (&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 105 106 /* 107 * NDP Cache Entry creation routine. 108 * Mapped entries will never do NUD . 109 * This routine must always be called with ndp_g_lock held. 110 * Prior to return, nce_refcnt is incremented. 111 */ 112 int 113 ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 114 const in6_addr_t *mask, const in6_addr_t *extract_mask, 115 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 116 nce_t **newnce) 117 { 118 static nce_t nce_nil; 119 nce_t *nce; 120 mblk_t *mp; 121 mblk_t *template; 122 nce_t **ncep; 123 boolean_t dropped = B_FALSE; 124 125 ASSERT(MUTEX_HELD(&ndp_g_lock)); 126 ASSERT(ill != NULL); 127 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 128 ip0dbg(("ndp_add: no addr\n")); 129 return (EINVAL); 130 } 131 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 132 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 133 return (EINVAL); 134 } 135 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 136 (flags & NCE_F_MAPPING)) { 137 ip0dbg(("ndp_add: extract mask zero for mapping")); 138 return (EINVAL); 139 } 140 /* 141 * Allocate the mblk to hold the nce. 142 * 143 * XXX This can come out of a separate cache - nce_cache. 144 * We don't need the mp anymore as there are no more 145 * "qwriter"s 146 */ 147 mp = allocb(sizeof (nce_t), BPRI_MED); 148 if (mp == NULL) 149 return (ENOMEM); 150 151 nce = (nce_t *)mp->b_rptr; 152 mp->b_wptr = (uchar_t *)&nce[1]; 153 *nce = nce_nil; 154 155 /* 156 * This one holds link layer address 157 */ 158 if (ill->ill_net_type == IRE_IF_RESOLVER) { 159 template = nce_udreq_alloc(ill); 160 } else { 161 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 162 ASSERT((ill->ill_resolver_mp != NULL)); 163 template = copyb(ill->ill_resolver_mp); 164 } 165 if (template == NULL) { 166 freeb(mp); 167 return (ENOMEM); 168 } 169 nce->nce_ill = ill; 170 nce->nce_flags = flags; 171 nce->nce_state = state; 172 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 173 nce->nce_rcnt = ill->ill_xmit_count; 174 nce->nce_addr = *addr; 175 nce->nce_mask = *mask; 176 nce->nce_extract_mask = *extract_mask; 177 nce->nce_ll_extract_start = hw_extract_start; 178 nce->nce_fp_mp = NULL; 179 nce->nce_res_mp = template; 180 if (state == ND_REACHABLE) 181 nce->nce_last = TICK_TO_MSEC(lbolt64); 182 else 183 nce->nce_last = 0; 184 nce->nce_qd_mp = NULL; 185 nce->nce_mp = mp; 186 if (hw_addr != NULL) 187 nce_set_ll(nce, hw_addr); 188 /* This one is for nce getting created */ 189 nce->nce_refcnt = 1; 190 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 191 if (nce->nce_flags & NCE_F_MAPPING) { 192 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 193 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 194 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 195 ncep = &nce_mask_entries; 196 } else { 197 ncep = ((nce_t **)NCE_HASH_PTR(*addr)); 198 } 199 200 #ifdef NCE_DEBUG 201 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 202 #endif 203 /* 204 * Atomically ensure that the ill is not CONDEMNED, before 205 * adding the NCE. 206 */ 207 mutex_enter(&ill->ill_lock); 208 if (ill->ill_state_flags & ILL_CONDEMNED) { 209 mutex_exit(&ill->ill_lock); 210 freeb(mp); 211 return (EINVAL); 212 } 213 if ((nce->nce_next = *ncep) != NULL) 214 nce->nce_next->nce_ptpn = &nce->nce_next; 215 *ncep = nce; 216 nce->nce_ptpn = ncep; 217 *newnce = nce; 218 /* This one is for nce being used by an active thread */ 219 NCE_REFHOLD(*newnce); 220 221 /* Bump up the number of nce's referencing this ill */ 222 ill->ill_nce_cnt++; 223 mutex_exit(&ill->ill_lock); 224 225 /* 226 * Before we insert the nce, honor the UNSOL_ADV flag. 227 * We cannot hold the ndp_g_lock and call nce_xmit 228 * which does a putnext. 229 */ 230 if (flags & NCE_F_UNSOL_ADV) { 231 flags |= NDP_ORIDE; 232 /* 233 * We account for the transmit below by assigning one 234 * less than the ndd variable. Subsequent decrements 235 * are done in ndp_timer. 236 */ 237 mutex_enter(&nce->nce_lock); 238 mutex_exit(&ndp_g_lock); 239 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 240 mutex_exit(&nce->nce_lock); 241 dropped = nce_xmit(ill, 242 ND_NEIGHBOR_ADVERT, 243 ill, /* ill to be used for extracting ill_nd_lla */ 244 B_TRUE, /* use ill_nd_lla */ 245 addr, /* Source and target of the advertisement pkt */ 246 &ipv6_all_hosts_mcast, /* Destination of the packet */ 247 flags); 248 mutex_enter(&nce->nce_lock); 249 if (dropped) 250 nce->nce_unsolicit_count++; 251 if (nce->nce_unsolicit_count != 0) { 252 nce->nce_timeout_id = timeout(ndp_timer, nce, 253 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 254 } 255 mutex_exit(&nce->nce_lock); 256 mutex_enter(&ndp_g_lock); 257 } 258 /* 259 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 260 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 261 * We call nce_fastpath from nce_update if the link layer address of 262 * the peer changes from nce_update 263 */ 264 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 265 nce_fastpath(nce); 266 return (0); 267 } 268 269 int 270 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 271 const in6_addr_t *mask, const in6_addr_t *extract_mask, 272 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 273 nce_t **newnce) 274 { 275 int err = 0; 276 nce_t *nce; 277 278 mutex_enter(&ndp_g_lock); 279 nce = nce_lookup_addr(ill, addr); 280 if (nce == NULL) { 281 err = ndp_add(ill, 282 hw_addr, 283 addr, 284 mask, 285 extract_mask, 286 hw_extract_start, 287 flags, 288 state, 289 newnce); 290 } else { 291 *newnce = nce; 292 err = EEXIST; 293 } 294 mutex_exit(&ndp_g_lock); 295 return (err); 296 } 297 298 /* 299 * Remove all the CONDEMNED nces from the appropriate hash table. 300 * We create a private list of NCEs, these may have ires pointing 301 * to them, so the list will be passed through to clean up dependent 302 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 303 */ 304 static void 305 nce_remove(nce_t *nce, nce_t **free_nce_list) 306 { 307 nce_t *nce1; 308 nce_t **ptpn; 309 310 ASSERT(MUTEX_HELD(&ndp_g_lock)); 311 ASSERT(ndp_g_walker == 0); 312 for (; nce; nce = nce1) { 313 nce1 = nce->nce_next; 314 mutex_enter(&nce->nce_lock); 315 if (nce->nce_flags & NCE_F_CONDEMNED) { 316 ptpn = nce->nce_ptpn; 317 nce1 = nce->nce_next; 318 if (nce1 != NULL) 319 nce1->nce_ptpn = ptpn; 320 *ptpn = nce1; 321 nce->nce_ptpn = NULL; 322 nce->nce_next = NULL; 323 nce->nce_next = *free_nce_list; 324 *free_nce_list = nce; 325 } 326 mutex_exit(&nce->nce_lock); 327 } 328 } 329 330 /* 331 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 332 * will return this NCE. Also no new IREs will be created that 333 * point to this NCE (See ire_add_v6). Also no new timeouts will 334 * be started (See NDP_RESTART_TIMER). 335 * 2. Cancel any currently running timeouts. 336 * 3. If there is an ndp walker, return. The walker will do the cleanup. 337 * This ensures that walkers see a consistent list of NCEs while walking. 338 * 4. Otherwise remove the NCE from the list of NCEs 339 * 5. Delete all IREs pointing to this NCE. 340 */ 341 void 342 ndp_delete(nce_t *nce) 343 { 344 nce_t **ptpn; 345 nce_t *nce1; 346 347 /* Serialize deletes */ 348 mutex_enter(&nce->nce_lock); 349 if (nce->nce_flags & NCE_F_CONDEMNED) { 350 /* Some other thread is doing the delete */ 351 mutex_exit(&nce->nce_lock); 352 return; 353 } 354 /* 355 * Caller has a refhold. Also 1 ref for being in the list. Thus 356 * refcnt has to be >= 2 357 */ 358 ASSERT(nce->nce_refcnt >= 2); 359 nce->nce_flags |= NCE_F_CONDEMNED; 360 mutex_exit(&nce->nce_lock); 361 362 nce_fastpath_list_delete(nce); 363 364 /* 365 * Cancel any running timer. Timeout can't be restarted 366 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 367 * Passing invalid timeout id is fine. 368 */ 369 if (nce->nce_timeout_id != 0) { 370 (void) untimeout(nce->nce_timeout_id); 371 nce->nce_timeout_id = 0; 372 } 373 374 mutex_enter(&ndp_g_lock); 375 if (nce->nce_ptpn == NULL) { 376 /* 377 * The last ndp walker has already removed this nce from 378 * the list after we marked the nce CONDEMNED and before 379 * we grabbed the ndp_g_lock. 380 */ 381 mutex_exit(&ndp_g_lock); 382 return; 383 } 384 if (ndp_g_walker > 0) { 385 /* 386 * Can't unlink. The walker will clean up 387 */ 388 ndp_g_walker_cleanup = B_TRUE; 389 mutex_exit(&ndp_g_lock); 390 return; 391 } 392 393 /* 394 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 395 * the timer since it is marked CONDEMNED. 396 */ 397 ptpn = nce->nce_ptpn; 398 nce1 = nce->nce_next; 399 if (nce1 != NULL) 400 nce1->nce_ptpn = ptpn; 401 *ptpn = nce1; 402 nce->nce_ptpn = NULL; 403 nce->nce_next = NULL; 404 mutex_exit(&ndp_g_lock); 405 406 nce_ire_delete(nce); 407 } 408 409 void 410 ndp_inactive(nce_t *nce) 411 { 412 mblk_t **mpp; 413 ill_t *ill; 414 415 ASSERT(nce->nce_refcnt == 0); 416 ASSERT(MUTEX_HELD(&nce->nce_lock)); 417 ASSERT(nce->nce_fastpath == NULL); 418 419 /* Free all nce allocated messages */ 420 mpp = &nce->nce_first_mp_to_free; 421 do { 422 while (*mpp != NULL) { 423 mblk_t *mp; 424 425 mp = *mpp; 426 *mpp = mp->b_next; 427 mp->b_next = NULL; 428 mp->b_prev = NULL; 429 freemsg(mp); 430 } 431 } while (mpp++ != &nce->nce_last_mp_to_free); 432 433 #ifdef NCE_DEBUG 434 nce_trace_inactive(nce); 435 #endif 436 437 ill = nce->nce_ill; 438 mutex_enter(&ill->ill_lock); 439 ill->ill_nce_cnt--; 440 /* 441 * If the number of nce's associated with this ill have dropped 442 * to zero, check whether we need to restart any operation that 443 * is waiting for this to happen. 444 */ 445 if (ill->ill_nce_cnt == 0) { 446 /* ipif_ill_refrele_tail drops the ill_lock */ 447 ipif_ill_refrele_tail(ill); 448 } else { 449 mutex_exit(&ill->ill_lock); 450 } 451 mutex_destroy(&nce->nce_lock); 452 freeb(nce->nce_mp); 453 } 454 455 /* 456 * ndp_walk routine. Delete the nce if it is associated with the ill 457 * that is going away. Always called as a writer. 458 */ 459 void 460 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 461 { 462 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 463 ndp_delete(nce); 464 } 465 } 466 467 /* 468 * Walk a list of to be inactive NCEs and blow away all the ires. 469 */ 470 static void 471 nce_ire_delete_list(nce_t *nce) 472 { 473 nce_t *nce_next; 474 475 ASSERT(nce != NULL); 476 while (nce != NULL) { 477 nce_next = nce->nce_next; 478 nce->nce_next = NULL; 479 480 /* 481 * It is possible for the last ndp walker (this thread) 482 * to come here after ndp_delete has marked the nce CONDEMNED 483 * and before it has removed the nce from the fastpath list 484 * or called untimeout. So we need to do it here. It is safe 485 * for both ndp_delete and this thread to do it twice or 486 * even simultaneously since each of the threads has a 487 * reference on the nce. 488 */ 489 nce_fastpath_list_delete(nce); 490 /* 491 * Cancel any running timer. Timeout can't be restarted 492 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 493 * Passing invalid timeout id is fine. 494 */ 495 if (nce->nce_timeout_id != 0) { 496 (void) untimeout(nce->nce_timeout_id); 497 nce->nce_timeout_id = 0; 498 } 499 500 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 501 nce_ire_delete1, (char *)nce, nce->nce_ill); 502 NCE_REFRELE_NOTR(nce); 503 nce = nce_next; 504 } 505 } 506 507 /* 508 * Delete an ire when the nce goes away. 509 */ 510 /* ARGSUSED */ 511 static void 512 nce_ire_delete(nce_t *nce) 513 { 514 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 515 nce_ire_delete1, (char *)nce, nce->nce_ill); 516 NCE_REFRELE_NOTR(nce); 517 } 518 519 /* 520 * ire_walk routine used to delete every IRE that shares this nce 521 */ 522 static void 523 nce_ire_delete1(ire_t *ire, char *nce_arg) 524 { 525 nce_t *nce = (nce_t *)nce_arg; 526 527 ASSERT(ire->ire_type == IRE_CACHE); 528 529 if (ire->ire_nce == nce) 530 ire_delete(ire); 531 } 532 533 /* 534 * Cache entry lookup. Try to find an nce matching the parameters passed. 535 * If one is found, the refcnt on the nce will be incremented. 536 */ 537 nce_t * 538 ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 539 { 540 nce_t *nce; 541 542 if (!caller_holds_lock) 543 mutex_enter(&ndp_g_lock); 544 nce = nce_lookup_addr(ill, addr); 545 if (nce == NULL) 546 nce = nce_lookup_mapping(ill, addr); 547 if (!caller_holds_lock) 548 mutex_exit(&ndp_g_lock); 549 return (nce); 550 } 551 552 /* 553 * Cache entry lookup. Try to find an nce matching the parameters passed. 554 * Look only for exact entries (no mappings). If an nce is found, increment 555 * the hold count on that nce. 556 */ 557 static nce_t * 558 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 559 { 560 nce_t *nce; 561 562 ASSERT(ill != NULL); 563 ASSERT(MUTEX_HELD(&ndp_g_lock)); 564 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 565 return (NULL); 566 nce = *((nce_t **)NCE_HASH_PTR(*addr)); 567 for (; nce != NULL; nce = nce->nce_next) { 568 if (nce->nce_ill == ill) { 569 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 570 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 571 &ipv6_all_ones)) { 572 mutex_enter(&nce->nce_lock); 573 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 574 NCE_REFHOLD_LOCKED(nce); 575 mutex_exit(&nce->nce_lock); 576 break; 577 } 578 mutex_exit(&nce->nce_lock); 579 } 580 } 581 } 582 return (nce); 583 } 584 585 /* 586 * Cache entry lookup. Try to find an nce matching the parameters passed. 587 * Look only for mappings. 588 */ 589 static nce_t * 590 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 591 { 592 nce_t *nce; 593 594 ASSERT(ill != NULL); 595 ASSERT(MUTEX_HELD(&ndp_g_lock)); 596 if (!IN6_IS_ADDR_MULTICAST(addr)) 597 return (NULL); 598 nce = nce_mask_entries; 599 for (; nce != NULL; nce = nce->nce_next) 600 if (nce->nce_ill == ill && 601 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 602 mutex_enter(&nce->nce_lock); 603 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 604 NCE_REFHOLD_LOCKED(nce); 605 mutex_exit(&nce->nce_lock); 606 break; 607 } 608 mutex_exit(&nce->nce_lock); 609 } 610 return (nce); 611 } 612 613 /* 614 * Process passed in parameters either from an incoming packet or via 615 * user ioctl. 616 */ 617 void 618 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 619 { 620 ill_t *ill = nce->nce_ill; 621 uint32_t hw_addr_len = ill->ill_nd_lla_len; 622 mblk_t *mp; 623 boolean_t ll_updated = B_FALSE; 624 boolean_t ll_changed; 625 626 /* 627 * No updates of link layer address or the neighbor state is 628 * allowed, when the cache is in NONUD state. This still 629 * allows for responding to reachability solicitation. 630 */ 631 mutex_enter(&nce->nce_lock); 632 if (nce->nce_state == ND_INCOMPLETE) { 633 if (hw_addr == NULL) { 634 mutex_exit(&nce->nce_lock); 635 return; 636 } 637 nce_set_ll(nce, hw_addr); 638 /* 639 * Update nce state and send the queued packets 640 * back to ip this time ire will be added. 641 */ 642 if (flag & ND_NA_FLAG_SOLICITED) { 643 nce_update(nce, ND_REACHABLE, NULL); 644 } else { 645 nce_update(nce, ND_STALE, NULL); 646 } 647 mutex_exit(&nce->nce_lock); 648 nce_fastpath(nce); 649 mutex_enter(&nce->nce_lock); 650 mp = nce->nce_qd_mp; 651 nce->nce_qd_mp = NULL; 652 mutex_exit(&nce->nce_lock); 653 while (mp != NULL) { 654 mblk_t *nxt_mp; 655 656 nxt_mp = mp->b_next; 657 mp->b_next = NULL; 658 if (mp->b_prev != NULL) { 659 ill_t *inbound_ill; 660 queue_t *fwdq = NULL; 661 uint_t ifindex; 662 663 ifindex = (uint_t)(uintptr_t)mp->b_prev; 664 inbound_ill = ill_lookup_on_ifindex(ifindex, 665 B_TRUE, NULL, NULL, NULL, NULL); 666 if (inbound_ill == NULL) { 667 mp->b_prev = NULL; 668 freemsg(mp); 669 return; 670 } else { 671 fwdq = inbound_ill->ill_rq; 672 } 673 mp->b_prev = NULL; 674 /* 675 * Send a forwarded packet back into ip_rput_v6 676 * just as in ire_send_v6(). 677 * Extract the queue from b_prev (set in 678 * ip_rput_data_v6). 679 */ 680 if (fwdq != NULL) { 681 /* 682 * Forwarded packets hop count will 683 * get decremented in ip_rput_data_v6 684 */ 685 put(fwdq, mp); 686 } else { 687 /* 688 * Send locally originated packets back 689 * into * ip_wput_v6. 690 */ 691 put(ill->ill_wq, mp); 692 } 693 ill_refrele(inbound_ill); 694 } else { 695 put(ill->ill_wq, mp); 696 } 697 mp = nxt_mp; 698 } 699 return; 700 } 701 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 702 if (!is_adv) { 703 /* If this is a SOLICITATION request only */ 704 if (ll_changed) 705 nce_update(nce, ND_STALE, hw_addr); 706 mutex_exit(&nce->nce_lock); 707 return; 708 } 709 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 710 /* If in any other state than REACHABLE, ignore */ 711 if (nce->nce_state == ND_REACHABLE) { 712 nce_update(nce, ND_STALE, NULL); 713 } 714 mutex_exit(&nce->nce_lock); 715 return; 716 } else { 717 if (ll_changed) { 718 nce_update(nce, ND_UNCHANGED, hw_addr); 719 ll_updated = B_TRUE; 720 } 721 if (flag & ND_NA_FLAG_SOLICITED) { 722 nce_update(nce, ND_REACHABLE, NULL); 723 } else { 724 if (ll_updated) { 725 nce_update(nce, ND_STALE, NULL); 726 } 727 } 728 mutex_exit(&nce->nce_lock); 729 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 730 NCE_F_ISROUTER)) { 731 ire_t *ire; 732 733 /* 734 * Router turned to host. We need to remove the 735 * entry as well as any default route that may be 736 * using this as a next hop. This is required by 737 * section 7.2.5 of RFC 2461. 738 */ 739 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 740 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 741 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 742 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 743 MATCH_IRE_DEFAULT); 744 if (ire != NULL) { 745 ip_rts_rtmsg(RTM_DELETE, ire, 0); 746 ire_delete(ire); 747 ire_refrele(ire); 748 } 749 ndp_delete(nce); 750 } 751 } 752 } 753 754 /* 755 * Pass arg1 to the pfi supplied, along with each nce in existence. 756 * ndp_walk() places a REFHOLD on the nce and drops the lock when 757 * walking the hash list. 758 */ 759 void 760 ndp_walk_impl(ill_t *ill, pfi_t pfi, void *arg1, boolean_t trace) 761 { 762 763 nce_t *nce; 764 nce_t *nce1; 765 nce_t **ncep; 766 nce_t *free_nce_list = NULL; 767 768 mutex_enter(&ndp_g_lock); 769 ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ 770 mutex_exit(&ndp_g_lock); 771 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 772 for (nce = *ncep; nce; nce = nce1) { 773 nce1 = nce->nce_next; 774 if (ill == NULL || nce->nce_ill == ill) { 775 if (trace) { 776 NCE_REFHOLD(nce); 777 (*pfi)(nce, arg1); 778 NCE_REFRELE(nce); 779 } else { 780 NCE_REFHOLD_NOTR(nce); 781 (*pfi)(nce, arg1); 782 NCE_REFRELE_NOTR(nce); 783 } 784 } 785 } 786 } 787 for (nce = nce_mask_entries; nce; nce = nce1) { 788 nce1 = nce->nce_next; 789 if (ill == NULL || nce->nce_ill == ill) { 790 if (trace) { 791 NCE_REFHOLD(nce); 792 (*pfi)(nce, arg1); 793 NCE_REFRELE(nce); 794 } else { 795 NCE_REFHOLD_NOTR(nce); 796 (*pfi)(nce, arg1); 797 NCE_REFRELE_NOTR(nce); 798 } 799 } 800 } 801 mutex_enter(&ndp_g_lock); 802 ndp_g_walker--; 803 /* 804 * While NCE's are removed from global list they are placed 805 * in a private list, to be passed to nce_ire_delete_list(). 806 * The reason is, there may be ires pointing to this nce 807 * which needs to cleaned up. 808 */ 809 if (ndp_g_walker_cleanup && ndp_g_walker == 0) { 810 /* Time to delete condemned entries */ 811 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 812 nce = *ncep; 813 if (nce != NULL) { 814 nce_remove(nce, &free_nce_list); 815 } 816 } 817 nce = nce_mask_entries; 818 if (nce != NULL) { 819 nce_remove(nce, &free_nce_list); 820 } 821 ndp_g_walker_cleanup = B_FALSE; 822 } 823 mutex_exit(&ndp_g_lock); 824 825 if (free_nce_list != NULL) { 826 nce_ire_delete_list(free_nce_list); 827 } 828 } 829 830 void 831 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 832 { 833 ndp_walk_impl(ill, pfi, arg1, B_TRUE); 834 } 835 836 /* 837 * Prepend the zoneid using an ipsec_out_t for later use by functions like 838 * ip_rput_v6() after neighbor discovery has taken place. If the message 839 * block already has a M_CTL at the front of it, then simply set the zoneid 840 * appropriately. 841 */ 842 static mblk_t * 843 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 844 { 845 mblk_t *first_mp; 846 ipsec_out_t *io; 847 848 ASSERT(zoneid != ALL_ZONES); 849 if (mp->b_datap->db_type == M_CTL) { 850 io = (ipsec_out_t *)mp->b_rptr; 851 ASSERT(io->ipsec_out_type == IPSEC_OUT); 852 io->ipsec_out_zoneid = zoneid; 853 return (mp); 854 } 855 856 first_mp = ipsec_alloc_ipsec_out(); 857 if (first_mp == NULL) 858 return (NULL); 859 io = (ipsec_out_t *)first_mp->b_rptr; 860 /* This is not a secure packet */ 861 io->ipsec_out_secure = B_FALSE; 862 io->ipsec_out_zoneid = zoneid; 863 first_mp->b_cont = mp; 864 return (first_mp); 865 } 866 867 /* 868 * Process resolve requests. Handles both mapped entries 869 * as well as cases that needs to be send out on the wire. 870 * Lookup a NCE for a given IRE. Regardless of whether one exists 871 * or one is created, we defer making ire point to nce until the 872 * ire is actually added at which point the nce_refcnt on the nce is 873 * incremented. This is done primarily to have symmetry between ire_add() 874 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 875 */ 876 int 877 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 878 { 879 nce_t *nce; 880 int err = 0; 881 uint32_t ms; 882 mblk_t *mp_nce = NULL; 883 884 ASSERT(ill != NULL); 885 if (IN6_IS_ADDR_MULTICAST(dst)) { 886 err = nce_set_multicast(ill, dst); 887 return (err); 888 } 889 err = ndp_lookup_then_add(ill, 890 NULL, /* No hardware address */ 891 dst, 892 &ipv6_all_ones, 893 &ipv6_all_zeros, 894 0, 895 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 896 ND_INCOMPLETE, 897 &nce); 898 899 switch (err) { 900 case 0: 901 /* 902 * New cache entry was created. Make sure that the state 903 * is not ND_INCOMPLETE. It can be in some other state 904 * even before we send out the solicitation as we could 905 * get un-solicited advertisements. 906 * 907 * If this is an XRESOLV interface, simply return 0, 908 * since we don't want to solicit just yet. 909 */ 910 if (ill->ill_flags & ILLF_XRESOLV) { 911 NCE_REFRELE(nce); 912 return (0); 913 } 914 rw_enter(&ill_g_lock, RW_READER); 915 mutex_enter(&nce->nce_lock); 916 if (nce->nce_state != ND_INCOMPLETE) { 917 mutex_exit(&nce->nce_lock); 918 rw_exit(&ill_g_lock); 919 NCE_REFRELE(nce); 920 return (0); 921 } 922 mp_nce = ndp_prepend_zone(mp, zoneid); 923 if (mp_nce == NULL) { 924 /* The caller will free mp */ 925 mutex_exit(&nce->nce_lock); 926 rw_exit(&ill_g_lock); 927 ndp_delete(nce); 928 NCE_REFRELE(nce); 929 return (ENOMEM); 930 } 931 ms = nce_solicit(nce, mp_nce); 932 rw_exit(&ill_g_lock); 933 if (ms == 0) { 934 /* The caller will free mp */ 935 if (mp_nce != mp) 936 freeb(mp_nce); 937 mutex_exit(&nce->nce_lock); 938 ndp_delete(nce); 939 NCE_REFRELE(nce); 940 return (EBUSY); 941 } 942 mutex_exit(&nce->nce_lock); 943 NDP_RESTART_TIMER(nce, (clock_t)ms); 944 NCE_REFRELE(nce); 945 return (EINPROGRESS); 946 case EEXIST: 947 /* Resolution in progress just queue the packet */ 948 mutex_enter(&nce->nce_lock); 949 if (nce->nce_state == ND_INCOMPLETE) { 950 mp_nce = ndp_prepend_zone(mp, zoneid); 951 if (mp_nce == NULL) { 952 err = ENOMEM; 953 } else { 954 nce_queue_mp(nce, mp_nce); 955 err = EINPROGRESS; 956 } 957 } else { 958 /* 959 * Any other state implies we have 960 * a nce but IRE needs to be added ... 961 * ire_add_v6() will take care of the 962 * the case when the nce becomes CONDEMNED 963 * before the ire is added to the table. 964 */ 965 err = 0; 966 } 967 mutex_exit(&nce->nce_lock); 968 NCE_REFRELE(nce); 969 break; 970 default: 971 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 972 break; 973 } 974 return (err); 975 } 976 977 /* 978 * When there is no resolver, the link layer template is passed in 979 * the IRE. 980 * Lookup a NCE for a given IRE. Regardless of whether one exists 981 * or one is created, we defer making ire point to nce until the 982 * ire is actually added at which point the nce_refcnt on the nce is 983 * incremented. This is done primarily to have symmetry between ire_add() 984 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 985 */ 986 int 987 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 988 { 989 nce_t *nce; 990 int err = 0; 991 992 ASSERT(ill != NULL); 993 if (IN6_IS_ADDR_MULTICAST(dst)) { 994 err = nce_set_multicast(ill, dst); 995 return (err); 996 } 997 998 err = ndp_lookup_then_add(ill, 999 NULL, /* hardware address */ 1000 dst, 1001 &ipv6_all_ones, 1002 &ipv6_all_zeros, 1003 0, 1004 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1005 ND_REACHABLE, 1006 &nce); 1007 1008 switch (err) { 1009 case 0: 1010 /* 1011 * Cache entry with a proper resolver cookie was 1012 * created. 1013 */ 1014 NCE_REFRELE(nce); 1015 break; 1016 case EEXIST: 1017 err = 0; 1018 NCE_REFRELE(nce); 1019 break; 1020 default: 1021 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1022 break; 1023 } 1024 return (err); 1025 } 1026 1027 /* 1028 * For each interface an entry is added for the unspecified multicast group. 1029 * Here that mapping is used to form the multicast cache entry for a particular 1030 * multicast destination. 1031 */ 1032 static int 1033 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1034 { 1035 nce_t *mnce; /* Multicast mapping entry */ 1036 nce_t *nce; 1037 uchar_t *hw_addr = NULL; 1038 int err = 0; 1039 1040 ASSERT(ill != NULL); 1041 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1042 1043 mutex_enter(&ndp_g_lock); 1044 nce = nce_lookup_addr(ill, dst); 1045 if (nce != NULL) { 1046 mutex_exit(&ndp_g_lock); 1047 NCE_REFRELE(nce); 1048 return (0); 1049 } 1050 /* No entry, now lookup for a mapping this should never fail */ 1051 mnce = nce_lookup_mapping(ill, dst); 1052 if (mnce == NULL) { 1053 /* Something broken for the interface. */ 1054 mutex_exit(&ndp_g_lock); 1055 return (ESRCH); 1056 } 1057 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1058 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1059 /* 1060 * For IRE_IF_RESOLVER a hardware mapping can be 1061 * generated, for IRE_IF_NORESOLVER, resolution cookie 1062 * in the ill is copied in ndp_add(). 1063 */ 1064 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1065 if (hw_addr == NULL) { 1066 mutex_exit(&ndp_g_lock); 1067 NCE_REFRELE(mnce); 1068 return (ENOMEM); 1069 } 1070 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1071 } 1072 NCE_REFRELE(mnce); 1073 /* 1074 * IRE_IF_NORESOLVER type simply copies the resolution 1075 * cookie passed in. So no hw_addr is needed. 1076 */ 1077 err = ndp_add(ill, 1078 hw_addr, 1079 dst, 1080 &ipv6_all_ones, 1081 &ipv6_all_zeros, 1082 0, 1083 NCE_F_NONUD, 1084 ND_REACHABLE, 1085 &nce); 1086 mutex_exit(&ndp_g_lock); 1087 if (hw_addr != NULL) 1088 kmem_free(hw_addr, ill->ill_nd_lla_len); 1089 if (err != 0) { 1090 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1091 return (err); 1092 } 1093 NCE_REFRELE(nce); 1094 return (0); 1095 } 1096 1097 /* 1098 * Return the link layer address, and any flags of a nce. 1099 */ 1100 int 1101 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1102 { 1103 nce_t *nce; 1104 in6_addr_t *addr; 1105 sin6_t *sin6; 1106 dl_unitdata_req_t *dl; 1107 1108 ASSERT(ill != NULL); 1109 sin6 = (sin6_t *)&lnr->lnr_addr; 1110 addr = &sin6->sin6_addr; 1111 1112 nce = ndp_lookup(ill, addr, B_FALSE); 1113 if (nce == NULL) 1114 return (ESRCH); 1115 /* If in INCOMPLETE state, no link layer address is available yet */ 1116 if (nce->nce_state == ND_INCOMPLETE) 1117 goto done; 1118 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1119 if (ill->ill_flags & ILLF_XRESOLV) 1120 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1121 else 1122 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1123 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1124 sizeof (lnr->lnr_hdw_addr)); 1125 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1126 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1127 if (nce->nce_flags & NCE_F_ISROUTER) 1128 lnr->lnr_flags = NDF_ISROUTER_ON; 1129 if (nce->nce_flags & NCE_F_PROXY) 1130 lnr->lnr_flags |= NDF_PROXY_ON; 1131 if (nce->nce_flags & NCE_F_ANYCAST) 1132 lnr->lnr_flags |= NDF_ANYCAST_ON; 1133 done: 1134 NCE_REFRELE(nce); 1135 return (0); 1136 } 1137 1138 /* 1139 * Send Enable/Disable multicast reqs to driver. 1140 */ 1141 int 1142 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1143 uint32_t hw_addr_offset, mblk_t *mp) 1144 { 1145 nce_t *nce; 1146 uchar_t *hw_addr; 1147 1148 ASSERT(ill != NULL); 1149 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1150 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1151 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1152 freemsg(mp); 1153 return (EINVAL); 1154 } 1155 mutex_enter(&ndp_g_lock); 1156 nce = nce_lookup_mapping(ill, addr); 1157 if (nce == NULL) { 1158 mutex_exit(&ndp_g_lock); 1159 freemsg(mp); 1160 return (ESRCH); 1161 } 1162 mutex_exit(&ndp_g_lock); 1163 /* 1164 * Update dl_addr_length and dl_addr_offset for primitives that 1165 * have physical addresses as opposed to full saps 1166 */ 1167 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1168 case DL_ENABMULTI_REQ: 1169 /* Track the state if this is the first enabmulti */ 1170 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1171 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1172 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1173 break; 1174 case DL_DISABMULTI_REQ: 1175 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1176 break; 1177 default: 1178 NCE_REFRELE(nce); 1179 ip1dbg(("ndp_mcastreq: default\n")); 1180 return (EINVAL); 1181 } 1182 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1183 NCE_REFRELE(nce); 1184 putnext(ill->ill_wq, mp); 1185 return (0); 1186 } 1187 1188 /* 1189 * Send a neighbor solicitation. 1190 * Returns number of milliseconds after which we should either rexmit or abort. 1191 * Return of zero means we should abort. 1192 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1193 * 1194 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1195 * the packet. 1196 * NOTE: This routine does not consume mp. 1197 */ 1198 uint32_t 1199 nce_solicit(nce_t *nce, mblk_t *mp) 1200 { 1201 ill_t *ill; 1202 ill_t *src_ill; 1203 ip6_t *ip6h; 1204 in6_addr_t src; 1205 in6_addr_t dst; 1206 ipif_t *ipif; 1207 ip6i_t *ip6i; 1208 boolean_t dropped = B_FALSE; 1209 1210 ASSERT(RW_READ_HELD(&ill_g_lock)); 1211 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1212 ill = nce->nce_ill; 1213 ASSERT(ill != NULL); 1214 1215 if (nce->nce_rcnt == 0) { 1216 return (0); 1217 } 1218 1219 if (mp == NULL) { 1220 ASSERT(nce->nce_qd_mp != NULL); 1221 mp = nce->nce_qd_mp; 1222 } else { 1223 nce_queue_mp(nce, mp); 1224 } 1225 1226 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1227 if (mp->b_datap->db_type == M_CTL) 1228 mp = mp->b_cont; 1229 1230 ip6h = (ip6_t *)mp->b_rptr; 1231 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1232 /* 1233 * This message should have been pulled up already in 1234 * ip_wput_v6. We can't do pullups here because the message 1235 * could be from the nce_qd_mp which could have b_next/b_prev 1236 * non-NULL. 1237 */ 1238 ip6i = (ip6i_t *)ip6h; 1239 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1240 sizeof (ip6i_t) + IPV6_HDR_LEN); 1241 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1242 } 1243 src = ip6h->ip6_src; 1244 /* 1245 * If the src of outgoing packet is one of the assigned interface 1246 * addresses use it, otherwise we will pick the source address below. 1247 */ 1248 src_ill = ill; 1249 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1250 if (ill->ill_group != NULL) 1251 src_ill = ill->ill_group->illgrp_ill; 1252 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1253 for (ipif = src_ill->ill_ipif; ipif != NULL; 1254 ipif = ipif->ipif_next) { 1255 if (IN6_ARE_ADDR_EQUAL(&src, 1256 &ipif->ipif_v6lcl_addr)) { 1257 break; 1258 } 1259 } 1260 if (ipif != NULL) 1261 break; 1262 } 1263 if (src_ill == NULL) { 1264 /* May be a forwarding packet */ 1265 src_ill = ill; 1266 src = ipv6_all_zeros; 1267 } 1268 } 1269 dst = nce->nce_addr; 1270 /* 1271 * If source address is unspecified, nce_xmit will choose 1272 * one for us and initialize the hardware address also 1273 * appropriately. 1274 */ 1275 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1276 src_ill = NULL; 1277 nce->nce_rcnt--; 1278 mutex_exit(&nce->nce_lock); 1279 rw_exit(&ill_g_lock); 1280 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1281 &dst, 0); 1282 rw_enter(&ill_g_lock, RW_READER); 1283 mutex_enter(&nce->nce_lock); 1284 if (dropped) 1285 nce->nce_rcnt++; 1286 return (ill->ill_reachable_retrans_time); 1287 } 1288 1289 void 1290 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1291 { 1292 nd_neighbor_solicit_t *ns; 1293 uint32_t hlen = ill->ill_nd_lla_len; 1294 uchar_t *haddr = NULL; 1295 icmp6_t *icmp_nd; 1296 ip6_t *ip6h; 1297 nce_t *our_nce = NULL; 1298 in6_addr_t target; 1299 in6_addr_t src; 1300 int len; 1301 int flag = 0; 1302 nd_opt_hdr_t *opt = NULL; 1303 boolean_t bad_solicit = B_FALSE; 1304 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1305 1306 ip6h = (ip6_t *)mp->b_rptr; 1307 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1308 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1309 src = ip6h->ip6_src; 1310 ns = (nd_neighbor_solicit_t *)icmp_nd; 1311 target = ns->nd_ns_target; 1312 if (IN6_IS_ADDR_MULTICAST(&target)) { 1313 if (ip_debug > 2) { 1314 /* ip1dbg */ 1315 pr_addr_dbg("ndp_input_solicit: Target is" 1316 " multicast! %s\n", AF_INET6, &target); 1317 } 1318 bad_solicit = B_TRUE; 1319 goto done; 1320 } 1321 if (len > sizeof (nd_neighbor_solicit_t)) { 1322 /* Options present */ 1323 opt = (nd_opt_hdr_t *)&ns[1]; 1324 len -= sizeof (nd_neighbor_solicit_t); 1325 if (!ndp_verify_optlen(opt, len)) { 1326 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1327 bad_solicit = B_TRUE; 1328 goto done; 1329 } 1330 } 1331 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1332 /* Check to see if this is a valid DAD solicitation */ 1333 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1334 if (ip_debug > 2) { 1335 /* ip1dbg */ 1336 pr_addr_dbg("ndp_input_solicit: IPv6 " 1337 "Destination is not solicited node " 1338 "multicast %s\n", AF_INET6, 1339 &ip6h->ip6_dst); 1340 } 1341 bad_solicit = B_TRUE; 1342 goto done; 1343 } 1344 } 1345 1346 our_nce = ndp_lookup(ill, &target, B_FALSE); 1347 /* 1348 * If this is a valid Solicitation, a permanent 1349 * entry should exist in the cache 1350 */ 1351 if (our_nce == NULL || 1352 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1353 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1354 "ifname=%s ", ill->ill_name)); 1355 if (ip_debug > 2) { 1356 /* ip1dbg */ 1357 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1358 } 1359 bad_solicit = B_TRUE; 1360 goto done; 1361 } 1362 1363 /* At this point we should have a verified NS per spec */ 1364 if (opt != NULL) { 1365 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1366 if (opt != NULL) { 1367 /* 1368 * No source link layer address option should 1369 * be present in a valid DAD request. 1370 */ 1371 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1372 ip1dbg(("ndp_input_solicit: source link-layer " 1373 "address option present with an " 1374 "unspecified source. \n")); 1375 bad_solicit = B_TRUE; 1376 goto done; 1377 } 1378 haddr = (uchar_t *)&opt[1]; 1379 if (hlen > opt->nd_opt_len * 8 || 1380 hlen == 0) { 1381 bad_solicit = B_TRUE; 1382 goto done; 1383 } 1384 } 1385 } 1386 /* 1387 * haddr can be NULL if no options are present, 1388 * or no Source link layer address is present in, 1389 * recvd NDP options of solicitation message. 1390 */ 1391 if (haddr == NULL) { 1392 nce_t *nnce; 1393 mutex_enter(&ndp_g_lock); 1394 nnce = nce_lookup_addr(ill, &src); 1395 mutex_exit(&ndp_g_lock); 1396 1397 if (nnce == NULL) { 1398 in6_addr_t dst = ipv6_solicited_node_mcast; 1399 1400 /* Form solicited node multicast address */ 1401 dst.s6_addr32[3] |= src.s6_addr32[3]; 1402 (void) nce_xmit(ill, 1403 ND_NEIGHBOR_SOLICIT, 1404 ill, 1405 B_TRUE, 1406 &target, 1407 &dst, 1408 flag); 1409 bad_solicit = B_TRUE; 1410 goto done; 1411 } 1412 } 1413 /* Set override flag, it will be reset later if need be. */ 1414 flag |= NDP_ORIDE; 1415 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1416 flag |= NDP_UNICAST; 1417 } 1418 1419 /* 1420 * Create/update the entry for the soliciting node. 1421 * or respond to outstanding queries, don't if 1422 * the source is unspecified address. 1423 */ 1424 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1425 int err = 0; 1426 nce_t *nnce; 1427 1428 err = ndp_lookup_then_add(ill, 1429 haddr, 1430 &src, /* Soliciting nodes address */ 1431 &ipv6_all_ones, 1432 &ipv6_all_zeros, 1433 0, 1434 0, 1435 ND_STALE, 1436 &nnce); 1437 switch (err) { 1438 case 0: 1439 /* done with this entry */ 1440 NCE_REFRELE(nnce); 1441 break; 1442 case EEXIST: 1443 /* 1444 * B_FALSE indicates this is not an 1445 * an advertisement. 1446 */ 1447 ndp_process(nnce, haddr, 0, B_FALSE); 1448 NCE_REFRELE(nnce); 1449 break; 1450 default: 1451 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1452 err)); 1453 goto done; 1454 } 1455 flag |= NDP_SOLICITED; 1456 } else { 1457 /* 1458 * This is a DAD req, multicast the advertisement 1459 * to the all-nodes address. 1460 */ 1461 src = ipv6_all_hosts_mcast; 1462 } 1463 if (our_nce->nce_flags & NCE_F_ISROUTER) 1464 flag |= NDP_ISROUTER; 1465 if (our_nce->nce_flags & NCE_F_PROXY) 1466 flag &= ~NDP_ORIDE; 1467 /* Response to a solicitation */ 1468 (void) nce_xmit(ill, 1469 ND_NEIGHBOR_ADVERT, 1470 ill, /* ill to be used for extracting ill_nd_lla */ 1471 B_TRUE, /* use ill_nd_lla */ 1472 &target, /* Source and target of the advertisement pkt */ 1473 &src, /* IP Destination (source of original pkt) */ 1474 flag); 1475 done: 1476 if (bad_solicit) 1477 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1478 if (our_nce != NULL) 1479 NCE_REFRELE(our_nce); 1480 } 1481 1482 void 1483 ndp_input_advert(ill_t *ill, mblk_t *mp) 1484 { 1485 nd_neighbor_advert_t *na; 1486 uint32_t hlen = ill->ill_nd_lla_len; 1487 uchar_t *haddr = NULL; 1488 icmp6_t *icmp_nd; 1489 ip6_t *ip6h; 1490 nce_t *dst_nce = NULL; 1491 in6_addr_t target; 1492 nd_opt_hdr_t *opt = NULL; 1493 int len; 1494 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1495 1496 ip6h = (ip6_t *)mp->b_rptr; 1497 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1498 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1499 na = (nd_neighbor_advert_t *)icmp_nd; 1500 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1501 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1502 ip1dbg(("ndp_input_advert: Target is multicast but the " 1503 "solicited flag is not zero\n")); 1504 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1505 return; 1506 } 1507 target = na->nd_na_target; 1508 if (IN6_IS_ADDR_MULTICAST(&target)) { 1509 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1510 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1511 return; 1512 } 1513 if (len > sizeof (nd_neighbor_advert_t)) { 1514 opt = (nd_opt_hdr_t *)&na[1]; 1515 if (!ndp_verify_optlen(opt, 1516 len - sizeof (nd_neighbor_advert_t))) { 1517 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1518 return; 1519 } 1520 /* At this point we have a verified NA per spec */ 1521 len -= sizeof (nd_neighbor_advert_t); 1522 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1523 if (opt != NULL) { 1524 haddr = (uchar_t *)&opt[1]; 1525 if (hlen > opt->nd_opt_len * 8 || 1526 hlen == 0) { 1527 BUMP_MIB(mib, 1528 ipv6IfIcmpInBadNeighborAdvertisements); 1529 return; 1530 } 1531 } 1532 } 1533 1534 /* 1535 * If this interface is part of the group look at all the 1536 * ills in the group. 1537 */ 1538 rw_enter(&ill_g_lock, RW_READER); 1539 if (ill->ill_group != NULL) 1540 ill = ill->ill_group->illgrp_ill; 1541 1542 for (; ill != NULL; ill = ill->ill_group_next) { 1543 mutex_enter(&ill->ill_lock); 1544 if (!ILL_CAN_LOOKUP(ill)) { 1545 mutex_exit(&ill->ill_lock); 1546 continue; 1547 } 1548 ill_refhold_locked(ill); 1549 mutex_exit(&ill->ill_lock); 1550 dst_nce = ndp_lookup(ill, &target, B_FALSE); 1551 /* We have to drop the lock since ndp_process calls put* */ 1552 rw_exit(&ill_g_lock); 1553 if (dst_nce != NULL) { 1554 if (na->nd_na_flags_reserved & 1555 ND_NA_FLAG_ROUTER) { 1556 dst_nce->nce_flags |= NCE_F_ISROUTER; 1557 } 1558 /* B_TRUE indicates this an advertisement */ 1559 ndp_process(dst_nce, haddr, 1560 na->nd_na_flags_reserved, B_TRUE); 1561 NCE_REFRELE(dst_nce); 1562 } 1563 rw_enter(&ill_g_lock, RW_READER); 1564 ill_refrele(ill); 1565 } 1566 rw_exit(&ill_g_lock); 1567 } 1568 1569 /* 1570 * Process NDP neighbor solicitation/advertisement messages. 1571 * The checksum has already checked o.k before reaching here. 1572 */ 1573 void 1574 ndp_input(ill_t *ill, mblk_t *mp) 1575 { 1576 icmp6_t *icmp_nd; 1577 ip6_t *ip6h; 1578 int len; 1579 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1580 1581 1582 if (!pullupmsg(mp, -1)) { 1583 ip1dbg(("ndp_input: pullupmsg failed\n")); 1584 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1585 goto done; 1586 } 1587 ip6h = (ip6_t *)mp->b_rptr; 1588 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1589 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1590 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1591 goto done; 1592 } 1593 /* 1594 * NDP does not accept any extension headers between the 1595 * IP header and the ICMP header since e.g. a routing 1596 * header could be dangerous. 1597 * This assumes that any AH or ESP headers are removed 1598 * by ip prior to passing the packet to ndp_input. 1599 */ 1600 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1601 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1602 ip6h->ip6_nxt)); 1603 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1604 goto done; 1605 } 1606 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1607 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1608 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1609 if (icmp_nd->icmp6_code != 0) { 1610 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1611 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1612 goto done; 1613 } 1614 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1615 /* 1616 * Make sure packet length is large enough for either 1617 * a NS or a NA icmp packet. 1618 */ 1619 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1620 ip1dbg(("ndp_input: packet too short\n")); 1621 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1622 goto done; 1623 } 1624 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1625 ndp_input_solicit(ill, mp); 1626 } else { 1627 ndp_input_advert(ill, mp); 1628 } 1629 done: 1630 freemsg(mp); 1631 } 1632 1633 /* 1634 * nce_xmit is called to form and transmit a ND solicitation or 1635 * advertisement ICMP packet. 1636 * If source address is unspecified, appropriate source address 1637 * and link layer address will be chosen here. This function 1638 * *always* sends the link layer option. 1639 * It returns B_FALSE only if it does a successful put() to the 1640 * corresponding ill's ill_wq otherwise returns B_TRUE. 1641 */ 1642 static boolean_t 1643 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1644 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1645 int flag) 1646 { 1647 uint32_t len; 1648 icmp6_t *icmp6; 1649 mblk_t *mp; 1650 ip6_t *ip6h; 1651 nd_opt_hdr_t *opt; 1652 uint_t plen; 1653 ip6i_t *ip6i; 1654 ipif_t *src_ipif = NULL; 1655 1656 /* 1657 * If we have a unspecified source(sender) address, select a 1658 * proper source address for the solicitation here itself so 1659 * that we can initialize the h/w address correctly. This is 1660 * needed for interface groups as source address can come from 1661 * the whole group and the h/w address initialized from ill will 1662 * be wrong if the source address comes from a different ill. 1663 * 1664 * Note that the NA never comes here with the unspecified source 1665 * address. The following asserts that whenever the source 1666 * address is specified, the haddr also should be specified. 1667 */ 1668 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1669 1670 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1671 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1672 /* 1673 * Pick a source address for this solicitation, but 1674 * restrict the selection to addresses assigned to the 1675 * output interface (or interface group). We do this 1676 * because the destination will create a neighbor cache 1677 * entry for the source address of this packet, so the 1678 * source address had better be a valid neighbor. 1679 */ 1680 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 1681 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1682 if (src_ipif == NULL) { 1683 char buf[INET6_ADDRSTRLEN]; 1684 1685 ip0dbg(("nce_xmit: No source ipif for dst %s\n", 1686 inet_ntop(AF_INET6, (char *)target, buf, 1687 sizeof (buf)))); 1688 return (B_TRUE); 1689 } 1690 sender = &src_ipif->ipif_v6src_addr; 1691 hwaddr_ill = src_ipif->ipif_ill; 1692 } 1693 1694 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1695 /* 1696 * Always make sure that the NS/NA packets don't get load 1697 * spread. This is needed so that the probe packets sent 1698 * by the in.mpathd daemon can really go out on the desired 1699 * interface. Probe packets are made to go out on a desired 1700 * interface by including a ip6i with ATTACH_IF flag. As these 1701 * packets indirectly end up sending/receiving NS/NA packets 1702 * (neighbor doing NUD), we have to make sure that NA 1703 * also go out on the same interface. 1704 */ 1705 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1706 plen * 8; 1707 mp = allocb(len, BPRI_LO); 1708 if (mp == NULL) { 1709 if (src_ipif != NULL) 1710 ipif_refrele(src_ipif); 1711 return (B_TRUE); 1712 } 1713 bzero((char *)mp->b_rptr, len); 1714 mp->b_wptr = mp->b_rptr + len; 1715 1716 ip6i = (ip6i_t *)mp->b_rptr; 1717 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1718 ip6i->ip6i_nxt = IPPROTO_RAW; 1719 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1720 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1721 1722 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1723 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1724 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1725 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1726 ip6h->ip6_hops = IPV6_MAX_HOPS; 1727 ip6h->ip6_dst = *target; 1728 icmp6 = (icmp6_t *)&ip6h[1]; 1729 1730 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1731 sizeof (nd_neighbor_advert_t)); 1732 1733 if (operation == ND_NEIGHBOR_SOLICIT) { 1734 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1735 1736 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1737 ip6h->ip6_src = *sender; 1738 ns->nd_ns_target = *target; 1739 if (!(flag & NDP_UNICAST)) { 1740 /* Form multicast address of the target */ 1741 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1742 ip6h->ip6_dst.s6_addr32[3] |= 1743 ns->nd_ns_target.s6_addr32[3]; 1744 } 1745 } else { 1746 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1747 1748 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1749 ip6h->ip6_src = *sender; 1750 na->nd_na_target = *sender; 1751 if (flag & NDP_ISROUTER) 1752 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1753 if (flag & NDP_SOLICITED) 1754 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1755 if (flag & NDP_ORIDE) 1756 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1757 1758 } 1759 /* Fill in link layer address and option len */ 1760 opt->nd_opt_len = (uint8_t)plen; 1761 mutex_enter(&hwaddr_ill->ill_lock); 1762 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1763 &opt[1], hwaddr_ill->ill_nd_lla_len); 1764 mutex_exit(&hwaddr_ill->ill_lock); 1765 icmp6->icmp6_type = (uint8_t)operation; 1766 icmp6->icmp6_code = 0; 1767 /* 1768 * Prepare for checksum by putting icmp length in the icmp 1769 * checksum field. The checksum is calculated in ip_wput_v6. 1770 */ 1771 icmp6->icmp6_cksum = ip6h->ip6_plen; 1772 1773 if (src_ipif != NULL) 1774 ipif_refrele(src_ipif); 1775 if (canput(ill->ill_wq)) { 1776 put(ill->ill_wq, mp); 1777 return (B_FALSE); 1778 } 1779 freemsg(mp); 1780 return (B_TRUE); 1781 } 1782 1783 /* 1784 * Make a link layer address (does not include the SAP) from an nce. 1785 * To form the link layer address, use the last four bytes of ipv6 1786 * address passed in and the fixed offset stored in nce. 1787 */ 1788 static void 1789 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1790 { 1791 uchar_t *mask, *to; 1792 ill_t *ill = nce->nce_ill; 1793 int len; 1794 1795 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1796 return; 1797 ASSERT(nce->nce_res_mp != NULL); 1798 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1799 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1800 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1801 ASSERT(addr != NULL); 1802 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1803 addrpos, ill->ill_nd_lla_len); 1804 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1805 IPV6_ADDR_LEN); 1806 mask = (uchar_t *)&nce->nce_extract_mask; 1807 mask += (IPV6_ADDR_LEN - len); 1808 addr += (IPV6_ADDR_LEN - len); 1809 to = addrpos + nce->nce_ll_extract_start; 1810 while (len-- > 0) 1811 *to++ |= *mask++ & *addr++; 1812 } 1813 1814 /* 1815 * Pass a cache report back out via NDD. 1816 */ 1817 /* ARGSUSED */ 1818 int 1819 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1820 { 1821 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1822 " proto addr/mask"); 1823 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1824 return (0); 1825 } 1826 1827 /* 1828 * convert a link level address of arbitrary length 1829 * to an ascii string. 1830 * The caller *must* have already verified that the string buffer 1831 * is large enough to hold the entire string, including trailing NULL. 1832 */ 1833 static void 1834 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1835 { 1836 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1837 int i; 1838 size_t len; 1839 1840 buf[0] = '\0'; 1841 for (i = 0; i < addrlen; i++) { 1842 addrbyte[0] = '\0'; 1843 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1844 len = strlen((const char *)addrbyte); 1845 bcopy(addrbyte, buf, len); 1846 buf = buf + len; 1847 } 1848 *--buf = '\0'; 1849 } 1850 1851 /* 1852 * Add a single line to the NDP Cache Entry Report. 1853 */ 1854 static void 1855 nce_report1(nce_t *nce, uchar_t *mp_arg) 1856 { 1857 ill_t *ill = nce->nce_ill; 1858 char local_buf[INET6_ADDRSTRLEN]; 1859 uchar_t flags_buf[10]; 1860 uint32_t flags = nce->nce_flags; 1861 mblk_t *mp = (mblk_t *)mp_arg; 1862 uchar_t *h; 1863 uchar_t *m = flags_buf; 1864 in6_addr_t v6addr; 1865 1866 /* 1867 * Lock the nce to protect nce_res_mp from being changed 1868 * if an external resolver address resolution completes 1869 * while nce_res_mp is being accessed here. 1870 * 1871 * Deal with all address formats, not just Ethernet-specific 1872 * In addition, make sure that the mblk has enough space 1873 * before writing to it. If is doesn't, allocate a new one. 1874 */ 1875 ASSERT(ill != NULL); 1876 v6addr = nce->nce_mask; 1877 if (flags & NCE_F_PERMANENT) 1878 *m++ = 'P'; 1879 if (flags & NCE_F_ISROUTER) 1880 *m++ = 'R'; 1881 if (flags & NCE_F_MAPPING) 1882 *m++ = 'M'; 1883 *m = '\0'; 1884 1885 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1886 size_t addrlen; 1887 uchar_t *addr_buf; 1888 dl_unitdata_req_t *dl; 1889 1890 mutex_enter(&nce->nce_lock); 1891 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1892 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1893 if (ill->ill_flags & ILLF_XRESOLV) 1894 addrlen = (3 * (dl->dl_dest_addr_length)); 1895 else 1896 addrlen = (3 * (ill->ill_nd_lla_len)); 1897 if (addrlen <= 0) { 1898 mutex_exit(&nce->nce_lock); 1899 (void) mi_mpprintf(mp, 1900 "%8s %9s %5s %s/%d", 1901 ill->ill_name, 1902 "None", 1903 (uchar_t *)&flags_buf, 1904 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1905 (char *)local_buf, sizeof (local_buf)), 1906 ip_mask_to_plen_v6(&v6addr)); 1907 } else { 1908 /* 1909 * Convert the hardware/lla address to ascii 1910 */ 1911 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 1912 if (addr_buf == NULL) { 1913 mutex_exit(&nce->nce_lock); 1914 return; 1915 } 1916 if (ill->ill_flags & ILLF_XRESOLV) 1917 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 1918 addr_buf); 1919 else 1920 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 1921 addr_buf); 1922 mutex_exit(&nce->nce_lock); 1923 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 1924 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 1925 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1926 (char *)local_buf, sizeof (local_buf)), 1927 ip_mask_to_plen_v6(&v6addr)); 1928 kmem_free(addr_buf, addrlen); 1929 } 1930 } else { 1931 (void) mi_mpprintf(mp, 1932 "%8s %9s %5s %s/%d", 1933 ill->ill_name, 1934 "None", 1935 (uchar_t *)&flags_buf, 1936 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1937 (char *)local_buf, sizeof (local_buf)), 1938 ip_mask_to_plen_v6(&v6addr)); 1939 } 1940 } 1941 1942 mblk_t * 1943 nce_udreq_alloc(ill_t *ill) 1944 { 1945 mblk_t *template_mp = NULL; 1946 dl_unitdata_req_t *dlur; 1947 int sap_length; 1948 1949 sap_length = ill->ill_sap_length; 1950 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 1951 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 1952 if (template_mp == NULL) 1953 return (NULL); 1954 1955 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 1956 dlur->dl_priority.dl_min = 0; 1957 dlur->dl_priority.dl_max = 0; 1958 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 1959 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 1960 1961 /* Copy in the SAP value. */ 1962 NCE_LL_SAP_COPY(ill, template_mp); 1963 1964 return (template_mp); 1965 } 1966 1967 /* 1968 * NDP retransmit timer. 1969 * This timer goes off when: 1970 * a. It is time to retransmit NS for resolver. 1971 * b. It is time to send reachability probes. 1972 */ 1973 void 1974 ndp_timer(void *arg) 1975 { 1976 nce_t *nce = arg; 1977 ill_t *ill = nce->nce_ill; 1978 uint32_t ms; 1979 char addrbuf[INET6_ADDRSTRLEN]; 1980 mblk_t *mp; 1981 boolean_t dropped = B_FALSE; 1982 1983 /* 1984 * The timer has to be cancelled by ndp_delete before doing the final 1985 * refrele. So the NCE is guaranteed to exist when the timer runs 1986 * until it clears the timeout_id. Before clearing the timeout_id 1987 * bump up the refcnt so that we can continue to use the nce 1988 */ 1989 ASSERT(nce != NULL); 1990 1991 /* 1992 * Grab the ill_g_lock now itself to avoid lock order problems. 1993 * nce_solicit needs ill_g_lock to be able to traverse ills 1994 */ 1995 rw_enter(&ill_g_lock, RW_READER); 1996 mutex_enter(&nce->nce_lock); 1997 NCE_REFHOLD_LOCKED(nce); 1998 nce->nce_timeout_id = 0; 1999 2000 /* 2001 * Check the reachability state first. 2002 */ 2003 switch (nce->nce_state) { 2004 case ND_DELAY: 2005 rw_exit(&ill_g_lock); 2006 nce->nce_state = ND_PROBE; 2007 mutex_exit(&nce->nce_lock); 2008 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2009 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2010 if (ip_debug > 3) { 2011 /* ip2dbg */ 2012 pr_addr_dbg("ndp_timer: state for %s changed " 2013 "to PROBE\n", AF_INET6, &nce->nce_addr); 2014 } 2015 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2016 NCE_REFRELE(nce); 2017 return; 2018 case ND_PROBE: 2019 /* must be retransmit timer */ 2020 rw_exit(&ill_g_lock); 2021 nce->nce_pcnt--; 2022 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2023 nce->nce_pcnt >= -1); 2024 if (nce->nce_pcnt == 0) { 2025 /* Wait RetransTimer, before deleting the entry */ 2026 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2027 nce->nce_pcnt, inet_ntop(AF_INET6, 2028 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2029 mutex_exit(&nce->nce_lock); 2030 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2031 } else { 2032 /* 2033 * As per RFC2461, the nce gets deleted after 2034 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2035 * Note that the first unicast solicitation is sent 2036 * during the DELAY state. 2037 */ 2038 if (nce->nce_pcnt > 0) { 2039 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2040 nce->nce_pcnt, inet_ntop(AF_INET6, 2041 &nce->nce_addr, 2042 addrbuf, sizeof (addrbuf)))); 2043 mutex_exit(&nce->nce_lock); 2044 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2045 NULL, B_FALSE, &ipv6_all_zeros, 2046 &nce->nce_addr, NDP_UNICAST); 2047 if (dropped) { 2048 mutex_enter(&nce->nce_lock); 2049 nce->nce_pcnt++; 2050 mutex_exit(&nce->nce_lock); 2051 } 2052 NDP_RESTART_TIMER(nce, 2053 ill->ill_reachable_retrans_time); 2054 } else { 2055 /* No hope, delete the nce */ 2056 nce->nce_state = ND_UNREACHABLE; 2057 mutex_exit(&nce->nce_lock); 2058 if (ip_debug > 2) { 2059 /* ip1dbg */ 2060 pr_addr_dbg("ndp_timer: Delete IRE for" 2061 " dst %s\n", AF_INET6, 2062 &nce->nce_addr); 2063 } 2064 ndp_delete(nce); 2065 } 2066 } 2067 NCE_REFRELE(nce); 2068 return; 2069 case ND_INCOMPLETE: 2070 /* 2071 * Must be resolvers retransmit timer. 2072 */ 2073 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2074 ip6i_t *ip6i; 2075 ip6_t *ip6h; 2076 mblk_t *data_mp; 2077 2078 /* 2079 * Walk the list of packets queued, and see if there 2080 * are any multipathing probe packets. Such packets 2081 * are always queued at the head. Since this is a 2082 * retransmit timer firing, mark such packets as 2083 * delayed in ND resolution. This info will be used 2084 * in ip_wput_v6(). Multipathing probe packets will 2085 * always have an ip6i_t. Once we hit a packet without 2086 * it, we can break out of this loop. 2087 */ 2088 if (mp->b_datap->db_type == M_CTL) 2089 data_mp = mp->b_cont; 2090 else 2091 data_mp = mp; 2092 2093 ip6h = (ip6_t *)data_mp->b_rptr; 2094 if (ip6h->ip6_nxt != IPPROTO_RAW) 2095 break; 2096 2097 /* 2098 * This message should have been pulled up already in 2099 * ip_wput_v6. We can't do pullups here because the 2100 * b_next/b_prev is non-NULL. 2101 */ 2102 ip6i = (ip6i_t *)ip6h; 2103 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2104 sizeof (ip6i_t) + IPV6_HDR_LEN); 2105 2106 /* Mark this packet as delayed due to ND resolution */ 2107 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2108 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2109 } 2110 if (nce->nce_qd_mp != NULL) { 2111 ms = nce_solicit(nce, NULL); 2112 rw_exit(&ill_g_lock); 2113 if (ms == 0) { 2114 if (nce->nce_state != ND_REACHABLE) { 2115 mutex_exit(&nce->nce_lock); 2116 nce_resolv_failed(nce); 2117 ndp_delete(nce); 2118 } else { 2119 mutex_exit(&nce->nce_lock); 2120 } 2121 } else { 2122 mutex_exit(&nce->nce_lock); 2123 NDP_RESTART_TIMER(nce, (clock_t)ms); 2124 } 2125 NCE_REFRELE(nce); 2126 return; 2127 } 2128 mutex_exit(&nce->nce_lock); 2129 rw_exit(&ill_g_lock); 2130 NCE_REFRELE(nce); 2131 break; 2132 case ND_REACHABLE : 2133 rw_exit(&ill_g_lock); 2134 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2135 nce->nce_unsolicit_count != 0) { 2136 nce->nce_unsolicit_count--; 2137 mutex_exit(&nce->nce_lock); 2138 dropped = nce_xmit(ill, 2139 ND_NEIGHBOR_ADVERT, 2140 ill, /* ill to be used for hw addr */ 2141 B_FALSE, /* use ill_phys_addr */ 2142 &nce->nce_addr, 2143 &ipv6_all_hosts_mcast, 2144 nce->nce_flags | NDP_ORIDE); 2145 if (dropped) { 2146 mutex_enter(&nce->nce_lock); 2147 nce->nce_unsolicit_count++; 2148 mutex_exit(&nce->nce_lock); 2149 } 2150 if (nce->nce_unsolicit_count != 0) { 2151 NDP_RESTART_TIMER(nce, 2152 ip_ndp_unsolicit_interval); 2153 } 2154 } else { 2155 mutex_exit(&nce->nce_lock); 2156 } 2157 NCE_REFRELE(nce); 2158 break; 2159 default: 2160 rw_exit(&ill_g_lock); 2161 mutex_exit(&nce->nce_lock); 2162 NCE_REFRELE(nce); 2163 break; 2164 } 2165 } 2166 2167 /* 2168 * Set a link layer address from the ll_addr passed in. 2169 * Copy SAP from ill. 2170 */ 2171 static void 2172 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2173 { 2174 ill_t *ill = nce->nce_ill; 2175 uchar_t *woffset; 2176 2177 ASSERT(ll_addr != NULL); 2178 /* Always called before fast_path_probe */ 2179 ASSERT(nce->nce_fp_mp == NULL); 2180 if (ill->ill_sap_length != 0) { 2181 /* 2182 * Copy the SAP type specified in the 2183 * request into the xmit template. 2184 */ 2185 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2186 } 2187 if (ill->ill_phys_addr_length > 0) { 2188 /* 2189 * The bcopy() below used to be called for the physical address 2190 * length rather than the link layer address length. For 2191 * ethernet and many other media, the phys_addr and lla are 2192 * identical. 2193 * However, with xresolv interfaces being introduced, the 2194 * phys_addr and lla are no longer the same, and the physical 2195 * address may not have any useful meaning, so we use the lla 2196 * for IPv6 address resolution and destination addressing. 2197 * 2198 * For PPP or other interfaces with a zero length 2199 * physical address, don't do anything here. 2200 * The bcopy() with a zero phys_addr length was previously 2201 * a no-op for interfaces with a zero-length physical address. 2202 * Using the lla for them would change the way they operate. 2203 * Doing nothing in such cases preserves expected behavior. 2204 */ 2205 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2206 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2207 } 2208 } 2209 2210 static boolean_t 2211 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2212 { 2213 ill_t *ill = nce->nce_ill; 2214 uchar_t *ll_offset; 2215 2216 ASSERT(nce->nce_res_mp != NULL); 2217 if (ll_addr == NULL) 2218 return (B_FALSE); 2219 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2220 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2221 return (B_TRUE); 2222 return (B_FALSE); 2223 } 2224 2225 /* 2226 * Updates the link layer address or the reachability state of 2227 * a cache entry. Reset probe counter if needed. 2228 */ 2229 static void 2230 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2231 { 2232 ill_t *ill = nce->nce_ill; 2233 boolean_t need_stop_timer = B_FALSE; 2234 boolean_t need_fastpath_update = B_FALSE; 2235 2236 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2237 /* 2238 * If this interface does not do NUD, there is no point 2239 * in allowing an update to the cache entry. Although 2240 * we will respond to NS. 2241 * The only time we accept an update for a resolver when 2242 * NUD is turned off is when it has just been created. 2243 * Non-Resolvers will always be created as REACHABLE. 2244 */ 2245 if (new_state != ND_UNCHANGED) { 2246 if ((nce->nce_flags & NCE_F_NONUD) && 2247 (nce->nce_state != ND_INCOMPLETE)) 2248 return; 2249 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2250 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2251 need_stop_timer = B_TRUE; 2252 if (new_state == ND_REACHABLE) 2253 nce->nce_last = TICK_TO_MSEC(lbolt64); 2254 else { 2255 /* We force NUD in this case */ 2256 nce->nce_last = 0; 2257 } 2258 nce->nce_state = new_state; 2259 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2260 } 2261 /* 2262 * In case of fast path we need to free the the fastpath 2263 * M_DATA and do another probe. Otherwise we can just 2264 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2265 * whatever packets that happens to be transmitting at the time. 2266 */ 2267 if (new_ll_addr != NULL) { 2268 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2269 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2270 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2271 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2272 if (nce->nce_fp_mp != NULL) { 2273 freemsg(nce->nce_fp_mp); 2274 nce->nce_fp_mp = NULL; 2275 } 2276 need_fastpath_update = B_TRUE; 2277 } 2278 mutex_exit(&nce->nce_lock); 2279 if (need_stop_timer) { 2280 (void) untimeout(nce->nce_timeout_id); 2281 nce->nce_timeout_id = 0; 2282 } 2283 if (need_fastpath_update) 2284 nce_fastpath(nce); 2285 mutex_enter(&nce->nce_lock); 2286 } 2287 2288 static void 2289 nce_queue_mp(nce_t *nce, mblk_t *mp) 2290 { 2291 uint_t count = 0; 2292 mblk_t **mpp; 2293 boolean_t head_insert = B_FALSE; 2294 ip6_t *ip6h; 2295 ip6i_t *ip6i; 2296 mblk_t *data_mp; 2297 2298 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2299 2300 if (mp->b_datap->db_type == M_CTL) 2301 data_mp = mp->b_cont; 2302 else 2303 data_mp = mp; 2304 ip6h = (ip6_t *)data_mp->b_rptr; 2305 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2306 /* 2307 * This message should have been pulled up already in 2308 * ip_wput_v6. We can't do pullups here because the message 2309 * could be from the nce_qd_mp which could have b_next/b_prev 2310 * non-NULL. 2311 */ 2312 ip6i = (ip6i_t *)ip6h; 2313 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2314 sizeof (ip6i_t) + IPV6_HDR_LEN); 2315 /* 2316 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2317 * This has 2 aspects mentioned below. 2318 * 1. Perform head insertion in the nce_qd_mp for these packets. 2319 * This ensures that next retransmit of ND solicitation 2320 * will use the interface specified by the probe packet, 2321 * for both NS and NA. This corresponds to the src address 2322 * in the IPv6 packet. If we insert at tail, we will be 2323 * depending on the packet at the head for successful 2324 * ND resolution. This is not reliable, because the interface 2325 * on which the NA arrives could be different from the interface 2326 * on which the NS was sent, and if the receiving interface is 2327 * failed, it will appear that the sending interface is also 2328 * failed, causing in.mpathd to misdiagnose this as link 2329 * failure. 2330 * 2. Drop the original packet, if the ND resolution did not 2331 * succeed in the first attempt. However we will create the 2332 * nce and the ire, as soon as the ND resolution succeeds. 2333 * We don't gain anything by queueing multiple probe packets 2334 * and sending them back-to-back once resolution succeeds. 2335 * It is sufficient to send just 1 packet after ND resolution 2336 * succeeds. Since mpathd is sending down probe packets at a 2337 * constant rate, we don't need to send the queued packet. We 2338 * need to queue it only for NDP resolution. The benefit of 2339 * dropping the probe packets that were delayed in ND 2340 * resolution, is that in.mpathd will not see inflated 2341 * RTT. If the ND resolution does not succeed within 2342 * in.mpathd's failure detection time, mpathd may detect 2343 * a failure, and it does not matter whether the packet 2344 * was queued or dropped. 2345 */ 2346 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2347 head_insert = B_TRUE; 2348 } 2349 2350 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2351 mpp = &(*mpp)->b_next) { 2352 if (++count > 2353 nce->nce_ill->ill_max_buf) { 2354 mblk_t *tmp = nce->nce_qd_mp->b_next; 2355 2356 nce->nce_qd_mp->b_next = NULL; 2357 nce->nce_qd_mp->b_prev = NULL; 2358 freemsg(nce->nce_qd_mp); 2359 ip1dbg(("nce_queue_mp: pkt dropped\n")); 2360 nce->nce_qd_mp = tmp; 2361 } 2362 } 2363 /* put this on the list */ 2364 if (head_insert) { 2365 mp->b_next = nce->nce_qd_mp; 2366 nce->nce_qd_mp = mp; 2367 } else { 2368 *mpp = mp; 2369 } 2370 } 2371 2372 /* 2373 * Called when address resolution failed due to a timeout. 2374 * Send an ICMP unreachable in response to all queued packets. 2375 */ 2376 void 2377 nce_resolv_failed(nce_t *nce) 2378 { 2379 mblk_t *mp, *nxt_mp, *first_mp; 2380 char buf[INET6_ADDRSTRLEN]; 2381 ip6_t *ip6h; 2382 zoneid_t zoneid = GLOBAL_ZONEID; 2383 2384 ip1dbg(("nce_resolv_failed: dst %s\n", 2385 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2386 mutex_enter(&nce->nce_lock); 2387 mp = nce->nce_qd_mp; 2388 nce->nce_qd_mp = NULL; 2389 mutex_exit(&nce->nce_lock); 2390 while (mp != NULL) { 2391 nxt_mp = mp->b_next; 2392 mp->b_next = NULL; 2393 mp->b_prev = NULL; 2394 2395 first_mp = mp; 2396 if (mp->b_datap->db_type == M_CTL) { 2397 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2398 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2399 zoneid = io->ipsec_out_zoneid; 2400 ASSERT(zoneid != ALL_ZONES); 2401 mp = mp->b_cont; 2402 } 2403 2404 ip6h = (ip6_t *)mp->b_rptr; 2405 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2406 ip6i_t *ip6i; 2407 /* 2408 * This message should have been pulled up already 2409 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2410 * the header is pulled up. 2411 */ 2412 ip6i = (ip6i_t *)ip6h; 2413 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2414 sizeof (ip6i_t) + IPV6_HDR_LEN); 2415 mp->b_rptr += sizeof (ip6i_t); 2416 } 2417 /* 2418 * Ignore failure since icmp_unreachable_v6 will silently 2419 * drop packets with an unspecified source address. 2420 */ 2421 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2422 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2423 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2424 mp = nxt_mp; 2425 } 2426 } 2427 2428 /* 2429 * Called by SIOCSNDP* ioctl to add/change an nce entry 2430 * and the corresponding attributes. 2431 * Disallow states other than ND_REACHABLE or ND_STALE. 2432 */ 2433 int 2434 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2435 { 2436 sin6_t *sin6; 2437 in6_addr_t *addr; 2438 nce_t *nce; 2439 int err; 2440 uint16_t new_flags = 0; 2441 uint16_t old_flags = 0; 2442 int inflags = lnr->lnr_flags; 2443 2444 if ((lnr->lnr_state_create != ND_REACHABLE) && 2445 (lnr->lnr_state_create != ND_STALE)) 2446 return (EINVAL); 2447 2448 sin6 = (sin6_t *)&lnr->lnr_addr; 2449 addr = &sin6->sin6_addr; 2450 2451 mutex_enter(&ndp_g_lock); 2452 /* We know it can not be mapping so just look in the hash table */ 2453 nce = nce_lookup_addr(ill, addr); 2454 if (nce != NULL) 2455 new_flags = nce->nce_flags; 2456 2457 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2458 case NDF_ISROUTER_ON: 2459 new_flags |= NCE_F_ISROUTER; 2460 break; 2461 case NDF_ISROUTER_OFF: 2462 new_flags &= ~NCE_F_ISROUTER; 2463 break; 2464 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2465 mutex_exit(&ndp_g_lock); 2466 if (nce != NULL) 2467 NCE_REFRELE(nce); 2468 return (EINVAL); 2469 } 2470 2471 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2472 case NDF_ANYCAST_ON: 2473 new_flags |= NCE_F_ANYCAST; 2474 break; 2475 case NDF_ANYCAST_OFF: 2476 new_flags &= ~NCE_F_ANYCAST; 2477 break; 2478 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2479 mutex_exit(&ndp_g_lock); 2480 if (nce != NULL) 2481 NCE_REFRELE(nce); 2482 return (EINVAL); 2483 } 2484 2485 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2486 case NDF_PROXY_ON: 2487 new_flags |= NCE_F_PROXY; 2488 break; 2489 case NDF_PROXY_OFF: 2490 new_flags &= ~NCE_F_PROXY; 2491 break; 2492 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2493 mutex_exit(&ndp_g_lock); 2494 if (nce != NULL) 2495 NCE_REFRELE(nce); 2496 return (EINVAL); 2497 } 2498 2499 if (nce == NULL) { 2500 err = ndp_add(ill, 2501 (uchar_t *)lnr->lnr_hdw_addr, 2502 addr, 2503 &ipv6_all_ones, 2504 &ipv6_all_zeros, 2505 0, 2506 new_flags, 2507 lnr->lnr_state_create, 2508 &nce); 2509 if (err != 0) { 2510 mutex_exit(&ndp_g_lock); 2511 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2512 return (err); 2513 } 2514 } 2515 old_flags = nce->nce_flags; 2516 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2517 /* 2518 * Router turned to host, delete all ires. 2519 * XXX Just delete the entry, but we need to add too. 2520 */ 2521 nce->nce_flags &= ~NCE_F_ISROUTER; 2522 mutex_exit(&ndp_g_lock); 2523 ndp_delete(nce); 2524 NCE_REFRELE(nce); 2525 return (0); 2526 } 2527 mutex_exit(&ndp_g_lock); 2528 2529 mutex_enter(&nce->nce_lock); 2530 nce->nce_flags = new_flags; 2531 mutex_exit(&nce->nce_lock); 2532 /* 2533 * Note that we ignore the state at this point, which 2534 * should be either STALE or REACHABLE. Instead we let 2535 * the link layer address passed in to determine the state 2536 * much like incoming packets. 2537 */ 2538 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2539 NCE_REFRELE(nce); 2540 return (0); 2541 } 2542 2543 /* 2544 * If the device driver supports it, we make nce_fp_mp to have 2545 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2546 * The caller insures there is hold on nce for this function. 2547 * Note that since ill_fastpath_probe() copies the mblk there is 2548 * no need for the hold beyond this function. 2549 */ 2550 static void 2551 nce_fastpath(nce_t *nce) 2552 { 2553 ill_t *ill = nce->nce_ill; 2554 int res; 2555 2556 ASSERT(ill != NULL); 2557 if (nce->nce_fp_mp != NULL) { 2558 /* Already contains fastpath info */ 2559 return; 2560 } 2561 if (nce->nce_res_mp != NULL) { 2562 nce_fastpath_list_add(nce); 2563 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2564 /* 2565 * EAGAIN is an indication of a transient error 2566 * i.e. allocation failure etc. leave the nce in the list it 2567 * will be updated when another probe happens for another ire 2568 * if not it will be taken out of the list when the ire is 2569 * deleted. 2570 */ 2571 2572 if (res != 0 && res != EAGAIN) 2573 nce_fastpath_list_delete(nce); 2574 } 2575 } 2576 2577 /* 2578 * Drain the list of nce's waiting for fastpath response. 2579 */ 2580 void 2581 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2582 void *arg) 2583 { 2584 2585 nce_t *next_nce; 2586 nce_t *current_nce; 2587 nce_t *first_nce; 2588 nce_t *prev_nce = NULL; 2589 2590 ASSERT(ill != NULL); 2591 2592 mutex_enter(&ill->ill_lock); 2593 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2594 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2595 next_nce = current_nce->nce_fastpath; 2596 /* 2597 * Take it off the list if we're flushing, or if the callback 2598 * routine tells us to do so. Otherwise, leave the nce in the 2599 * fastpath list to handle any pending response from the lower 2600 * layer. We can't drain the list when the callback routine 2601 * comparison failed, because the response is asynchronous in 2602 * nature, and may not arrive in the same order as the list 2603 * insertion. 2604 */ 2605 if (func == NULL || func(current_nce, arg)) { 2606 current_nce->nce_fastpath = NULL; 2607 if (current_nce == first_nce) 2608 ill->ill_fastpath_list = first_nce = next_nce; 2609 else 2610 prev_nce->nce_fastpath = next_nce; 2611 } else { 2612 /* previous element that is still in the list */ 2613 prev_nce = current_nce; 2614 } 2615 current_nce = next_nce; 2616 } 2617 mutex_exit(&ill->ill_lock); 2618 } 2619 2620 /* 2621 * Add nce to the nce fastpath list. 2622 */ 2623 void 2624 nce_fastpath_list_add(nce_t *nce) 2625 { 2626 ill_t *ill; 2627 2628 ill = nce->nce_ill; 2629 ASSERT(ill != NULL); 2630 2631 mutex_enter(&ill->ill_lock); 2632 mutex_enter(&nce->nce_lock); 2633 2634 /* 2635 * if nce has not been deleted and 2636 * is not already in the list add it. 2637 */ 2638 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2639 (nce->nce_fastpath == NULL)) { 2640 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2641 ill->ill_fastpath_list = nce; 2642 } 2643 2644 mutex_exit(&nce->nce_lock); 2645 mutex_exit(&ill->ill_lock); 2646 } 2647 2648 /* 2649 * remove nce from the nce fastpath list. 2650 */ 2651 void 2652 nce_fastpath_list_delete(nce_t *nce) 2653 { 2654 nce_t *nce_ptr; 2655 2656 ill_t *ill; 2657 2658 ill = nce->nce_ill; 2659 ASSERT(ill != NULL); 2660 2661 mutex_enter(&ill->ill_lock); 2662 if (nce->nce_fastpath == NULL) 2663 goto done; 2664 2665 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2666 2667 if (ill->ill_fastpath_list == nce) { 2668 ill->ill_fastpath_list = nce->nce_fastpath; 2669 } else { 2670 nce_ptr = ill->ill_fastpath_list; 2671 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2672 if (nce_ptr->nce_fastpath == nce) { 2673 nce_ptr->nce_fastpath = nce->nce_fastpath; 2674 break; 2675 } 2676 nce_ptr = nce_ptr->nce_fastpath; 2677 } 2678 } 2679 2680 nce->nce_fastpath = NULL; 2681 done: 2682 mutex_exit(&ill->ill_lock); 2683 } 2684 2685 /* 2686 * Update all NCE's that are not in fastpath mode and 2687 * have an nce_fp_mp that matches mp. mp->b_cont contains 2688 * the fastpath header. 2689 * 2690 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2691 */ 2692 boolean_t 2693 ndp_fastpath_update(nce_t *nce, void *arg) 2694 { 2695 mblk_t *mp, *fp_mp; 2696 uchar_t *mp_rptr, *ud_mp_rptr; 2697 mblk_t *ud_mp = nce->nce_res_mp; 2698 ptrdiff_t cmplen; 2699 2700 if (nce->nce_flags & NCE_F_MAPPING) 2701 return (B_TRUE); 2702 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2703 return (B_TRUE); 2704 2705 ip2dbg(("ndp_fastpath_update: trying\n")); 2706 mp = (mblk_t *)arg; 2707 mp_rptr = mp->b_rptr; 2708 cmplen = mp->b_wptr - mp_rptr; 2709 ASSERT(cmplen >= 0); 2710 ud_mp_rptr = ud_mp->b_rptr; 2711 /* 2712 * The nce is locked here to prevent any other threads 2713 * from accessing and changing nce_res_mp when the IPv6 address 2714 * becomes resolved to an lla while we're in the middle 2715 * of looking at and comparing the hardware address (lla). 2716 * It is also locked to prevent multiple threads in nce_fastpath_update 2717 * from examining nce_res_mp atthe same time. 2718 */ 2719 mutex_enter(&nce->nce_lock); 2720 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2721 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2722 mutex_exit(&nce->nce_lock); 2723 /* 2724 * Don't take the ire off the fastpath list yet, 2725 * since the response may come later. 2726 */ 2727 return (B_FALSE); 2728 } 2729 /* Matched - install mp as the fastpath mp */ 2730 ip1dbg(("ndp_fastpath_update: match\n")); 2731 fp_mp = dupb(mp->b_cont); 2732 if (fp_mp != NULL) { 2733 nce->nce_fp_mp = fp_mp; 2734 } 2735 mutex_exit(&nce->nce_lock); 2736 return (B_TRUE); 2737 } 2738 2739 /* 2740 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2741 * driver. Note that it assumes IP is exclusive... 2742 */ 2743 /* ARGSUSED */ 2744 void 2745 ndp_fastpath_flush(nce_t *nce, char *arg) 2746 { 2747 if (nce->nce_flags & NCE_F_MAPPING) 2748 return; 2749 /* No fastpath info? */ 2750 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2751 return; 2752 2753 /* Just delete the NCE... */ 2754 ndp_delete(nce); 2755 } 2756 2757 /* 2758 * Return a pointer to a given option in the packet. 2759 * Assumes that option part of the packet have already been validated. 2760 */ 2761 nd_opt_hdr_t * 2762 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2763 { 2764 while (optlen > 0) { 2765 if (opt->nd_opt_type == opt_type) 2766 return (opt); 2767 optlen -= 8 * opt->nd_opt_len; 2768 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2769 } 2770 return (NULL); 2771 } 2772 2773 /* 2774 * Verify all option lengths present are > 0, also check to see 2775 * if the option lengths and packet length are consistent. 2776 */ 2777 boolean_t 2778 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2779 { 2780 ASSERT(opt != NULL); 2781 while (optlen > 0) { 2782 if (opt->nd_opt_len == 0) 2783 return (B_FALSE); 2784 optlen -= 8 * opt->nd_opt_len; 2785 if (optlen < 0) 2786 return (B_FALSE); 2787 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2788 } 2789 return (B_TRUE); 2790 } 2791 2792 /* 2793 * ndp_walk function. 2794 * Free a fraction of the NCE cache entries. 2795 * A fraction of zero means to not free any in that category. 2796 */ 2797 void 2798 ndp_cache_reclaim(nce_t *nce, char *arg) 2799 { 2800 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2801 uint_t rand; 2802 2803 if (nce->nce_flags & NCE_F_PERMANENT) 2804 return; 2805 2806 rand = (uint_t)lbolt + 2807 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2808 if (ncr->ncr_host != 0 && 2809 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2810 ndp_delete(nce); 2811 return; 2812 } 2813 } 2814 2815 /* 2816 * ndp_walk function. 2817 * Count the number of NCEs that can be deleted. 2818 * These would be hosts but not routers. 2819 */ 2820 void 2821 ndp_cache_count(nce_t *nce, char *arg) 2822 { 2823 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2824 2825 if (nce->nce_flags & NCE_F_PERMANENT) 2826 return; 2827 2828 ncc->ncc_total++; 2829 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2830 ncc->ncc_host++; 2831 } 2832 2833 #ifdef NCE_DEBUG 2834 th_trace_t * 2835 th_trace_nce_lookup(nce_t *nce) 2836 { 2837 int bucket_id; 2838 th_trace_t *th_trace; 2839 2840 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2841 2842 bucket_id = IP_TR_HASH(curthread); 2843 ASSERT(bucket_id < IP_TR_HASH_MAX); 2844 2845 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2846 th_trace = th_trace->th_next) { 2847 if (th_trace->th_id == curthread) 2848 return (th_trace); 2849 } 2850 return (NULL); 2851 } 2852 2853 void 2854 nce_trace_ref(nce_t *nce) 2855 { 2856 int bucket_id; 2857 th_trace_t *th_trace; 2858 2859 /* 2860 * Attempt to locate the trace buffer for the curthread. 2861 * If it does not exist, then allocate a new trace buffer 2862 * and link it in list of trace bufs for this ipif, at the head 2863 */ 2864 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2865 2866 if (nce->nce_trace_disable == B_TRUE) 2867 return; 2868 2869 th_trace = th_trace_nce_lookup(nce); 2870 if (th_trace == NULL) { 2871 bucket_id = IP_TR_HASH(curthread); 2872 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 2873 KM_NOSLEEP); 2874 if (th_trace == NULL) { 2875 nce->nce_trace_disable = B_TRUE; 2876 nce_trace_inactive(nce); 2877 return; 2878 } 2879 th_trace->th_id = curthread; 2880 th_trace->th_next = nce->nce_trace[bucket_id]; 2881 th_trace->th_prev = &nce->nce_trace[bucket_id]; 2882 if (th_trace->th_next != NULL) 2883 th_trace->th_next->th_prev = &th_trace->th_next; 2884 nce->nce_trace[bucket_id] = th_trace; 2885 } 2886 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 2887 th_trace->th_refcnt++; 2888 th_trace_rrecord(th_trace); 2889 } 2890 2891 void 2892 nce_untrace_ref(nce_t *nce) 2893 { 2894 th_trace_t *th_trace; 2895 2896 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2897 2898 if (nce->nce_trace_disable == B_TRUE) 2899 return; 2900 2901 th_trace = th_trace_nce_lookup(nce); 2902 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 2903 2904 th_trace_rrecord(th_trace); 2905 th_trace->th_refcnt--; 2906 } 2907 2908 void 2909 nce_trace_inactive(nce_t *nce) 2910 { 2911 th_trace_t *th_trace; 2912 int i; 2913 2914 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2915 2916 for (i = 0; i < IP_TR_HASH_MAX; i++) { 2917 while (nce->nce_trace[i] != NULL) { 2918 th_trace = nce->nce_trace[i]; 2919 2920 /* unlink th_trace and free it */ 2921 nce->nce_trace[i] = th_trace->th_next; 2922 if (th_trace->th_next != NULL) 2923 th_trace->th_next->th_prev = 2924 &nce->nce_trace[i]; 2925 2926 th_trace->th_next = NULL; 2927 th_trace->th_prev = NULL; 2928 kmem_free(th_trace, sizeof (th_trace_t)); 2929 } 2930 } 2931 2932 } 2933 2934 /* ARGSUSED */ 2935 int 2936 nce_thread_exit(nce_t *nce, caddr_t arg) 2937 { 2938 th_trace_t *th_trace; 2939 2940 mutex_enter(&nce->nce_lock); 2941 th_trace = th_trace_nce_lookup(nce); 2942 2943 if (th_trace == NULL) { 2944 mutex_exit(&nce->nce_lock); 2945 return (0); 2946 } 2947 2948 ASSERT(th_trace->th_refcnt == 0); 2949 2950 /* unlink th_trace and free it */ 2951 *th_trace->th_prev = th_trace->th_next; 2952 if (th_trace->th_next != NULL) 2953 th_trace->th_next->th_prev = th_trace->th_prev; 2954 th_trace->th_next = NULL; 2955 th_trace->th_prev = NULL; 2956 kmem_free(th_trace, sizeof (th_trace_t)); 2957 mutex_exit(&nce->nce_lock); 2958 return (0); 2959 } 2960 #endif 2961