1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/strlog.h> 35 #include <sys/dlpi.h> 36 #include <sys/sockio.h> 37 #include <sys/tiuser.h> 38 #include <sys/tihdr.h> 39 #include <sys/socket.h> 40 #include <sys/ddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/vtrace.h> 44 #include <sys/kmem.h> 45 #include <sys/zone.h> 46 47 #include <net/if.h> 48 #include <net/if_types.h> 49 #include <net/if_dl.h> 50 #include <net/route.h> 51 #include <sys/sockio.h> 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/mi.h> 59 #include <inet/mib2.h> 60 #include <inet/nd.h> 61 #include <inet/arp.h> 62 #include <inet/ip.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ip_if.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_rts.h> 67 #include <inet/ip6.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/ipsec_impl.h> 70 #include <inet/ipsec_info.h> 71 72 /* 73 * Function names with nce_ prefix are static while function 74 * names with ndp_ prefix are used by rest of the IP. 75 */ 76 77 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 78 uint32_t ll_addr_len); 79 static void nce_fastpath(nce_t *nce); 80 static void nce_ire_delete(nce_t *nce); 81 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 82 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 83 static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); 84 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 85 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 86 uchar_t *addr); 87 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 88 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 89 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 90 static mblk_t *nce_udreq_alloc(ill_t *ill); 91 static void nce_update(nce_t *nce, uint16_t new_state, 92 uchar_t *new_ll_addr); 93 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 94 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 95 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 96 const in6_addr_t *target, int flag); 97 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 98 extern void th_trace_rrecord(th_trace_t *); 99 100 #ifdef NCE_DEBUG 101 void nce_trace_inactive(nce_t *); 102 #endif 103 104 /* NDP Cache Entry Hash Table */ 105 #define NCE_TABLE_SIZE 256 106 static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; 107 static nce_t *nce_mask_entries; /* mask not all ones */ 108 static int ndp_g_walker = 0; /* # of active thread */ 109 /* walking nce hash list */ 110 /* ndp_g_walker_cleanup will be true, when deletion have to be defered */ 111 static boolean_t ndp_g_walker_cleanup = B_FALSE; 112 113 #ifdef _BIG_ENDIAN 114 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 115 ((((addr)->s6_addr32[0] & 0xff020000) == 0xff020000) && \ 116 ((addr)->s6_addr32[1] == 0x0) && \ 117 ((addr)->s6_addr32[2] == 0x00000001) && \ 118 ((addr)->s6_addr32[3] & 0xff000000) == 0xff000000) 119 #else /* _BIG_ENDIAN */ 120 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 121 ((((addr)->s6_addr32[0] & 0x000002ff) == 0x000002ff) && \ 122 ((addr)->s6_addr32[1] == 0x0) && \ 123 ((addr)->s6_addr32[2] == 0x01000000) && \ 124 ((addr)->s6_addr32[3] & 0x000000ff) == 0x000000ff) 125 #endif 126 127 #define NCE_HASH_PTR(addr) \ 128 (&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 129 130 /* 131 * NDP Cache Entry creation routine. 132 * Mapped entries will never do NUD . 133 * This routine must always be called with ndp_g_lock held. 134 * Prior to return, nce_refcnt is incremented. 135 */ 136 int 137 ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 138 const in6_addr_t *mask, const in6_addr_t *extract_mask, 139 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 140 nce_t **newnce) 141 { 142 static nce_t nce_nil; 143 nce_t *nce; 144 mblk_t *mp; 145 mblk_t *template; 146 nce_t **ncep; 147 boolean_t dropped = B_FALSE; 148 149 ASSERT(MUTEX_HELD(&ndp_g_lock)); 150 ASSERT(ill != NULL); 151 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 152 ip0dbg(("ndp_add: no addr\n")); 153 return (EINVAL); 154 } 155 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 156 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 157 return (EINVAL); 158 } 159 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 160 (flags & NCE_F_MAPPING)) { 161 ip0dbg(("ndp_add: extract mask zero for mapping")); 162 return (EINVAL); 163 } 164 /* 165 * Allocate the mblk to hold the nce. 166 * 167 * XXX This can come out of a separate cache - nce_cache. 168 * We don't need the mp anymore as there are no more 169 * "qwriter"s 170 */ 171 mp = allocb(sizeof (nce_t), BPRI_MED); 172 if (mp == NULL) 173 return (ENOMEM); 174 175 nce = (nce_t *)mp->b_rptr; 176 mp->b_wptr = (uchar_t *)&nce[1]; 177 *nce = nce_nil; 178 179 /* 180 * This one holds link layer address 181 */ 182 if (ill->ill_net_type == IRE_IF_RESOLVER) { 183 template = nce_udreq_alloc(ill); 184 } else { 185 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 186 ASSERT((ill->ill_resolver_mp != NULL)); 187 template = copyb(ill->ill_resolver_mp); 188 } 189 if (template == NULL) { 190 freeb(mp); 191 return (ENOMEM); 192 } 193 nce->nce_ill = ill; 194 nce->nce_flags = flags; 195 nce->nce_state = state; 196 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 197 nce->nce_rcnt = ill->ill_xmit_count; 198 nce->nce_addr = *addr; 199 nce->nce_mask = *mask; 200 nce->nce_extract_mask = *extract_mask; 201 nce->nce_ll_extract_start = hw_extract_start; 202 nce->nce_fp_mp = NULL; 203 nce->nce_res_mp = template; 204 if (state == ND_REACHABLE) 205 nce->nce_last = TICK_TO_MSEC(lbolt64); 206 else 207 nce->nce_last = 0; 208 nce->nce_qd_mp = NULL; 209 nce->nce_mp = mp; 210 if (hw_addr != NULL) 211 nce_set_ll(nce, hw_addr); 212 /* This one is for nce getting created */ 213 nce->nce_refcnt = 1; 214 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 215 if (nce->nce_flags & NCE_F_MAPPING) { 216 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 217 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 218 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 219 ncep = &nce_mask_entries; 220 } else { 221 ncep = ((nce_t **)NCE_HASH_PTR(*addr)); 222 } 223 224 #ifdef NCE_DEBUG 225 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 226 #endif 227 /* 228 * Atomically ensure that the ill is not CONDEMNED, before 229 * adding the NCE. 230 */ 231 mutex_enter(&ill->ill_lock); 232 if (ill->ill_state_flags & ILL_CONDEMNED) { 233 mutex_exit(&ill->ill_lock); 234 freeb(mp); 235 return (EINVAL); 236 } 237 if ((nce->nce_next = *ncep) != NULL) 238 nce->nce_next->nce_ptpn = &nce->nce_next; 239 *ncep = nce; 240 nce->nce_ptpn = ncep; 241 *newnce = nce; 242 /* This one is for nce being used by an active thread */ 243 NCE_REFHOLD(*newnce); 244 245 /* Bump up the number of nce's referencing this ill */ 246 ill->ill_nce_cnt++; 247 mutex_exit(&ill->ill_lock); 248 249 /* 250 * Before we insert the nce, honor the UNSOL_ADV flag. 251 * We cannot hold the ndp_g_lock and call nce_xmit 252 * which does a putnext. 253 */ 254 if (flags & NCE_F_UNSOL_ADV) { 255 flags |= NDP_ORIDE; 256 /* 257 * We account for the transmit below by assigning one 258 * less than the ndd variable. Subsequent decrements 259 * are done in ndp_timer. 260 */ 261 mutex_enter(&nce->nce_lock); 262 mutex_exit(&ndp_g_lock); 263 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 264 mutex_exit(&nce->nce_lock); 265 dropped = nce_xmit(ill, 266 ND_NEIGHBOR_ADVERT, 267 ill, /* ill to be used for extracting ill_nd_lla */ 268 B_TRUE, /* use ill_nd_lla */ 269 addr, /* Source and target of the advertisement pkt */ 270 &ipv6_all_hosts_mcast, /* Destination of the packet */ 271 flags); 272 mutex_enter(&nce->nce_lock); 273 if (dropped) 274 nce->nce_unsolicit_count++; 275 if (nce->nce_unsolicit_count != 0) { 276 nce->nce_timeout_id = timeout(ndp_timer, nce, 277 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 278 } 279 mutex_exit(&nce->nce_lock); 280 mutex_enter(&ndp_g_lock); 281 } 282 /* 283 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 284 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 285 * We call nce_fastpath from nce_update if the link layer address of 286 * the peer changes from nce_update 287 */ 288 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 289 nce_fastpath(nce); 290 return (0); 291 } 292 293 int 294 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 295 const in6_addr_t *mask, const in6_addr_t *extract_mask, 296 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 297 nce_t **newnce) 298 { 299 int err = 0; 300 nce_t *nce; 301 302 mutex_enter(&ndp_g_lock); 303 nce = nce_lookup_addr(ill, addr); 304 if (nce == NULL) { 305 err = ndp_add(ill, 306 hw_addr, 307 addr, 308 mask, 309 extract_mask, 310 hw_extract_start, 311 flags, 312 state, 313 newnce); 314 } else { 315 *newnce = nce; 316 err = EEXIST; 317 } 318 mutex_exit(&ndp_g_lock); 319 return (err); 320 } 321 322 /* 323 * Remove all the CONDEMNED nces from the appropriate hash table. 324 * We create a private list of NCEs, these may have ires pointing 325 * to them, so the list will be passed through to clean up dependent 326 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 327 */ 328 static void 329 nce_remove(nce_t *nce, nce_t **free_nce_list) 330 { 331 nce_t *nce1; 332 nce_t **ptpn; 333 334 ASSERT(MUTEX_HELD(&ndp_g_lock)); 335 ASSERT(ndp_g_walker == 0); 336 for (; nce; nce = nce1) { 337 nce1 = nce->nce_next; 338 mutex_enter(&nce->nce_lock); 339 if (nce->nce_flags & NCE_F_CONDEMNED) { 340 ptpn = nce->nce_ptpn; 341 nce1 = nce->nce_next; 342 if (nce1 != NULL) 343 nce1->nce_ptpn = ptpn; 344 *ptpn = nce1; 345 nce->nce_ptpn = NULL; 346 nce->nce_next = NULL; 347 nce->nce_next = *free_nce_list; 348 *free_nce_list = nce; 349 } 350 mutex_exit(&nce->nce_lock); 351 } 352 } 353 354 /* 355 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 356 * will return this NCE. Also no new IREs will be created that 357 * point to this NCE (See ire_add_v6). Also no new timeouts will 358 * be started (See NDP_RESTART_TIMER). 359 * 2. Cancel any currently running timeouts. 360 * 3. If there is an ndp walker, return. The walker will do the cleanup. 361 * This ensures that walkers see a consistent list of NCEs while walking. 362 * 4. Otherwise remove the NCE from the list of NCEs 363 * 5. Delete all IREs pointing to this NCE. 364 */ 365 void 366 ndp_delete(nce_t *nce) 367 { 368 nce_t **ptpn; 369 nce_t *nce1; 370 371 /* Serialize deletes */ 372 mutex_enter(&nce->nce_lock); 373 if (nce->nce_flags & NCE_F_CONDEMNED) { 374 /* Some other thread is doing the delete */ 375 mutex_exit(&nce->nce_lock); 376 return; 377 } 378 /* 379 * Caller has a refhold. Also 1 ref for being in the list. Thus 380 * refcnt has to be >= 2 381 */ 382 ASSERT(nce->nce_refcnt >= 2); 383 nce->nce_flags |= NCE_F_CONDEMNED; 384 mutex_exit(&nce->nce_lock); 385 386 nce_fastpath_list_delete(nce); 387 388 /* 389 * Cancel any running timer. Timeout can't be restarted 390 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 391 * Passing invalid timeout id is fine. 392 */ 393 if (nce->nce_timeout_id != 0) { 394 (void) untimeout(nce->nce_timeout_id); 395 nce->nce_timeout_id = 0; 396 } 397 398 mutex_enter(&ndp_g_lock); 399 if (nce->nce_ptpn == NULL) { 400 /* 401 * The last ndp walker has already removed this nce from 402 * the list after we marked the nce CONDEMNED and before 403 * we grabbed the ndp_g_lock. 404 */ 405 mutex_exit(&ndp_g_lock); 406 return; 407 } 408 if (ndp_g_walker > 0) { 409 /* 410 * Can't unlink. The walker will clean up 411 */ 412 ndp_g_walker_cleanup = B_TRUE; 413 mutex_exit(&ndp_g_lock); 414 return; 415 } 416 417 /* 418 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 419 * the timer since it is marked CONDEMNED. 420 */ 421 ptpn = nce->nce_ptpn; 422 nce1 = nce->nce_next; 423 if (nce1 != NULL) 424 nce1->nce_ptpn = ptpn; 425 *ptpn = nce1; 426 nce->nce_ptpn = NULL; 427 nce->nce_next = NULL; 428 mutex_exit(&ndp_g_lock); 429 430 nce_ire_delete(nce); 431 } 432 433 void 434 ndp_inactive(nce_t *nce) 435 { 436 mblk_t **mpp; 437 ill_t *ill; 438 439 ASSERT(nce->nce_refcnt == 0); 440 ASSERT(MUTEX_HELD(&nce->nce_lock)); 441 ASSERT(nce->nce_fastpath == NULL); 442 443 /* Free all nce allocated messages */ 444 mpp = &nce->nce_first_mp_to_free; 445 do { 446 while (*mpp != NULL) { 447 mblk_t *mp; 448 449 mp = *mpp; 450 *mpp = mp->b_next; 451 mp->b_next = NULL; 452 mp->b_prev = NULL; 453 freemsg(mp); 454 } 455 } while (mpp++ != &nce->nce_last_mp_to_free); 456 457 #ifdef NCE_DEBUG 458 nce_trace_inactive(nce); 459 #endif 460 461 ill = nce->nce_ill; 462 mutex_enter(&ill->ill_lock); 463 ill->ill_nce_cnt--; 464 /* 465 * If the number of nce's associated with this ill have dropped 466 * to zero, check whether we need to restart any operation that 467 * is waiting for this to happen. 468 */ 469 if (ill->ill_nce_cnt == 0) { 470 /* ipif_ill_refrele_tail drops the ill_lock */ 471 ipif_ill_refrele_tail(ill); 472 } else { 473 mutex_exit(&ill->ill_lock); 474 } 475 mutex_destroy(&nce->nce_lock); 476 freeb(nce->nce_mp); 477 } 478 479 /* 480 * ndp_walk routine. Delete the nce if it is associated with the ill 481 * that is going away. Always called as a writer. 482 */ 483 void 484 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 485 { 486 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 487 ndp_delete(nce); 488 } 489 } 490 491 /* 492 * Walk a list of to be inactive NCEs and blow away all the ires. 493 */ 494 static void 495 nce_ire_delete_list(nce_t *nce) 496 { 497 nce_t *nce_next; 498 499 ASSERT(nce != NULL); 500 while (nce != NULL) { 501 nce_next = nce->nce_next; 502 nce->nce_next = NULL; 503 504 /* 505 * It is possible for the last ndp walker (this thread) 506 * to come here after ndp_delete has marked the nce CONDEMNED 507 * and before it has removed the nce from the fastpath list 508 * or called untimeout. So we need to do it here. It is safe 509 * for both ndp_delete and this thread to do it twice or 510 * even simultaneously since each of the threads has a 511 * reference on the nce. 512 */ 513 nce_fastpath_list_delete(nce); 514 /* 515 * Cancel any running timer. Timeout can't be restarted 516 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 517 * Passing invalid timeout id is fine. 518 */ 519 if (nce->nce_timeout_id != 0) { 520 (void) untimeout(nce->nce_timeout_id); 521 nce->nce_timeout_id = 0; 522 } 523 524 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 525 nce_ire_delete1, (char *)nce, nce->nce_ill); 526 NCE_REFRELE_NOTR(nce); 527 nce = nce_next; 528 } 529 } 530 531 /* 532 * Delete an ire when the nce goes away. 533 */ 534 /* ARGSUSED */ 535 static void 536 nce_ire_delete(nce_t *nce) 537 { 538 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 539 nce_ire_delete1, (char *)nce, nce->nce_ill); 540 NCE_REFRELE_NOTR(nce); 541 } 542 543 /* 544 * ire_walk routine used to delete every IRE that shares this nce 545 */ 546 static void 547 nce_ire_delete1(ire_t *ire, char *nce_arg) 548 { 549 nce_t *nce = (nce_t *)nce_arg; 550 551 ASSERT(ire->ire_type == IRE_CACHE); 552 553 if (ire->ire_nce == nce) 554 ire_delete(ire); 555 } 556 557 /* 558 * Cache entry lookup. Try to find an nce matching the parameters passed. 559 * If one is found, the refcnt on the nce will be incremented. 560 */ 561 nce_t * 562 ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 563 { 564 nce_t *nce; 565 566 if (!caller_holds_lock) 567 mutex_enter(&ndp_g_lock); 568 nce = nce_lookup_addr(ill, addr); 569 if (nce == NULL) 570 nce = nce_lookup_mapping(ill, addr); 571 if (!caller_holds_lock) 572 mutex_exit(&ndp_g_lock); 573 return (nce); 574 } 575 576 /* 577 * Cache entry lookup. Try to find an nce matching the parameters passed. 578 * Look only for exact entries (no mappings). If an nce is found, increment 579 * the hold count on that nce. 580 */ 581 static nce_t * 582 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 583 { 584 nce_t *nce; 585 586 ASSERT(ill != NULL); 587 ASSERT(MUTEX_HELD(&ndp_g_lock)); 588 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 589 return (NULL); 590 nce = *((nce_t **)NCE_HASH_PTR(*addr)); 591 for (; nce != NULL; nce = nce->nce_next) { 592 if (nce->nce_ill == ill) { 593 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 594 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 595 &ipv6_all_ones)) { 596 mutex_enter(&nce->nce_lock); 597 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 598 NCE_REFHOLD_LOCKED(nce); 599 mutex_exit(&nce->nce_lock); 600 break; 601 } 602 mutex_exit(&nce->nce_lock); 603 } 604 } 605 } 606 return (nce); 607 } 608 609 /* 610 * Cache entry lookup. Try to find an nce matching the parameters passed. 611 * Look only for mappings. 612 */ 613 static nce_t * 614 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 615 { 616 nce_t *nce; 617 618 ASSERT(ill != NULL); 619 ASSERT(MUTEX_HELD(&ndp_g_lock)); 620 if (!IN6_IS_ADDR_MULTICAST(addr)) 621 return (NULL); 622 nce = nce_mask_entries; 623 for (; nce != NULL; nce = nce->nce_next) 624 if (nce->nce_ill == ill && 625 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 626 mutex_enter(&nce->nce_lock); 627 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 628 NCE_REFHOLD_LOCKED(nce); 629 mutex_exit(&nce->nce_lock); 630 break; 631 } 632 mutex_exit(&nce->nce_lock); 633 } 634 return (nce); 635 } 636 637 /* 638 * Process passed in parameters either from an incoming packet or via 639 * user ioctl. 640 */ 641 void 642 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 643 { 644 ill_t *ill = nce->nce_ill; 645 uint32_t hw_addr_len = ill->ill_nd_lla_len; 646 mblk_t *mp; 647 boolean_t ll_updated = B_FALSE; 648 boolean_t ll_changed; 649 650 /* 651 * No updates of link layer address or the neighbor state is 652 * allowed, when the cache is in NONUD state. This still 653 * allows for responding to reachability solicitation. 654 */ 655 mutex_enter(&nce->nce_lock); 656 if (nce->nce_state == ND_INCOMPLETE) { 657 if (hw_addr == NULL) { 658 mutex_exit(&nce->nce_lock); 659 return; 660 } 661 nce_set_ll(nce, hw_addr); 662 /* 663 * Update nce state and send the queued packets 664 * back to ip this time ire will be added. 665 */ 666 if (flag & ND_NA_FLAG_SOLICITED) { 667 nce_update(nce, ND_REACHABLE, NULL); 668 } else { 669 nce_update(nce, ND_STALE, NULL); 670 } 671 mutex_exit(&nce->nce_lock); 672 nce_fastpath(nce); 673 mutex_enter(&nce->nce_lock); 674 mp = nce->nce_qd_mp; 675 nce->nce_qd_mp = NULL; 676 mutex_exit(&nce->nce_lock); 677 while (mp != NULL) { 678 mblk_t *nxt_mp; 679 680 nxt_mp = mp->b_next; 681 mp->b_next = NULL; 682 if (mp->b_prev != NULL) { 683 ill_t *inbound_ill; 684 queue_t *fwdq = NULL; 685 uint_t ifindex; 686 687 ifindex = (uint_t)(uintptr_t)mp->b_prev; 688 inbound_ill = ill_lookup_on_ifindex(ifindex, 689 B_TRUE, NULL, NULL, NULL, NULL); 690 if (inbound_ill == NULL) { 691 mp->b_prev = NULL; 692 freemsg(mp); 693 return; 694 } else { 695 fwdq = inbound_ill->ill_rq; 696 } 697 mp->b_prev = NULL; 698 /* 699 * Send a forwarded packet back into ip_rput_v6 700 * just as in ire_send_v6(). 701 * Extract the queue from b_prev (set in 702 * ip_rput_data_v6). 703 */ 704 if (fwdq != NULL) { 705 /* 706 * Forwarded packets hop count will 707 * get decremented in ip_rput_data_v6 708 */ 709 put(fwdq, mp); 710 } else { 711 /* 712 * Send locally originated packets back 713 * into * ip_wput_v6. 714 */ 715 put(ill->ill_wq, mp); 716 } 717 ill_refrele(inbound_ill); 718 } else { 719 put(ill->ill_wq, mp); 720 } 721 mp = nxt_mp; 722 } 723 return; 724 } 725 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 726 if (!is_adv) { 727 /* If this is a SOLICITATION request only */ 728 if (ll_changed) 729 nce_update(nce, ND_STALE, hw_addr); 730 mutex_exit(&nce->nce_lock); 731 return; 732 } 733 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 734 /* If in any other state than REACHABLE, ignore */ 735 if (nce->nce_state == ND_REACHABLE) { 736 nce_update(nce, ND_STALE, NULL); 737 } 738 mutex_exit(&nce->nce_lock); 739 return; 740 } else { 741 if (ll_changed) { 742 nce_update(nce, ND_UNCHANGED, hw_addr); 743 ll_updated = B_TRUE; 744 } 745 if (flag & ND_NA_FLAG_SOLICITED) { 746 nce_update(nce, ND_REACHABLE, NULL); 747 } else { 748 if (ll_updated) { 749 nce_update(nce, ND_STALE, NULL); 750 } 751 } 752 mutex_exit(&nce->nce_lock); 753 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 754 NCE_F_ISROUTER)) { 755 ire_t *ire; 756 757 /* 758 * Router turned to host. We need to remove the 759 * entry as well as any default route that may be 760 * using this as a next hop. This is required by 761 * section 7.2.5 of RFC 2461. 762 */ 763 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 764 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 765 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, 766 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 767 MATCH_IRE_DEFAULT); 768 if (ire != NULL) { 769 ip_rts_rtmsg(RTM_DELETE, ire, 0); 770 ire_delete(ire); 771 ire_refrele(ire); 772 } 773 ndp_delete(nce); 774 } 775 } 776 } 777 778 /* 779 * Pass arg1 to the pfi supplied, along with each nce in existence. 780 * ndp_walk() places a REFHOLD on the nce and drops the lock when 781 * walking the hash list. 782 */ 783 void 784 ndp_walk_impl(ill_t *ill, pfi_t pfi, uchar_t *arg1, boolean_t trace) 785 { 786 787 nce_t *nce; 788 nce_t *nce1; 789 nce_t **ncep; 790 nce_t *free_nce_list = NULL; 791 792 mutex_enter(&ndp_g_lock); 793 ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ 794 mutex_exit(&ndp_g_lock); 795 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 796 for (nce = *ncep; nce; nce = nce1) { 797 nce1 = nce->nce_next; 798 if (ill == NULL || nce->nce_ill == ill) { 799 if (trace) { 800 NCE_REFHOLD(nce); 801 (*pfi)(nce, arg1); 802 NCE_REFRELE(nce); 803 } else { 804 NCE_REFHOLD_NOTR(nce); 805 (*pfi)(nce, arg1); 806 NCE_REFRELE_NOTR(nce); 807 } 808 } 809 } 810 } 811 for (nce = nce_mask_entries; nce; nce = nce1) { 812 nce1 = nce->nce_next; 813 if (ill == NULL || nce->nce_ill == ill) { 814 if (trace) { 815 NCE_REFHOLD(nce); 816 (*pfi)(nce, arg1); 817 NCE_REFRELE(nce); 818 } else { 819 NCE_REFHOLD_NOTR(nce); 820 (*pfi)(nce, arg1); 821 NCE_REFRELE_NOTR(nce); 822 } 823 } 824 } 825 mutex_enter(&ndp_g_lock); 826 ndp_g_walker--; 827 /* 828 * While NCE's are removed from global list they are placed 829 * in a private list, to be passed to nce_ire_delete_list(). 830 * The reason is, there may be ires pointing to this nce 831 * which needs to cleaned up. 832 */ 833 if (ndp_g_walker_cleanup && ndp_g_walker == 0) { 834 /* Time to delete condemned entries */ 835 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 836 nce = *ncep; 837 if (nce != NULL) { 838 nce_remove(nce, &free_nce_list); 839 } 840 } 841 nce = nce_mask_entries; 842 if (nce != NULL) { 843 nce_remove(nce, &free_nce_list); 844 } 845 ndp_g_walker_cleanup = B_FALSE; 846 } 847 mutex_exit(&ndp_g_lock); 848 849 if (free_nce_list != NULL) { 850 nce_ire_delete_list(free_nce_list); 851 } 852 } 853 854 void 855 ndp_walk(ill_t *ill, pfi_t pfi, uchar_t *arg1) 856 { 857 ndp_walk_impl(ill, pfi, arg1, B_TRUE); 858 } 859 860 /* 861 * Prepend the zoneid using an ipsec_out_t for later use by functions like 862 * ip_rput_v6() after neighbor discovery has taken place. If the message 863 * block already has a M_CTL at the front of it, then simply set the zoneid 864 * appropriately. 865 */ 866 static mblk_t * 867 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 868 { 869 mblk_t *first_mp; 870 ipsec_out_t *io; 871 872 if (mp->b_datap->db_type == M_CTL) { 873 io = (ipsec_out_t *)mp->b_rptr; 874 ASSERT(io->ipsec_out_type == IPSEC_OUT); 875 io->ipsec_out_zoneid = zoneid; 876 return (mp); 877 } 878 879 first_mp = ipsec_alloc_ipsec_out(); 880 if (first_mp == NULL) 881 return (NULL); 882 io = (ipsec_out_t *)first_mp->b_rptr; 883 /* This is not a secure packet */ 884 io->ipsec_out_secure = B_FALSE; 885 io->ipsec_out_zoneid = zoneid; 886 first_mp->b_cont = mp; 887 return (first_mp); 888 } 889 890 /* 891 * Process resolve requests. Handles both mapped entries 892 * as well as cases that needs to be send out on the wire. 893 * Lookup a NCE for a given IRE. Regardless of whether one exists 894 * or one is created, we defer making ire point to nce until the 895 * ire is actually added at which point the nce_refcnt on the nce is 896 * incremented. This is done primarily to have symmetry between ire_add() 897 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 898 */ 899 int 900 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 901 { 902 nce_t *nce; 903 int err = 0; 904 uint32_t ms; 905 mblk_t *mp_nce = NULL; 906 907 ASSERT(ill != NULL); 908 if (IN6_IS_ADDR_MULTICAST(dst)) { 909 err = nce_set_multicast(ill, dst); 910 return (err); 911 } 912 err = ndp_lookup_then_add(ill, 913 NULL, /* No hardware address */ 914 dst, 915 &ipv6_all_ones, 916 &ipv6_all_zeros, 917 0, 918 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 919 ND_INCOMPLETE, 920 &nce); 921 922 switch (err) { 923 case 0: 924 /* 925 * New cache entry was created. Make sure that the state 926 * is not ND_INCOMPLETE. It can be in some other state 927 * even before we send out the solicitation as we could 928 * get un-solicited advertisements. 929 * 930 * If this is an XRESOLV interface, simply return 0, 931 * since we don't want to solicit just yet. 932 */ 933 if (ill->ill_flags & ILLF_XRESOLV) { 934 NCE_REFRELE(nce); 935 return (0); 936 } 937 rw_enter(&ill_g_lock, RW_READER); 938 mutex_enter(&nce->nce_lock); 939 if (nce->nce_state != ND_INCOMPLETE) { 940 mutex_exit(&nce->nce_lock); 941 rw_exit(&ill_g_lock); 942 NCE_REFRELE(nce); 943 return (0); 944 } 945 mp_nce = ndp_prepend_zone(mp, zoneid); 946 if (mp_nce == NULL) { 947 /* The caller will free mp */ 948 mutex_exit(&nce->nce_lock); 949 rw_exit(&ill_g_lock); 950 ndp_delete(nce); 951 NCE_REFRELE(nce); 952 return (ENOMEM); 953 } 954 ms = nce_solicit(nce, mp_nce); 955 rw_exit(&ill_g_lock); 956 if (ms == 0) { 957 /* The caller will free mp */ 958 if (mp_nce != mp) 959 freeb(mp_nce); 960 mutex_exit(&nce->nce_lock); 961 ndp_delete(nce); 962 NCE_REFRELE(nce); 963 return (EBUSY); 964 } 965 mutex_exit(&nce->nce_lock); 966 NDP_RESTART_TIMER(nce, (clock_t)ms); 967 NCE_REFRELE(nce); 968 return (EINPROGRESS); 969 case EEXIST: 970 /* Resolution in progress just queue the packet */ 971 mutex_enter(&nce->nce_lock); 972 if (nce->nce_state == ND_INCOMPLETE) { 973 mp_nce = ndp_prepend_zone(mp, zoneid); 974 if (mp_nce == NULL) { 975 err = ENOMEM; 976 } else { 977 nce_queue_mp(nce, mp_nce); 978 err = EINPROGRESS; 979 } 980 } else { 981 /* 982 * Any other state implies we have 983 * a nce but IRE needs to be added ... 984 * ire_add_v6() will take care of the 985 * the case when the nce becomes CONDEMNED 986 * before the ire is added to the table. 987 */ 988 err = 0; 989 } 990 mutex_exit(&nce->nce_lock); 991 NCE_REFRELE(nce); 992 break; 993 default: 994 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 995 break; 996 } 997 return (err); 998 } 999 1000 /* 1001 * When there is no resolver, the link layer template is passed in 1002 * the IRE. 1003 * Lookup a NCE for a given IRE. Regardless of whether one exists 1004 * or one is created, we defer making ire point to nce until the 1005 * ire is actually added at which point the nce_refcnt on the nce is 1006 * incremented. This is done primarily to have symmetry between ire_add() 1007 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1008 */ 1009 int 1010 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1011 { 1012 nce_t *nce; 1013 int err = 0; 1014 1015 ASSERT(ill != NULL); 1016 if (IN6_IS_ADDR_MULTICAST(dst)) { 1017 err = nce_set_multicast(ill, dst); 1018 return (err); 1019 } 1020 1021 err = ndp_lookup_then_add(ill, 1022 NULL, /* hardware address */ 1023 dst, 1024 &ipv6_all_ones, 1025 &ipv6_all_zeros, 1026 0, 1027 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1028 ND_REACHABLE, 1029 &nce); 1030 1031 switch (err) { 1032 case 0: 1033 /* 1034 * Cache entry with a proper resolver cookie was 1035 * created. 1036 */ 1037 NCE_REFRELE(nce); 1038 break; 1039 case EEXIST: 1040 err = 0; 1041 NCE_REFRELE(nce); 1042 break; 1043 default: 1044 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1045 break; 1046 } 1047 return (err); 1048 } 1049 1050 /* 1051 * For each interface an entry is added for the unspecified multicast group. 1052 * Here that mapping is used to form the multicast cache entry for a particular 1053 * multicast destination. 1054 */ 1055 static int 1056 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1057 { 1058 nce_t *mnce; /* Multicast mapping entry */ 1059 nce_t *nce; 1060 uchar_t *hw_addr = NULL; 1061 int err = 0; 1062 1063 ASSERT(ill != NULL); 1064 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1065 1066 mutex_enter(&ndp_g_lock); 1067 nce = nce_lookup_addr(ill, dst); 1068 if (nce != NULL) { 1069 mutex_exit(&ndp_g_lock); 1070 NCE_REFRELE(nce); 1071 return (0); 1072 } 1073 /* No entry, now lookup for a mapping this should never fail */ 1074 mnce = nce_lookup_mapping(ill, dst); 1075 if (mnce == NULL) { 1076 /* Something broken for the interface. */ 1077 mutex_exit(&ndp_g_lock); 1078 return (ESRCH); 1079 } 1080 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1081 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1082 /* 1083 * For IRE_IF_RESOLVER a hardware mapping can be 1084 * generated, for IRE_IF_NORESOLVER, resolution cookie 1085 * in the ill is copied in ndp_add(). 1086 */ 1087 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1088 if (hw_addr == NULL) { 1089 mutex_exit(&ndp_g_lock); 1090 NCE_REFRELE(mnce); 1091 return (ENOMEM); 1092 } 1093 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1094 } 1095 NCE_REFRELE(mnce); 1096 /* 1097 * IRE_IF_NORESOLVER type simply copies the resolution 1098 * cookie passed in. So no hw_addr is needed. 1099 */ 1100 err = ndp_add(ill, 1101 hw_addr, 1102 dst, 1103 &ipv6_all_ones, 1104 &ipv6_all_zeros, 1105 0, 1106 NCE_F_NONUD, 1107 ND_REACHABLE, 1108 &nce); 1109 mutex_exit(&ndp_g_lock); 1110 if (hw_addr != NULL) 1111 kmem_free(hw_addr, ill->ill_nd_lla_len); 1112 if (err != 0) { 1113 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1114 return (err); 1115 } 1116 NCE_REFRELE(nce); 1117 return (0); 1118 } 1119 1120 /* 1121 * Return the link layer address, and any flags of a nce. 1122 */ 1123 int 1124 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1125 { 1126 nce_t *nce; 1127 in6_addr_t *addr; 1128 sin6_t *sin6; 1129 dl_unitdata_req_t *dl; 1130 1131 ASSERT(ill != NULL); 1132 sin6 = (sin6_t *)&lnr->lnr_addr; 1133 addr = &sin6->sin6_addr; 1134 1135 nce = ndp_lookup(ill, addr, B_FALSE); 1136 if (nce == NULL) 1137 return (ESRCH); 1138 /* If in INCOMPLETE state, no link layer address is available yet */ 1139 if (nce->nce_state == ND_INCOMPLETE) 1140 goto done; 1141 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1142 if (ill->ill_flags & ILLF_XRESOLV) 1143 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1144 else 1145 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1146 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1147 sizeof (lnr->lnr_hdw_addr)); 1148 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1149 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1150 if (nce->nce_flags & NCE_F_ISROUTER) 1151 lnr->lnr_flags = NDF_ISROUTER_ON; 1152 if (nce->nce_flags & NCE_F_PROXY) 1153 lnr->lnr_flags |= NDF_PROXY_ON; 1154 if (nce->nce_flags & NCE_F_ANYCAST) 1155 lnr->lnr_flags |= NDF_ANYCAST_ON; 1156 done: 1157 NCE_REFRELE(nce); 1158 return (0); 1159 } 1160 1161 /* 1162 * Send Enable/Disable multicast reqs to driver. 1163 */ 1164 int 1165 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1166 uint32_t hw_addr_offset, mblk_t *mp) 1167 { 1168 nce_t *nce; 1169 uchar_t *hw_addr; 1170 1171 ASSERT(ill != NULL); 1172 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1173 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1174 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1175 freemsg(mp); 1176 return (EINVAL); 1177 } 1178 mutex_enter(&ndp_g_lock); 1179 nce = nce_lookup_mapping(ill, addr); 1180 if (nce == NULL) { 1181 mutex_exit(&ndp_g_lock); 1182 freemsg(mp); 1183 return (ESRCH); 1184 } 1185 mutex_exit(&ndp_g_lock); 1186 /* 1187 * Update dl_addr_length and dl_addr_offset for primitives that 1188 * have physical addresses as opposed to full saps 1189 */ 1190 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1191 case DL_ENABMULTI_REQ: 1192 /* Track the state if this is the first enabmulti */ 1193 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1194 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1195 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1196 break; 1197 case DL_DISABMULTI_REQ: 1198 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1199 break; 1200 default: 1201 NCE_REFRELE(nce); 1202 ip1dbg(("ndp_mcastreq: default\n")); 1203 return (EINVAL); 1204 } 1205 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1206 NCE_REFRELE(nce); 1207 putnext(ill->ill_wq, mp); 1208 return (0); 1209 } 1210 1211 /* 1212 * Send a neighbor solicitation. 1213 * Returns number of milliseconds after which we should either rexmit or abort. 1214 * Return of zero means we should abort. 1215 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1216 * 1217 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1218 * the packet. 1219 * NOTE: This routine does not consume mp. 1220 */ 1221 uint32_t 1222 nce_solicit(nce_t *nce, mblk_t *mp) 1223 { 1224 ill_t *ill; 1225 ill_t *src_ill; 1226 ip6_t *ip6h; 1227 in6_addr_t src; 1228 in6_addr_t dst; 1229 ipif_t *ipif; 1230 ip6i_t *ip6i; 1231 boolean_t dropped = B_FALSE; 1232 1233 ASSERT(RW_READ_HELD(&ill_g_lock)); 1234 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1235 ill = nce->nce_ill; 1236 ASSERT(ill != NULL); 1237 1238 if (nce->nce_rcnt == 0) { 1239 return (0); 1240 } 1241 1242 if (mp == NULL) { 1243 ASSERT(nce->nce_qd_mp != NULL); 1244 mp = nce->nce_qd_mp; 1245 } else { 1246 nce_queue_mp(nce, mp); 1247 } 1248 1249 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1250 if (mp->b_datap->db_type == M_CTL) 1251 mp = mp->b_cont; 1252 1253 ip6h = (ip6_t *)mp->b_rptr; 1254 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1255 /* 1256 * This message should have been pulled up already in 1257 * ip_wput_v6. We can't do pullups here because the message 1258 * could be from the nce_qd_mp which could have b_next/b_prev 1259 * non-NULL. 1260 */ 1261 ip6i = (ip6i_t *)ip6h; 1262 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1263 sizeof (ip6i_t) + IPV6_HDR_LEN); 1264 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1265 } 1266 src = ip6h->ip6_src; 1267 /* 1268 * If the src of outgoing packet is one of the assigned interface 1269 * addresses use it, otherwise we will pick the source address below. 1270 */ 1271 src_ill = ill; 1272 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1273 if (ill->ill_group != NULL) 1274 src_ill = ill->ill_group->illgrp_ill; 1275 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1276 for (ipif = src_ill->ill_ipif; ipif != NULL; 1277 ipif = ipif->ipif_next) { 1278 if (IN6_ARE_ADDR_EQUAL(&src, 1279 &ipif->ipif_v6lcl_addr)) { 1280 break; 1281 } 1282 } 1283 if (ipif != NULL) 1284 break; 1285 } 1286 if (src_ill == NULL) { 1287 /* May be a forwarding packet */ 1288 src_ill = ill; 1289 src = ipv6_all_zeros; 1290 } 1291 } 1292 dst = nce->nce_addr; 1293 /* 1294 * If source address is unspecified, nce_xmit will choose 1295 * one for us and initialize the hardware address also 1296 * appropriately. 1297 */ 1298 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1299 src_ill = NULL; 1300 nce->nce_rcnt--; 1301 mutex_exit(&nce->nce_lock); 1302 rw_exit(&ill_g_lock); 1303 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1304 &dst, 0); 1305 rw_enter(&ill_g_lock, RW_READER); 1306 mutex_enter(&nce->nce_lock); 1307 if (dropped) 1308 nce->nce_rcnt++; 1309 return (ill->ill_reachable_retrans_time); 1310 } 1311 1312 void 1313 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1314 { 1315 nd_neighbor_solicit_t *ns; 1316 uint32_t hlen = ill->ill_nd_lla_len; 1317 uchar_t *haddr = NULL; 1318 icmp6_t *icmp_nd; 1319 ip6_t *ip6h; 1320 nce_t *our_nce = NULL; 1321 in6_addr_t target; 1322 in6_addr_t src; 1323 int len; 1324 int flag = 0; 1325 nd_opt_hdr_t *opt = NULL; 1326 boolean_t bad_solicit = B_FALSE; 1327 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1328 1329 ip6h = (ip6_t *)mp->b_rptr; 1330 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1331 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1332 src = ip6h->ip6_src; 1333 ns = (nd_neighbor_solicit_t *)icmp_nd; 1334 target = ns->nd_ns_target; 1335 if (IN6_IS_ADDR_MULTICAST(&target)) { 1336 if (ip_debug > 2) { 1337 /* ip1dbg */ 1338 pr_addr_dbg("ndp_input_solicit: Target is" 1339 " multicast! %s\n", AF_INET6, &target); 1340 } 1341 bad_solicit = B_TRUE; 1342 goto done; 1343 } 1344 if (len > sizeof (nd_neighbor_solicit_t)) { 1345 /* Options present */ 1346 opt = (nd_opt_hdr_t *)&ns[1]; 1347 len -= sizeof (nd_neighbor_solicit_t); 1348 if (!ndp_verify_optlen(opt, len)) { 1349 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1350 bad_solicit = B_TRUE; 1351 goto done; 1352 } 1353 } 1354 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1355 /* Check to see if this is a valid DAD solicitation */ 1356 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1357 if (ip_debug > 2) { 1358 /* ip1dbg */ 1359 pr_addr_dbg("ndp_input_solicit: IPv6 " 1360 "Destination is not solicited node " 1361 "multicast %s\n", AF_INET6, 1362 &ip6h->ip6_dst); 1363 } 1364 bad_solicit = B_TRUE; 1365 goto done; 1366 } 1367 } 1368 1369 our_nce = ndp_lookup(ill, &target, B_FALSE); 1370 /* 1371 * If this is a valid Solicitation, a permanent 1372 * entry should exist in the cache 1373 */ 1374 if (our_nce == NULL || 1375 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1376 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1377 "ifname=%s ", ill->ill_name)); 1378 if (ip_debug > 2) { 1379 /* ip1dbg */ 1380 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1381 } 1382 bad_solicit = B_TRUE; 1383 goto done; 1384 } 1385 1386 /* At this point we should have a verified NS per spec */ 1387 if (opt != NULL) { 1388 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1389 if (opt != NULL) { 1390 /* 1391 * No source link layer address option should 1392 * be present in a valid DAD request. 1393 */ 1394 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1395 ip1dbg(("ndp_input_solicit: source link-layer " 1396 "address option present with an " 1397 "unspecified source. \n")); 1398 bad_solicit = B_TRUE; 1399 goto done; 1400 } 1401 haddr = (uchar_t *)&opt[1]; 1402 if (hlen > opt->nd_opt_len * 8 || 1403 hlen == 0) { 1404 bad_solicit = B_TRUE; 1405 goto done; 1406 } 1407 } 1408 } 1409 /* 1410 * haddr can be NULL if no options are present, 1411 * or no Source link layer address is present in, 1412 * recvd NDP options of solicitation message. 1413 */ 1414 if (haddr == NULL) { 1415 nce_t *nnce; 1416 mutex_enter(&ndp_g_lock); 1417 nnce = nce_lookup_addr(ill, &src); 1418 mutex_exit(&ndp_g_lock); 1419 1420 if (nnce == NULL) { 1421 in6_addr_t dst = ipv6_solicited_node_mcast; 1422 1423 /* Form solicited node multicast address */ 1424 dst.s6_addr32[3] |= src.s6_addr32[3]; 1425 (void) nce_xmit(ill, 1426 ND_NEIGHBOR_SOLICIT, 1427 ill, 1428 B_TRUE, 1429 &target, 1430 &dst, 1431 flag); 1432 bad_solicit = B_TRUE; 1433 goto done; 1434 } 1435 } 1436 /* Set override flag, it will be reset later if need be. */ 1437 flag |= NDP_ORIDE; 1438 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1439 flag |= NDP_UNICAST; 1440 } 1441 1442 /* 1443 * Create/update the entry for the soliciting node. 1444 * or respond to outstanding queries, don't if 1445 * the source is unspecified address. 1446 */ 1447 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1448 int err = 0; 1449 nce_t *nnce; 1450 1451 err = ndp_lookup_then_add(ill, 1452 haddr, 1453 &src, /* Soliciting nodes address */ 1454 &ipv6_all_ones, 1455 &ipv6_all_zeros, 1456 0, 1457 0, 1458 ND_STALE, 1459 &nnce); 1460 switch (err) { 1461 case 0: 1462 /* done with this entry */ 1463 NCE_REFRELE(nnce); 1464 break; 1465 case EEXIST: 1466 /* 1467 * B_FALSE indicates this is not an 1468 * an advertisement. 1469 */ 1470 ndp_process(nnce, haddr, 0, B_FALSE); 1471 NCE_REFRELE(nnce); 1472 break; 1473 default: 1474 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1475 err)); 1476 goto done; 1477 } 1478 flag |= NDP_SOLICITED; 1479 } else { 1480 /* 1481 * This is a DAD req, multicast the advertisement 1482 * to the all-nodes address. 1483 */ 1484 src = ipv6_all_hosts_mcast; 1485 } 1486 if (our_nce->nce_flags & NCE_F_ISROUTER) 1487 flag |= NDP_ISROUTER; 1488 if (our_nce->nce_flags & NCE_F_PROXY) 1489 flag &= ~NDP_ORIDE; 1490 /* Response to a solicitation */ 1491 (void) nce_xmit(ill, 1492 ND_NEIGHBOR_ADVERT, 1493 ill, /* ill to be used for extracting ill_nd_lla */ 1494 B_TRUE, /* use ill_nd_lla */ 1495 &target, /* Source and target of the advertisement pkt */ 1496 &src, /* IP Destination (source of original pkt) */ 1497 flag); 1498 done: 1499 if (bad_solicit) 1500 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1501 if (our_nce != NULL) 1502 NCE_REFRELE(our_nce); 1503 } 1504 1505 void 1506 ndp_input_advert(ill_t *ill, mblk_t *mp) 1507 { 1508 nd_neighbor_advert_t *na; 1509 uint32_t hlen = ill->ill_nd_lla_len; 1510 uchar_t *haddr = NULL; 1511 icmp6_t *icmp_nd; 1512 ip6_t *ip6h; 1513 nce_t *dst_nce = NULL; 1514 in6_addr_t target; 1515 nd_opt_hdr_t *opt = NULL; 1516 int len; 1517 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1518 1519 ip6h = (ip6_t *)mp->b_rptr; 1520 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1521 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1522 na = (nd_neighbor_advert_t *)icmp_nd; 1523 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1524 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1525 ip1dbg(("ndp_input_advert: Target is multicast but the " 1526 "solicited flag is not zero\n")); 1527 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1528 return; 1529 } 1530 target = na->nd_na_target; 1531 if (IN6_IS_ADDR_MULTICAST(&target)) { 1532 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1533 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1534 return; 1535 } 1536 if (len > sizeof (nd_neighbor_advert_t)) { 1537 opt = (nd_opt_hdr_t *)&na[1]; 1538 if (!ndp_verify_optlen(opt, 1539 len - sizeof (nd_neighbor_advert_t))) { 1540 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1541 return; 1542 } 1543 /* At this point we have a verified NA per spec */ 1544 len -= sizeof (nd_neighbor_advert_t); 1545 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1546 if (opt != NULL) { 1547 haddr = (uchar_t *)&opt[1]; 1548 if (hlen > opt->nd_opt_len * 8 || 1549 hlen == 0) { 1550 BUMP_MIB(mib, 1551 ipv6IfIcmpInBadNeighborAdvertisements); 1552 return; 1553 } 1554 } 1555 } 1556 1557 /* 1558 * If this interface is part of the group look at all the 1559 * ills in the group. 1560 */ 1561 rw_enter(&ill_g_lock, RW_READER); 1562 if (ill->ill_group != NULL) 1563 ill = ill->ill_group->illgrp_ill; 1564 1565 for (; ill != NULL; ill = ill->ill_group_next) { 1566 mutex_enter(&ill->ill_lock); 1567 if (!ILL_CAN_LOOKUP(ill)) { 1568 mutex_exit(&ill->ill_lock); 1569 continue; 1570 } 1571 ill_refhold_locked(ill); 1572 mutex_exit(&ill->ill_lock); 1573 dst_nce = ndp_lookup(ill, &target, B_FALSE); 1574 /* We have to drop the lock since ndp_process calls put* */ 1575 rw_exit(&ill_g_lock); 1576 if (dst_nce != NULL) { 1577 if (na->nd_na_flags_reserved & 1578 ND_NA_FLAG_ROUTER) { 1579 dst_nce->nce_flags |= NCE_F_ISROUTER; 1580 } 1581 /* B_TRUE indicates this an advertisement */ 1582 ndp_process(dst_nce, haddr, 1583 na->nd_na_flags_reserved, B_TRUE); 1584 NCE_REFRELE(dst_nce); 1585 } 1586 rw_enter(&ill_g_lock, RW_READER); 1587 ill_refrele(ill); 1588 } 1589 rw_exit(&ill_g_lock); 1590 } 1591 1592 /* 1593 * Process NDP neighbor solicitation/advertisement messages. 1594 * The checksum has already checked o.k before reaching here. 1595 */ 1596 void 1597 ndp_input(ill_t *ill, mblk_t *mp) 1598 { 1599 icmp6_t *icmp_nd; 1600 ip6_t *ip6h; 1601 int len; 1602 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1603 1604 1605 if (!pullupmsg(mp, -1)) { 1606 ip1dbg(("ndp_input: pullupmsg failed\n")); 1607 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1608 goto done; 1609 } 1610 ip6h = (ip6_t *)mp->b_rptr; 1611 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1612 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1613 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1614 goto done; 1615 } 1616 /* 1617 * NDP does not accept any extension headers between the 1618 * IP header and the ICMP header since e.g. a routing 1619 * header could be dangerous. 1620 * This assumes that any AH or ESP headers are removed 1621 * by ip prior to passing the packet to ndp_input. 1622 */ 1623 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1624 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1625 ip6h->ip6_nxt)); 1626 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1627 goto done; 1628 } 1629 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1630 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1631 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1632 if (icmp_nd->icmp6_code != 0) { 1633 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1634 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1635 goto done; 1636 } 1637 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1638 /* 1639 * Make sure packet length is large enough for either 1640 * a NS or a NA icmp packet. 1641 */ 1642 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1643 ip1dbg(("ndp_input: packet too short\n")); 1644 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1645 goto done; 1646 } 1647 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1648 ndp_input_solicit(ill, mp); 1649 } else { 1650 ndp_input_advert(ill, mp); 1651 } 1652 done: 1653 freemsg(mp); 1654 } 1655 1656 /* 1657 * nce_xmit is called to form and transmit a ND solicitation or 1658 * advertisement ICMP packet. 1659 * If source address is unspecified, appropriate source address 1660 * and link layer address will be chosen here. This function 1661 * *always* sends the link layer option. 1662 * It returns B_FALSE only if it does a successful put() to the 1663 * corresponding ill's ill_wq otherwise returns B_TRUE. 1664 */ 1665 static boolean_t 1666 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1667 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1668 int flag) 1669 { 1670 uint32_t len; 1671 icmp6_t *icmp6; 1672 mblk_t *mp; 1673 ip6_t *ip6h; 1674 nd_opt_hdr_t *opt; 1675 uint_t plen; 1676 ip6i_t *ip6i; 1677 ipif_t *src_ipif = NULL; 1678 1679 /* 1680 * If we have a unspecified source(sender) address, select a 1681 * proper source address for the solicitation here itself so 1682 * that we can initialize the h/w address correctly. This is 1683 * needed for interface groups as source address can come from 1684 * the whole group and the h/w address initialized from ill will 1685 * be wrong if the source address comes from a different ill. 1686 * 1687 * Note that the NA never comes here with the unspecified source 1688 * address. The following asserts that whenever the source 1689 * address is specified, the haddr also should be specified. 1690 */ 1691 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1692 1693 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1694 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1695 /* 1696 * Pick a source address for this solicitation, but 1697 * restrict the selection to addresses assigned to the 1698 * output interface (or interface group). We do this 1699 * because the destination will create a neighbor cache 1700 * entry for the source address of this packet, so the 1701 * source address had better be a valid neighbor. 1702 */ 1703 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 1704 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1705 if (src_ipif == NULL) { 1706 char buf[INET6_ADDRSTRLEN]; 1707 1708 ip0dbg(("nce_xmit: No source ipif for dst %s\n", 1709 inet_ntop(AF_INET6, (char *)target, buf, 1710 sizeof (buf)))); 1711 return (B_TRUE); 1712 } 1713 sender = &src_ipif->ipif_v6src_addr; 1714 hwaddr_ill = src_ipif->ipif_ill; 1715 } 1716 1717 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1718 /* 1719 * Always make sure that the NS/NA packets don't get load 1720 * spread. This is needed so that the probe packets sent 1721 * by the in.mpathd daemon can really go out on the desired 1722 * interface. Probe packets are made to go out on a desired 1723 * interface by including a ip6i with ATTACH_IF flag. As these 1724 * packets indirectly end up sending/receiving NS/NA packets 1725 * (neighbor doing NUD), we have to make sure that NA 1726 * also go out on the same interface. 1727 */ 1728 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1729 plen * 8; 1730 mp = allocb(len, BPRI_LO); 1731 if (mp == NULL) { 1732 if (src_ipif != NULL) 1733 ipif_refrele(src_ipif); 1734 return (B_TRUE); 1735 } 1736 bzero((char *)mp->b_rptr, len); 1737 mp->b_wptr = mp->b_rptr + len; 1738 1739 ip6i = (ip6i_t *)mp->b_rptr; 1740 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1741 ip6i->ip6i_nxt = IPPROTO_RAW; 1742 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1743 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1744 1745 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1746 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1747 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1748 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1749 ip6h->ip6_hops = IPV6_MAX_HOPS; 1750 ip6h->ip6_dst = *target; 1751 icmp6 = (icmp6_t *)&ip6h[1]; 1752 1753 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1754 sizeof (nd_neighbor_advert_t)); 1755 1756 if (operation == ND_NEIGHBOR_SOLICIT) { 1757 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1758 1759 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1760 ip6h->ip6_src = *sender; 1761 ns->nd_ns_target = *target; 1762 if (!(flag & NDP_UNICAST)) { 1763 /* Form multicast address of the target */ 1764 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1765 ip6h->ip6_dst.s6_addr32[3] |= 1766 ns->nd_ns_target.s6_addr32[3]; 1767 } 1768 } else { 1769 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1770 1771 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1772 ip6h->ip6_src = *sender; 1773 na->nd_na_target = *sender; 1774 if (flag & NDP_ISROUTER) 1775 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1776 if (flag & NDP_SOLICITED) 1777 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1778 if (flag & NDP_ORIDE) 1779 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1780 1781 } 1782 /* Fill in link layer address and option len */ 1783 opt->nd_opt_len = (uint8_t)plen; 1784 mutex_enter(&hwaddr_ill->ill_lock); 1785 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1786 &opt[1], hwaddr_ill->ill_nd_lla_len); 1787 mutex_exit(&hwaddr_ill->ill_lock); 1788 icmp6->icmp6_type = (uint8_t)operation; 1789 icmp6->icmp6_code = 0; 1790 /* 1791 * Prepare for checksum by putting icmp length in the icmp 1792 * checksum field. The checksum is calculated in ip_wput_v6. 1793 */ 1794 icmp6->icmp6_cksum = ip6h->ip6_plen; 1795 1796 if (src_ipif != NULL) 1797 ipif_refrele(src_ipif); 1798 if (canput(ill->ill_wq)) { 1799 put(ill->ill_wq, mp); 1800 return (B_FALSE); 1801 } 1802 freemsg(mp); 1803 return (B_TRUE); 1804 } 1805 1806 /* 1807 * Make a link layer address (does not include the SAP) from an nce. 1808 * To form the link layer address, use the last four bytes of ipv6 1809 * address passed in and the fixed offset stored in nce. 1810 */ 1811 static void 1812 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1813 { 1814 uchar_t *mask, *to; 1815 ill_t *ill = nce->nce_ill; 1816 int len; 1817 1818 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1819 return; 1820 ASSERT(nce->nce_res_mp != NULL); 1821 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1822 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1823 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1824 ASSERT(addr != NULL); 1825 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1826 addrpos, ill->ill_nd_lla_len); 1827 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1828 IPV6_ADDR_LEN); 1829 mask = (uchar_t *)&nce->nce_extract_mask; 1830 mask += (IPV6_ADDR_LEN - len); 1831 addr += (IPV6_ADDR_LEN - len); 1832 to = addrpos + nce->nce_ll_extract_start; 1833 while (len-- > 0) 1834 *to++ |= *mask++ & *addr++; 1835 } 1836 1837 /* 1838 * Pass a cache report back out via NDD. 1839 */ 1840 /* ARGSUSED */ 1841 int 1842 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1843 { 1844 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1845 " proto addr/mask"); 1846 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1847 return (0); 1848 } 1849 1850 /* 1851 * convert a link level address of arbitrary length 1852 * to an ascii string. 1853 * The caller *must* have already verified that the string buffer 1854 * is large enough to hold the entire string, including trailing NULL. 1855 */ 1856 static void 1857 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1858 { 1859 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1860 int i; 1861 size_t len; 1862 1863 buf[0] = '\0'; 1864 for (i = 0; i < addrlen; i++) { 1865 addrbyte[0] = '\0'; 1866 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1867 len = strlen((const char *)addrbyte); 1868 bcopy(addrbyte, buf, len); 1869 buf = buf + len; 1870 } 1871 *--buf = '\0'; 1872 } 1873 1874 /* 1875 * Add a single line to the NDP Cache Entry Report. 1876 */ 1877 static void 1878 nce_report1(nce_t *nce, uchar_t *mp_arg) 1879 { 1880 ill_t *ill = nce->nce_ill; 1881 char local_buf[INET6_ADDRSTRLEN]; 1882 uchar_t flags_buf[10]; 1883 uint32_t flags = nce->nce_flags; 1884 mblk_t *mp = (mblk_t *)mp_arg; 1885 uchar_t *h; 1886 uchar_t *m = flags_buf; 1887 in6_addr_t v6addr; 1888 1889 /* 1890 * Lock the nce to protect nce_res_mp from being changed 1891 * if an external resolver address resolution completes 1892 * while nce_res_mp is being accessed here. 1893 * 1894 * Deal with all address formats, not just Ethernet-specific 1895 * In addition, make sure that the mblk has enough space 1896 * before writing to it. If is doesn't, allocate a new one. 1897 */ 1898 ASSERT(ill != NULL); 1899 v6addr = nce->nce_mask; 1900 if (flags & NCE_F_PERMANENT) 1901 *m++ = 'P'; 1902 if (flags & NCE_F_ISROUTER) 1903 *m++ = 'R'; 1904 if (flags & NCE_F_MAPPING) 1905 *m++ = 'M'; 1906 *m = '\0'; 1907 1908 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1909 size_t addrlen; 1910 uchar_t *addr_buf; 1911 dl_unitdata_req_t *dl; 1912 1913 mutex_enter(&nce->nce_lock); 1914 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1915 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1916 if (ill->ill_flags & ILLF_XRESOLV) 1917 addrlen = (3 * (dl->dl_dest_addr_length)); 1918 else 1919 addrlen = (3 * (ill->ill_nd_lla_len)); 1920 if (addrlen <= 0) { 1921 mutex_exit(&nce->nce_lock); 1922 (void) mi_mpprintf(mp, 1923 "%8s %9s %5s %s/%d", 1924 ill->ill_name, 1925 "None", 1926 (uchar_t *)&flags_buf, 1927 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1928 (char *)local_buf, sizeof (local_buf)), 1929 ip_mask_to_plen_v6(&v6addr)); 1930 } else { 1931 /* 1932 * Convert the hardware/lla address to ascii 1933 */ 1934 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 1935 if (addr_buf == NULL) { 1936 mutex_exit(&nce->nce_lock); 1937 return; 1938 } 1939 if (ill->ill_flags & ILLF_XRESOLV) 1940 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 1941 addr_buf); 1942 else 1943 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 1944 addr_buf); 1945 mutex_exit(&nce->nce_lock); 1946 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 1947 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 1948 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1949 (char *)local_buf, sizeof (local_buf)), 1950 ip_mask_to_plen_v6(&v6addr)); 1951 kmem_free(addr_buf, addrlen); 1952 } 1953 } else { 1954 (void) mi_mpprintf(mp, 1955 "%8s %9s %5s %s/%d", 1956 ill->ill_name, 1957 "None", 1958 (uchar_t *)&flags_buf, 1959 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1960 (char *)local_buf, sizeof (local_buf)), 1961 ip_mask_to_plen_v6(&v6addr)); 1962 } 1963 } 1964 1965 mblk_t * 1966 nce_udreq_alloc(ill_t *ill) 1967 { 1968 mblk_t *template_mp = NULL; 1969 dl_unitdata_req_t *dlur; 1970 int sap_length; 1971 1972 sap_length = ill->ill_sap_length; 1973 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 1974 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 1975 if (template_mp == NULL) 1976 return (NULL); 1977 1978 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 1979 dlur->dl_priority.dl_min = 0; 1980 dlur->dl_priority.dl_max = 0; 1981 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 1982 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 1983 1984 /* Copy in the SAP value. */ 1985 NCE_LL_SAP_COPY(ill, template_mp); 1986 1987 return (template_mp); 1988 } 1989 1990 /* 1991 * NDP retransmit timer. 1992 * This timer goes off when: 1993 * a. It is time to retransmit NS for resolver. 1994 * b. It is time to send reachability probes. 1995 */ 1996 void 1997 ndp_timer(void *arg) 1998 { 1999 nce_t *nce = arg; 2000 ill_t *ill = nce->nce_ill; 2001 uint32_t ms; 2002 char addrbuf[INET6_ADDRSTRLEN]; 2003 mblk_t *mp; 2004 boolean_t dropped = B_FALSE; 2005 2006 /* 2007 * The timer has to be cancelled by ndp_delete before doing the final 2008 * refrele. So the NCE is guaranteed to exist when the timer runs 2009 * until it clears the timeout_id. Before clearing the timeout_id 2010 * bump up the refcnt so that we can continue to use the nce 2011 */ 2012 ASSERT(nce != NULL); 2013 2014 /* 2015 * Grab the ill_g_lock now itself to avoid lock order problems. 2016 * nce_solicit needs ill_g_lock to be able to traverse ills 2017 */ 2018 rw_enter(&ill_g_lock, RW_READER); 2019 mutex_enter(&nce->nce_lock); 2020 NCE_REFHOLD_LOCKED(nce); 2021 nce->nce_timeout_id = 0; 2022 2023 /* 2024 * Check the reachability state first. 2025 */ 2026 switch (nce->nce_state) { 2027 case ND_DELAY: 2028 rw_exit(&ill_g_lock); 2029 nce->nce_state = ND_PROBE; 2030 mutex_exit(&nce->nce_lock); 2031 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2032 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2033 if (ip_debug > 3) { 2034 /* ip2dbg */ 2035 pr_addr_dbg("ndp_timer: state for %s changed " 2036 "to PROBE\n", AF_INET6, &nce->nce_addr); 2037 } 2038 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2039 NCE_REFRELE(nce); 2040 return; 2041 case ND_PROBE: 2042 /* must be retransmit timer */ 2043 rw_exit(&ill_g_lock); 2044 nce->nce_pcnt--; 2045 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2046 nce->nce_pcnt >= -1); 2047 if (nce->nce_pcnt == 0) { 2048 /* Wait RetransTimer, before deleting the entry */ 2049 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2050 nce->nce_pcnt, inet_ntop(AF_INET6, 2051 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2052 mutex_exit(&nce->nce_lock); 2053 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2054 } else { 2055 /* 2056 * As per RFC2461, the nce gets deleted after 2057 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2058 * Note that the first unicast solicitation is sent 2059 * during the DELAY state. 2060 */ 2061 if (nce->nce_pcnt > 0) { 2062 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2063 nce->nce_pcnt, inet_ntop(AF_INET6, 2064 &nce->nce_addr, 2065 addrbuf, sizeof (addrbuf)))); 2066 mutex_exit(&nce->nce_lock); 2067 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2068 NULL, B_FALSE, &ipv6_all_zeros, 2069 &nce->nce_addr, NDP_UNICAST); 2070 if (dropped) { 2071 mutex_enter(&nce->nce_lock); 2072 nce->nce_pcnt++; 2073 mutex_exit(&nce->nce_lock); 2074 } 2075 NDP_RESTART_TIMER(nce, 2076 ill->ill_reachable_retrans_time); 2077 } else { 2078 /* No hope, delete the nce */ 2079 nce->nce_state = ND_UNREACHABLE; 2080 mutex_exit(&nce->nce_lock); 2081 if (ip_debug > 2) { 2082 /* ip1dbg */ 2083 pr_addr_dbg("ndp_timer: Delete IRE for" 2084 " dst %s\n", AF_INET6, 2085 &nce->nce_addr); 2086 } 2087 ndp_delete(nce); 2088 } 2089 } 2090 NCE_REFRELE(nce); 2091 return; 2092 case ND_INCOMPLETE: 2093 /* 2094 * Must be resolvers retransmit timer. 2095 */ 2096 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2097 ip6i_t *ip6i; 2098 ip6_t *ip6h; 2099 mblk_t *data_mp; 2100 2101 /* 2102 * Walk the list of packets queued, and see if there 2103 * are any multipathing probe packets. Such packets 2104 * are always queued at the head. Since this is a 2105 * retransmit timer firing, mark such packets as 2106 * delayed in ND resolution. This info will be used 2107 * in ip_wput_v6(). Multipathing probe packets will 2108 * always have an ip6i_t. Once we hit a packet without 2109 * it, we can break out of this loop. 2110 */ 2111 if (mp->b_datap->db_type == M_CTL) 2112 data_mp = mp->b_cont; 2113 else 2114 data_mp = mp; 2115 2116 ip6h = (ip6_t *)data_mp->b_rptr; 2117 if (ip6h->ip6_nxt != IPPROTO_RAW) 2118 break; 2119 2120 /* 2121 * This message should have been pulled up already in 2122 * ip_wput_v6. We can't do pullups here because the 2123 * b_next/b_prev is non-NULL. 2124 */ 2125 ip6i = (ip6i_t *)ip6h; 2126 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2127 sizeof (ip6i_t) + IPV6_HDR_LEN); 2128 2129 /* Mark this packet as delayed due to ND resolution */ 2130 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2131 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2132 } 2133 if (nce->nce_qd_mp != NULL) { 2134 ms = nce_solicit(nce, NULL); 2135 rw_exit(&ill_g_lock); 2136 if (ms == 0) { 2137 if (nce->nce_state != ND_REACHABLE) { 2138 mutex_exit(&nce->nce_lock); 2139 nce_resolv_failed(nce); 2140 ndp_delete(nce); 2141 } else { 2142 mutex_exit(&nce->nce_lock); 2143 } 2144 } else { 2145 mutex_exit(&nce->nce_lock); 2146 NDP_RESTART_TIMER(nce, (clock_t)ms); 2147 } 2148 NCE_REFRELE(nce); 2149 return; 2150 } 2151 mutex_exit(&nce->nce_lock); 2152 rw_exit(&ill_g_lock); 2153 NCE_REFRELE(nce); 2154 break; 2155 case ND_REACHABLE : 2156 rw_exit(&ill_g_lock); 2157 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2158 nce->nce_unsolicit_count != 0) { 2159 nce->nce_unsolicit_count--; 2160 mutex_exit(&nce->nce_lock); 2161 dropped = nce_xmit(ill, 2162 ND_NEIGHBOR_ADVERT, 2163 ill, /* ill to be used for hw addr */ 2164 B_FALSE, /* use ill_phys_addr */ 2165 &nce->nce_addr, 2166 &ipv6_all_hosts_mcast, 2167 nce->nce_flags | NDP_ORIDE); 2168 if (dropped) { 2169 mutex_enter(&nce->nce_lock); 2170 nce->nce_unsolicit_count++; 2171 mutex_exit(&nce->nce_lock); 2172 } 2173 if (nce->nce_unsolicit_count != 0) { 2174 NDP_RESTART_TIMER(nce, 2175 ip_ndp_unsolicit_interval); 2176 } 2177 } else { 2178 mutex_exit(&nce->nce_lock); 2179 } 2180 NCE_REFRELE(nce); 2181 break; 2182 default: 2183 rw_exit(&ill_g_lock); 2184 mutex_exit(&nce->nce_lock); 2185 NCE_REFRELE(nce); 2186 break; 2187 } 2188 } 2189 2190 /* 2191 * Set a link layer address from the ll_addr passed in. 2192 * Copy SAP from ill. 2193 */ 2194 static void 2195 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2196 { 2197 ill_t *ill = nce->nce_ill; 2198 uchar_t *woffset; 2199 2200 ASSERT(ll_addr != NULL); 2201 /* Always called before fast_path_probe */ 2202 ASSERT(nce->nce_fp_mp == NULL); 2203 if (ill->ill_sap_length != 0) { 2204 /* 2205 * Copy the SAP type specified in the 2206 * request into the xmit template. 2207 */ 2208 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2209 } 2210 if (ill->ill_phys_addr_length > 0) { 2211 /* 2212 * The bcopy() below used to be called for the physical address 2213 * length rather than the link layer address length. For 2214 * ethernet and many other media, the phys_addr and lla are 2215 * identical. 2216 * However, with xresolv interfaces being introduced, the 2217 * phys_addr and lla are no longer the same, and the physical 2218 * address may not have any useful meaning, so we use the lla 2219 * for IPv6 address resolution and destination addressing. 2220 * 2221 * For PPP or other interfaces with a zero length 2222 * physical address, don't do anything here. 2223 * The bcopy() with a zero phys_addr length was previously 2224 * a no-op for interfaces with a zero-length physical address. 2225 * Using the lla for them would change the way they operate. 2226 * Doing nothing in such cases preserves expected behavior. 2227 */ 2228 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2229 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2230 } 2231 } 2232 2233 static boolean_t 2234 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2235 { 2236 ill_t *ill = nce->nce_ill; 2237 uchar_t *ll_offset; 2238 2239 ASSERT(nce->nce_res_mp != NULL); 2240 if (ll_addr == NULL) 2241 return (B_FALSE); 2242 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2243 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2244 return (B_TRUE); 2245 return (B_FALSE); 2246 } 2247 2248 /* 2249 * Updates the link layer address or the reachability state of 2250 * a cache entry. Reset probe counter if needed. 2251 */ 2252 static void 2253 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2254 { 2255 ill_t *ill = nce->nce_ill; 2256 boolean_t need_stop_timer = B_FALSE; 2257 boolean_t need_fastpath_update = B_FALSE; 2258 2259 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2260 /* 2261 * If this interface does not do NUD, there is no point 2262 * in allowing an update to the cache entry. Although 2263 * we will respond to NS. 2264 * The only time we accept an update for a resolver when 2265 * NUD is turned off is when it has just been created. 2266 * Non-Resolvers will always be created as REACHABLE. 2267 */ 2268 if (new_state != ND_UNCHANGED) { 2269 if ((nce->nce_flags & NCE_F_NONUD) && 2270 (nce->nce_state != ND_INCOMPLETE)) 2271 return; 2272 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2273 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2274 need_stop_timer = B_TRUE; 2275 if (new_state == ND_REACHABLE) 2276 nce->nce_last = TICK_TO_MSEC(lbolt64); 2277 else { 2278 /* We force NUD in this case */ 2279 nce->nce_last = 0; 2280 } 2281 nce->nce_state = new_state; 2282 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2283 } 2284 /* 2285 * In case of fast path we need to free the the fastpath 2286 * M_DATA and do another probe. Otherwise we can just 2287 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2288 * whatever packets that happens to be transmitting at the time. 2289 */ 2290 if (new_ll_addr != NULL) { 2291 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2292 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2293 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2294 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2295 if (nce->nce_fp_mp != NULL) { 2296 freemsg(nce->nce_fp_mp); 2297 nce->nce_fp_mp = NULL; 2298 } 2299 need_fastpath_update = B_TRUE; 2300 } 2301 mutex_exit(&nce->nce_lock); 2302 if (need_stop_timer) { 2303 (void) untimeout(nce->nce_timeout_id); 2304 nce->nce_timeout_id = 0; 2305 } 2306 if (need_fastpath_update) 2307 nce_fastpath(nce); 2308 mutex_enter(&nce->nce_lock); 2309 } 2310 2311 static void 2312 nce_queue_mp(nce_t *nce, mblk_t *mp) 2313 { 2314 uint_t count = 0; 2315 mblk_t **mpp; 2316 boolean_t head_insert = B_FALSE; 2317 ip6_t *ip6h; 2318 ip6i_t *ip6i; 2319 mblk_t *data_mp; 2320 2321 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2322 2323 if (mp->b_datap->db_type == M_CTL) 2324 data_mp = mp->b_cont; 2325 else 2326 data_mp = mp; 2327 ip6h = (ip6_t *)data_mp->b_rptr; 2328 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2329 /* 2330 * This message should have been pulled up already in 2331 * ip_wput_v6. We can't do pullups here because the message 2332 * could be from the nce_qd_mp which could have b_next/b_prev 2333 * non-NULL. 2334 */ 2335 ip6i = (ip6i_t *)ip6h; 2336 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2337 sizeof (ip6i_t) + IPV6_HDR_LEN); 2338 /* 2339 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2340 * This has 2 aspects mentioned below. 2341 * 1. Perform head insertion in the nce_qd_mp for these packets. 2342 * This ensures that next retransmit of ND solicitation 2343 * will use the interface specified by the probe packet, 2344 * for both NS and NA. This corresponds to the src address 2345 * in the IPv6 packet. If we insert at tail, we will be 2346 * depending on the packet at the head for successful 2347 * ND resolution. This is not reliable, because the interface 2348 * on which the NA arrives could be different from the interface 2349 * on which the NS was sent, and if the receiving interface is 2350 * failed, it will appear that the sending interface is also 2351 * failed, causing in.mpathd to misdiagnose this as link 2352 * failure. 2353 * 2. Drop the original packet, if the ND resolution did not 2354 * succeed in the first attempt. However we will create the 2355 * nce and the ire, as soon as the ND resolution succeeds. 2356 * We don't gain anything by queueing multiple probe packets 2357 * and sending them back-to-back once resolution succeeds. 2358 * It is sufficient to send just 1 packet after ND resolution 2359 * succeeds. Since mpathd is sending down probe packets at a 2360 * constant rate, we don't need to send the queued packet. We 2361 * need to queue it only for NDP resolution. The benefit of 2362 * dropping the probe packets that were delayed in ND 2363 * resolution, is that in.mpathd will not see inflated 2364 * RTT. If the ND resolution does not succeed within 2365 * in.mpathd's failure detection time, mpathd may detect 2366 * a failure, and it does not matter whether the packet 2367 * was queued or dropped. 2368 */ 2369 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2370 head_insert = B_TRUE; 2371 } 2372 2373 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2374 mpp = &(*mpp)->b_next) { 2375 if (++count > 2376 nce->nce_ill->ill_max_buf) { 2377 mblk_t *tmp = nce->nce_qd_mp->b_next; 2378 2379 nce->nce_qd_mp->b_next = NULL; 2380 nce->nce_qd_mp->b_prev = NULL; 2381 freemsg(nce->nce_qd_mp); 2382 ip1dbg(("nce_queue_mp: pkt dropped\n")); 2383 nce->nce_qd_mp = tmp; 2384 } 2385 } 2386 /* put this on the list */ 2387 if (head_insert) { 2388 mp->b_next = nce->nce_qd_mp; 2389 nce->nce_qd_mp = mp; 2390 } else { 2391 *mpp = mp; 2392 } 2393 } 2394 2395 /* 2396 * Called when address resolution failed due to a timeout. 2397 * Send an ICMP unreachable in response to all queued packets. 2398 */ 2399 void 2400 nce_resolv_failed(nce_t *nce) 2401 { 2402 mblk_t *mp, *nxt_mp, *first_mp; 2403 char buf[INET6_ADDRSTRLEN]; 2404 ip6_t *ip6h; 2405 zoneid_t zoneid = GLOBAL_ZONEID; 2406 2407 ip1dbg(("nce_resolv_failed: dst %s\n", 2408 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2409 mutex_enter(&nce->nce_lock); 2410 mp = nce->nce_qd_mp; 2411 nce->nce_qd_mp = NULL; 2412 mutex_exit(&nce->nce_lock); 2413 while (mp != NULL) { 2414 nxt_mp = mp->b_next; 2415 mp->b_next = NULL; 2416 mp->b_prev = NULL; 2417 2418 first_mp = mp; 2419 if (mp->b_datap->db_type == M_CTL) { 2420 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2421 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2422 zoneid = io->ipsec_out_zoneid; 2423 ASSERT(zoneid != ALL_ZONES); 2424 mp = mp->b_cont; 2425 } 2426 2427 ip6h = (ip6_t *)mp->b_rptr; 2428 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2429 ip6i_t *ip6i; 2430 /* 2431 * This message should have been pulled up already 2432 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2433 * the header is pulled up. 2434 */ 2435 ip6i = (ip6i_t *)ip6h; 2436 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2437 sizeof (ip6i_t) + IPV6_HDR_LEN); 2438 mp->b_rptr += sizeof (ip6i_t); 2439 } 2440 /* 2441 * Ignore failure since icmp_unreachable_v6 will silently 2442 * drop packets with an unspecified source address. 2443 */ 2444 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2445 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2446 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2447 mp = nxt_mp; 2448 } 2449 } 2450 2451 /* 2452 * Called by SIOCSNDP* ioctl to add/change an nce entry 2453 * and the corresponding attributes. 2454 * Disallow states other than ND_REACHABLE or ND_STALE. 2455 */ 2456 int 2457 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2458 { 2459 sin6_t *sin6; 2460 in6_addr_t *addr; 2461 nce_t *nce; 2462 int err; 2463 uint16_t new_flags = 0; 2464 uint16_t old_flags = 0; 2465 int inflags = lnr->lnr_flags; 2466 2467 if ((lnr->lnr_state_create != ND_REACHABLE) && 2468 (lnr->lnr_state_create != ND_STALE)) 2469 return (EINVAL); 2470 2471 sin6 = (sin6_t *)&lnr->lnr_addr; 2472 addr = &sin6->sin6_addr; 2473 2474 mutex_enter(&ndp_g_lock); 2475 /* We know it can not be mapping so just look in the hash table */ 2476 nce = nce_lookup_addr(ill, addr); 2477 if (nce != NULL) 2478 new_flags = nce->nce_flags; 2479 2480 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2481 case NDF_ISROUTER_ON: 2482 new_flags |= NCE_F_ISROUTER; 2483 break; 2484 case NDF_ISROUTER_OFF: 2485 new_flags &= ~NCE_F_ISROUTER; 2486 break; 2487 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2488 mutex_exit(&ndp_g_lock); 2489 if (nce != NULL) 2490 NCE_REFRELE(nce); 2491 return (EINVAL); 2492 } 2493 2494 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2495 case NDF_ANYCAST_ON: 2496 new_flags |= NCE_F_ANYCAST; 2497 break; 2498 case NDF_ANYCAST_OFF: 2499 new_flags &= ~NCE_F_ANYCAST; 2500 break; 2501 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2502 mutex_exit(&ndp_g_lock); 2503 if (nce != NULL) 2504 NCE_REFRELE(nce); 2505 return (EINVAL); 2506 } 2507 2508 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2509 case NDF_PROXY_ON: 2510 new_flags |= NCE_F_PROXY; 2511 break; 2512 case NDF_PROXY_OFF: 2513 new_flags &= ~NCE_F_PROXY; 2514 break; 2515 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2516 mutex_exit(&ndp_g_lock); 2517 if (nce != NULL) 2518 NCE_REFRELE(nce); 2519 return (EINVAL); 2520 } 2521 2522 if (nce == NULL) { 2523 err = ndp_add(ill, 2524 (uchar_t *)lnr->lnr_hdw_addr, 2525 addr, 2526 &ipv6_all_ones, 2527 &ipv6_all_zeros, 2528 0, 2529 new_flags, 2530 lnr->lnr_state_create, 2531 &nce); 2532 if (err != 0) { 2533 mutex_exit(&ndp_g_lock); 2534 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2535 return (err); 2536 } 2537 } 2538 old_flags = nce->nce_flags; 2539 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2540 /* 2541 * Router turned to host, delete all ires. 2542 * XXX Just delete the entry, but we need to add too. 2543 */ 2544 nce->nce_flags &= ~NCE_F_ISROUTER; 2545 mutex_exit(&ndp_g_lock); 2546 ndp_delete(nce); 2547 NCE_REFRELE(nce); 2548 return (0); 2549 } 2550 mutex_exit(&ndp_g_lock); 2551 2552 mutex_enter(&nce->nce_lock); 2553 nce->nce_flags = new_flags; 2554 mutex_exit(&nce->nce_lock); 2555 /* 2556 * Note that we ignore the state at this point, which 2557 * should be either STALE or REACHABLE. Instead we let 2558 * the link layer address passed in to determine the state 2559 * much like incoming packets. 2560 */ 2561 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2562 NCE_REFRELE(nce); 2563 return (0); 2564 } 2565 2566 /* 2567 * If the device driver supports it, we make nce_fp_mp to have 2568 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2569 * The caller insures there is hold on nce for this function. 2570 * Note that since ill_fastpath_probe() copies the mblk there is 2571 * no need for the hold beyond this function. 2572 */ 2573 static void 2574 nce_fastpath(nce_t *nce) 2575 { 2576 ill_t *ill = nce->nce_ill; 2577 int res; 2578 2579 ASSERT(ill != NULL); 2580 if (nce->nce_fp_mp != NULL) { 2581 /* Already contains fastpath info */ 2582 return; 2583 } 2584 if (nce->nce_res_mp != NULL) { 2585 nce_fastpath_list_add(nce); 2586 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2587 /* 2588 * EAGAIN is an indication of a transient error 2589 * i.e. allocation failure etc. leave the nce in the list it 2590 * will be updated when another probe happens for another ire 2591 * if not it will be taken out of the list when the ire is 2592 * deleted. 2593 */ 2594 2595 if (res != 0 && res != EAGAIN) 2596 nce_fastpath_list_delete(nce); 2597 } 2598 } 2599 2600 /* 2601 * Drain the list of nce's waiting for fastpath response. 2602 */ 2603 void 2604 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2605 void *arg) 2606 { 2607 2608 nce_t *next_nce; 2609 nce_t *current_nce; 2610 nce_t *first_nce; 2611 nce_t *prev_nce = NULL; 2612 2613 ASSERT(ill != NULL); 2614 2615 mutex_enter(&ill->ill_lock); 2616 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2617 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2618 next_nce = current_nce->nce_fastpath; 2619 /* 2620 * Take it off the list if we're flushing, or if the callback 2621 * routine tells us to do so. Otherwise, leave the nce in the 2622 * fastpath list to handle any pending response from the lower 2623 * layer. We can't drain the list when the callback routine 2624 * comparison failed, because the response is asynchronous in 2625 * nature, and may not arrive in the same order as the list 2626 * insertion. 2627 */ 2628 if (func == NULL || func(current_nce, arg)) { 2629 current_nce->nce_fastpath = NULL; 2630 if (current_nce == first_nce) 2631 ill->ill_fastpath_list = first_nce = next_nce; 2632 else 2633 prev_nce->nce_fastpath = next_nce; 2634 } else { 2635 /* previous element that is still in the list */ 2636 prev_nce = current_nce; 2637 } 2638 current_nce = next_nce; 2639 } 2640 mutex_exit(&ill->ill_lock); 2641 } 2642 2643 /* 2644 * Add nce to the nce fastpath list. 2645 */ 2646 void 2647 nce_fastpath_list_add(nce_t *nce) 2648 { 2649 ill_t *ill; 2650 2651 ill = nce->nce_ill; 2652 ASSERT(ill != NULL); 2653 2654 mutex_enter(&ill->ill_lock); 2655 mutex_enter(&nce->nce_lock); 2656 2657 /* 2658 * if nce has not been deleted and 2659 * is not already in the list add it. 2660 */ 2661 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2662 (nce->nce_fastpath == NULL)) { 2663 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2664 ill->ill_fastpath_list = nce; 2665 } 2666 2667 mutex_exit(&nce->nce_lock); 2668 mutex_exit(&ill->ill_lock); 2669 } 2670 2671 /* 2672 * remove nce from the nce fastpath list. 2673 */ 2674 void 2675 nce_fastpath_list_delete(nce_t *nce) 2676 { 2677 nce_t *nce_ptr; 2678 2679 ill_t *ill; 2680 2681 ill = nce->nce_ill; 2682 ASSERT(ill != NULL); 2683 2684 mutex_enter(&ill->ill_lock); 2685 if (nce->nce_fastpath == NULL) 2686 goto done; 2687 2688 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2689 2690 if (ill->ill_fastpath_list == nce) { 2691 ill->ill_fastpath_list = nce->nce_fastpath; 2692 } else { 2693 nce_ptr = ill->ill_fastpath_list; 2694 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2695 if (nce_ptr->nce_fastpath == nce) { 2696 nce_ptr->nce_fastpath = nce->nce_fastpath; 2697 break; 2698 } 2699 nce_ptr = nce_ptr->nce_fastpath; 2700 } 2701 } 2702 2703 nce->nce_fastpath = NULL; 2704 done: 2705 mutex_exit(&ill->ill_lock); 2706 } 2707 2708 /* 2709 * Update all NCE's that are not in fastpath mode and 2710 * have an nce_fp_mp that matches mp. mp->b_cont contains 2711 * the fastpath header. 2712 * 2713 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2714 */ 2715 boolean_t 2716 ndp_fastpath_update(nce_t *nce, void *arg) 2717 { 2718 mblk_t *mp, *fp_mp; 2719 uchar_t *mp_rptr, *ud_mp_rptr; 2720 mblk_t *ud_mp = nce->nce_res_mp; 2721 ptrdiff_t cmplen; 2722 2723 if (nce->nce_flags & NCE_F_MAPPING) 2724 return (B_TRUE); 2725 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2726 return (B_TRUE); 2727 2728 ip2dbg(("ndp_fastpath_update: trying\n")); 2729 mp = (mblk_t *)arg; 2730 mp_rptr = mp->b_rptr; 2731 cmplen = mp->b_wptr - mp_rptr; 2732 ASSERT(cmplen >= 0); 2733 ud_mp_rptr = ud_mp->b_rptr; 2734 /* 2735 * The nce is locked here to prevent any other threads 2736 * from accessing and changing nce_res_mp when the IPv6 address 2737 * becomes resolved to an lla while we're in the middle 2738 * of looking at and comparing the hardware address (lla). 2739 * It is also locked to prevent multiple threads in nce_fastpath_update 2740 * from examining nce_res_mp atthe same time. 2741 */ 2742 mutex_enter(&nce->nce_lock); 2743 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2744 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2745 mutex_exit(&nce->nce_lock); 2746 /* 2747 * Don't take the ire off the fastpath list yet, 2748 * since the response may come later. 2749 */ 2750 return (B_FALSE); 2751 } 2752 /* Matched - install mp as the fastpath mp */ 2753 ip1dbg(("ndp_fastpath_update: match\n")); 2754 fp_mp = dupb(mp->b_cont); 2755 if (fp_mp != NULL) { 2756 nce->nce_fp_mp = fp_mp; 2757 } 2758 mutex_exit(&nce->nce_lock); 2759 return (B_TRUE); 2760 } 2761 2762 /* 2763 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2764 * driver. Note that it assumes IP is exclusive... 2765 */ 2766 /* ARGSUSED */ 2767 void 2768 ndp_fastpath_flush(nce_t *nce, char *arg) 2769 { 2770 if (nce->nce_flags & NCE_F_MAPPING) 2771 return; 2772 /* No fastpath info? */ 2773 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2774 return; 2775 2776 /* Just delete the NCE... */ 2777 ndp_delete(nce); 2778 } 2779 2780 /* 2781 * Return a pointer to a given option in the packet. 2782 * Assumes that option part of the packet have already been validated. 2783 */ 2784 nd_opt_hdr_t * 2785 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2786 { 2787 while (optlen > 0) { 2788 if (opt->nd_opt_type == opt_type) 2789 return (opt); 2790 optlen -= 8 * opt->nd_opt_len; 2791 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2792 } 2793 return (NULL); 2794 } 2795 2796 /* 2797 * Verify all option lengths present are > 0, also check to see 2798 * if the option lengths and packet length are consistent. 2799 */ 2800 boolean_t 2801 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2802 { 2803 ASSERT(opt != NULL); 2804 while (optlen > 0) { 2805 if (opt->nd_opt_len == 0) 2806 return (B_FALSE); 2807 optlen -= 8 * opt->nd_opt_len; 2808 if (optlen < 0) 2809 return (B_FALSE); 2810 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2811 } 2812 return (B_TRUE); 2813 } 2814 2815 /* 2816 * ndp_walk function. 2817 * Free a fraction of the NCE cache entries. 2818 * A fraction of zero means to not free any in that category. 2819 */ 2820 void 2821 ndp_cache_reclaim(nce_t *nce, char *arg) 2822 { 2823 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2824 uint_t rand; 2825 2826 if (nce->nce_flags & NCE_F_PERMANENT) 2827 return; 2828 2829 rand = (uint_t)lbolt + 2830 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2831 if (ncr->ncr_host != 0 && 2832 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2833 ndp_delete(nce); 2834 return; 2835 } 2836 } 2837 2838 /* 2839 * ndp_walk function. 2840 * Count the number of NCEs that can be deleted. 2841 * These would be hosts but not routers. 2842 */ 2843 void 2844 ndp_cache_count(nce_t *nce, char *arg) 2845 { 2846 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2847 2848 if (nce->nce_flags & NCE_F_PERMANENT) 2849 return; 2850 2851 ncc->ncc_total++; 2852 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2853 ncc->ncc_host++; 2854 } 2855 2856 #ifdef NCE_DEBUG 2857 th_trace_t * 2858 th_trace_nce_lookup(nce_t *nce) 2859 { 2860 int bucket_id; 2861 th_trace_t *th_trace; 2862 2863 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2864 2865 bucket_id = IP_TR_HASH(curthread); 2866 ASSERT(bucket_id < IP_TR_HASH_MAX); 2867 2868 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2869 th_trace = th_trace->th_next) { 2870 if (th_trace->th_id == curthread) 2871 return (th_trace); 2872 } 2873 return (NULL); 2874 } 2875 2876 void 2877 nce_trace_ref(nce_t *nce) 2878 { 2879 int bucket_id; 2880 th_trace_t *th_trace; 2881 2882 /* 2883 * Attempt to locate the trace buffer for the curthread. 2884 * If it does not exist, then allocate a new trace buffer 2885 * and link it in list of trace bufs for this ipif, at the head 2886 */ 2887 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2888 2889 if (nce->nce_trace_disable == B_TRUE) 2890 return; 2891 2892 th_trace = th_trace_nce_lookup(nce); 2893 if (th_trace == NULL) { 2894 bucket_id = IP_TR_HASH(curthread); 2895 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 2896 KM_NOSLEEP); 2897 if (th_trace == NULL) { 2898 nce->nce_trace_disable = B_TRUE; 2899 nce_trace_inactive(nce); 2900 return; 2901 } 2902 th_trace->th_id = curthread; 2903 th_trace->th_next = nce->nce_trace[bucket_id]; 2904 th_trace->th_prev = &nce->nce_trace[bucket_id]; 2905 if (th_trace->th_next != NULL) 2906 th_trace->th_next->th_prev = &th_trace->th_next; 2907 nce->nce_trace[bucket_id] = th_trace; 2908 } 2909 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 2910 th_trace->th_refcnt++; 2911 th_trace_rrecord(th_trace); 2912 } 2913 2914 void 2915 nce_untrace_ref(nce_t *nce) 2916 { 2917 th_trace_t *th_trace; 2918 2919 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2920 2921 if (nce->nce_trace_disable == B_TRUE) 2922 return; 2923 2924 th_trace = th_trace_nce_lookup(nce); 2925 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 2926 2927 th_trace_rrecord(th_trace); 2928 th_trace->th_refcnt--; 2929 } 2930 2931 void 2932 nce_trace_inactive(nce_t *nce) 2933 { 2934 th_trace_t *th_trace; 2935 int i; 2936 2937 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2938 2939 for (i = 0; i < IP_TR_HASH_MAX; i++) { 2940 while (nce->nce_trace[i] != NULL) { 2941 th_trace = nce->nce_trace[i]; 2942 2943 /* unlink th_trace and free it */ 2944 nce->nce_trace[i] = th_trace->th_next; 2945 if (th_trace->th_next != NULL) 2946 th_trace->th_next->th_prev = 2947 &nce->nce_trace[i]; 2948 2949 th_trace->th_next = NULL; 2950 th_trace->th_prev = NULL; 2951 kmem_free(th_trace, sizeof (th_trace_t)); 2952 } 2953 } 2954 2955 } 2956 2957 /* ARGSUSED */ 2958 int 2959 nce_thread_exit(nce_t *nce, caddr_t arg) 2960 { 2961 th_trace_t *th_trace; 2962 2963 mutex_enter(&nce->nce_lock); 2964 th_trace = th_trace_nce_lookup(nce); 2965 2966 if (th_trace == NULL) { 2967 mutex_exit(&nce->nce_lock); 2968 return (0); 2969 } 2970 2971 ASSERT(th_trace->th_refcnt == 0); 2972 2973 /* unlink th_trace and free it */ 2974 *th_trace->th_prev = th_trace->th_next; 2975 if (th_trace->th_next != NULL) 2976 th_trace->th_next->th_prev = th_trace->th_prev; 2977 th_trace->th_next = NULL; 2978 th_trace->th_prev = NULL; 2979 kmem_free(th_trace, sizeof (th_trace_t)); 2980 mutex_exit(&nce->nce_lock); 2981 return (0); 2982 } 2983 #endif 2984