1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/strlog.h> 35 #include <sys/dlpi.h> 36 #include <sys/sockio.h> 37 #include <sys/tiuser.h> 38 #include <sys/tihdr.h> 39 #include <sys/socket.h> 40 #include <sys/ddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/vtrace.h> 44 #include <sys/kmem.h> 45 #include <sys/zone.h> 46 47 #include <net/if.h> 48 #include <net/if_types.h> 49 #include <net/if_dl.h> 50 #include <net/route.h> 51 #include <sys/sockio.h> 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/mi.h> 59 #include <inet/mib2.h> 60 #include <inet/nd.h> 61 #include <inet/arp.h> 62 #include <inet/ip.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ip_if.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_rts.h> 67 #include <inet/ip6.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/ipsec_impl.h> 70 #include <inet/ipsec_info.h> 71 72 /* 73 * Function names with nce_ prefix are static while function 74 * names with ndp_ prefix are used by rest of the IP. 75 */ 76 77 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 78 uint32_t ll_addr_len); 79 static void nce_fastpath(nce_t *nce); 80 static void nce_ire_delete(nce_t *nce); 81 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 82 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 83 static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); 84 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 85 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 86 uchar_t *addr); 87 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 88 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 89 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 90 static mblk_t *nce_udreq_alloc(ill_t *ill); 91 static void nce_update(nce_t *nce, uint16_t new_state, 92 uchar_t *new_ll_addr); 93 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 94 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 95 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 96 const in6_addr_t *target, int flag); 97 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 98 extern void th_trace_rrecord(th_trace_t *); 99 100 #ifdef NCE_DEBUG 101 void nce_trace_inactive(nce_t *); 102 #endif 103 104 /* NDP Cache Entry Hash Table */ 105 #define NCE_TABLE_SIZE 256 106 static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; 107 static nce_t *nce_mask_entries; /* mask not all ones */ 108 static int ndp_g_walker = 0; /* # of active thread */ 109 /* walking nce hash list */ 110 /* ndp_g_walker_cleanup will be true, when deletion have to be defered */ 111 static boolean_t ndp_g_walker_cleanup = B_FALSE; 112 113 #ifdef _BIG_ENDIAN 114 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 115 ((((addr)->s6_addr32[0] & 0xff020000) == 0xff020000) && \ 116 ((addr)->s6_addr32[1] == 0x0) && \ 117 ((addr)->s6_addr32[2] == 0x00000001) && \ 118 ((addr)->s6_addr32[3] & 0xff000000) == 0xff000000) 119 #else /* _BIG_ENDIAN */ 120 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 121 ((((addr)->s6_addr32[0] & 0x000002ff) == 0x000002ff) && \ 122 ((addr)->s6_addr32[1] == 0x0) && \ 123 ((addr)->s6_addr32[2] == 0x01000000) && \ 124 ((addr)->s6_addr32[3] & 0x000000ff) == 0x000000ff) 125 #endif 126 127 #define NCE_HASH_PTR(addr) \ 128 (&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 129 130 /* 131 * NDP Cache Entry creation routine. 132 * Mapped entries will never do NUD . 133 * This routine must always be called with ndp_g_lock held. 134 * Prior to return, nce_refcnt is incremented. 135 */ 136 int 137 ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 138 const in6_addr_t *mask, const in6_addr_t *extract_mask, 139 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 140 nce_t **newnce) 141 { 142 static nce_t nce_nil; 143 nce_t *nce; 144 mblk_t *mp; 145 mblk_t *template; 146 nce_t **ncep; 147 boolean_t dropped = B_FALSE; 148 149 ASSERT(MUTEX_HELD(&ndp_g_lock)); 150 ASSERT(ill != NULL); 151 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 152 ip0dbg(("ndp_add: no addr\n")); 153 return (EINVAL); 154 } 155 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 156 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 157 return (EINVAL); 158 } 159 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 160 (flags & NCE_F_MAPPING)) { 161 ip0dbg(("ndp_add: extract mask zero for mapping")); 162 return (EINVAL); 163 } 164 /* 165 * Allocate the mblk to hold the nce. 166 * 167 * XXX This can come out of a separate cache - nce_cache. 168 * We don't need the mp anymore as there are no more 169 * "qwriter"s 170 */ 171 mp = allocb(sizeof (nce_t), BPRI_MED); 172 if (mp == NULL) 173 return (ENOMEM); 174 175 nce = (nce_t *)mp->b_rptr; 176 mp->b_wptr = (uchar_t *)&nce[1]; 177 *nce = nce_nil; 178 179 /* 180 * This one holds link layer address 181 */ 182 if (ill->ill_net_type == IRE_IF_RESOLVER) { 183 template = nce_udreq_alloc(ill); 184 } else { 185 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 186 ASSERT((ill->ill_resolver_mp != NULL)); 187 template = copyb(ill->ill_resolver_mp); 188 } 189 if (template == NULL) { 190 freeb(mp); 191 return (ENOMEM); 192 } 193 nce->nce_ill = ill; 194 nce->nce_flags = flags; 195 nce->nce_state = state; 196 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 197 nce->nce_rcnt = ill->ill_xmit_count; 198 nce->nce_addr = *addr; 199 nce->nce_mask = *mask; 200 nce->nce_extract_mask = *extract_mask; 201 nce->nce_ll_extract_start = hw_extract_start; 202 nce->nce_fp_mp = NULL; 203 nce->nce_res_mp = template; 204 if (state == ND_REACHABLE) 205 nce->nce_last = TICK_TO_MSEC(lbolt64); 206 else 207 nce->nce_last = 0; 208 nce->nce_qd_mp = NULL; 209 nce->nce_mp = mp; 210 if (hw_addr != NULL) 211 nce_set_ll(nce, hw_addr); 212 /* This one is for nce getting created */ 213 nce->nce_refcnt = 1; 214 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 215 if (nce->nce_flags & NCE_F_MAPPING) { 216 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 217 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 218 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 219 ncep = &nce_mask_entries; 220 } else { 221 ncep = ((nce_t **)NCE_HASH_PTR(*addr)); 222 } 223 224 #ifdef NCE_DEBUG 225 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 226 #endif 227 /* 228 * Atomically ensure that the ill is not CONDEMNED, before 229 * adding the NCE. 230 */ 231 mutex_enter(&ill->ill_lock); 232 if (ill->ill_state_flags & ILL_CONDEMNED) { 233 mutex_exit(&ill->ill_lock); 234 freeb(mp); 235 return (EINVAL); 236 } 237 if ((nce->nce_next = *ncep) != NULL) 238 nce->nce_next->nce_ptpn = &nce->nce_next; 239 *ncep = nce; 240 nce->nce_ptpn = ncep; 241 *newnce = nce; 242 /* This one is for nce being used by an active thread */ 243 NCE_REFHOLD(*newnce); 244 245 /* Bump up the number of nce's referencing this ill */ 246 ill->ill_nce_cnt++; 247 mutex_exit(&ill->ill_lock); 248 249 /* 250 * Before we insert the nce, honor the UNSOL_ADV flag. 251 * We cannot hold the ndp_g_lock and call nce_xmit 252 * which does a putnext. 253 */ 254 if (flags & NCE_F_UNSOL_ADV) { 255 flags |= NDP_ORIDE; 256 /* 257 * We account for the transmit below by assigning one 258 * less than the ndd variable. Subsequent decrements 259 * are done in ndp_timer. 260 */ 261 mutex_enter(&nce->nce_lock); 262 mutex_exit(&ndp_g_lock); 263 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 264 mutex_exit(&nce->nce_lock); 265 dropped = nce_xmit(ill, 266 ND_NEIGHBOR_ADVERT, 267 ill, /* ill to be used for extracting ill_nd_lla */ 268 B_TRUE, /* use ill_nd_lla */ 269 addr, /* Source and target of the advertisement pkt */ 270 &ipv6_all_hosts_mcast, /* Destination of the packet */ 271 flags); 272 mutex_enter(&nce->nce_lock); 273 if (dropped) 274 nce->nce_unsolicit_count++; 275 if (nce->nce_unsolicit_count != 0) { 276 nce->nce_timeout_id = timeout(ndp_timer, nce, 277 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 278 } 279 mutex_exit(&nce->nce_lock); 280 mutex_enter(&ndp_g_lock); 281 } 282 /* 283 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 284 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 285 * We call nce_fastpath from nce_update if the link layer address of 286 * the peer changes from nce_update 287 */ 288 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 289 nce_fastpath(nce); 290 return (0); 291 } 292 293 int 294 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 295 const in6_addr_t *mask, const in6_addr_t *extract_mask, 296 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 297 nce_t **newnce) 298 { 299 int err = 0; 300 nce_t *nce; 301 302 mutex_enter(&ndp_g_lock); 303 nce = nce_lookup_addr(ill, addr); 304 if (nce == NULL) { 305 err = ndp_add(ill, 306 hw_addr, 307 addr, 308 mask, 309 extract_mask, 310 hw_extract_start, 311 flags, 312 state, 313 newnce); 314 } else { 315 *newnce = nce; 316 err = EEXIST; 317 } 318 mutex_exit(&ndp_g_lock); 319 return (err); 320 } 321 322 /* 323 * Remove all the CONDEMNED nces from the appropriate hash table. 324 * We create a private list of NCEs, these may have ires pointing 325 * to them, so the list will be passed through to clean up dependent 326 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 327 */ 328 static void 329 nce_remove(nce_t *nce, nce_t **free_nce_list) 330 { 331 nce_t *nce1; 332 nce_t **ptpn; 333 334 ASSERT(MUTEX_HELD(&ndp_g_lock)); 335 ASSERT(ndp_g_walker == 0); 336 for (; nce; nce = nce1) { 337 nce1 = nce->nce_next; 338 mutex_enter(&nce->nce_lock); 339 if (nce->nce_flags & NCE_F_CONDEMNED) { 340 ptpn = nce->nce_ptpn; 341 nce1 = nce->nce_next; 342 if (nce1 != NULL) 343 nce1->nce_ptpn = ptpn; 344 *ptpn = nce1; 345 nce->nce_ptpn = NULL; 346 nce->nce_next = NULL; 347 nce->nce_next = *free_nce_list; 348 *free_nce_list = nce; 349 } 350 mutex_exit(&nce->nce_lock); 351 } 352 } 353 354 /* 355 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 356 * will return this NCE. Also no new IREs will be created that 357 * point to this NCE (See ire_add_v6). Also no new timeouts will 358 * be started (See NDP_RESTART_TIMER). 359 * 2. Cancel any currently running timeouts. 360 * 3. If there is an ndp walker, return. The walker will do the cleanup. 361 * This ensures that walkers see a consistent list of NCEs while walking. 362 * 4. Otherwise remove the NCE from the list of NCEs 363 * 5. Delete all IREs pointing to this NCE. 364 */ 365 void 366 ndp_delete(nce_t *nce) 367 { 368 nce_t **ptpn; 369 nce_t *nce1; 370 371 /* Serialize deletes */ 372 mutex_enter(&nce->nce_lock); 373 if (nce->nce_flags & NCE_F_CONDEMNED) { 374 /* Some other thread is doing the delete */ 375 mutex_exit(&nce->nce_lock); 376 return; 377 } 378 /* 379 * Caller has a refhold. Also 1 ref for being in the list. Thus 380 * refcnt has to be >= 2 381 */ 382 ASSERT(nce->nce_refcnt >= 2); 383 nce->nce_flags |= NCE_F_CONDEMNED; 384 mutex_exit(&nce->nce_lock); 385 386 nce_fastpath_list_delete(nce); 387 388 /* 389 * Cancel any running timer. Timeout can't be restarted 390 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 391 * Passing invalid timeout id is fine. 392 */ 393 if (nce->nce_timeout_id != 0) { 394 (void) untimeout(nce->nce_timeout_id); 395 nce->nce_timeout_id = 0; 396 } 397 398 mutex_enter(&ndp_g_lock); 399 if (nce->nce_ptpn == NULL) { 400 /* 401 * The last ndp walker has already removed this nce from 402 * the list after we marked the nce CONDEMNED and before 403 * we grabbed the ndp_g_lock. 404 */ 405 mutex_exit(&ndp_g_lock); 406 return; 407 } 408 if (ndp_g_walker > 0) { 409 /* 410 * Can't unlink. The walker will clean up 411 */ 412 ndp_g_walker_cleanup = B_TRUE; 413 mutex_exit(&ndp_g_lock); 414 return; 415 } 416 417 /* 418 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 419 * the timer since it is marked CONDEMNED. 420 */ 421 ptpn = nce->nce_ptpn; 422 nce1 = nce->nce_next; 423 if (nce1 != NULL) 424 nce1->nce_ptpn = ptpn; 425 *ptpn = nce1; 426 nce->nce_ptpn = NULL; 427 nce->nce_next = NULL; 428 mutex_exit(&ndp_g_lock); 429 430 nce_ire_delete(nce); 431 } 432 433 void 434 ndp_inactive(nce_t *nce) 435 { 436 mblk_t **mpp; 437 ill_t *ill; 438 439 ASSERT(nce->nce_refcnt == 0); 440 ASSERT(MUTEX_HELD(&nce->nce_lock)); 441 ASSERT(nce->nce_fastpath == NULL); 442 443 /* Free all nce allocated messages */ 444 mpp = &nce->nce_first_mp_to_free; 445 do { 446 while (*mpp != NULL) { 447 mblk_t *mp; 448 449 mp = *mpp; 450 *mpp = mp->b_next; 451 mp->b_next = NULL; 452 mp->b_prev = NULL; 453 freemsg(mp); 454 } 455 } while (mpp++ != &nce->nce_last_mp_to_free); 456 457 #ifdef NCE_DEBUG 458 nce_trace_inactive(nce); 459 #endif 460 461 ill = nce->nce_ill; 462 mutex_enter(&ill->ill_lock); 463 ill->ill_nce_cnt--; 464 /* 465 * If the number of nce's associated with this ill have dropped 466 * to zero, check whether we need to restart any operation that 467 * is waiting for this to happen. 468 */ 469 if (ill->ill_nce_cnt == 0) { 470 /* ipif_ill_refrele_tail drops the ill_lock */ 471 ipif_ill_refrele_tail(ill); 472 } else { 473 mutex_exit(&ill->ill_lock); 474 } 475 mutex_destroy(&nce->nce_lock); 476 freeb(nce->nce_mp); 477 } 478 479 /* 480 * ndp_walk routine. Delete the nce if it is associated with the ill 481 * that is going away. Always called as a writer. 482 */ 483 void 484 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 485 { 486 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 487 ndp_delete(nce); 488 } 489 } 490 491 /* 492 * Walk a list of to be inactive NCEs and blow away all the ires. 493 */ 494 static void 495 nce_ire_delete_list(nce_t *nce) 496 { 497 nce_t *nce_next; 498 499 ASSERT(nce != NULL); 500 while (nce != NULL) { 501 nce_next = nce->nce_next; 502 nce->nce_next = NULL; 503 504 /* 505 * It is possible for the last ndp walker (this thread) 506 * to come here after ndp_delete has marked the nce CONDEMNED 507 * and before it has removed the nce from the fastpath list 508 * or called untimeout. So we need to do it here. It is safe 509 * for both ndp_delete and this thread to do it twice or 510 * even simultaneously since each of the threads has a 511 * reference on the nce. 512 */ 513 nce_fastpath_list_delete(nce); 514 /* 515 * Cancel any running timer. Timeout can't be restarted 516 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 517 * Passing invalid timeout id is fine. 518 */ 519 if (nce->nce_timeout_id != 0) { 520 (void) untimeout(nce->nce_timeout_id); 521 nce->nce_timeout_id = 0; 522 } 523 524 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 525 nce_ire_delete1, (char *)nce, nce->nce_ill); 526 NCE_REFRELE_NOTR(nce); 527 nce = nce_next; 528 } 529 } 530 531 /* 532 * Delete an ire when the nce goes away. 533 */ 534 /* ARGSUSED */ 535 static void 536 nce_ire_delete(nce_t *nce) 537 { 538 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 539 nce_ire_delete1, (char *)nce, nce->nce_ill); 540 NCE_REFRELE_NOTR(nce); 541 } 542 543 /* 544 * ire_walk routine used to delete every IRE that shares this nce 545 */ 546 static void 547 nce_ire_delete1(ire_t *ire, char *nce_arg) 548 { 549 nce_t *nce = (nce_t *)nce_arg; 550 551 ASSERT(ire->ire_type == IRE_CACHE); 552 553 if (ire->ire_nce == nce) 554 ire_delete(ire); 555 } 556 557 /* 558 * Cache entry lookup. Try to find an nce matching the parameters passed. 559 * If one is found, the refcnt on the nce will be incremented. 560 */ 561 nce_t * 562 ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 563 { 564 nce_t *nce; 565 566 if (!caller_holds_lock) 567 mutex_enter(&ndp_g_lock); 568 nce = nce_lookup_addr(ill, addr); 569 if (nce == NULL) 570 nce = nce_lookup_mapping(ill, addr); 571 if (!caller_holds_lock) 572 mutex_exit(&ndp_g_lock); 573 return (nce); 574 } 575 576 /* 577 * Cache entry lookup. Try to find an nce matching the parameters passed. 578 * Look only for exact entries (no mappings). If an nce is found, increment 579 * the hold count on that nce. 580 */ 581 static nce_t * 582 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 583 { 584 nce_t *nce; 585 586 ASSERT(ill != NULL); 587 ASSERT(MUTEX_HELD(&ndp_g_lock)); 588 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 589 return (NULL); 590 nce = *((nce_t **)NCE_HASH_PTR(*addr)); 591 for (; nce != NULL; nce = nce->nce_next) { 592 if (nce->nce_ill == ill) { 593 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 594 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 595 &ipv6_all_ones)) { 596 mutex_enter(&nce->nce_lock); 597 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 598 NCE_REFHOLD_LOCKED(nce); 599 mutex_exit(&nce->nce_lock); 600 break; 601 } 602 mutex_exit(&nce->nce_lock); 603 } 604 } 605 } 606 return (nce); 607 } 608 609 /* 610 * Cache entry lookup. Try to find an nce matching the parameters passed. 611 * Look only for mappings. 612 */ 613 static nce_t * 614 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 615 { 616 nce_t *nce; 617 618 ASSERT(ill != NULL); 619 ASSERT(MUTEX_HELD(&ndp_g_lock)); 620 if (!IN6_IS_ADDR_MULTICAST(addr)) 621 return (NULL); 622 nce = nce_mask_entries; 623 for (; nce != NULL; nce = nce->nce_next) 624 if (nce->nce_ill == ill && 625 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 626 mutex_enter(&nce->nce_lock); 627 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 628 NCE_REFHOLD_LOCKED(nce); 629 mutex_exit(&nce->nce_lock); 630 break; 631 } 632 mutex_exit(&nce->nce_lock); 633 } 634 return (nce); 635 } 636 637 /* 638 * Process passed in parameters either from an incoming packet or via 639 * user ioctl. 640 */ 641 void 642 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 643 { 644 ill_t *ill = nce->nce_ill; 645 uint32_t hw_addr_len = ill->ill_nd_lla_len; 646 mblk_t *mp; 647 boolean_t ll_updated = B_FALSE; 648 boolean_t ll_changed; 649 650 /* 651 * No updates of link layer address or the neighbor state is 652 * allowed, when the cache is in NONUD state. This still 653 * allows for responding to reachability solicitation. 654 */ 655 mutex_enter(&nce->nce_lock); 656 if (nce->nce_state == ND_INCOMPLETE) { 657 if (hw_addr == NULL) { 658 mutex_exit(&nce->nce_lock); 659 return; 660 } 661 nce_set_ll(nce, hw_addr); 662 /* 663 * Update nce state and send the queued packets 664 * back to ip this time ire will be added. 665 */ 666 if (flag & ND_NA_FLAG_SOLICITED) { 667 nce_update(nce, ND_REACHABLE, NULL); 668 } else { 669 nce_update(nce, ND_STALE, NULL); 670 } 671 mutex_exit(&nce->nce_lock); 672 nce_fastpath(nce); 673 mutex_enter(&nce->nce_lock); 674 mp = nce->nce_qd_mp; 675 nce->nce_qd_mp = NULL; 676 mutex_exit(&nce->nce_lock); 677 while (mp != NULL) { 678 mblk_t *nxt_mp; 679 680 nxt_mp = mp->b_next; 681 mp->b_next = NULL; 682 if (mp->b_prev != NULL) { 683 ill_t *inbound_ill; 684 queue_t *fwdq = NULL; 685 uint_t ifindex; 686 687 ifindex = (uint_t)(uintptr_t)mp->b_prev; 688 inbound_ill = ill_lookup_on_ifindex(ifindex, 689 B_TRUE, NULL, NULL, NULL, NULL); 690 if (inbound_ill == NULL) { 691 mp->b_prev = NULL; 692 freemsg(mp); 693 return; 694 } else { 695 fwdq = inbound_ill->ill_rq; 696 } 697 mp->b_prev = NULL; 698 /* 699 * Send a forwarded packet back into ip_rput_v6 700 * just as in ire_send_v6(). 701 * Extract the queue from b_prev (set in 702 * ip_rput_data_v6). 703 */ 704 if (fwdq != NULL) { 705 /* 706 * Forwarded packets hop count will 707 * get decremented in ip_rput_data_v6 708 */ 709 put(fwdq, mp); 710 } else { 711 /* 712 * Send locally originated packets back 713 * into * ip_wput_v6. 714 */ 715 put(ill->ill_wq, mp); 716 } 717 ill_refrele(inbound_ill); 718 } else { 719 put(ill->ill_wq, mp); 720 } 721 mp = nxt_mp; 722 } 723 return; 724 } 725 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 726 if (!is_adv) { 727 /* If this is a SOLICITATION request only */ 728 if (ll_changed) 729 nce_update(nce, ND_STALE, hw_addr); 730 mutex_exit(&nce->nce_lock); 731 return; 732 } 733 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 734 /* If in any other state than REACHABLE, ignore */ 735 if (nce->nce_state == ND_REACHABLE) { 736 nce_update(nce, ND_STALE, NULL); 737 } 738 mutex_exit(&nce->nce_lock); 739 return; 740 } else { 741 if (ll_changed) { 742 nce_update(nce, ND_UNCHANGED, hw_addr); 743 ll_updated = B_TRUE; 744 } 745 if (flag & ND_NA_FLAG_SOLICITED) { 746 nce_update(nce, ND_REACHABLE, NULL); 747 } else { 748 if (ll_updated) { 749 nce_update(nce, ND_STALE, NULL); 750 } 751 } 752 mutex_exit(&nce->nce_lock); 753 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 754 NCE_F_ISROUTER)) { 755 ire_t *ire; 756 757 /* 758 * Router turned to host. We need to remove the 759 * entry as well as any default route that may be 760 * using this as a next hop. This is required by 761 * section 7.2.5 of RFC 2461. 762 */ 763 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 764 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 765 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, 766 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 767 MATCH_IRE_DEFAULT); 768 if (ire != NULL) { 769 ip_rts_rtmsg(RTM_DELETE, ire, 0); 770 ire_delete(ire); 771 ire_refrele(ire); 772 } 773 ndp_delete(nce); 774 } 775 } 776 } 777 778 /* 779 * Pass arg1 to the pfi supplied, along with each nce in existence. 780 * ndp_walk() places a REFHOLD on the nce and drops the lock when 781 * walking the hash list. 782 */ 783 void 784 ndp_walk_impl(ill_t *ill, pfi_t pfi, uchar_t *arg1, boolean_t trace) 785 { 786 787 nce_t *nce; 788 nce_t *nce1; 789 nce_t **ncep; 790 nce_t *free_nce_list = NULL; 791 792 mutex_enter(&ndp_g_lock); 793 ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ 794 mutex_exit(&ndp_g_lock); 795 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 796 for (nce = *ncep; nce; nce = nce1) { 797 nce1 = nce->nce_next; 798 if (ill == NULL || nce->nce_ill == ill) { 799 if (trace) { 800 NCE_REFHOLD(nce); 801 (*pfi)(nce, arg1); 802 NCE_REFRELE(nce); 803 } else { 804 NCE_REFHOLD_NOTR(nce); 805 (*pfi)(nce, arg1); 806 NCE_REFRELE_NOTR(nce); 807 } 808 } 809 } 810 } 811 for (nce = nce_mask_entries; nce; nce = nce1) { 812 nce1 = nce->nce_next; 813 if (ill == NULL || nce->nce_ill == ill) { 814 if (trace) { 815 NCE_REFHOLD(nce); 816 (*pfi)(nce, arg1); 817 NCE_REFRELE(nce); 818 } else { 819 NCE_REFHOLD_NOTR(nce); 820 (*pfi)(nce, arg1); 821 NCE_REFRELE_NOTR(nce); 822 } 823 } 824 } 825 mutex_enter(&ndp_g_lock); 826 ndp_g_walker--; 827 /* 828 * While NCE's are removed from global list they are placed 829 * in a private list, to be passed to nce_ire_delete_list(). 830 * The reason is, there may be ires pointing to this nce 831 * which needs to cleaned up. 832 */ 833 if (ndp_g_walker_cleanup && ndp_g_walker == 0) { 834 /* Time to delete condemned entries */ 835 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 836 nce = *ncep; 837 if (nce != NULL) { 838 nce_remove(nce, &free_nce_list); 839 } 840 } 841 nce = nce_mask_entries; 842 if (nce != NULL) { 843 nce_remove(nce, &free_nce_list); 844 } 845 ndp_g_walker_cleanup = B_FALSE; 846 } 847 mutex_exit(&ndp_g_lock); 848 849 if (free_nce_list != NULL) { 850 nce_ire_delete_list(free_nce_list); 851 } 852 } 853 854 void 855 ndp_walk(ill_t *ill, pfi_t pfi, uchar_t *arg1) 856 { 857 ndp_walk_impl(ill, pfi, arg1, B_TRUE); 858 } 859 860 /* 861 * Prepend the zoneid using an ipsec_out_t for later use by functions like 862 * ip_rput_v6() after neighbor discovery has taken place. If the message 863 * block already has a M_CTL at the front of it, then simply set the zoneid 864 * appropriately. 865 */ 866 static mblk_t * 867 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 868 { 869 mblk_t *first_mp; 870 ipsec_out_t *io; 871 872 if (mp->b_datap->db_type == M_CTL) { 873 io = (ipsec_out_t *)mp->b_rptr; 874 ASSERT(io->ipsec_out_type == IPSEC_OUT); 875 io->ipsec_out_zoneid = zoneid; 876 return (mp); 877 } 878 879 first_mp = ipsec_alloc_ipsec_out(); 880 if (first_mp == NULL) 881 return (NULL); 882 io = (ipsec_out_t *)first_mp->b_rptr; 883 /* This is not a secure packet */ 884 io->ipsec_out_secure = B_FALSE; 885 io->ipsec_out_zoneid = zoneid; 886 first_mp->b_cont = mp; 887 return (first_mp); 888 } 889 890 /* 891 * Process resolve requests. Handles both mapped entries 892 * as well as cases that needs to be send out on the wire. 893 * Lookup a NCE for a given IRE. Regardless of whether one exists 894 * or one is created, we defer making ire point to nce until the 895 * ire is actually added at which point the nce_refcnt on the nce is 896 * incremented. This is done primarily to have symmetry between ire_add() 897 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 898 */ 899 int 900 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 901 { 902 nce_t *nce; 903 int err = 0; 904 uint32_t ms; 905 mblk_t *mp_nce = NULL; 906 907 ASSERT(ill != NULL); 908 if (IN6_IS_ADDR_MULTICAST(dst)) { 909 err = nce_set_multicast(ill, dst); 910 return (err); 911 } 912 err = ndp_lookup_then_add(ill, 913 NULL, /* No hardware address */ 914 dst, 915 &ipv6_all_ones, 916 &ipv6_all_zeros, 917 0, 918 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 919 ND_INCOMPLETE, 920 &nce); 921 922 switch (err) { 923 case 0: 924 /* 925 * New cache entry was created. Make sure that the state 926 * is not ND_INCOMPLETE. It can be in some other state 927 * even before we send out the solicitation as we could 928 * get un-solicited advertisements. 929 * 930 * If this is an XRESOLV interface, simply return 0, 931 * since we don't want to solicit just yet. 932 */ 933 if (ill->ill_flags & ILLF_XRESOLV) { 934 NCE_REFRELE(nce); 935 return (0); 936 } 937 rw_enter(&ill_g_lock, RW_READER); 938 mutex_enter(&nce->nce_lock); 939 if (nce->nce_state != ND_INCOMPLETE) { 940 mutex_exit(&nce->nce_lock); 941 rw_exit(&ill_g_lock); 942 NCE_REFRELE(nce); 943 return (0); 944 } 945 mp_nce = ndp_prepend_zone(mp, zoneid); 946 if (mp_nce == NULL) { 947 /* The caller will free mp */ 948 mutex_exit(&nce->nce_lock); 949 rw_exit(&ill_g_lock); 950 ndp_delete(nce); 951 NCE_REFRELE(nce); 952 return (ENOMEM); 953 } 954 ms = nce_solicit(nce, mp_nce); 955 rw_exit(&ill_g_lock); 956 if (ms == 0) { 957 /* The caller will free mp */ 958 if (mp_nce != mp) 959 freeb(mp_nce); 960 mutex_exit(&nce->nce_lock); 961 ndp_delete(nce); 962 NCE_REFRELE(nce); 963 return (EBUSY); 964 } 965 mutex_exit(&nce->nce_lock); 966 NDP_RESTART_TIMER(nce, (clock_t)ms); 967 NCE_REFRELE(nce); 968 return (EINPROGRESS); 969 case EEXIST: 970 /* Resolution in progress just queue the packet */ 971 mutex_enter(&nce->nce_lock); 972 if (nce->nce_state == ND_INCOMPLETE) { 973 mp_nce = ndp_prepend_zone(mp, zoneid); 974 if (mp_nce == NULL) { 975 err = ENOMEM; 976 } else { 977 nce_queue_mp(nce, mp_nce); 978 err = EINPROGRESS; 979 } 980 } else { 981 /* 982 * Any other state implies we have 983 * a nce but IRE needs to be added ... 984 * ire_add_v6() will take care of the 985 * the case when the nce becomes CONDEMNED 986 * before the ire is added to the table. 987 */ 988 err = 0; 989 } 990 mutex_exit(&nce->nce_lock); 991 NCE_REFRELE(nce); 992 break; 993 default: 994 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 995 break; 996 } 997 return (err); 998 } 999 1000 /* 1001 * When there is no resolver, the link layer template is passed in 1002 * the IRE. 1003 * Lookup a NCE for a given IRE. Regardless of whether one exists 1004 * or one is created, we defer making ire point to nce until the 1005 * ire is actually added at which point the nce_refcnt on the nce is 1006 * incremented. This is done primarily to have symmetry between ire_add() 1007 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1008 */ 1009 int 1010 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1011 { 1012 nce_t *nce; 1013 int err = 0; 1014 1015 ASSERT(ill != NULL); 1016 if (IN6_IS_ADDR_MULTICAST(dst)) { 1017 err = nce_set_multicast(ill, dst); 1018 return (err); 1019 } 1020 1021 err = ndp_lookup_then_add(ill, 1022 NULL, /* hardware address */ 1023 dst, 1024 &ipv6_all_ones, 1025 &ipv6_all_zeros, 1026 0, 1027 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1028 ND_REACHABLE, 1029 &nce); 1030 1031 switch (err) { 1032 case 0: 1033 /* 1034 * Cache entry with a proper resolver cookie was 1035 * created. 1036 */ 1037 NCE_REFRELE(nce); 1038 break; 1039 case EEXIST: 1040 err = 0; 1041 NCE_REFRELE(nce); 1042 break; 1043 default: 1044 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1045 break; 1046 } 1047 return (err); 1048 } 1049 1050 /* 1051 * For each interface an entry is added for the unspecified multicast group. 1052 * Here that mapping is used to form the multicast cache entry for a particular 1053 * multicast destination. 1054 */ 1055 static int 1056 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1057 { 1058 nce_t *mnce; /* Multicast mapping entry */ 1059 nce_t *nce; 1060 uchar_t *hw_addr = NULL; 1061 int err = 0; 1062 1063 ASSERT(ill != NULL); 1064 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1065 1066 mutex_enter(&ndp_g_lock); 1067 nce = nce_lookup_addr(ill, dst); 1068 if (nce != NULL) { 1069 mutex_exit(&ndp_g_lock); 1070 NCE_REFRELE(nce); 1071 return (0); 1072 } 1073 /* No entry, now lookup for a mapping this should never fail */ 1074 mnce = nce_lookup_mapping(ill, dst); 1075 if (mnce == NULL) { 1076 /* Something broken for the interface. */ 1077 mutex_exit(&ndp_g_lock); 1078 return (ESRCH); 1079 } 1080 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1081 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1082 /* 1083 * For IRE_IF_RESOLVER a hardware mapping can be 1084 * generated, for IRE_IF_NORESOLVER, resolution cookie 1085 * in the ill is copied in ndp_add(). 1086 */ 1087 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1088 if (hw_addr == NULL) { 1089 mutex_exit(&ndp_g_lock); 1090 NCE_REFRELE(mnce); 1091 return (ENOMEM); 1092 } 1093 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1094 } 1095 NCE_REFRELE(mnce); 1096 /* 1097 * IRE_IF_NORESOLVER type simply copies the resolution 1098 * cookie passed in. So no hw_addr is needed. 1099 */ 1100 err = ndp_add(ill, 1101 hw_addr, 1102 dst, 1103 &ipv6_all_ones, 1104 &ipv6_all_zeros, 1105 0, 1106 NCE_F_NONUD, 1107 ND_REACHABLE, 1108 &nce); 1109 mutex_exit(&ndp_g_lock); 1110 if (hw_addr != NULL) 1111 kmem_free(hw_addr, ill->ill_nd_lla_len); 1112 if (err != 0) { 1113 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1114 return (err); 1115 } 1116 NCE_REFRELE(nce); 1117 return (0); 1118 } 1119 1120 /* 1121 * Return the link layer address, and any flags of a nce. 1122 */ 1123 int 1124 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1125 { 1126 nce_t *nce; 1127 in6_addr_t *addr; 1128 sin6_t *sin6; 1129 dl_unitdata_req_t *dl; 1130 1131 ASSERT(ill != NULL); 1132 sin6 = (sin6_t *)&lnr->lnr_addr; 1133 addr = &sin6->sin6_addr; 1134 1135 nce = ndp_lookup(ill, addr, B_FALSE); 1136 if (nce == NULL) 1137 return (ESRCH); 1138 /* If in INCOMPLETE state, no link layer address is available yet */ 1139 if (nce->nce_state == ND_INCOMPLETE) 1140 goto done; 1141 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1142 if (ill->ill_flags & ILLF_XRESOLV) 1143 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1144 else 1145 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1146 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1147 sizeof (lnr->lnr_hdw_addr)); 1148 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1149 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1150 if (nce->nce_flags & NCE_F_ISROUTER) 1151 lnr->lnr_flags = NDF_ISROUTER_ON; 1152 if (nce->nce_flags & NCE_F_PROXY) 1153 lnr->lnr_flags |= NDF_PROXY_ON; 1154 if (nce->nce_flags & NCE_F_ANYCAST) 1155 lnr->lnr_flags |= NDF_ANYCAST_ON; 1156 done: 1157 NCE_REFRELE(nce); 1158 return (0); 1159 } 1160 1161 /* 1162 * Send Enable/Disable multicast reqs to driver. 1163 */ 1164 int 1165 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1166 uint32_t hw_addr_offset, mblk_t *mp) 1167 { 1168 nce_t *nce; 1169 uchar_t *hw_addr; 1170 1171 ASSERT(ill != NULL); 1172 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1173 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1174 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1175 freemsg(mp); 1176 return (EINVAL); 1177 } 1178 mutex_enter(&ndp_g_lock); 1179 nce = nce_lookup_mapping(ill, addr); 1180 if (nce == NULL) { 1181 mutex_exit(&ndp_g_lock); 1182 freemsg(mp); 1183 return (ESRCH); 1184 } 1185 mutex_exit(&ndp_g_lock); 1186 /* 1187 * Update dl_addr_length and dl_addr_offset for primitives that 1188 * have physical addresses as opposed to full saps 1189 */ 1190 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1191 case DL_ENABMULTI_REQ: 1192 /* Track the state if this is the first enabmulti */ 1193 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1194 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1195 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1196 break; 1197 case DL_DISABMULTI_REQ: 1198 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1199 break; 1200 default: 1201 NCE_REFRELE(nce); 1202 ip1dbg(("ndp_mcastreq: default\n")); 1203 return (EINVAL); 1204 } 1205 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1206 NCE_REFRELE(nce); 1207 putnext(ill->ill_wq, mp); 1208 return (0); 1209 } 1210 1211 /* 1212 * Send a neighbor solicitation. 1213 * Returns number of milliseconds after which we should either rexmit or abort. 1214 * Return of zero means we should abort. 1215 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1216 * 1217 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1218 * the packet. 1219 * NOTE: This routine does not consume mp. 1220 */ 1221 uint32_t 1222 nce_solicit(nce_t *nce, mblk_t *mp) 1223 { 1224 ill_t *ill; 1225 ill_t *src_ill; 1226 ip6_t *ip6h; 1227 in6_addr_t src; 1228 in6_addr_t dst; 1229 ipif_t *ipif; 1230 ip6i_t *ip6i; 1231 boolean_t dropped = B_FALSE; 1232 1233 ASSERT(RW_READ_HELD(&ill_g_lock)); 1234 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1235 ill = nce->nce_ill; 1236 ASSERT(ill != NULL); 1237 1238 if (nce->nce_rcnt == 0) { 1239 return (0); 1240 } 1241 1242 if (mp == NULL) { 1243 ASSERT(nce->nce_qd_mp != NULL); 1244 mp = nce->nce_qd_mp; 1245 } else { 1246 nce_queue_mp(nce, mp); 1247 } 1248 1249 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1250 if (mp->b_datap->db_type == M_CTL) 1251 mp = mp->b_cont; 1252 1253 ip6h = (ip6_t *)mp->b_rptr; 1254 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1255 /* 1256 * This message should have been pulled up already in 1257 * ip_wput_v6. We can't do pullups here because the message 1258 * could be from the nce_qd_mp which could have b_next/b_prev 1259 * non-NULL. 1260 */ 1261 ip6i = (ip6i_t *)ip6h; 1262 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1263 sizeof (ip6i_t) + IPV6_HDR_LEN); 1264 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1265 } 1266 src = ip6h->ip6_src; 1267 /* 1268 * If the src of outgoing packet is one of the assigned interface 1269 * addresses use it, otherwise we will pick the source address below. 1270 */ 1271 src_ill = ill; 1272 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1273 if (ill->ill_group != NULL) 1274 src_ill = ill->ill_group->illgrp_ill; 1275 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1276 for (ipif = src_ill->ill_ipif; ipif != NULL; 1277 ipif = ipif->ipif_next) { 1278 if (IN6_ARE_ADDR_EQUAL(&src, 1279 &ipif->ipif_v6lcl_addr)) { 1280 break; 1281 } 1282 } 1283 if (ipif != NULL) 1284 break; 1285 } 1286 if (src_ill == NULL) { 1287 /* May be a forwarding packet */ 1288 src_ill = ill; 1289 src = ipv6_all_zeros; 1290 } 1291 } 1292 dst = nce->nce_addr; 1293 /* 1294 * If source address is unspecified, nce_xmit will choose 1295 * one for us and initialize the hardware address also 1296 * appropriately. 1297 */ 1298 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1299 src_ill = NULL; 1300 nce->nce_rcnt--; 1301 mutex_exit(&nce->nce_lock); 1302 rw_exit(&ill_g_lock); 1303 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1304 &dst, 0); 1305 rw_enter(&ill_g_lock, RW_READER); 1306 mutex_enter(&nce->nce_lock); 1307 if (dropped) 1308 nce->nce_rcnt++; 1309 return (ill->ill_reachable_retrans_time); 1310 } 1311 1312 void 1313 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1314 { 1315 nd_neighbor_solicit_t *ns; 1316 uint32_t hlen = ill->ill_nd_lla_len; 1317 uchar_t *haddr = NULL; 1318 icmp6_t *icmp_nd; 1319 ip6_t *ip6h; 1320 nce_t *our_nce = NULL; 1321 in6_addr_t target; 1322 in6_addr_t src; 1323 int len; 1324 int flag = 0; 1325 nd_opt_hdr_t *opt = NULL; 1326 boolean_t bad_solicit = B_FALSE; 1327 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1328 1329 ip6h = (ip6_t *)mp->b_rptr; 1330 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1331 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1332 src = ip6h->ip6_src; 1333 ns = (nd_neighbor_solicit_t *)icmp_nd; 1334 target = ns->nd_ns_target; 1335 if (IN6_IS_ADDR_MULTICAST(&target)) { 1336 if (ip_debug > 2) { 1337 /* ip1dbg */ 1338 pr_addr_dbg("ndp_input_solicit: Target is" 1339 " multicast! %s\n", AF_INET6, &target); 1340 } 1341 bad_solicit = B_TRUE; 1342 goto done; 1343 } 1344 if (len > sizeof (nd_neighbor_solicit_t)) { 1345 /* Options present */ 1346 opt = (nd_opt_hdr_t *)&ns[1]; 1347 len -= sizeof (nd_neighbor_solicit_t); 1348 if (!ndp_verify_optlen(opt, len)) { 1349 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1350 bad_solicit = B_TRUE; 1351 goto done; 1352 } 1353 } 1354 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1355 /* Check to see if this is a valid DAD solicitation */ 1356 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1357 if (ip_debug > 2) { 1358 /* ip1dbg */ 1359 pr_addr_dbg("ndp_input_solicit: IPv6 " 1360 "Destination is not solicited node " 1361 "multicast %s\n", AF_INET6, 1362 &ip6h->ip6_dst); 1363 } 1364 bad_solicit = B_TRUE; 1365 goto done; 1366 } 1367 } 1368 1369 our_nce = ndp_lookup(ill, &target, B_FALSE); 1370 /* 1371 * If this is a valid Solicitation, a permanent 1372 * entry should exist in the cache 1373 */ 1374 if (our_nce == NULL || 1375 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1376 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1377 "ifname=%s ", ill->ill_name)); 1378 if (ip_debug > 2) { 1379 /* ip1dbg */ 1380 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1381 } 1382 bad_solicit = B_TRUE; 1383 goto done; 1384 } 1385 1386 /* At this point we should have a verified NS per spec */ 1387 if (opt != NULL) { 1388 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1389 if (opt != NULL) { 1390 /* 1391 * No source link layer address option should 1392 * be present in a valid DAD request. 1393 */ 1394 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1395 ip1dbg(("ndp_input_solicit: source link-layer " 1396 "address option present with an " 1397 "unspecified source. \n")); 1398 bad_solicit = B_TRUE; 1399 goto done; 1400 } 1401 haddr = (uchar_t *)&opt[1]; 1402 if (hlen > opt->nd_opt_len * 8 || 1403 hlen == 0) { 1404 bad_solicit = B_TRUE; 1405 goto done; 1406 } 1407 } 1408 } 1409 /* Set override flag, it will be reset later if need be. */ 1410 flag |= NDP_ORIDE; 1411 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1412 flag |= NDP_UNICAST; 1413 } 1414 1415 /* 1416 * Create/update the entry for the soliciting node. 1417 * or respond to outstanding queries, don't if 1418 * the source is unspecified address. 1419 */ 1420 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1421 int err = 0; 1422 nce_t *nnce; 1423 1424 err = ndp_lookup_then_add(ill, 1425 haddr, 1426 &src, /* Soliciting nodes address */ 1427 &ipv6_all_ones, 1428 &ipv6_all_zeros, 1429 0, 1430 0, 1431 ND_STALE, 1432 &nnce); 1433 switch (err) { 1434 case 0: 1435 /* done with this entry */ 1436 NCE_REFRELE(nnce); 1437 break; 1438 case EEXIST: 1439 /* 1440 * B_FALSE indicates this is not an 1441 * an advertisement. 1442 */ 1443 ndp_process(nnce, haddr, 0, B_FALSE); 1444 NCE_REFRELE(nnce); 1445 break; 1446 default: 1447 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1448 err)); 1449 goto done; 1450 } 1451 flag |= NDP_SOLICITED; 1452 } else { 1453 /* 1454 * This is a DAD req, multicast the advertisement 1455 * to the all-nodes address. 1456 */ 1457 src = ipv6_all_hosts_mcast; 1458 } 1459 if (our_nce->nce_flags & NCE_F_ISROUTER) 1460 flag |= NDP_ISROUTER; 1461 if (our_nce->nce_flags & NCE_F_PROXY) 1462 flag &= ~NDP_ORIDE; 1463 /* Response to a solicitation */ 1464 (void) nce_xmit(ill, 1465 ND_NEIGHBOR_ADVERT, 1466 ill, /* ill to be used for extracting ill_nd_lla */ 1467 B_TRUE, /* use ill_nd_lla */ 1468 &target, /* Source and target of the advertisement pkt */ 1469 &src, /* IP Destination (source of original pkt) */ 1470 flag); 1471 done: 1472 if (bad_solicit) 1473 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1474 if (our_nce != NULL) 1475 NCE_REFRELE(our_nce); 1476 } 1477 1478 void 1479 ndp_input_advert(ill_t *ill, mblk_t *mp) 1480 { 1481 nd_neighbor_advert_t *na; 1482 uint32_t hlen = ill->ill_nd_lla_len; 1483 uchar_t *haddr = NULL; 1484 icmp6_t *icmp_nd; 1485 ip6_t *ip6h; 1486 nce_t *dst_nce = NULL; 1487 in6_addr_t target; 1488 nd_opt_hdr_t *opt = NULL; 1489 int len; 1490 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1491 1492 ip6h = (ip6_t *)mp->b_rptr; 1493 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1494 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1495 na = (nd_neighbor_advert_t *)icmp_nd; 1496 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1497 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1498 ip1dbg(("ndp_input_advert: Target is multicast but the " 1499 "solicited flag is not zero\n")); 1500 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1501 return; 1502 } 1503 target = na->nd_na_target; 1504 if (IN6_IS_ADDR_MULTICAST(&target)) { 1505 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1506 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1507 return; 1508 } 1509 if (len > sizeof (nd_neighbor_advert_t)) { 1510 opt = (nd_opt_hdr_t *)&na[1]; 1511 if (!ndp_verify_optlen(opt, 1512 len - sizeof (nd_neighbor_advert_t))) { 1513 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1514 return; 1515 } 1516 /* At this point we have a verified NA per spec */ 1517 len -= sizeof (nd_neighbor_advert_t); 1518 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1519 if (opt != NULL) { 1520 haddr = (uchar_t *)&opt[1]; 1521 if (hlen > opt->nd_opt_len * 8 || 1522 hlen == 0) { 1523 BUMP_MIB(mib, 1524 ipv6IfIcmpInBadNeighborAdvertisements); 1525 return; 1526 } 1527 } 1528 } 1529 1530 /* 1531 * If this interface is part of the group look at all the 1532 * ills in the group. 1533 */ 1534 rw_enter(&ill_g_lock, RW_READER); 1535 if (ill->ill_group != NULL) 1536 ill = ill->ill_group->illgrp_ill; 1537 1538 for (; ill != NULL; ill = ill->ill_group_next) { 1539 mutex_enter(&ill->ill_lock); 1540 if (!ILL_CAN_LOOKUP(ill)) { 1541 mutex_exit(&ill->ill_lock); 1542 continue; 1543 } 1544 ill_refhold_locked(ill); 1545 mutex_exit(&ill->ill_lock); 1546 dst_nce = ndp_lookup(ill, &target, B_FALSE); 1547 /* We have to drop the lock since ndp_process calls put* */ 1548 rw_exit(&ill_g_lock); 1549 if (dst_nce != NULL) { 1550 if (na->nd_na_flags_reserved & 1551 ND_NA_FLAG_ROUTER) { 1552 dst_nce->nce_flags |= NCE_F_ISROUTER; 1553 } 1554 /* B_TRUE indicates this an advertisement */ 1555 ndp_process(dst_nce, haddr, 1556 na->nd_na_flags_reserved, B_TRUE); 1557 NCE_REFRELE(dst_nce); 1558 } 1559 rw_enter(&ill_g_lock, RW_READER); 1560 ill_refrele(ill); 1561 } 1562 rw_exit(&ill_g_lock); 1563 } 1564 1565 /* 1566 * Process NDP neighbor solicitation/advertisement messages. 1567 * The checksum has already checked o.k before reaching here. 1568 */ 1569 void 1570 ndp_input(ill_t *ill, mblk_t *mp) 1571 { 1572 icmp6_t *icmp_nd; 1573 ip6_t *ip6h; 1574 int len; 1575 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1576 1577 1578 if (!pullupmsg(mp, -1)) { 1579 ip1dbg(("ndp_input: pullupmsg failed\n")); 1580 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1581 goto done; 1582 } 1583 ip6h = (ip6_t *)mp->b_rptr; 1584 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1585 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1586 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1587 goto done; 1588 } 1589 /* 1590 * NDP does not accept any extension headers between the 1591 * IP header and the ICMP header since e.g. a routing 1592 * header could be dangerous. 1593 * This assumes that any AH or ESP headers are removed 1594 * by ip prior to passing the packet to ndp_input. 1595 */ 1596 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1597 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1598 ip6h->ip6_nxt)); 1599 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1600 goto done; 1601 } 1602 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1603 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1604 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1605 if (icmp_nd->icmp6_code != 0) { 1606 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1607 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1608 goto done; 1609 } 1610 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1611 /* 1612 * Make sure packet length is large enough for either 1613 * a NS or a NA icmp packet. 1614 */ 1615 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1616 ip1dbg(("ndp_input: packet too short\n")); 1617 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1618 goto done; 1619 } 1620 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1621 ndp_input_solicit(ill, mp); 1622 } else { 1623 ndp_input_advert(ill, mp); 1624 } 1625 done: 1626 freemsg(mp); 1627 } 1628 1629 /* 1630 * nce_xmit is called to form and transmit a ND solicitation or 1631 * advertisement ICMP packet. 1632 * If source address is unspecified, appropriate source address 1633 * and link layer address will be chosen here. This function 1634 * *always* sends the link layer option. 1635 * It returns B_FALSE only if it does a successful put() to the 1636 * corresponding ill's ill_wq otherwise returns B_TRUE. 1637 */ 1638 static boolean_t 1639 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1640 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1641 int flag) 1642 { 1643 uint32_t len; 1644 icmp6_t *icmp6; 1645 mblk_t *mp; 1646 ip6_t *ip6h; 1647 nd_opt_hdr_t *opt; 1648 uint_t plen; 1649 ip6i_t *ip6i; 1650 ipif_t *src_ipif = NULL; 1651 1652 /* 1653 * If we have a unspecified source(sender) address, select a 1654 * proper source address for the solicitation here itself so 1655 * that we can initialize the h/w address correctly. This is 1656 * needed for interface groups as source address can come from 1657 * the whole group and the h/w address initialized from ill will 1658 * be wrong if the source address comes from a different ill. 1659 * 1660 * Note that the NA never comes here with the unspecified source 1661 * address. The following asserts that whenever the source 1662 * address is specified, the haddr also should be specified. 1663 */ 1664 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1665 1666 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1667 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1668 /* 1669 * Pick a source address for this solicitation, but 1670 * restrict the selection to addresses assigned to the 1671 * output interface (or interface group). We do this 1672 * because the destination will create a neighbor cache 1673 * entry for the source address of this packet, so the 1674 * source address had better be a valid neighbor. 1675 */ 1676 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 1677 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1678 if (src_ipif == NULL) { 1679 char buf[INET6_ADDRSTRLEN]; 1680 1681 ip0dbg(("nce_xmit: No source ipif for dst %s\n", 1682 inet_ntop(AF_INET6, (char *)target, buf, 1683 sizeof (buf)))); 1684 return (B_TRUE); 1685 } 1686 sender = &src_ipif->ipif_v6src_addr; 1687 hwaddr_ill = src_ipif->ipif_ill; 1688 } 1689 1690 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1691 /* 1692 * Always make sure that the NS/NA packets don't get load 1693 * spread. This is needed so that the probe packets sent 1694 * by the in.mpathd daemon can really go out on the desired 1695 * interface. Probe packets are made to go out on a desired 1696 * interface by including a ip6i with ATTACH_IF flag. As these 1697 * packets indirectly end up sending/receiving NS/NA packets 1698 * (neighbor doing NUD), we have to make sure that NA 1699 * also go out on the same interface. 1700 */ 1701 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1702 plen * 8; 1703 mp = allocb(len, BPRI_LO); 1704 if (mp == NULL) { 1705 if (src_ipif != NULL) 1706 ipif_refrele(src_ipif); 1707 return (B_TRUE); 1708 } 1709 bzero((char *)mp->b_rptr, len); 1710 mp->b_wptr = mp->b_rptr + len; 1711 1712 ip6i = (ip6i_t *)mp->b_rptr; 1713 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1714 ip6i->ip6i_nxt = IPPROTO_RAW; 1715 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1716 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1717 1718 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1719 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1720 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1721 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1722 ip6h->ip6_hops = IPV6_MAX_HOPS; 1723 ip6h->ip6_dst = *target; 1724 icmp6 = (icmp6_t *)&ip6h[1]; 1725 1726 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1727 sizeof (nd_neighbor_advert_t)); 1728 1729 if (operation == ND_NEIGHBOR_SOLICIT) { 1730 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1731 1732 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1733 ip6h->ip6_src = *sender; 1734 ns->nd_ns_target = *target; 1735 if (!(flag & NDP_UNICAST)) { 1736 /* Form multicast address of the target */ 1737 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1738 ip6h->ip6_dst.s6_addr32[3] |= 1739 ns->nd_ns_target.s6_addr32[3]; 1740 } 1741 } else { 1742 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1743 1744 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1745 ip6h->ip6_src = *sender; 1746 na->nd_na_target = *sender; 1747 if (flag & NDP_ISROUTER) 1748 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1749 if (flag & NDP_SOLICITED) 1750 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1751 if (flag & NDP_ORIDE) 1752 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1753 1754 } 1755 /* Fill in link layer address and option len */ 1756 opt->nd_opt_len = (uint8_t)plen; 1757 mutex_enter(&hwaddr_ill->ill_lock); 1758 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1759 &opt[1], hwaddr_ill->ill_nd_lla_len); 1760 mutex_exit(&hwaddr_ill->ill_lock); 1761 icmp6->icmp6_type = (uint8_t)operation; 1762 icmp6->icmp6_code = 0; 1763 /* 1764 * Prepare for checksum by putting icmp length in the icmp 1765 * checksum field. The checksum is calculated in ip_wput_v6. 1766 */ 1767 icmp6->icmp6_cksum = ip6h->ip6_plen; 1768 1769 if (src_ipif != NULL) 1770 ipif_refrele(src_ipif); 1771 if (canput(ill->ill_wq)) { 1772 put(ill->ill_wq, mp); 1773 return (B_FALSE); 1774 } 1775 freemsg(mp); 1776 return (B_TRUE); 1777 } 1778 1779 /* 1780 * Make a link layer address (does not include the SAP) from an nce. 1781 * To form the link layer address, use the last four bytes of ipv6 1782 * address passed in and the fixed offset stored in nce. 1783 */ 1784 static void 1785 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1786 { 1787 uchar_t *mask, *to; 1788 ill_t *ill = nce->nce_ill; 1789 int len; 1790 1791 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1792 return; 1793 ASSERT(nce->nce_res_mp != NULL); 1794 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1795 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1796 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1797 ASSERT(addr != NULL); 1798 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1799 addrpos, ill->ill_nd_lla_len); 1800 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1801 IPV6_ADDR_LEN); 1802 mask = (uchar_t *)&nce->nce_extract_mask; 1803 mask += (IPV6_ADDR_LEN - len); 1804 addr += (IPV6_ADDR_LEN - len); 1805 to = addrpos + nce->nce_ll_extract_start; 1806 while (len-- > 0) 1807 *to++ |= *mask++ & *addr++; 1808 } 1809 1810 /* 1811 * Pass a cache report back out via NDD. 1812 */ 1813 /* ARGSUSED */ 1814 int 1815 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1816 { 1817 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1818 " proto addr/mask"); 1819 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1820 return (0); 1821 } 1822 1823 /* 1824 * convert a link level address of arbitrary length 1825 * to an ascii string. 1826 * The caller *must* have already verified that the string buffer 1827 * is large enough to hold the entire string, including trailing NULL. 1828 */ 1829 static void 1830 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1831 { 1832 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1833 int i; 1834 size_t len; 1835 1836 buf[0] = '\0'; 1837 for (i = 0; i < addrlen; i++) { 1838 addrbyte[0] = '\0'; 1839 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1840 len = strlen((const char *)addrbyte); 1841 bcopy(addrbyte, buf, len); 1842 buf = buf + len; 1843 } 1844 *--buf = '\0'; 1845 } 1846 1847 /* 1848 * Add a single line to the NDP Cache Entry Report. 1849 */ 1850 static void 1851 nce_report1(nce_t *nce, uchar_t *mp_arg) 1852 { 1853 ill_t *ill = nce->nce_ill; 1854 char local_buf[INET6_ADDRSTRLEN]; 1855 uchar_t flags_buf[10]; 1856 uint32_t flags = nce->nce_flags; 1857 mblk_t *mp = (mblk_t *)mp_arg; 1858 uchar_t *h; 1859 uchar_t *m = flags_buf; 1860 in6_addr_t v6addr; 1861 1862 /* 1863 * Lock the nce to protect nce_res_mp from being changed 1864 * if an external resolver address resolution completes 1865 * while nce_res_mp is being accessed here. 1866 * 1867 * Deal with all address formats, not just Ethernet-specific 1868 * In addition, make sure that the mblk has enough space 1869 * before writing to it. If is doesn't, allocate a new one. 1870 */ 1871 ASSERT(ill != NULL); 1872 v6addr = nce->nce_mask; 1873 if (flags & NCE_F_PERMANENT) 1874 *m++ = 'P'; 1875 if (flags & NCE_F_ISROUTER) 1876 *m++ = 'R'; 1877 if (flags & NCE_F_MAPPING) 1878 *m++ = 'M'; 1879 *m = '\0'; 1880 1881 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1882 size_t addrlen; 1883 uchar_t *addr_buf; 1884 dl_unitdata_req_t *dl; 1885 1886 mutex_enter(&nce->nce_lock); 1887 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1888 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1889 if (ill->ill_flags & ILLF_XRESOLV) 1890 addrlen = (3 * (dl->dl_dest_addr_length)); 1891 else 1892 addrlen = (3 * (ill->ill_nd_lla_len)); 1893 if (addrlen <= 0) { 1894 mutex_exit(&nce->nce_lock); 1895 (void) mi_mpprintf(mp, 1896 "%8s %9s %5s %s/%d", 1897 ill->ill_name, 1898 "None", 1899 (uchar_t *)&flags_buf, 1900 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1901 (char *)local_buf, sizeof (local_buf)), 1902 ip_mask_to_plen_v6(&v6addr)); 1903 } else { 1904 /* 1905 * Convert the hardware/lla address to ascii 1906 */ 1907 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 1908 if (addr_buf == NULL) { 1909 mutex_exit(&nce->nce_lock); 1910 return; 1911 } 1912 if (ill->ill_flags & ILLF_XRESOLV) 1913 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 1914 addr_buf); 1915 else 1916 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 1917 addr_buf); 1918 mutex_exit(&nce->nce_lock); 1919 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 1920 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 1921 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1922 (char *)local_buf, sizeof (local_buf)), 1923 ip_mask_to_plen_v6(&v6addr)); 1924 kmem_free(addr_buf, addrlen); 1925 } 1926 } else { 1927 (void) mi_mpprintf(mp, 1928 "%8s %9s %5s %s/%d", 1929 ill->ill_name, 1930 "None", 1931 (uchar_t *)&flags_buf, 1932 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1933 (char *)local_buf, sizeof (local_buf)), 1934 ip_mask_to_plen_v6(&v6addr)); 1935 } 1936 } 1937 1938 mblk_t * 1939 nce_udreq_alloc(ill_t *ill) 1940 { 1941 mblk_t *template_mp = NULL; 1942 dl_unitdata_req_t *dlur; 1943 int sap_length; 1944 1945 sap_length = ill->ill_sap_length; 1946 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 1947 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 1948 if (template_mp == NULL) 1949 return (NULL); 1950 1951 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 1952 dlur->dl_priority.dl_min = 0; 1953 dlur->dl_priority.dl_max = 0; 1954 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 1955 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 1956 1957 /* Copy in the SAP value. */ 1958 NCE_LL_SAP_COPY(ill, template_mp); 1959 1960 return (template_mp); 1961 } 1962 1963 /* 1964 * NDP retransmit timer. 1965 * This timer goes off when: 1966 * a. It is time to retransmit NS for resolver. 1967 * b. It is time to send reachability probes. 1968 */ 1969 void 1970 ndp_timer(void *arg) 1971 { 1972 nce_t *nce = arg; 1973 ill_t *ill = nce->nce_ill; 1974 uint32_t ms; 1975 char addrbuf[INET6_ADDRSTRLEN]; 1976 mblk_t *mp; 1977 boolean_t dropped = B_FALSE; 1978 1979 /* 1980 * The timer has to be cancelled by ndp_delete before doing the final 1981 * refrele. So the NCE is guaranteed to exist when the timer runs 1982 * until it clears the timeout_id. Before clearing the timeout_id 1983 * bump up the refcnt so that we can continue to use the nce 1984 */ 1985 ASSERT(nce != NULL); 1986 1987 /* 1988 * Grab the ill_g_lock now itself to avoid lock order problems. 1989 * nce_solicit needs ill_g_lock to be able to traverse ills 1990 */ 1991 rw_enter(&ill_g_lock, RW_READER); 1992 mutex_enter(&nce->nce_lock); 1993 NCE_REFHOLD_LOCKED(nce); 1994 nce->nce_timeout_id = 0; 1995 1996 /* 1997 * Check the reachability state first. 1998 */ 1999 switch (nce->nce_state) { 2000 case ND_DELAY: 2001 rw_exit(&ill_g_lock); 2002 nce->nce_state = ND_PROBE; 2003 mutex_exit(&nce->nce_lock); 2004 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2005 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2006 if (ip_debug > 3) { 2007 /* ip2dbg */ 2008 pr_addr_dbg("ndp_timer: state for %s changed " 2009 "to PROBE\n", AF_INET6, &nce->nce_addr); 2010 } 2011 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2012 NCE_REFRELE(nce); 2013 return; 2014 case ND_PROBE: 2015 /* must be retransmit timer */ 2016 rw_exit(&ill_g_lock); 2017 nce->nce_pcnt--; 2018 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2019 nce->nce_pcnt >= -1); 2020 if (nce->nce_pcnt == 0) { 2021 /* Wait RetransTimer, before deleting the entry */ 2022 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2023 nce->nce_pcnt, inet_ntop(AF_INET6, 2024 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2025 mutex_exit(&nce->nce_lock); 2026 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2027 } else { 2028 /* 2029 * As per RFC2461, the nce gets deleted after 2030 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2031 * Note that the first unicast solicitation is sent 2032 * during the DELAY state. 2033 */ 2034 if (nce->nce_pcnt > 0) { 2035 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2036 nce->nce_pcnt, inet_ntop(AF_INET6, 2037 &nce->nce_addr, 2038 addrbuf, sizeof (addrbuf)))); 2039 mutex_exit(&nce->nce_lock); 2040 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2041 NULL, B_FALSE, &ipv6_all_zeros, 2042 &nce->nce_addr, NDP_UNICAST); 2043 if (dropped) { 2044 mutex_enter(&nce->nce_lock); 2045 nce->nce_pcnt++; 2046 mutex_exit(&nce->nce_lock); 2047 } 2048 NDP_RESTART_TIMER(nce, 2049 ill->ill_reachable_retrans_time); 2050 } else { 2051 /* No hope, delete the nce */ 2052 nce->nce_state = ND_UNREACHABLE; 2053 mutex_exit(&nce->nce_lock); 2054 if (ip_debug > 2) { 2055 /* ip1dbg */ 2056 pr_addr_dbg("ndp_timer: Delete IRE for" 2057 " dst %s\n", AF_INET6, 2058 &nce->nce_addr); 2059 } 2060 ndp_delete(nce); 2061 } 2062 } 2063 NCE_REFRELE(nce); 2064 return; 2065 case ND_INCOMPLETE: 2066 /* 2067 * Must be resolvers retransmit timer. 2068 */ 2069 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2070 ip6i_t *ip6i; 2071 ip6_t *ip6h; 2072 mblk_t *data_mp; 2073 2074 /* 2075 * Walk the list of packets queued, and see if there 2076 * are any multipathing probe packets. Such packets 2077 * are always queued at the head. Since this is a 2078 * retransmit timer firing, mark such packets as 2079 * delayed in ND resolution. This info will be used 2080 * in ip_wput_v6(). Multipathing probe packets will 2081 * always have an ip6i_t. Once we hit a packet without 2082 * it, we can break out of this loop. 2083 */ 2084 if (mp->b_datap->db_type == M_CTL) 2085 data_mp = mp->b_cont; 2086 else 2087 data_mp = mp; 2088 2089 ip6h = (ip6_t *)data_mp->b_rptr; 2090 if (ip6h->ip6_nxt != IPPROTO_RAW) 2091 break; 2092 2093 /* 2094 * This message should have been pulled up already in 2095 * ip_wput_v6. We can't do pullups here because the 2096 * b_next/b_prev is non-NULL. 2097 */ 2098 ip6i = (ip6i_t *)ip6h; 2099 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2100 sizeof (ip6i_t) + IPV6_HDR_LEN); 2101 2102 /* Mark this packet as delayed due to ND resolution */ 2103 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2104 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2105 } 2106 if (nce->nce_qd_mp != NULL) { 2107 ms = nce_solicit(nce, NULL); 2108 rw_exit(&ill_g_lock); 2109 if (ms == 0) { 2110 if (nce->nce_state != ND_REACHABLE) { 2111 mutex_exit(&nce->nce_lock); 2112 nce_resolv_failed(nce); 2113 ndp_delete(nce); 2114 } else { 2115 mutex_exit(&nce->nce_lock); 2116 } 2117 } else { 2118 mutex_exit(&nce->nce_lock); 2119 NDP_RESTART_TIMER(nce, (clock_t)ms); 2120 } 2121 NCE_REFRELE(nce); 2122 return; 2123 } 2124 mutex_exit(&nce->nce_lock); 2125 rw_exit(&ill_g_lock); 2126 NCE_REFRELE(nce); 2127 break; 2128 case ND_REACHABLE : 2129 rw_exit(&ill_g_lock); 2130 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2131 nce->nce_unsolicit_count != 0) { 2132 nce->nce_unsolicit_count--; 2133 mutex_exit(&nce->nce_lock); 2134 dropped = nce_xmit(ill, 2135 ND_NEIGHBOR_ADVERT, 2136 ill, /* ill to be used for hw addr */ 2137 B_FALSE, /* use ill_phys_addr */ 2138 &nce->nce_addr, 2139 &ipv6_all_hosts_mcast, 2140 nce->nce_flags | NDP_ORIDE); 2141 if (dropped) { 2142 mutex_enter(&nce->nce_lock); 2143 nce->nce_unsolicit_count++; 2144 mutex_exit(&nce->nce_lock); 2145 } 2146 if (nce->nce_unsolicit_count != 0) { 2147 NDP_RESTART_TIMER(nce, 2148 ip_ndp_unsolicit_interval); 2149 } 2150 } else { 2151 mutex_exit(&nce->nce_lock); 2152 } 2153 NCE_REFRELE(nce); 2154 break; 2155 default: 2156 rw_exit(&ill_g_lock); 2157 mutex_exit(&nce->nce_lock); 2158 NCE_REFRELE(nce); 2159 break; 2160 } 2161 } 2162 2163 /* 2164 * Set a link layer address from the ll_addr passed in. 2165 * Copy SAP from ill. 2166 */ 2167 static void 2168 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2169 { 2170 ill_t *ill = nce->nce_ill; 2171 uchar_t *woffset; 2172 2173 ASSERT(ll_addr != NULL); 2174 /* Always called before fast_path_probe */ 2175 ASSERT(nce->nce_fp_mp == NULL); 2176 if (ill->ill_sap_length != 0) { 2177 /* 2178 * Copy the SAP type specified in the 2179 * request into the xmit template. 2180 */ 2181 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2182 } 2183 if (ill->ill_phys_addr_length > 0) { 2184 /* 2185 * The bcopy() below used to be called for the physical address 2186 * length rather than the link layer address length. For 2187 * ethernet and many other media, the phys_addr and lla are 2188 * identical. 2189 * However, with xresolv interfaces being introduced, the 2190 * phys_addr and lla are no longer the same, and the physical 2191 * address may not have any useful meaning, so we use the lla 2192 * for IPv6 address resolution and destination addressing. 2193 * 2194 * For PPP or other interfaces with a zero length 2195 * physical address, don't do anything here. 2196 * The bcopy() with a zero phys_addr length was previously 2197 * a no-op for interfaces with a zero-length physical address. 2198 * Using the lla for them would change the way they operate. 2199 * Doing nothing in such cases preserves expected behavior. 2200 */ 2201 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2202 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2203 } 2204 } 2205 2206 static boolean_t 2207 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2208 { 2209 ill_t *ill = nce->nce_ill; 2210 uchar_t *ll_offset; 2211 2212 ASSERT(nce->nce_res_mp != NULL); 2213 if (ll_addr == NULL) 2214 return (B_FALSE); 2215 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2216 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2217 return (B_TRUE); 2218 return (B_FALSE); 2219 } 2220 2221 /* 2222 * Updates the link layer address or the reachability state of 2223 * a cache entry. Reset probe counter if needed. 2224 */ 2225 static void 2226 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2227 { 2228 ill_t *ill = nce->nce_ill; 2229 boolean_t need_stop_timer = B_FALSE; 2230 boolean_t need_fastpath_update = B_FALSE; 2231 2232 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2233 /* 2234 * If this interface does not do NUD, there is no point 2235 * in allowing an update to the cache entry. Although 2236 * we will respond to NS. 2237 * The only time we accept an update for a resolver when 2238 * NUD is turned off is when it has just been created. 2239 * Non-Resolvers will always be created as REACHABLE. 2240 */ 2241 if (new_state != ND_UNCHANGED) { 2242 if ((nce->nce_flags & NCE_F_NONUD) && 2243 (nce->nce_state != ND_INCOMPLETE)) 2244 return; 2245 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2246 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2247 need_stop_timer = B_TRUE; 2248 if (new_state == ND_REACHABLE) 2249 nce->nce_last = TICK_TO_MSEC(lbolt64); 2250 else { 2251 /* We force NUD in this case */ 2252 nce->nce_last = 0; 2253 } 2254 nce->nce_state = new_state; 2255 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2256 } 2257 /* 2258 * In case of fast path we need to free the the fastpath 2259 * M_DATA and do another probe. Otherwise we can just 2260 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2261 * whatever packets that happens to be transmitting at the time. 2262 */ 2263 if (new_ll_addr != NULL) { 2264 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2265 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2266 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2267 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2268 if (nce->nce_fp_mp != NULL) { 2269 freemsg(nce->nce_fp_mp); 2270 nce->nce_fp_mp = NULL; 2271 } 2272 need_fastpath_update = B_TRUE; 2273 } 2274 mutex_exit(&nce->nce_lock); 2275 if (need_stop_timer) { 2276 (void) untimeout(nce->nce_timeout_id); 2277 nce->nce_timeout_id = 0; 2278 } 2279 if (need_fastpath_update) 2280 nce_fastpath(nce); 2281 mutex_enter(&nce->nce_lock); 2282 } 2283 2284 static void 2285 nce_queue_mp(nce_t *nce, mblk_t *mp) 2286 { 2287 uint_t count = 0; 2288 mblk_t **mpp; 2289 boolean_t head_insert = B_FALSE; 2290 ip6_t *ip6h; 2291 ip6i_t *ip6i; 2292 mblk_t *data_mp; 2293 2294 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2295 2296 if (mp->b_datap->db_type == M_CTL) 2297 data_mp = mp->b_cont; 2298 else 2299 data_mp = mp; 2300 ip6h = (ip6_t *)data_mp->b_rptr; 2301 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2302 /* 2303 * This message should have been pulled up already in 2304 * ip_wput_v6. We can't do pullups here because the message 2305 * could be from the nce_qd_mp which could have b_next/b_prev 2306 * non-NULL. 2307 */ 2308 ip6i = (ip6i_t *)ip6h; 2309 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2310 sizeof (ip6i_t) + IPV6_HDR_LEN); 2311 /* 2312 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2313 * This has 2 aspects mentioned below. 2314 * 1. Perform head insertion in the nce_qd_mp for these packets. 2315 * This ensures that next retransmit of ND solicitation 2316 * will use the interface specified by the probe packet, 2317 * for both NS and NA. This corresponds to the src address 2318 * in the IPv6 packet. If we insert at tail, we will be 2319 * depending on the packet at the head for successful 2320 * ND resolution. This is not reliable, because the interface 2321 * on which the NA arrives could be different from the interface 2322 * on which the NS was sent, and if the receiving interface is 2323 * failed, it will appear that the sending interface is also 2324 * failed, causing in.mpathd to misdiagnose this as link 2325 * failure. 2326 * 2. Drop the original packet, if the ND resolution did not 2327 * succeed in the first attempt. However we will create the 2328 * nce and the ire, as soon as the ND resolution succeeds. 2329 * We don't gain anything by queueing multiple probe packets 2330 * and sending them back-to-back once resolution succeeds. 2331 * It is sufficient to send just 1 packet after ND resolution 2332 * succeeds. Since mpathd is sending down probe packets at a 2333 * constant rate, we don't need to send the queued packet. We 2334 * need to queue it only for NDP resolution. The benefit of 2335 * dropping the probe packets that were delayed in ND 2336 * resolution, is that in.mpathd will not see inflated 2337 * RTT. If the ND resolution does not succeed within 2338 * in.mpathd's failure detection time, mpathd may detect 2339 * a failure, and it does not matter whether the packet 2340 * was queued or dropped. 2341 */ 2342 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2343 head_insert = B_TRUE; 2344 } 2345 2346 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2347 mpp = &(*mpp)->b_next) { 2348 if (++count > 2349 nce->nce_ill->ill_max_buf) { 2350 mblk_t *tmp = nce->nce_qd_mp->b_next; 2351 2352 nce->nce_qd_mp->b_next = NULL; 2353 nce->nce_qd_mp->b_prev = NULL; 2354 freemsg(nce->nce_qd_mp); 2355 ip1dbg(("nce_queue_mp: pkt dropped\n")); 2356 nce->nce_qd_mp = tmp; 2357 } 2358 } 2359 /* put this on the list */ 2360 if (head_insert) { 2361 mp->b_next = nce->nce_qd_mp; 2362 nce->nce_qd_mp = mp; 2363 } else { 2364 *mpp = mp; 2365 } 2366 } 2367 2368 /* 2369 * Called when address resolution failed due to a timeout. 2370 * Send an ICMP unreachable in response to all queued packets. 2371 */ 2372 void 2373 nce_resolv_failed(nce_t *nce) 2374 { 2375 mblk_t *mp, *nxt_mp, *first_mp; 2376 char buf[INET6_ADDRSTRLEN]; 2377 ip6_t *ip6h; 2378 zoneid_t zoneid = GLOBAL_ZONEID; 2379 2380 ip1dbg(("nce_resolv_failed: dst %s\n", 2381 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2382 mutex_enter(&nce->nce_lock); 2383 mp = nce->nce_qd_mp; 2384 nce->nce_qd_mp = NULL; 2385 mutex_exit(&nce->nce_lock); 2386 while (mp != NULL) { 2387 nxt_mp = mp->b_next; 2388 mp->b_next = NULL; 2389 mp->b_prev = NULL; 2390 2391 first_mp = mp; 2392 if (mp->b_datap->db_type == M_CTL) { 2393 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2394 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2395 zoneid = io->ipsec_out_zoneid; 2396 ASSERT(zoneid != ALL_ZONES); 2397 mp = mp->b_cont; 2398 } 2399 2400 ip6h = (ip6_t *)mp->b_rptr; 2401 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2402 ip6i_t *ip6i; 2403 /* 2404 * This message should have been pulled up already 2405 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2406 * the header is pulled up. 2407 */ 2408 ip6i = (ip6i_t *)ip6h; 2409 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2410 sizeof (ip6i_t) + IPV6_HDR_LEN); 2411 mp->b_rptr += sizeof (ip6i_t); 2412 } 2413 /* 2414 * Ignore failure since icmp_unreachable_v6 will silently 2415 * drop packets with an unspecified source address. 2416 */ 2417 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2418 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2419 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2420 mp = nxt_mp; 2421 } 2422 } 2423 2424 /* 2425 * Called by SIOCSNDP* ioctl to add/change an nce entry 2426 * and the corresponding attributes. 2427 * Disallow states other than ND_REACHABLE or ND_STALE. 2428 */ 2429 int 2430 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2431 { 2432 sin6_t *sin6; 2433 in6_addr_t *addr; 2434 nce_t *nce; 2435 int err; 2436 uint16_t new_flags = 0; 2437 uint16_t old_flags = 0; 2438 int inflags = lnr->lnr_flags; 2439 2440 if ((lnr->lnr_state_create != ND_REACHABLE) && 2441 (lnr->lnr_state_create != ND_STALE)) 2442 return (EINVAL); 2443 2444 sin6 = (sin6_t *)&lnr->lnr_addr; 2445 addr = &sin6->sin6_addr; 2446 2447 mutex_enter(&ndp_g_lock); 2448 /* We know it can not be mapping so just look in the hash table */ 2449 nce = nce_lookup_addr(ill, addr); 2450 if (nce != NULL) 2451 new_flags = nce->nce_flags; 2452 2453 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2454 case NDF_ISROUTER_ON: 2455 new_flags |= NCE_F_ISROUTER; 2456 break; 2457 case NDF_ISROUTER_OFF: 2458 new_flags &= ~NCE_F_ISROUTER; 2459 break; 2460 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2461 mutex_exit(&ndp_g_lock); 2462 if (nce != NULL) 2463 NCE_REFRELE(nce); 2464 return (EINVAL); 2465 } 2466 2467 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2468 case NDF_ANYCAST_ON: 2469 new_flags |= NCE_F_ANYCAST; 2470 break; 2471 case NDF_ANYCAST_OFF: 2472 new_flags &= ~NCE_F_ANYCAST; 2473 break; 2474 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2475 mutex_exit(&ndp_g_lock); 2476 if (nce != NULL) 2477 NCE_REFRELE(nce); 2478 return (EINVAL); 2479 } 2480 2481 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2482 case NDF_PROXY_ON: 2483 new_flags |= NCE_F_PROXY; 2484 break; 2485 case NDF_PROXY_OFF: 2486 new_flags &= ~NCE_F_PROXY; 2487 break; 2488 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2489 mutex_exit(&ndp_g_lock); 2490 if (nce != NULL) 2491 NCE_REFRELE(nce); 2492 return (EINVAL); 2493 } 2494 2495 if (nce == NULL) { 2496 err = ndp_add(ill, 2497 (uchar_t *)lnr->lnr_hdw_addr, 2498 addr, 2499 &ipv6_all_ones, 2500 &ipv6_all_zeros, 2501 0, 2502 new_flags, 2503 lnr->lnr_state_create, 2504 &nce); 2505 if (err != 0) { 2506 mutex_exit(&ndp_g_lock); 2507 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2508 return (err); 2509 } 2510 } 2511 old_flags = nce->nce_flags; 2512 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2513 /* 2514 * Router turned to host, delete all ires. 2515 * XXX Just delete the entry, but we need to add too. 2516 */ 2517 nce->nce_flags &= ~NCE_F_ISROUTER; 2518 mutex_exit(&ndp_g_lock); 2519 ndp_delete(nce); 2520 NCE_REFRELE(nce); 2521 return (0); 2522 } 2523 mutex_exit(&ndp_g_lock); 2524 2525 mutex_enter(&nce->nce_lock); 2526 nce->nce_flags = new_flags; 2527 mutex_exit(&nce->nce_lock); 2528 /* 2529 * Note that we ignore the state at this point, which 2530 * should be either STALE or REACHABLE. Instead we let 2531 * the link layer address passed in to determine the state 2532 * much like incoming packets. 2533 */ 2534 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2535 NCE_REFRELE(nce); 2536 return (0); 2537 } 2538 2539 /* 2540 * If the device driver supports it, we make nce_fp_mp to have 2541 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2542 * The caller insures there is hold on nce for this function. 2543 * Note that since ill_fastpath_probe() copies the mblk there is 2544 * no need for the hold beyond this function. 2545 */ 2546 static void 2547 nce_fastpath(nce_t *nce) 2548 { 2549 ill_t *ill = nce->nce_ill; 2550 int res; 2551 2552 ASSERT(ill != NULL); 2553 if (nce->nce_fp_mp != NULL) { 2554 /* Already contains fastpath info */ 2555 return; 2556 } 2557 if (nce->nce_res_mp != NULL) { 2558 nce_fastpath_list_add(nce); 2559 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2560 /* 2561 * EAGAIN is an indication of a transient error 2562 * i.e. allocation failure etc. leave the nce in the list it 2563 * will be updated when another probe happens for another ire 2564 * if not it will be taken out of the list when the ire is 2565 * deleted. 2566 */ 2567 2568 if (res != 0 && res != EAGAIN) 2569 nce_fastpath_list_delete(nce); 2570 } 2571 } 2572 2573 /* 2574 * Drain the list of nce's waiting for fastpath response. 2575 */ 2576 void 2577 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2578 void *arg) 2579 { 2580 2581 nce_t *next_nce; 2582 nce_t *current_nce; 2583 nce_t *first_nce; 2584 nce_t *prev_nce = NULL; 2585 2586 ASSERT(ill != NULL); 2587 2588 mutex_enter(&ill->ill_lock); 2589 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2590 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2591 next_nce = current_nce->nce_fastpath; 2592 /* 2593 * Take it off the list if we're flushing, or if the callback 2594 * routine tells us to do so. Otherwise, leave the nce in the 2595 * fastpath list to handle any pending response from the lower 2596 * layer. We can't drain the list when the callback routine 2597 * comparison failed, because the response is asynchronous in 2598 * nature, and may not arrive in the same order as the list 2599 * insertion. 2600 */ 2601 if (func == NULL || func(current_nce, arg)) { 2602 current_nce->nce_fastpath = NULL; 2603 if (current_nce == first_nce) 2604 ill->ill_fastpath_list = first_nce = next_nce; 2605 else 2606 prev_nce->nce_fastpath = next_nce; 2607 } else { 2608 /* previous element that is still in the list */ 2609 prev_nce = current_nce; 2610 } 2611 current_nce = next_nce; 2612 } 2613 mutex_exit(&ill->ill_lock); 2614 } 2615 2616 /* 2617 * Add nce to the nce fastpath list. 2618 */ 2619 void 2620 nce_fastpath_list_add(nce_t *nce) 2621 { 2622 ill_t *ill; 2623 2624 ill = nce->nce_ill; 2625 ASSERT(ill != NULL); 2626 2627 mutex_enter(&ill->ill_lock); 2628 mutex_enter(&nce->nce_lock); 2629 2630 /* 2631 * if nce has not been deleted and 2632 * is not already in the list add it. 2633 */ 2634 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2635 (nce->nce_fastpath == NULL)) { 2636 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2637 ill->ill_fastpath_list = nce; 2638 } 2639 2640 mutex_exit(&nce->nce_lock); 2641 mutex_exit(&ill->ill_lock); 2642 } 2643 2644 /* 2645 * remove nce from the nce fastpath list. 2646 */ 2647 void 2648 nce_fastpath_list_delete(nce_t *nce) 2649 { 2650 nce_t *nce_ptr; 2651 2652 ill_t *ill; 2653 2654 ill = nce->nce_ill; 2655 ASSERT(ill != NULL); 2656 2657 mutex_enter(&ill->ill_lock); 2658 if (nce->nce_fastpath == NULL) 2659 goto done; 2660 2661 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2662 2663 if (ill->ill_fastpath_list == nce) { 2664 ill->ill_fastpath_list = nce->nce_fastpath; 2665 } else { 2666 nce_ptr = ill->ill_fastpath_list; 2667 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2668 if (nce_ptr->nce_fastpath == nce) { 2669 nce_ptr->nce_fastpath = nce->nce_fastpath; 2670 break; 2671 } 2672 nce_ptr = nce_ptr->nce_fastpath; 2673 } 2674 } 2675 2676 nce->nce_fastpath = NULL; 2677 done: 2678 mutex_exit(&ill->ill_lock); 2679 } 2680 2681 /* 2682 * Update all NCE's that are not in fastpath mode and 2683 * have an nce_fp_mp that matches mp. mp->b_cont contains 2684 * the fastpath header. 2685 * 2686 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2687 */ 2688 boolean_t 2689 ndp_fastpath_update(nce_t *nce, void *arg) 2690 { 2691 mblk_t *mp, *fp_mp; 2692 uchar_t *mp_rptr, *ud_mp_rptr; 2693 mblk_t *ud_mp = nce->nce_res_mp; 2694 ptrdiff_t cmplen; 2695 2696 if (nce->nce_flags & NCE_F_MAPPING) 2697 return (B_TRUE); 2698 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2699 return (B_TRUE); 2700 2701 ip2dbg(("ndp_fastpath_update: trying\n")); 2702 mp = (mblk_t *)arg; 2703 mp_rptr = mp->b_rptr; 2704 cmplen = mp->b_wptr - mp_rptr; 2705 ASSERT(cmplen >= 0); 2706 ud_mp_rptr = ud_mp->b_rptr; 2707 /* 2708 * The nce is locked here to prevent any other threads 2709 * from accessing and changing nce_res_mp when the IPv6 address 2710 * becomes resolved to an lla while we're in the middle 2711 * of looking at and comparing the hardware address (lla). 2712 * It is also locked to prevent multiple threads in nce_fastpath_update 2713 * from examining nce_res_mp atthe same time. 2714 */ 2715 mutex_enter(&nce->nce_lock); 2716 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2717 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2718 mutex_exit(&nce->nce_lock); 2719 /* 2720 * Don't take the ire off the fastpath list yet, 2721 * since the response may come later. 2722 */ 2723 return (B_FALSE); 2724 } 2725 /* Matched - install mp as the fastpath mp */ 2726 ip1dbg(("ndp_fastpath_update: match\n")); 2727 fp_mp = dupb(mp->b_cont); 2728 if (fp_mp != NULL) { 2729 nce->nce_fp_mp = fp_mp; 2730 } 2731 mutex_exit(&nce->nce_lock); 2732 return (B_TRUE); 2733 } 2734 2735 /* 2736 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2737 * driver. Note that it assumes IP is exclusive... 2738 */ 2739 /* ARGSUSED */ 2740 void 2741 ndp_fastpath_flush(nce_t *nce, char *arg) 2742 { 2743 if (nce->nce_flags & NCE_F_MAPPING) 2744 return; 2745 /* No fastpath info? */ 2746 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2747 return; 2748 2749 /* Just delete the NCE... */ 2750 ndp_delete(nce); 2751 } 2752 2753 /* 2754 * Return a pointer to a given option in the packet. 2755 * Assumes that option part of the packet have already been validated. 2756 */ 2757 nd_opt_hdr_t * 2758 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2759 { 2760 while (optlen > 0) { 2761 if (opt->nd_opt_type == opt_type) 2762 return (opt); 2763 optlen -= 8 * opt->nd_opt_len; 2764 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2765 } 2766 return (NULL); 2767 } 2768 2769 /* 2770 * Verify all option lengths present are > 0, also check to see 2771 * if the option lengths and packet length are consistent. 2772 */ 2773 boolean_t 2774 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2775 { 2776 ASSERT(opt != NULL); 2777 while (optlen > 0) { 2778 if (opt->nd_opt_len == 0) 2779 return (B_FALSE); 2780 optlen -= 8 * opt->nd_opt_len; 2781 if (optlen < 0) 2782 return (B_FALSE); 2783 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2784 } 2785 return (B_TRUE); 2786 } 2787 2788 /* 2789 * ndp_walk function. 2790 * Free a fraction of the NCE cache entries. 2791 * A fraction of zero means to not free any in that category. 2792 */ 2793 void 2794 ndp_cache_reclaim(nce_t *nce, char *arg) 2795 { 2796 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2797 uint_t rand; 2798 2799 if (nce->nce_flags & NCE_F_PERMANENT) 2800 return; 2801 2802 rand = (uint_t)lbolt + 2803 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2804 if (ncr->ncr_host != 0 && 2805 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2806 ndp_delete(nce); 2807 return; 2808 } 2809 } 2810 2811 /* 2812 * ndp_walk function. 2813 * Count the number of NCEs that can be deleted. 2814 * These would be hosts but not routers. 2815 */ 2816 void 2817 ndp_cache_count(nce_t *nce, char *arg) 2818 { 2819 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2820 2821 if (nce->nce_flags & NCE_F_PERMANENT) 2822 return; 2823 2824 ncc->ncc_total++; 2825 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2826 ncc->ncc_host++; 2827 } 2828 2829 #ifdef NCE_DEBUG 2830 th_trace_t * 2831 th_trace_nce_lookup(nce_t *nce) 2832 { 2833 int bucket_id; 2834 th_trace_t *th_trace; 2835 2836 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2837 2838 bucket_id = IP_TR_HASH(curthread); 2839 ASSERT(bucket_id < IP_TR_HASH_MAX); 2840 2841 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2842 th_trace = th_trace->th_next) { 2843 if (th_trace->th_id == curthread) 2844 return (th_trace); 2845 } 2846 return (NULL); 2847 } 2848 2849 void 2850 nce_trace_ref(nce_t *nce) 2851 { 2852 int bucket_id; 2853 th_trace_t *th_trace; 2854 2855 /* 2856 * Attempt to locate the trace buffer for the curthread. 2857 * If it does not exist, then allocate a new trace buffer 2858 * and link it in list of trace bufs for this ipif, at the head 2859 */ 2860 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2861 2862 if (nce->nce_trace_disable == B_TRUE) 2863 return; 2864 2865 th_trace = th_trace_nce_lookup(nce); 2866 if (th_trace == NULL) { 2867 bucket_id = IP_TR_HASH(curthread); 2868 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 2869 KM_NOSLEEP); 2870 if (th_trace == NULL) { 2871 nce->nce_trace_disable = B_TRUE; 2872 nce_trace_inactive(nce); 2873 return; 2874 } 2875 th_trace->th_id = curthread; 2876 th_trace->th_next = nce->nce_trace[bucket_id]; 2877 th_trace->th_prev = &nce->nce_trace[bucket_id]; 2878 if (th_trace->th_next != NULL) 2879 th_trace->th_next->th_prev = &th_trace->th_next; 2880 nce->nce_trace[bucket_id] = th_trace; 2881 } 2882 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 2883 th_trace->th_refcnt++; 2884 th_trace_rrecord(th_trace); 2885 } 2886 2887 void 2888 nce_untrace_ref(nce_t *nce) 2889 { 2890 th_trace_t *th_trace; 2891 2892 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2893 2894 if (nce->nce_trace_disable == B_TRUE) 2895 return; 2896 2897 th_trace = th_trace_nce_lookup(nce); 2898 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 2899 2900 th_trace_rrecord(th_trace); 2901 th_trace->th_refcnt--; 2902 } 2903 2904 void 2905 nce_trace_inactive(nce_t *nce) 2906 { 2907 th_trace_t *th_trace; 2908 int i; 2909 2910 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2911 2912 for (i = 0; i < IP_TR_HASH_MAX; i++) { 2913 while (nce->nce_trace[i] != NULL) { 2914 th_trace = nce->nce_trace[i]; 2915 2916 /* unlink th_trace and free it */ 2917 nce->nce_trace[i] = th_trace->th_next; 2918 if (th_trace->th_next != NULL) 2919 th_trace->th_next->th_prev = 2920 &nce->nce_trace[i]; 2921 2922 th_trace->th_next = NULL; 2923 th_trace->th_prev = NULL; 2924 kmem_free(th_trace, sizeof (th_trace_t)); 2925 } 2926 } 2927 2928 } 2929 2930 /* ARGSUSED */ 2931 int 2932 nce_thread_exit(nce_t *nce, caddr_t arg) 2933 { 2934 th_trace_t *th_trace; 2935 2936 mutex_enter(&nce->nce_lock); 2937 th_trace = th_trace_nce_lookup(nce); 2938 2939 if (th_trace == NULL) { 2940 mutex_exit(&nce->nce_lock); 2941 return (0); 2942 } 2943 2944 ASSERT(th_trace->th_refcnt == 0); 2945 2946 /* unlink th_trace and free it */ 2947 *th_trace->th_prev = th_trace->th_next; 2948 if (th_trace->th_next != NULL) 2949 th_trace->th_next->th_prev = th_trace->th_prev; 2950 th_trace->th_next = NULL; 2951 th_trace->th_prev = NULL; 2952 kmem_free(th_trace, sizeof (th_trace_t)); 2953 mutex_exit(&nce->nce_lock); 2954 return (0); 2955 } 2956 #endif 2957