1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/strlog.h> 35 #include <sys/dlpi.h> 36 #include <sys/sockio.h> 37 #include <sys/tiuser.h> 38 #include <sys/tihdr.h> 39 #include <sys/socket.h> 40 #include <sys/ddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/vtrace.h> 44 #include <sys/kmem.h> 45 #include <sys/zone.h> 46 47 #include <net/if.h> 48 #include <net/if_types.h> 49 #include <net/if_dl.h> 50 #include <net/route.h> 51 #include <sys/sockio.h> 52 #include <netinet/in.h> 53 #include <netinet/in_systm.h> 54 #include <netinet/ip6.h> 55 #include <netinet/icmp6.h> 56 57 #include <inet/common.h> 58 #include <inet/mi.h> 59 #include <inet/mib2.h> 60 #include <inet/nd.h> 61 #include <inet/arp.h> 62 #include <inet/ip.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ip_if.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_rts.h> 67 #include <inet/ip6.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/ipsec_impl.h> 70 #include <inet/ipsec_info.h> 71 72 /* 73 * Function names with nce_ prefix are static while function 74 * names with ndp_ prefix are used by rest of the IP. 75 */ 76 77 static boolean_t nce_cmp_ll_addr(nce_t *nce, char *new_ll_addr, 78 uint32_t ll_addr_len); 79 static void nce_fastpath(nce_t *nce); 80 static void nce_ire_delete(nce_t *nce); 81 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 82 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 83 static nce_t *nce_lookup_addr(ill_t *ill, const in6_addr_t *addr); 84 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 85 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 86 uchar_t *addr); 87 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 88 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 89 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 90 static mblk_t *nce_udreq_alloc(ill_t *ill); 91 static void nce_update(nce_t *nce, uint16_t new_state, 92 uchar_t *new_ll_addr); 93 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 94 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 95 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 96 const in6_addr_t *target, int flag); 97 static void lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf); 98 extern void th_trace_rrecord(th_trace_t *); 99 100 #ifdef NCE_DEBUG 101 void nce_trace_inactive(nce_t *); 102 #endif 103 104 /* NDP Cache Entry Hash Table */ 105 #define NCE_TABLE_SIZE 256 106 static nce_t *nce_hash_tbl[NCE_TABLE_SIZE]; 107 static nce_t *nce_mask_entries; /* mask not all ones */ 108 static int ndp_g_walker = 0; /* # of active thread */ 109 /* walking nce hash list */ 110 /* ndp_g_walker_cleanup will be true, when deletion have to be defered */ 111 static boolean_t ndp_g_walker_cleanup = B_FALSE; 112 113 #ifdef _BIG_ENDIAN 114 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 115 ((((addr)->s6_addr32[0] & 0xff020000) == 0xff020000) && \ 116 ((addr)->s6_addr32[1] == 0x0) && \ 117 ((addr)->s6_addr32[2] == 0x00000001) && \ 118 ((addr)->s6_addr32[3] & 0xff000000) == 0xff000000) 119 #else /* _BIG_ENDIAN */ 120 #define IN6_IS_ADDR_MC_SOLICITEDNODE(addr) \ 121 ((((addr)->s6_addr32[0] & 0x000002ff) == 0x000002ff) && \ 122 ((addr)->s6_addr32[1] == 0x0) && \ 123 ((addr)->s6_addr32[2] == 0x01000000) && \ 124 ((addr)->s6_addr32[3] & 0x000000ff) == 0x000000ff) 125 #endif 126 127 #define NCE_HASH_PTR(addr) \ 128 (&(nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 129 130 /* 131 * NDP Cache Entry creation routine. 132 * Mapped entries will never do NUD . 133 * This routine must always be called with ndp_g_lock held. 134 * Prior to return, nce_refcnt is incremented. 135 */ 136 int 137 ndp_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 138 const in6_addr_t *mask, const in6_addr_t *extract_mask, 139 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 140 nce_t **newnce) 141 { 142 static nce_t nce_nil; 143 nce_t *nce; 144 mblk_t *mp; 145 mblk_t *template; 146 nce_t **ncep; 147 int err = 0; 148 boolean_t dropped = B_FALSE; 149 150 ASSERT(MUTEX_HELD(&ndp_g_lock)); 151 ASSERT(ill != NULL); 152 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 153 ip0dbg(("ndp_add: no addr\n")); 154 return (EINVAL); 155 } 156 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 157 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 158 return (EINVAL); 159 } 160 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 161 (flags & NCE_F_MAPPING)) { 162 ip0dbg(("ndp_add: extract mask zero for mapping")); 163 return (EINVAL); 164 } 165 /* 166 * Allocate the mblk to hold the nce. 167 * 168 * XXX This can come out of a separate cache - nce_cache. 169 * We don't need the mp anymore as there are no more 170 * "qwriter"s 171 */ 172 mp = allocb(sizeof (nce_t), BPRI_MED); 173 if (mp == NULL) 174 return (ENOMEM); 175 176 nce = (nce_t *)mp->b_rptr; 177 mp->b_wptr = (uchar_t *)&nce[1]; 178 *nce = nce_nil; 179 180 /* 181 * This one holds link layer address 182 */ 183 if (ill->ill_net_type == IRE_IF_RESOLVER) { 184 template = nce_udreq_alloc(ill); 185 } else { 186 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 187 ASSERT((ill->ill_resolver_mp != NULL)); 188 template = copyb(ill->ill_resolver_mp); 189 } 190 if (template == NULL) { 191 freeb(mp); 192 return (ENOMEM); 193 } 194 nce->nce_ill = ill; 195 nce->nce_flags = flags; 196 nce->nce_state = state; 197 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 198 nce->nce_rcnt = ill->ill_xmit_count; 199 nce->nce_addr = *addr; 200 nce->nce_mask = *mask; 201 nce->nce_extract_mask = *extract_mask; 202 nce->nce_ll_extract_start = hw_extract_start; 203 nce->nce_fp_mp = NULL; 204 nce->nce_res_mp = template; 205 if (state == ND_REACHABLE) 206 nce->nce_last = TICK_TO_MSEC(lbolt64); 207 else 208 nce->nce_last = 0; 209 nce->nce_qd_mp = NULL; 210 nce->nce_mp = mp; 211 if (hw_addr != NULL) 212 nce_set_ll(nce, hw_addr); 213 /* This one is for nce getting created */ 214 nce->nce_refcnt = 1; 215 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 216 if (nce->nce_flags & NCE_F_MAPPING) { 217 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 218 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 219 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 220 ncep = &nce_mask_entries; 221 } else { 222 ncep = ((nce_t **)NCE_HASH_PTR(*addr)); 223 } 224 225 #ifdef NCE_DEBUG 226 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 227 #endif 228 /* 229 * Atomically ensure that the ill is not CONDEMNED, before 230 * adding the NCE. 231 */ 232 mutex_enter(&ill->ill_lock); 233 if (ill->ill_state_flags & ILL_CONDEMNED) { 234 mutex_exit(&ill->ill_lock); 235 freeb(mp); 236 return (EINVAL); 237 } 238 if ((nce->nce_next = *ncep) != NULL) 239 nce->nce_next->nce_ptpn = &nce->nce_next; 240 *ncep = nce; 241 nce->nce_ptpn = ncep; 242 *newnce = nce; 243 /* This one is for nce being used by an active thread */ 244 NCE_REFHOLD(*newnce); 245 246 /* Bump up the number of nce's referencing this ill */ 247 ill->ill_nce_cnt++; 248 mutex_exit(&ill->ill_lock); 249 250 /* 251 * Before we insert the nce, honor the UNSOL_ADV flag. 252 * We cannot hold the ndp_g_lock and call nce_xmit 253 * which does a putnext. 254 */ 255 if (flags & NCE_F_UNSOL_ADV) { 256 flags |= NDP_ORIDE; 257 /* 258 * We account for the transmit below by assigning one 259 * less than the ndd variable. Subsequent decrements 260 * are done in ndp_timer. 261 */ 262 mutex_enter(&nce->nce_lock); 263 mutex_exit(&ndp_g_lock); 264 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 265 mutex_exit(&nce->nce_lock); 266 dropped = nce_xmit(ill, 267 ND_NEIGHBOR_ADVERT, 268 ill, /* ill to be used for extracting ill_nd_lla */ 269 B_TRUE, /* use ill_nd_lla */ 270 addr, /* Source and target of the advertisement pkt */ 271 &ipv6_all_hosts_mcast, /* Destination of the packet */ 272 flags); 273 mutex_enter(&nce->nce_lock); 274 if (dropped) 275 nce->nce_unsolicit_count++; 276 if (nce->nce_unsolicit_count != 0) { 277 nce->nce_timeout_id = timeout(ndp_timer, nce, 278 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 279 } 280 mutex_exit(&nce->nce_lock); 281 mutex_enter(&ndp_g_lock); 282 } 283 done: 284 return (err); 285 } 286 287 int 288 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 289 const in6_addr_t *mask, const in6_addr_t *extract_mask, 290 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 291 nce_t **newnce) 292 { 293 int err = 0; 294 nce_t *nce; 295 296 mutex_enter(&ndp_g_lock); 297 nce = nce_lookup_addr(ill, addr); 298 if (nce == NULL) { 299 err = ndp_add(ill, 300 hw_addr, 301 addr, 302 mask, 303 extract_mask, 304 hw_extract_start, 305 flags, 306 state, 307 newnce); 308 } else { 309 *newnce = nce; 310 err = EEXIST; 311 } 312 mutex_exit(&ndp_g_lock); 313 return (err); 314 } 315 316 /* 317 * Remove all the CONDEMNED nces from the appropriate hash table. 318 * We create a private list of NCEs, these may have ires pointing 319 * to them, so the list will be passed through to clean up dependent 320 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 321 */ 322 static void 323 nce_remove(nce_t *nce, nce_t **free_nce_list) 324 { 325 nce_t *nce1; 326 nce_t **ptpn; 327 328 ASSERT(MUTEX_HELD(&ndp_g_lock)); 329 ASSERT(ndp_g_walker == 0); 330 for (; nce; nce = nce1) { 331 nce1 = nce->nce_next; 332 mutex_enter(&nce->nce_lock); 333 if (nce->nce_flags & NCE_F_CONDEMNED) { 334 ptpn = nce->nce_ptpn; 335 nce1 = nce->nce_next; 336 if (nce1 != NULL) 337 nce1->nce_ptpn = ptpn; 338 *ptpn = nce1; 339 nce->nce_ptpn = NULL; 340 nce->nce_next = NULL; 341 nce->nce_next = *free_nce_list; 342 *free_nce_list = nce; 343 } 344 mutex_exit(&nce->nce_lock); 345 } 346 } 347 348 /* 349 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 350 * will return this NCE. Also no new IREs will be created that 351 * point to this NCE (See ire_add_v6). Also no new timeouts will 352 * be started (See NDP_RESTART_TIMER). 353 * 2. Cancel any currently running timeouts. 354 * 3. If there is an ndp walker, return. The walker will do the cleanup. 355 * This ensures that walkers see a consistent list of NCEs while walking. 356 * 4. Otherwise remove the NCE from the list of NCEs 357 * 5. Delete all IREs pointing to this NCE. 358 */ 359 void 360 ndp_delete(nce_t *nce) 361 { 362 nce_t **ptpn; 363 nce_t *nce1; 364 365 /* Serialize deletes */ 366 mutex_enter(&nce->nce_lock); 367 if (nce->nce_flags & NCE_F_CONDEMNED) { 368 /* Some other thread is doing the delete */ 369 mutex_exit(&nce->nce_lock); 370 return; 371 } 372 /* 373 * Caller has a refhold. Also 1 ref for being in the list. Thus 374 * refcnt has to be >= 2 375 */ 376 ASSERT(nce->nce_refcnt >= 2); 377 nce->nce_flags |= NCE_F_CONDEMNED; 378 mutex_exit(&nce->nce_lock); 379 380 nce_fastpath_list_delete(nce); 381 382 /* 383 * Cancel any running timer. Timeout can't be restarted 384 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 385 * Passing invalid timeout id is fine. 386 */ 387 if (nce->nce_timeout_id != 0) { 388 (void) untimeout(nce->nce_timeout_id); 389 nce->nce_timeout_id = 0; 390 } 391 392 mutex_enter(&ndp_g_lock); 393 if (nce->nce_ptpn == NULL) { 394 /* 395 * The last ndp walker has already removed this nce from 396 * the list after we marked the nce CONDEMNED and before 397 * we grabbed the ndp_g_lock. 398 */ 399 mutex_exit(&ndp_g_lock); 400 return; 401 } 402 if (ndp_g_walker > 0) { 403 /* 404 * Can't unlink. The walker will clean up 405 */ 406 ndp_g_walker_cleanup = B_TRUE; 407 mutex_exit(&ndp_g_lock); 408 return; 409 } 410 411 /* 412 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 413 * the timer since it is marked CONDEMNED. 414 */ 415 ptpn = nce->nce_ptpn; 416 nce1 = nce->nce_next; 417 if (nce1 != NULL) 418 nce1->nce_ptpn = ptpn; 419 *ptpn = nce1; 420 nce->nce_ptpn = NULL; 421 nce->nce_next = NULL; 422 mutex_exit(&ndp_g_lock); 423 424 nce_ire_delete(nce); 425 } 426 427 void 428 ndp_inactive(nce_t *nce) 429 { 430 mblk_t **mpp; 431 ill_t *ill; 432 433 ASSERT(nce->nce_refcnt == 0); 434 ASSERT(MUTEX_HELD(&nce->nce_lock)); 435 ASSERT(nce->nce_fastpath == NULL); 436 437 /* Free all nce allocated messages */ 438 mpp = &nce->nce_first_mp_to_free; 439 do { 440 while (*mpp != NULL) { 441 mblk_t *mp; 442 443 mp = *mpp; 444 *mpp = mp->b_next; 445 mp->b_next = NULL; 446 mp->b_prev = NULL; 447 freemsg(mp); 448 } 449 } while (mpp++ != &nce->nce_last_mp_to_free); 450 451 #ifdef NCE_DEBUG 452 nce_trace_inactive(nce); 453 #endif 454 455 ill = nce->nce_ill; 456 mutex_enter(&ill->ill_lock); 457 ill->ill_nce_cnt--; 458 /* 459 * If the number of nce's associated with this ill have dropped 460 * to zero, check whether we need to restart any operation that 461 * is waiting for this to happen. 462 */ 463 if (ill->ill_nce_cnt == 0) { 464 /* ipif_ill_refrele_tail drops the ill_lock */ 465 ipif_ill_refrele_tail(ill); 466 } else { 467 mutex_exit(&ill->ill_lock); 468 } 469 mutex_destroy(&nce->nce_lock); 470 freeb(nce->nce_mp); 471 } 472 473 /* 474 * ndp_walk routine. Delete the nce if it is associated with the ill 475 * that is going away. Always called as a writer. 476 */ 477 void 478 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 479 { 480 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 481 ndp_delete(nce); 482 } 483 } 484 485 /* 486 * Walk a list of to be inactive NCEs and blow away all the ires. 487 */ 488 static void 489 nce_ire_delete_list(nce_t *nce) 490 { 491 nce_t *nce_next; 492 493 ASSERT(nce != NULL); 494 while (nce != NULL) { 495 nce_next = nce->nce_next; 496 nce->nce_next = NULL; 497 498 /* 499 * It is possible for the last ndp walker (this thread) 500 * to come here after ndp_delete has marked the nce CONDEMNED 501 * and before it has removed the nce from the fastpath list 502 * or called untimeout. So we need to do it here. It is safe 503 * for both ndp_delete and this thread to do it twice or 504 * even simultaneously since each of the threads has a 505 * reference on the nce. 506 */ 507 nce_fastpath_list_delete(nce); 508 /* 509 * Cancel any running timer. Timeout can't be restarted 510 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 511 * Passing invalid timeout id is fine. 512 */ 513 if (nce->nce_timeout_id != 0) { 514 (void) untimeout(nce->nce_timeout_id); 515 nce->nce_timeout_id = 0; 516 } 517 518 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 519 nce_ire_delete1, (char *)nce, nce->nce_ill); 520 NCE_REFRELE_NOTR(nce); 521 nce = nce_next; 522 } 523 } 524 525 /* 526 * Delete an ire when the nce goes away. 527 */ 528 /* ARGSUSED */ 529 static void 530 nce_ire_delete(nce_t *nce) 531 { 532 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 533 nce_ire_delete1, (char *)nce, nce->nce_ill); 534 NCE_REFRELE_NOTR(nce); 535 } 536 537 /* 538 * ire_walk routine used to delete every IRE that shares this nce 539 */ 540 static void 541 nce_ire_delete1(ire_t *ire, char *nce_arg) 542 { 543 nce_t *nce = (nce_t *)nce_arg; 544 545 ASSERT(ire->ire_type == IRE_CACHE); 546 547 if (ire->ire_nce == nce) 548 ire_delete(ire); 549 } 550 551 /* 552 * Cache entry lookup. Try to find an nce matching the parameters passed. 553 * If one is found, the refcnt on the nce will be incremented. 554 */ 555 nce_t * 556 ndp_lookup(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 557 { 558 nce_t *nce; 559 560 if (!caller_holds_lock) 561 mutex_enter(&ndp_g_lock); 562 nce = nce_lookup_addr(ill, addr); 563 if (nce == NULL) 564 nce = nce_lookup_mapping(ill, addr); 565 if (!caller_holds_lock) 566 mutex_exit(&ndp_g_lock); 567 return (nce); 568 } 569 570 /* 571 * Cache entry lookup. Try to find an nce matching the parameters passed. 572 * Look only for exact entries (no mappings). If an nce is found, increment 573 * the hold count on that nce. 574 */ 575 static nce_t * 576 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 577 { 578 nce_t *nce; 579 580 ASSERT(ill != NULL); 581 ASSERT(MUTEX_HELD(&ndp_g_lock)); 582 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 583 return (NULL); 584 nce = *((nce_t **)NCE_HASH_PTR(*addr)); 585 for (; nce != NULL; nce = nce->nce_next) { 586 if (nce->nce_ill == ill) { 587 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 588 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 589 &ipv6_all_ones)) { 590 mutex_enter(&nce->nce_lock); 591 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 592 NCE_REFHOLD_LOCKED(nce); 593 mutex_exit(&nce->nce_lock); 594 break; 595 } 596 mutex_exit(&nce->nce_lock); 597 } 598 } 599 } 600 return (nce); 601 } 602 603 /* 604 * Cache entry lookup. Try to find an nce matching the parameters passed. 605 * Look only for mappings. 606 */ 607 static nce_t * 608 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 609 { 610 nce_t *nce; 611 612 ASSERT(ill != NULL); 613 ASSERT(MUTEX_HELD(&ndp_g_lock)); 614 if (!IN6_IS_ADDR_MULTICAST(addr)) 615 return (NULL); 616 nce = nce_mask_entries; 617 for (; nce != NULL; nce = nce->nce_next) 618 if (nce->nce_ill == ill && 619 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 620 mutex_enter(&nce->nce_lock); 621 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 622 NCE_REFHOLD_LOCKED(nce); 623 mutex_exit(&nce->nce_lock); 624 break; 625 } 626 mutex_exit(&nce->nce_lock); 627 } 628 return (nce); 629 } 630 631 /* 632 * Process passed in parameters either from an incoming packet or via 633 * user ioctl. 634 */ 635 void 636 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 637 { 638 ill_t *ill = nce->nce_ill; 639 uint32_t hw_addr_len = ill->ill_nd_lla_len; 640 mblk_t *mp; 641 boolean_t ll_updated = B_FALSE; 642 boolean_t ll_changed; 643 644 /* 645 * No updates of link layer address or the neighbor state is 646 * allowed, when the cache is in NONUD state. This still 647 * allows for responding to reachability solicitation. 648 */ 649 mutex_enter(&nce->nce_lock); 650 if (nce->nce_state == ND_INCOMPLETE) { 651 if (hw_addr == NULL) { 652 mutex_exit(&nce->nce_lock); 653 return; 654 } 655 nce_set_ll(nce, hw_addr); 656 /* 657 * Update nce state and send the queued packets 658 * back to ip this time ire will be added. 659 */ 660 if (flag & ND_NA_FLAG_SOLICITED) { 661 nce_update(nce, ND_REACHABLE, NULL); 662 } else { 663 nce_update(nce, ND_STALE, NULL); 664 } 665 mutex_exit(&nce->nce_lock); 666 nce_fastpath(nce); 667 mutex_enter(&nce->nce_lock); 668 mp = nce->nce_qd_mp; 669 nce->nce_qd_mp = NULL; 670 mutex_exit(&nce->nce_lock); 671 while (mp != NULL) { 672 mblk_t *nxt_mp; 673 674 nxt_mp = mp->b_next; 675 mp->b_next = NULL; 676 if (mp->b_prev != NULL) { 677 ill_t *inbound_ill; 678 queue_t *fwdq = NULL; 679 uint_t ifindex; 680 681 ifindex = (uint_t)(uintptr_t)mp->b_prev; 682 inbound_ill = ill_lookup_on_ifindex(ifindex, 683 B_TRUE, NULL, NULL, NULL, NULL); 684 if (inbound_ill == NULL) { 685 mp->b_prev = NULL; 686 freemsg(mp); 687 return; 688 } else { 689 fwdq = inbound_ill->ill_rq; 690 } 691 mp->b_prev = NULL; 692 /* 693 * Send a forwarded packet back into ip_rput_v6 694 * just as in ire_send_v6(). 695 * Extract the queue from b_prev (set in 696 * ip_rput_data_v6). 697 */ 698 if (fwdq != NULL) { 699 /* 700 * Forwarded packets hop count will 701 * get decremented in ip_rput_data_v6 702 */ 703 put(fwdq, mp); 704 } else { 705 /* 706 * Send locally originated packets back 707 * into * ip_wput_v6. 708 */ 709 put(ill->ill_wq, mp); 710 } 711 ill_refrele(inbound_ill); 712 } else { 713 put(ill->ill_wq, mp); 714 } 715 mp = nxt_mp; 716 } 717 return; 718 } 719 ll_changed = nce_cmp_ll_addr(nce, (char *)hw_addr, hw_addr_len); 720 if (!is_adv) { 721 /* If this is a SOLICITATION request only */ 722 if (ll_changed) 723 nce_update(nce, ND_STALE, hw_addr); 724 mutex_exit(&nce->nce_lock); 725 return; 726 } 727 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 728 /* If in any other state than REACHABLE, ignore */ 729 if (nce->nce_state == ND_REACHABLE) { 730 nce_update(nce, ND_STALE, NULL); 731 } 732 mutex_exit(&nce->nce_lock); 733 return; 734 } else { 735 if (ll_changed) { 736 nce_update(nce, ND_UNCHANGED, hw_addr); 737 ll_updated = B_TRUE; 738 } 739 if (flag & ND_NA_FLAG_SOLICITED) { 740 nce_update(nce, ND_REACHABLE, NULL); 741 } else { 742 if (ll_updated) { 743 nce_update(nce, ND_STALE, NULL); 744 } 745 } 746 mutex_exit(&nce->nce_lock); 747 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 748 NCE_F_ISROUTER)) { 749 ire_t *ire; 750 751 /* 752 * Router turned to host. We need to remove the 753 * entry as well as any default route that may be 754 * using this as a next hop. This is required by 755 * section 7.2.5 of RFC 2461. 756 */ 757 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 758 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 759 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, 760 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 761 MATCH_IRE_DEFAULT); 762 if (ire != NULL) { 763 ip_rts_rtmsg(RTM_DELETE, ire, 0); 764 ire_delete(ire); 765 ire_refrele(ire); 766 } 767 ndp_delete(nce); 768 } 769 } 770 } 771 772 /* 773 * Pass arg1 to the pfi supplied, along with each nce in existence. 774 * ndp_walk() places a REFHOLD on the nce and drops the lock when 775 * walking the hash list. 776 */ 777 void 778 ndp_walk_impl(ill_t *ill, pfi_t pfi, uchar_t *arg1, boolean_t trace) 779 { 780 781 nce_t *nce; 782 nce_t *nce1; 783 nce_t **ncep; 784 nce_t *free_nce_list = NULL; 785 786 mutex_enter(&ndp_g_lock); 787 ndp_g_walker++; /* Prevent ndp_delete from unlink and free of NCE */ 788 mutex_exit(&ndp_g_lock); 789 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 790 for (nce = *ncep; nce; nce = nce1) { 791 nce1 = nce->nce_next; 792 if (ill == NULL || nce->nce_ill == ill) { 793 if (trace) { 794 NCE_REFHOLD(nce); 795 (*pfi)(nce, arg1); 796 NCE_REFRELE(nce); 797 } else { 798 NCE_REFHOLD_NOTR(nce); 799 (*pfi)(nce, arg1); 800 NCE_REFRELE_NOTR(nce); 801 } 802 } 803 } 804 } 805 for (nce = nce_mask_entries; nce; nce = nce1) { 806 nce1 = nce->nce_next; 807 if (ill == NULL || nce->nce_ill == ill) { 808 if (trace) { 809 NCE_REFHOLD(nce); 810 (*pfi)(nce, arg1); 811 NCE_REFRELE(nce); 812 } else { 813 NCE_REFHOLD_NOTR(nce); 814 (*pfi)(nce, arg1); 815 NCE_REFRELE_NOTR(nce); 816 } 817 } 818 } 819 mutex_enter(&ndp_g_lock); 820 ndp_g_walker--; 821 /* 822 * While NCE's are removed from global list they are placed 823 * in a private list, to be passed to nce_ire_delete_list(). 824 * The reason is, there may be ires pointing to this nce 825 * which needs to cleaned up. 826 */ 827 if (ndp_g_walker_cleanup && ndp_g_walker == 0) { 828 /* Time to delete condemned entries */ 829 for (ncep = nce_hash_tbl; ncep < A_END(nce_hash_tbl); ncep++) { 830 nce = *ncep; 831 if (nce != NULL) { 832 nce_remove(nce, &free_nce_list); 833 } 834 } 835 nce = nce_mask_entries; 836 if (nce != NULL) { 837 nce_remove(nce, &free_nce_list); 838 } 839 ndp_g_walker_cleanup = B_FALSE; 840 } 841 mutex_exit(&ndp_g_lock); 842 843 if (free_nce_list != NULL) { 844 nce_ire_delete_list(free_nce_list); 845 } 846 } 847 848 void 849 ndp_walk(ill_t *ill, pfi_t pfi, uchar_t *arg1) 850 { 851 ndp_walk_impl(ill, pfi, arg1, B_TRUE); 852 } 853 854 /* 855 * Prepend the zoneid using an ipsec_out_t for later use by functions like 856 * ip_rput_v6() after neighbor discovery has taken place. If the message 857 * block already has a M_CTL at the front of it, then simply set the zoneid 858 * appropriately. 859 */ 860 static mblk_t * 861 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 862 { 863 mblk_t *first_mp; 864 ipsec_out_t *io; 865 866 if (mp->b_datap->db_type == M_CTL) { 867 io = (ipsec_out_t *)mp->b_rptr; 868 ASSERT(io->ipsec_out_type == IPSEC_OUT); 869 io->ipsec_out_zoneid = zoneid; 870 return (mp); 871 } 872 873 first_mp = ipsec_alloc_ipsec_out(); 874 if (first_mp == NULL) 875 return (NULL); 876 io = (ipsec_out_t *)first_mp->b_rptr; 877 /* This is not a secure packet */ 878 io->ipsec_out_secure = B_FALSE; 879 io->ipsec_out_zoneid = zoneid; 880 first_mp->b_cont = mp; 881 return (first_mp); 882 } 883 884 /* 885 * Process resolve requests. Handles both mapped entries 886 * as well as cases that needs to be send out on the wire. 887 * Lookup a NCE for a given IRE. Regardless of whether one exists 888 * or one is created, we defer making ire point to nce until the 889 * ire is actually added at which point the nce_refcnt on the nce is 890 * incremented. This is done primarily to have symmetry between ire_add() 891 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 892 */ 893 int 894 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 895 { 896 nce_t *nce; 897 int err = 0; 898 uint32_t ms; 899 mblk_t *mp_nce = NULL; 900 901 ASSERT(ill != NULL); 902 if (IN6_IS_ADDR_MULTICAST(dst)) { 903 err = nce_set_multicast(ill, dst); 904 return (err); 905 } 906 err = ndp_lookup_then_add(ill, 907 NULL, /* No hardware address */ 908 dst, 909 &ipv6_all_ones, 910 &ipv6_all_zeros, 911 0, 912 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 913 ND_INCOMPLETE, 914 &nce); 915 916 switch (err) { 917 case 0: 918 /* 919 * New cache entry was created. Make sure that the state 920 * is not ND_INCOMPLETE. It can be in some other state 921 * even before we send out the solicitation as we could 922 * get un-solicited advertisements. 923 * 924 * If this is an XRESOLV interface, simply return 0, 925 * since we don't want to solicit just yet. 926 */ 927 if (ill->ill_flags & ILLF_XRESOLV) { 928 NCE_REFRELE(nce); 929 return (0); 930 } 931 rw_enter(&ill_g_lock, RW_READER); 932 mutex_enter(&nce->nce_lock); 933 if (nce->nce_state != ND_INCOMPLETE) { 934 mutex_exit(&nce->nce_lock); 935 rw_exit(&ill_g_lock); 936 NCE_REFRELE(nce); 937 return (0); 938 } 939 mp_nce = ndp_prepend_zone(mp, zoneid); 940 if (mp_nce == NULL) { 941 /* The caller will free mp */ 942 mutex_exit(&nce->nce_lock); 943 rw_exit(&ill_g_lock); 944 ndp_delete(nce); 945 NCE_REFRELE(nce); 946 return (ENOMEM); 947 } 948 ms = nce_solicit(nce, mp_nce); 949 rw_exit(&ill_g_lock); 950 if (ms == 0) { 951 /* The caller will free mp */ 952 if (mp_nce != mp) 953 freeb(mp_nce); 954 mutex_exit(&nce->nce_lock); 955 ndp_delete(nce); 956 NCE_REFRELE(nce); 957 return (EBUSY); 958 } 959 mutex_exit(&nce->nce_lock); 960 NDP_RESTART_TIMER(nce, (clock_t)ms); 961 NCE_REFRELE(nce); 962 return (EINPROGRESS); 963 case EEXIST: 964 /* Resolution in progress just queue the packet */ 965 mutex_enter(&nce->nce_lock); 966 if (nce->nce_state == ND_INCOMPLETE) { 967 mp_nce = ndp_prepend_zone(mp, zoneid); 968 if (mp_nce == NULL) { 969 err = ENOMEM; 970 } else { 971 nce_queue_mp(nce, mp_nce); 972 err = EINPROGRESS; 973 } 974 } else { 975 /* 976 * Any other state implies we have 977 * a nce but IRE needs to be added ... 978 * ire_add_v6() will take care of the 979 * the case when the nce becomes CONDEMNED 980 * before the ire is added to the table. 981 */ 982 err = 0; 983 } 984 mutex_exit(&nce->nce_lock); 985 NCE_REFRELE(nce); 986 break; 987 default: 988 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 989 break; 990 } 991 return (err); 992 } 993 994 /* 995 * When there is no resolver, the link layer template is passed in 996 * the IRE. 997 * Lookup a NCE for a given IRE. Regardless of whether one exists 998 * or one is created, we defer making ire point to nce until the 999 * ire is actually added at which point the nce_refcnt on the nce is 1000 * incremented. This is done primarily to have symmetry between ire_add() 1001 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1002 */ 1003 int 1004 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1005 { 1006 nce_t *nce; 1007 int err = 0; 1008 1009 ASSERT(ill != NULL); 1010 if (IN6_IS_ADDR_MULTICAST(dst)) { 1011 err = nce_set_multicast(ill, dst); 1012 return (err); 1013 } 1014 1015 err = ndp_lookup_then_add(ill, 1016 NULL, /* hardware address */ 1017 dst, 1018 &ipv6_all_ones, 1019 &ipv6_all_zeros, 1020 0, 1021 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1022 ND_REACHABLE, 1023 &nce); 1024 1025 switch (err) { 1026 case 0: 1027 /* 1028 * Cache entry with a proper resolver cookie was 1029 * created. 1030 */ 1031 nce_fastpath(nce); 1032 NCE_REFRELE(nce); 1033 break; 1034 case EEXIST: 1035 err = 0; 1036 NCE_REFRELE(nce); 1037 break; 1038 default: 1039 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1040 break; 1041 } 1042 return (err); 1043 } 1044 1045 /* 1046 * For each interface an entry is added for the unspecified multicast group. 1047 * Here that mapping is used to form the multicast cache entry for a particular 1048 * multicast destination. 1049 */ 1050 static int 1051 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1052 { 1053 nce_t *mnce; /* Multicast mapping entry */ 1054 nce_t *nce; 1055 uchar_t *hw_addr = NULL; 1056 int err = 0; 1057 1058 ASSERT(ill != NULL); 1059 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1060 1061 mutex_enter(&ndp_g_lock); 1062 nce = nce_lookup_addr(ill, dst); 1063 if (nce != NULL) { 1064 mutex_exit(&ndp_g_lock); 1065 NCE_REFRELE(nce); 1066 return (0); 1067 } 1068 /* No entry, now lookup for a mapping this should never fail */ 1069 mnce = nce_lookup_mapping(ill, dst); 1070 if (mnce == NULL) { 1071 /* Something broken for the interface. */ 1072 mutex_exit(&ndp_g_lock); 1073 return (ESRCH); 1074 } 1075 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1076 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1077 /* 1078 * For IRE_IF_RESOLVER a hardware mapping can be 1079 * generated, for IRE_IF_NORESOLVER, resolution cookie 1080 * in the ill is copied in ndp_add(). 1081 */ 1082 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1083 if (hw_addr == NULL) { 1084 mutex_exit(&ndp_g_lock); 1085 NCE_REFRELE(mnce); 1086 return (ENOMEM); 1087 } 1088 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1089 } 1090 NCE_REFRELE(mnce); 1091 /* 1092 * IRE_IF_NORESOLVER type simply copies the resolution 1093 * cookie passed in. So no hw_addr is needed. 1094 */ 1095 err = ndp_add(ill, 1096 hw_addr, 1097 dst, 1098 &ipv6_all_ones, 1099 &ipv6_all_zeros, 1100 0, 1101 NCE_F_NONUD, 1102 ND_REACHABLE, 1103 &nce); 1104 mutex_exit(&ndp_g_lock); 1105 if (hw_addr != NULL) 1106 kmem_free(hw_addr, ill->ill_nd_lla_len); 1107 if (err != 0) { 1108 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1109 return (err); 1110 } 1111 nce_fastpath(nce); 1112 NCE_REFRELE(nce); 1113 return (0); 1114 } 1115 1116 /* 1117 * Return the link layer address, and any flags of a nce. 1118 */ 1119 int 1120 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1121 { 1122 nce_t *nce; 1123 in6_addr_t *addr; 1124 sin6_t *sin6; 1125 dl_unitdata_req_t *dl; 1126 1127 ASSERT(ill != NULL); 1128 sin6 = (sin6_t *)&lnr->lnr_addr; 1129 addr = &sin6->sin6_addr; 1130 1131 nce = ndp_lookup(ill, addr, B_FALSE); 1132 if (nce == NULL) 1133 return (ESRCH); 1134 /* If in INCOMPLETE state, no link layer address is available yet */ 1135 if (nce->nce_state == ND_INCOMPLETE) 1136 goto done; 1137 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1138 if (ill->ill_flags & ILLF_XRESOLV) 1139 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1140 else 1141 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1142 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1143 sizeof (lnr->lnr_hdw_addr)); 1144 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1145 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1146 if (nce->nce_flags & NCE_F_ISROUTER) 1147 lnr->lnr_flags = NDF_ISROUTER_ON; 1148 if (nce->nce_flags & NCE_F_PROXY) 1149 lnr->lnr_flags |= NDF_PROXY_ON; 1150 if (nce->nce_flags & NCE_F_ANYCAST) 1151 lnr->lnr_flags |= NDF_ANYCAST_ON; 1152 done: 1153 NCE_REFRELE(nce); 1154 return (0); 1155 } 1156 1157 /* 1158 * Send Enable/Disable multicast reqs to driver. 1159 */ 1160 int 1161 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1162 uint32_t hw_addr_offset, mblk_t *mp) 1163 { 1164 nce_t *nce; 1165 uchar_t *hw_addr; 1166 1167 ASSERT(ill != NULL); 1168 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1169 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1170 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1171 freemsg(mp); 1172 return (EINVAL); 1173 } 1174 mutex_enter(&ndp_g_lock); 1175 nce = nce_lookup_mapping(ill, addr); 1176 if (nce == NULL) { 1177 mutex_exit(&ndp_g_lock); 1178 freemsg(mp); 1179 return (ESRCH); 1180 } 1181 mutex_exit(&ndp_g_lock); 1182 /* 1183 * Update dl_addr_length and dl_addr_offset for primitives that 1184 * have physical addresses as opposed to full saps 1185 */ 1186 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1187 case DL_ENABMULTI_REQ: 1188 /* Track the state if this is the first enabmulti */ 1189 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1190 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1191 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1192 break; 1193 case DL_DISABMULTI_REQ: 1194 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1195 break; 1196 default: 1197 NCE_REFRELE(nce); 1198 ip1dbg(("ndp_mcastreq: default\n")); 1199 return (EINVAL); 1200 } 1201 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1202 NCE_REFRELE(nce); 1203 putnext(ill->ill_wq, mp); 1204 return (0); 1205 } 1206 1207 /* 1208 * Send a neighbor solicitation. 1209 * Returns number of milliseconds after which we should either rexmit or abort. 1210 * Return of zero means we should abort. 1211 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1212 * 1213 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1214 * the packet. 1215 * NOTE: This routine does not consume mp. 1216 */ 1217 uint32_t 1218 nce_solicit(nce_t *nce, mblk_t *mp) 1219 { 1220 ill_t *ill; 1221 ill_t *src_ill; 1222 ip6_t *ip6h; 1223 in6_addr_t src; 1224 in6_addr_t dst; 1225 ipif_t *ipif; 1226 ip6i_t *ip6i; 1227 boolean_t dropped = B_FALSE; 1228 1229 ASSERT(RW_READ_HELD(&ill_g_lock)); 1230 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1231 ill = nce->nce_ill; 1232 ASSERT(ill != NULL); 1233 1234 if (nce->nce_rcnt == 0) { 1235 return (0); 1236 } 1237 1238 if (mp == NULL) { 1239 ASSERT(nce->nce_qd_mp != NULL); 1240 mp = nce->nce_qd_mp; 1241 } else { 1242 nce_queue_mp(nce, mp); 1243 } 1244 1245 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1246 if (mp->b_datap->db_type == M_CTL) 1247 mp = mp->b_cont; 1248 1249 ip6h = (ip6_t *)mp->b_rptr; 1250 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1251 /* 1252 * This message should have been pulled up already in 1253 * ip_wput_v6. We can't do pullups here because the message 1254 * could be from the nce_qd_mp which could have b_next/b_prev 1255 * non-NULL. 1256 */ 1257 ip6i = (ip6i_t *)ip6h; 1258 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1259 sizeof (ip6i_t) + IPV6_HDR_LEN); 1260 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1261 } 1262 src = ip6h->ip6_src; 1263 /* 1264 * If the src of outgoing packet is one of the assigned interface 1265 * addresses use it, otherwise we will pick the source address below. 1266 */ 1267 src_ill = ill; 1268 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1269 if (ill->ill_group != NULL) 1270 src_ill = ill->ill_group->illgrp_ill; 1271 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1272 for (ipif = src_ill->ill_ipif; ipif != NULL; 1273 ipif = ipif->ipif_next) { 1274 if (IN6_ARE_ADDR_EQUAL(&src, 1275 &ipif->ipif_v6lcl_addr)) { 1276 break; 1277 } 1278 } 1279 if (ipif != NULL) 1280 break; 1281 } 1282 if (src_ill == NULL) { 1283 /* May be a forwarding packet */ 1284 src_ill = ill; 1285 src = ipv6_all_zeros; 1286 } 1287 } 1288 dst = nce->nce_addr; 1289 /* 1290 * If source address is unspecified, nce_xmit will choose 1291 * one for us and initialize the hardware address also 1292 * appropriately. 1293 */ 1294 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1295 src_ill = NULL; 1296 nce->nce_rcnt--; 1297 mutex_exit(&nce->nce_lock); 1298 rw_exit(&ill_g_lock); 1299 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1300 &dst, 0); 1301 rw_enter(&ill_g_lock, RW_READER); 1302 mutex_enter(&nce->nce_lock); 1303 if (dropped) 1304 nce->nce_rcnt++; 1305 return (ill->ill_reachable_retrans_time); 1306 } 1307 1308 void 1309 ndp_input_solicit(ill_t *ill, mblk_t *mp) 1310 { 1311 nd_neighbor_solicit_t *ns; 1312 uint32_t hlen = ill->ill_nd_lla_len; 1313 uchar_t *haddr = NULL; 1314 icmp6_t *icmp_nd; 1315 ip6_t *ip6h; 1316 nce_t *our_nce = NULL; 1317 in6_addr_t target; 1318 in6_addr_t src; 1319 int len; 1320 int flag = 0; 1321 nd_opt_hdr_t *opt = NULL; 1322 boolean_t bad_solicit = B_FALSE; 1323 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1324 1325 ip6h = (ip6_t *)mp->b_rptr; 1326 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1327 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1328 src = ip6h->ip6_src; 1329 ns = (nd_neighbor_solicit_t *)icmp_nd; 1330 target = ns->nd_ns_target; 1331 if (IN6_IS_ADDR_MULTICAST(&target)) { 1332 if (ip_debug > 2) { 1333 /* ip1dbg */ 1334 pr_addr_dbg("ndp_input_solicit: Target is" 1335 " multicast! %s\n", AF_INET6, &target); 1336 } 1337 bad_solicit = B_TRUE; 1338 goto done; 1339 } 1340 if (len > sizeof (nd_neighbor_solicit_t)) { 1341 /* Options present */ 1342 opt = (nd_opt_hdr_t *)&ns[1]; 1343 len -= sizeof (nd_neighbor_solicit_t); 1344 if (!ndp_verify_optlen(opt, len)) { 1345 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1346 bad_solicit = B_TRUE; 1347 goto done; 1348 } 1349 } 1350 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1351 /* Check to see if this is a valid DAD solicitation */ 1352 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1353 if (ip_debug > 2) { 1354 /* ip1dbg */ 1355 pr_addr_dbg("ndp_input_solicit: IPv6 " 1356 "Destination is not solicited node " 1357 "multicast %s\n", AF_INET6, 1358 &ip6h->ip6_dst); 1359 } 1360 bad_solicit = B_TRUE; 1361 goto done; 1362 } 1363 } 1364 1365 our_nce = ndp_lookup(ill, &target, B_FALSE); 1366 /* 1367 * If this is a valid Solicitation, a permanent 1368 * entry should exist in the cache 1369 */ 1370 if (our_nce == NULL || 1371 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1372 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1373 "ifname=%s ", ill->ill_name)); 1374 if (ip_debug > 2) { 1375 /* ip1dbg */ 1376 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1377 } 1378 bad_solicit = B_TRUE; 1379 goto done; 1380 } 1381 1382 /* At this point we should have a verified NS per spec */ 1383 if (opt != NULL) { 1384 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1385 if (opt != NULL) { 1386 /* 1387 * No source link layer address option should 1388 * be present in a valid DAD request. 1389 */ 1390 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1391 ip1dbg(("ndp_input_solicit: source link-layer " 1392 "address option present with an " 1393 "unspecified source. \n")); 1394 bad_solicit = B_TRUE; 1395 goto done; 1396 } 1397 haddr = (uchar_t *)&opt[1]; 1398 if (hlen > opt->nd_opt_len * 8 || 1399 hlen == 0) { 1400 bad_solicit = B_TRUE; 1401 goto done; 1402 } 1403 } 1404 } 1405 /* Set override flag, it will be reset later if need be. */ 1406 flag |= NDP_ORIDE; 1407 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1408 flag |= NDP_UNICAST; 1409 } 1410 1411 /* 1412 * Create/update the entry for the soliciting node. 1413 * or respond to outstanding queries, don't if 1414 * the source is unspecified address. 1415 */ 1416 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1417 int err = 0; 1418 nce_t *nnce; 1419 1420 err = ndp_lookup_then_add(ill, 1421 haddr, 1422 &src, /* Soliciting nodes address */ 1423 &ipv6_all_ones, 1424 &ipv6_all_zeros, 1425 0, 1426 0, 1427 ND_STALE, 1428 &nnce); 1429 switch (err) { 1430 case 0: 1431 /* done with this entry */ 1432 NCE_REFRELE(nnce); 1433 break; 1434 case EEXIST: 1435 /* 1436 * B_FALSE indicates this is not an 1437 * an advertisement. 1438 */ 1439 ndp_process(nnce, haddr, 0, B_FALSE); 1440 NCE_REFRELE(nnce); 1441 break; 1442 default: 1443 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1444 err)); 1445 goto done; 1446 } 1447 flag |= NDP_SOLICITED; 1448 } else { 1449 /* 1450 * This is a DAD req, multicast the advertisement 1451 * to the all-nodes address. 1452 */ 1453 src = ipv6_all_hosts_mcast; 1454 } 1455 if (our_nce->nce_flags & NCE_F_ISROUTER) 1456 flag |= NDP_ISROUTER; 1457 if (our_nce->nce_flags & NCE_F_PROXY) 1458 flag &= ~NDP_ORIDE; 1459 /* Response to a solicitation */ 1460 (void) nce_xmit(ill, 1461 ND_NEIGHBOR_ADVERT, 1462 ill, /* ill to be used for extracting ill_nd_lla */ 1463 B_TRUE, /* use ill_nd_lla */ 1464 &target, /* Source and target of the advertisement pkt */ 1465 &src, /* IP Destination (source of original pkt) */ 1466 flag); 1467 done: 1468 if (bad_solicit) 1469 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1470 if (our_nce != NULL) 1471 NCE_REFRELE(our_nce); 1472 } 1473 1474 void 1475 ndp_input_advert(ill_t *ill, mblk_t *mp) 1476 { 1477 nd_neighbor_advert_t *na; 1478 uint32_t hlen = ill->ill_nd_lla_len; 1479 uchar_t *haddr = NULL; 1480 icmp6_t *icmp_nd; 1481 ip6_t *ip6h; 1482 nce_t *dst_nce = NULL; 1483 in6_addr_t target; 1484 nd_opt_hdr_t *opt = NULL; 1485 int len; 1486 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1487 1488 ip6h = (ip6_t *)mp->b_rptr; 1489 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1490 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1491 na = (nd_neighbor_advert_t *)icmp_nd; 1492 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1493 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1494 ip1dbg(("ndp_input_advert: Target is multicast but the " 1495 "solicited flag is not zero\n")); 1496 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1497 return; 1498 } 1499 target = na->nd_na_target; 1500 if (IN6_IS_ADDR_MULTICAST(&target)) { 1501 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1502 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1503 return; 1504 } 1505 if (len > sizeof (nd_neighbor_advert_t)) { 1506 opt = (nd_opt_hdr_t *)&na[1]; 1507 if (!ndp_verify_optlen(opt, 1508 len - sizeof (nd_neighbor_advert_t))) { 1509 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1510 return; 1511 } 1512 /* At this point we have a verified NA per spec */ 1513 len -= sizeof (nd_neighbor_advert_t); 1514 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1515 if (opt != NULL) { 1516 haddr = (uchar_t *)&opt[1]; 1517 if (hlen > opt->nd_opt_len * 8 || 1518 hlen == 0) { 1519 BUMP_MIB(mib, 1520 ipv6IfIcmpInBadNeighborAdvertisements); 1521 return; 1522 } 1523 } 1524 } 1525 1526 /* 1527 * If this interface is part of the group look at all the 1528 * ills in the group. 1529 */ 1530 rw_enter(&ill_g_lock, RW_READER); 1531 if (ill->ill_group != NULL) 1532 ill = ill->ill_group->illgrp_ill; 1533 1534 for (; ill != NULL; ill = ill->ill_group_next) { 1535 mutex_enter(&ill->ill_lock); 1536 if (!ILL_CAN_LOOKUP(ill)) { 1537 mutex_exit(&ill->ill_lock); 1538 continue; 1539 } 1540 ill_refhold_locked(ill); 1541 mutex_exit(&ill->ill_lock); 1542 dst_nce = ndp_lookup(ill, &target, B_FALSE); 1543 /* We have to drop the lock since ndp_process calls put* */ 1544 rw_exit(&ill_g_lock); 1545 if (dst_nce != NULL) { 1546 if (na->nd_na_flags_reserved & 1547 ND_NA_FLAG_ROUTER) { 1548 dst_nce->nce_flags |= NCE_F_ISROUTER; 1549 } 1550 /* B_TRUE indicates this an advertisement */ 1551 ndp_process(dst_nce, haddr, 1552 na->nd_na_flags_reserved, B_TRUE); 1553 NCE_REFRELE(dst_nce); 1554 } 1555 rw_enter(&ill_g_lock, RW_READER); 1556 ill_refrele(ill); 1557 } 1558 rw_exit(&ill_g_lock); 1559 } 1560 1561 /* 1562 * Process NDP neighbor solicitation/advertisement messages. 1563 * The checksum has already checked o.k before reaching here. 1564 */ 1565 void 1566 ndp_input(ill_t *ill, mblk_t *mp) 1567 { 1568 icmp6_t *icmp_nd; 1569 ip6_t *ip6h; 1570 int len; 1571 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1572 1573 1574 if (!pullupmsg(mp, -1)) { 1575 ip1dbg(("ndp_input: pullupmsg failed\n")); 1576 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 1577 goto done; 1578 } 1579 ip6h = (ip6_t *)mp->b_rptr; 1580 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 1581 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 1582 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 1583 goto done; 1584 } 1585 /* 1586 * NDP does not accept any extension headers between the 1587 * IP header and the ICMP header since e.g. a routing 1588 * header could be dangerous. 1589 * This assumes that any AH or ESP headers are removed 1590 * by ip prior to passing the packet to ndp_input. 1591 */ 1592 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 1593 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 1594 ip6h->ip6_nxt)); 1595 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1596 goto done; 1597 } 1598 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1599 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 1600 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 1601 if (icmp_nd->icmp6_code != 0) { 1602 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 1603 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1604 goto done; 1605 } 1606 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1607 /* 1608 * Make sure packet length is large enough for either 1609 * a NS or a NA icmp packet. 1610 */ 1611 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 1612 ip1dbg(("ndp_input: packet too short\n")); 1613 BUMP_MIB(mib, ipv6IfIcmpInErrors); 1614 goto done; 1615 } 1616 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 1617 ndp_input_solicit(ill, mp); 1618 } else { 1619 ndp_input_advert(ill, mp); 1620 } 1621 done: 1622 freemsg(mp); 1623 } 1624 1625 /* 1626 * nce_xmit is called to form and transmit a ND solicitation or 1627 * advertisement ICMP packet. 1628 * If source address is unspecified, appropriate source address 1629 * and link layer address will be chosen here. This function 1630 * *always* sends the link layer option. 1631 * It returns B_FALSE only if it does a successful put() to the 1632 * corresponding ill's ill_wq otherwise returns B_TRUE. 1633 */ 1634 static boolean_t 1635 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 1636 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 1637 int flag) 1638 { 1639 uint32_t len; 1640 icmp6_t *icmp6; 1641 mblk_t *mp; 1642 ip6_t *ip6h; 1643 nd_opt_hdr_t *opt; 1644 uint_t plen; 1645 ip6i_t *ip6i; 1646 ipif_t *src_ipif = NULL; 1647 1648 /* 1649 * If we have a unspecified source(sender) address, select a 1650 * proper source address for the solicitation here itself so 1651 * that we can initialize the h/w address correctly. This is 1652 * needed for interface groups as source address can come from 1653 * the whole group and the h/w address initialized from ill will 1654 * be wrong if the source address comes from a different ill. 1655 * 1656 * Note that the NA never comes here with the unspecified source 1657 * address. The following asserts that whenever the source 1658 * address is specified, the haddr also should be specified. 1659 */ 1660 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 1661 1662 if (IN6_IS_ADDR_UNSPECIFIED(sender)) { 1663 ASSERT(operation != ND_NEIGHBOR_ADVERT); 1664 /* 1665 * Pick a source address for this solicitation, but 1666 * restrict the selection to addresses assigned to the 1667 * output interface (or interface group). We do this 1668 * because the destination will create a neighbor cache 1669 * entry for the source address of this packet, so the 1670 * source address had better be a valid neighbor. 1671 */ 1672 src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 1673 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 1674 if (src_ipif == NULL) { 1675 char buf[INET6_ADDRSTRLEN]; 1676 1677 ip0dbg(("nce_xmit: No source ipif for dst %s\n", 1678 inet_ntop(AF_INET6, (char *)target, buf, 1679 sizeof (buf)))); 1680 return (B_TRUE); 1681 } 1682 sender = &src_ipif->ipif_v6src_addr; 1683 hwaddr_ill = src_ipif->ipif_ill; 1684 } 1685 1686 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7)/8; 1687 /* 1688 * Always make sure that the NS/NA packets don't get load 1689 * spread. This is needed so that the probe packets sent 1690 * by the in.mpathd daemon can really go out on the desired 1691 * interface. Probe packets are made to go out on a desired 1692 * interface by including a ip6i with ATTACH_IF flag. As these 1693 * packets indirectly end up sending/receiving NS/NA packets 1694 * (neighbor doing NUD), we have to make sure that NA 1695 * also go out on the same interface. 1696 */ 1697 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 1698 plen * 8; 1699 mp = allocb(len, BPRI_LO); 1700 if (mp == NULL) { 1701 if (src_ipif != NULL) 1702 ipif_refrele(src_ipif); 1703 return (B_TRUE); 1704 } 1705 bzero((char *)mp->b_rptr, len); 1706 mp->b_wptr = mp->b_rptr + len; 1707 1708 ip6i = (ip6i_t *)mp->b_rptr; 1709 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1710 ip6i->ip6i_nxt = IPPROTO_RAW; 1711 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 1712 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 1713 1714 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1715 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1716 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 1717 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1718 ip6h->ip6_hops = IPV6_MAX_HOPS; 1719 ip6h->ip6_dst = *target; 1720 icmp6 = (icmp6_t *)&ip6h[1]; 1721 1722 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 1723 sizeof (nd_neighbor_advert_t)); 1724 1725 if (operation == ND_NEIGHBOR_SOLICIT) { 1726 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1727 1728 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 1729 ip6h->ip6_src = *sender; 1730 ns->nd_ns_target = *target; 1731 if (!(flag & NDP_UNICAST)) { 1732 /* Form multicast address of the target */ 1733 ip6h->ip6_dst = ipv6_solicited_node_mcast; 1734 ip6h->ip6_dst.s6_addr32[3] |= 1735 ns->nd_ns_target.s6_addr32[3]; 1736 } 1737 } else { 1738 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1739 1740 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1741 ip6h->ip6_src = *sender; 1742 na->nd_na_target = *sender; 1743 if (flag & NDP_ISROUTER) 1744 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 1745 if (flag & NDP_SOLICITED) 1746 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 1747 if (flag & NDP_ORIDE) 1748 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 1749 1750 } 1751 /* Fill in link layer address and option len */ 1752 opt->nd_opt_len = (uint8_t)plen; 1753 mutex_enter(&hwaddr_ill->ill_lock); 1754 bcopy(use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr, 1755 &opt[1], hwaddr_ill->ill_nd_lla_len); 1756 mutex_exit(&hwaddr_ill->ill_lock); 1757 icmp6->icmp6_type = (uint8_t)operation; 1758 icmp6->icmp6_code = 0; 1759 /* 1760 * Prepare for checksum by putting icmp length in the icmp 1761 * checksum field. The checksum is calculated in ip_wput_v6. 1762 */ 1763 icmp6->icmp6_cksum = ip6h->ip6_plen; 1764 1765 if (src_ipif != NULL) 1766 ipif_refrele(src_ipif); 1767 if (canput(ill->ill_wq)) { 1768 put(ill->ill_wq, mp); 1769 return (B_FALSE); 1770 } 1771 freemsg(mp); 1772 return (B_TRUE); 1773 } 1774 1775 /* 1776 * Make a link layer address (does not include the SAP) from an nce. 1777 * To form the link layer address, use the last four bytes of ipv6 1778 * address passed in and the fixed offset stored in nce. 1779 */ 1780 static void 1781 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 1782 { 1783 uchar_t *mask, *to; 1784 ill_t *ill = nce->nce_ill; 1785 int len; 1786 1787 if (ill->ill_net_type == IRE_IF_NORESOLVER) 1788 return; 1789 ASSERT(nce->nce_res_mp != NULL); 1790 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1791 ASSERT(nce->nce_flags & NCE_F_MAPPING); 1792 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 1793 ASSERT(addr != NULL); 1794 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1795 addrpos, ill->ill_nd_lla_len); 1796 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 1797 IPV6_ADDR_LEN); 1798 mask = (uchar_t *)&nce->nce_extract_mask; 1799 mask += (IPV6_ADDR_LEN - len); 1800 addr += (IPV6_ADDR_LEN - len); 1801 to = addrpos + nce->nce_ll_extract_start; 1802 while (len-- > 0) 1803 *to++ |= *mask++ & *addr++; 1804 } 1805 1806 /* 1807 * Pass a cache report back out via NDD. 1808 */ 1809 /* ARGSUSED */ 1810 int 1811 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 1812 { 1813 (void) mi_mpprintf(mp, "ifname hardware addr flags" 1814 " proto addr/mask"); 1815 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 1816 return (0); 1817 } 1818 1819 /* 1820 * convert a link level address of arbitrary length 1821 * to an ascii string. 1822 * The caller *must* have already verified that the string buffer 1823 * is large enough to hold the entire string, including trailing NULL. 1824 */ 1825 static void 1826 lla2ascii(uint8_t *lla, int addrlen, uchar_t *buf) 1827 { 1828 uchar_t addrbyte[8]; /* needs to hold ascii for a byte plus a NULL */ 1829 int i; 1830 size_t len; 1831 1832 buf[0] = '\0'; 1833 for (i = 0; i < addrlen; i++) { 1834 addrbyte[0] = '\0'; 1835 (void) sprintf((char *)addrbyte, "%02x:", (lla[i] & 0xff)); 1836 len = strlen((const char *)addrbyte); 1837 bcopy(addrbyte, buf, len); 1838 buf = buf + len; 1839 } 1840 *--buf = '\0'; 1841 } 1842 1843 /* 1844 * Add a single line to the NDP Cache Entry Report. 1845 */ 1846 static void 1847 nce_report1(nce_t *nce, uchar_t *mp_arg) 1848 { 1849 ill_t *ill = nce->nce_ill; 1850 char local_buf[INET6_ADDRSTRLEN]; 1851 uchar_t flags_buf[10]; 1852 uint32_t flags = nce->nce_flags; 1853 mblk_t *mp = (mblk_t *)mp_arg; 1854 uchar_t *h; 1855 uchar_t *m = flags_buf; 1856 in6_addr_t v6addr; 1857 1858 /* 1859 * Lock the nce to protect nce_res_mp from being changed 1860 * if an external resolver address resolution completes 1861 * while nce_res_mp is being accessed here. 1862 * 1863 * Deal with all address formats, not just Ethernet-specific 1864 * In addition, make sure that the mblk has enough space 1865 * before writing to it. If is doesn't, allocate a new one. 1866 */ 1867 ASSERT(ill != NULL); 1868 v6addr = nce->nce_mask; 1869 if (flags & NCE_F_PERMANENT) 1870 *m++ = 'P'; 1871 if (flags & NCE_F_ISROUTER) 1872 *m++ = 'R'; 1873 if (flags & NCE_F_MAPPING) 1874 *m++ = 'M'; 1875 *m = '\0'; 1876 1877 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1878 size_t addrlen; 1879 uchar_t *addr_buf; 1880 dl_unitdata_req_t *dl; 1881 1882 mutex_enter(&nce->nce_lock); 1883 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1884 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1885 if (ill->ill_flags & ILLF_XRESOLV) 1886 addrlen = (3 * (dl->dl_dest_addr_length)); 1887 else 1888 addrlen = (3 * (ill->ill_nd_lla_len)); 1889 if (addrlen <= 0) { 1890 mutex_exit(&nce->nce_lock); 1891 (void) mi_mpprintf(mp, 1892 "%8s %9s %5s %s/%d", 1893 ill->ill_name, 1894 "None", 1895 (uchar_t *)&flags_buf, 1896 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1897 (char *)local_buf, sizeof (local_buf)), 1898 ip_mask_to_plen_v6(&v6addr)); 1899 } else { 1900 /* 1901 * Convert the hardware/lla address to ascii 1902 */ 1903 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 1904 if (addr_buf == NULL) { 1905 mutex_exit(&nce->nce_lock); 1906 return; 1907 } 1908 if (ill->ill_flags & ILLF_XRESOLV) 1909 lla2ascii((uint8_t *)h, dl->dl_dest_addr_length, 1910 addr_buf); 1911 else 1912 lla2ascii((uint8_t *)h, ill->ill_nd_lla_len, 1913 addr_buf); 1914 mutex_exit(&nce->nce_lock); 1915 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 1916 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 1917 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1918 (char *)local_buf, sizeof (local_buf)), 1919 ip_mask_to_plen_v6(&v6addr)); 1920 kmem_free(addr_buf, addrlen); 1921 } 1922 } else { 1923 (void) mi_mpprintf(mp, 1924 "%8s %9s %5s %s/%d", 1925 ill->ill_name, 1926 "None", 1927 (uchar_t *)&flags_buf, 1928 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 1929 (char *)local_buf, sizeof (local_buf)), 1930 ip_mask_to_plen_v6(&v6addr)); 1931 } 1932 } 1933 1934 mblk_t * 1935 nce_udreq_alloc(ill_t *ill) 1936 { 1937 mblk_t *template_mp = NULL; 1938 dl_unitdata_req_t *dlur; 1939 int sap_length; 1940 1941 sap_length = ill->ill_sap_length; 1942 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 1943 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 1944 if (template_mp == NULL) 1945 return (NULL); 1946 1947 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 1948 dlur->dl_priority.dl_min = 0; 1949 dlur->dl_priority.dl_max = 0; 1950 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 1951 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 1952 1953 /* Copy in the SAP value. */ 1954 NCE_LL_SAP_COPY(ill, template_mp); 1955 1956 return (template_mp); 1957 } 1958 1959 /* 1960 * NDP retransmit timer. 1961 * This timer goes off when: 1962 * a. It is time to retransmit NS for resolver. 1963 * b. It is time to send reachability probes. 1964 */ 1965 void 1966 ndp_timer(void *arg) 1967 { 1968 nce_t *nce = arg; 1969 ill_t *ill = nce->nce_ill; 1970 uint32_t ms; 1971 char addrbuf[INET6_ADDRSTRLEN]; 1972 mblk_t *mp; 1973 boolean_t dropped = B_FALSE; 1974 1975 /* 1976 * The timer has to be cancelled by ndp_delete before doing the final 1977 * refrele. So the NCE is guaranteed to exist when the timer runs 1978 * until it clears the timeout_id. Before clearing the timeout_id 1979 * bump up the refcnt so that we can continue to use the nce 1980 */ 1981 ASSERT(nce != NULL); 1982 1983 /* 1984 * Grab the ill_g_lock now itself to avoid lock order problems. 1985 * nce_solicit needs ill_g_lock to be able to traverse ills 1986 */ 1987 rw_enter(&ill_g_lock, RW_READER); 1988 mutex_enter(&nce->nce_lock); 1989 NCE_REFHOLD_LOCKED(nce); 1990 nce->nce_timeout_id = 0; 1991 1992 /* 1993 * Check the reachability state first. 1994 */ 1995 switch (nce->nce_state) { 1996 case ND_DELAY: 1997 rw_exit(&ill_g_lock); 1998 nce->nce_state = ND_PROBE; 1999 mutex_exit(&nce->nce_lock); 2000 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2001 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2002 if (ip_debug > 3) { 2003 /* ip2dbg */ 2004 pr_addr_dbg("ndp_timer: state for %s changed " 2005 "to PROBE\n", AF_INET6, &nce->nce_addr); 2006 } 2007 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2008 NCE_REFRELE(nce); 2009 return; 2010 case ND_PROBE: 2011 /* must be retransmit timer */ 2012 rw_exit(&ill_g_lock); 2013 nce->nce_pcnt--; 2014 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2015 nce->nce_pcnt >= -1); 2016 if (nce->nce_pcnt == 0) { 2017 /* Wait RetransTimer, before deleting the entry */ 2018 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2019 nce->nce_pcnt, inet_ntop(AF_INET6, 2020 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2021 mutex_exit(&nce->nce_lock); 2022 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2023 } else { 2024 /* 2025 * As per RFC2461, the nce gets deleted after 2026 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2027 * Note that the first unicast solicitation is sent 2028 * during the DELAY state. 2029 */ 2030 if (nce->nce_pcnt > 0) { 2031 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2032 nce->nce_pcnt, inet_ntop(AF_INET6, 2033 &nce->nce_addr, 2034 addrbuf, sizeof (addrbuf)))); 2035 mutex_exit(&nce->nce_lock); 2036 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, 2037 NULL, B_FALSE, &ipv6_all_zeros, 2038 &nce->nce_addr, NDP_UNICAST); 2039 if (dropped) { 2040 mutex_enter(&nce->nce_lock); 2041 nce->nce_pcnt++; 2042 mutex_exit(&nce->nce_lock); 2043 } 2044 NDP_RESTART_TIMER(nce, 2045 ill->ill_reachable_retrans_time); 2046 } else { 2047 /* No hope, delete the nce */ 2048 nce->nce_state = ND_UNREACHABLE; 2049 mutex_exit(&nce->nce_lock); 2050 if (ip_debug > 2) { 2051 /* ip1dbg */ 2052 pr_addr_dbg("ndp_timer: Delete IRE for" 2053 " dst %s\n", AF_INET6, 2054 &nce->nce_addr); 2055 } 2056 ndp_delete(nce); 2057 } 2058 } 2059 NCE_REFRELE(nce); 2060 return; 2061 case ND_INCOMPLETE: 2062 /* 2063 * Must be resolvers retransmit timer. 2064 */ 2065 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2066 ip6i_t *ip6i; 2067 ip6_t *ip6h; 2068 mblk_t *data_mp; 2069 2070 /* 2071 * Walk the list of packets queued, and see if there 2072 * are any multipathing probe packets. Such packets 2073 * are always queued at the head. Since this is a 2074 * retransmit timer firing, mark such packets as 2075 * delayed in ND resolution. This info will be used 2076 * in ip_wput_v6(). Multipathing probe packets will 2077 * always have an ip6i_t. Once we hit a packet without 2078 * it, we can break out of this loop. 2079 */ 2080 if (mp->b_datap->db_type == M_CTL) 2081 data_mp = mp->b_cont; 2082 else 2083 data_mp = mp; 2084 2085 ip6h = (ip6_t *)data_mp->b_rptr; 2086 if (ip6h->ip6_nxt != IPPROTO_RAW) 2087 break; 2088 2089 /* 2090 * This message should have been pulled up already in 2091 * ip_wput_v6. We can't do pullups here because the 2092 * b_next/b_prev is non-NULL. 2093 */ 2094 ip6i = (ip6i_t *)ip6h; 2095 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2096 sizeof (ip6i_t) + IPV6_HDR_LEN); 2097 2098 /* Mark this packet as delayed due to ND resolution */ 2099 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2100 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2101 } 2102 if (nce->nce_qd_mp != NULL) { 2103 ms = nce_solicit(nce, NULL); 2104 rw_exit(&ill_g_lock); 2105 if (ms == 0) { 2106 if (nce->nce_state != ND_REACHABLE) { 2107 mutex_exit(&nce->nce_lock); 2108 nce_resolv_failed(nce); 2109 ndp_delete(nce); 2110 } else { 2111 mutex_exit(&nce->nce_lock); 2112 } 2113 } else { 2114 mutex_exit(&nce->nce_lock); 2115 NDP_RESTART_TIMER(nce, (clock_t)ms); 2116 } 2117 NCE_REFRELE(nce); 2118 return; 2119 } 2120 mutex_exit(&nce->nce_lock); 2121 rw_exit(&ill_g_lock); 2122 NCE_REFRELE(nce); 2123 break; 2124 case ND_REACHABLE : 2125 rw_exit(&ill_g_lock); 2126 if (nce->nce_flags & NCE_F_UNSOL_ADV && 2127 nce->nce_unsolicit_count != 0) { 2128 nce->nce_unsolicit_count--; 2129 mutex_exit(&nce->nce_lock); 2130 dropped = nce_xmit(ill, 2131 ND_NEIGHBOR_ADVERT, 2132 ill, /* ill to be used for hw addr */ 2133 B_FALSE, /* use ill_phys_addr */ 2134 &nce->nce_addr, 2135 &ipv6_all_hosts_mcast, 2136 nce->nce_flags | NDP_ORIDE); 2137 if (dropped) { 2138 mutex_enter(&nce->nce_lock); 2139 nce->nce_unsolicit_count++; 2140 mutex_exit(&nce->nce_lock); 2141 } 2142 if (nce->nce_unsolicit_count != 0) { 2143 NDP_RESTART_TIMER(nce, 2144 ip_ndp_unsolicit_interval); 2145 } 2146 } else { 2147 mutex_exit(&nce->nce_lock); 2148 } 2149 NCE_REFRELE(nce); 2150 break; 2151 default: 2152 rw_exit(&ill_g_lock); 2153 mutex_exit(&nce->nce_lock); 2154 NCE_REFRELE(nce); 2155 break; 2156 } 2157 } 2158 2159 /* 2160 * Set a link layer address from the ll_addr passed in. 2161 * Copy SAP from ill. 2162 */ 2163 static void 2164 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2165 { 2166 ill_t *ill = nce->nce_ill; 2167 uchar_t *woffset; 2168 2169 ASSERT(ll_addr != NULL); 2170 /* Always called before fast_path_probe */ 2171 if (nce->nce_fp_mp != NULL) 2172 return; 2173 if (ill->ill_sap_length != 0) { 2174 /* 2175 * Copy the SAP type specified in the 2176 * request into the xmit template. 2177 */ 2178 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2179 } 2180 if (ill->ill_phys_addr_length > 0) { 2181 /* 2182 * The bcopy() below used to be called for the physical address 2183 * length rather than the link layer address length. For 2184 * ethernet and many other media, the phys_addr and lla are 2185 * identical. 2186 * However, with xresolv interfaces being introduced, the 2187 * phys_addr and lla are no longer the same, and the physical 2188 * address may not have any useful meaning, so we use the lla 2189 * for IPv6 address resolution and destination addressing. 2190 * 2191 * For PPP or other interfaces with a zero length 2192 * physical address, don't do anything here. 2193 * The bcopy() with a zero phys_addr length was previously 2194 * a no-op for interfaces with a zero-length physical address. 2195 * Using the lla for them would change the way they operate. 2196 * Doing nothing in such cases preserves expected behavior. 2197 */ 2198 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2199 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2200 } 2201 } 2202 2203 static boolean_t 2204 nce_cmp_ll_addr(nce_t *nce, char *ll_addr, uint32_t ll_addr_len) 2205 { 2206 ill_t *ill = nce->nce_ill; 2207 uchar_t *ll_offset; 2208 2209 ASSERT(nce->nce_res_mp != NULL); 2210 if (ll_addr == NULL) 2211 return (B_FALSE); 2212 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2213 if (bcmp(ll_addr, (char *)ll_offset, ll_addr_len) != 0) 2214 return (B_TRUE); 2215 return (B_FALSE); 2216 } 2217 2218 /* 2219 * Updates the link layer address or the reachability state of 2220 * a cache entry. Reset probe counter if needed. 2221 */ 2222 static void 2223 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2224 { 2225 ill_t *ill = nce->nce_ill; 2226 boolean_t need_stop_timer = B_FALSE; 2227 boolean_t need_fastpath_update = B_FALSE; 2228 2229 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2230 /* 2231 * If this interface does not do NUD, there is no point 2232 * in allowing an update to the cache entry. Although 2233 * we will respond to NS. 2234 * The only time we accept an update for a resolver when 2235 * NUD is turned off is when it has just been created. 2236 * Non-Resolvers will always be created as REACHABLE. 2237 */ 2238 if (new_state != ND_UNCHANGED) { 2239 if ((nce->nce_flags & NCE_F_NONUD) && 2240 (nce->nce_state != ND_INCOMPLETE)) 2241 return; 2242 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2243 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2244 need_stop_timer = B_TRUE; 2245 if (new_state == ND_REACHABLE) 2246 nce->nce_last = TICK_TO_MSEC(lbolt64); 2247 else { 2248 /* We force NUD in this case */ 2249 nce->nce_last = 0; 2250 } 2251 nce->nce_state = new_state; 2252 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2253 } 2254 /* 2255 * In case of fast path we need to free the the fastpath 2256 * M_DATA and do another probe. Otherwise we can just 2257 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2258 * whatever packets that happens to be transmitting at the time. 2259 */ 2260 if (new_ll_addr != NULL) { 2261 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2262 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2263 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2264 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2265 if (nce->nce_fp_mp != NULL) { 2266 freemsg(nce->nce_fp_mp); 2267 nce->nce_fp_mp = NULL; 2268 need_fastpath_update = B_TRUE; 2269 } 2270 } 2271 mutex_exit(&nce->nce_lock); 2272 if (need_stop_timer) { 2273 (void) untimeout(nce->nce_timeout_id); 2274 nce->nce_timeout_id = 0; 2275 } 2276 if (need_fastpath_update) 2277 nce_fastpath(nce); 2278 mutex_enter(&nce->nce_lock); 2279 } 2280 2281 static void 2282 nce_queue_mp(nce_t *nce, mblk_t *mp) 2283 { 2284 uint_t count = 0; 2285 mblk_t **mpp; 2286 boolean_t head_insert = B_FALSE; 2287 ip6_t *ip6h; 2288 ip6i_t *ip6i; 2289 mblk_t *data_mp; 2290 2291 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2292 2293 if (mp->b_datap->db_type == M_CTL) 2294 data_mp = mp->b_cont; 2295 else 2296 data_mp = mp; 2297 ip6h = (ip6_t *)data_mp->b_rptr; 2298 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2299 /* 2300 * This message should have been pulled up already in 2301 * ip_wput_v6. We can't do pullups here because the message 2302 * could be from the nce_qd_mp which could have b_next/b_prev 2303 * non-NULL. 2304 */ 2305 ip6i = (ip6i_t *)ip6h; 2306 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2307 sizeof (ip6i_t) + IPV6_HDR_LEN); 2308 /* 2309 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2310 * This has 2 aspects mentioned below. 2311 * 1. Perform head insertion in the nce_qd_mp for these packets. 2312 * This ensures that next retransmit of ND solicitation 2313 * will use the interface specified by the probe packet, 2314 * for both NS and NA. This corresponds to the src address 2315 * in the IPv6 packet. If we insert at tail, we will be 2316 * depending on the packet at the head for successful 2317 * ND resolution. This is not reliable, because the interface 2318 * on which the NA arrives could be different from the interface 2319 * on which the NS was sent, and if the receiving interface is 2320 * failed, it will appear that the sending interface is also 2321 * failed, causing in.mpathd to misdiagnose this as link 2322 * failure. 2323 * 2. Drop the original packet, if the ND resolution did not 2324 * succeed in the first attempt. However we will create the 2325 * nce and the ire, as soon as the ND resolution succeeds. 2326 * We don't gain anything by queueing multiple probe packets 2327 * and sending them back-to-back once resolution succeeds. 2328 * It is sufficient to send just 1 packet after ND resolution 2329 * succeeds. Since mpathd is sending down probe packets at a 2330 * constant rate, we don't need to send the queued packet. We 2331 * need to queue it only for NDP resolution. The benefit of 2332 * dropping the probe packets that were delayed in ND 2333 * resolution, is that in.mpathd will not see inflated 2334 * RTT. If the ND resolution does not succeed within 2335 * in.mpathd's failure detection time, mpathd may detect 2336 * a failure, and it does not matter whether the packet 2337 * was queued or dropped. 2338 */ 2339 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2340 head_insert = B_TRUE; 2341 } 2342 2343 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2344 mpp = &(*mpp)->b_next) { 2345 if (++count > 2346 nce->nce_ill->ill_max_buf) { 2347 mblk_t *tmp = nce->nce_qd_mp->b_next; 2348 2349 nce->nce_qd_mp->b_next = NULL; 2350 nce->nce_qd_mp->b_prev = NULL; 2351 freemsg(nce->nce_qd_mp); 2352 ip1dbg(("nce_queue_mp: pkt dropped\n")); 2353 nce->nce_qd_mp = tmp; 2354 } 2355 } 2356 /* put this on the list */ 2357 if (head_insert) { 2358 mp->b_next = nce->nce_qd_mp; 2359 nce->nce_qd_mp = mp; 2360 } else { 2361 *mpp = mp; 2362 } 2363 } 2364 2365 /* 2366 * Called when address resolution failed due to a timeout. 2367 * Send an ICMP unreachable in response to all queued packets. 2368 */ 2369 void 2370 nce_resolv_failed(nce_t *nce) 2371 { 2372 mblk_t *mp, *nxt_mp, *first_mp; 2373 char buf[INET6_ADDRSTRLEN]; 2374 ip6_t *ip6h; 2375 zoneid_t zoneid = GLOBAL_ZONEID; 2376 2377 ip1dbg(("nce_resolv_failed: dst %s\n", 2378 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 2379 mutex_enter(&nce->nce_lock); 2380 mp = nce->nce_qd_mp; 2381 nce->nce_qd_mp = NULL; 2382 mutex_exit(&nce->nce_lock); 2383 while (mp != NULL) { 2384 nxt_mp = mp->b_next; 2385 mp->b_next = NULL; 2386 mp->b_prev = NULL; 2387 2388 first_mp = mp; 2389 if (mp->b_datap->db_type == M_CTL) { 2390 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 2391 ASSERT(io->ipsec_out_type == IPSEC_OUT); 2392 zoneid = io->ipsec_out_zoneid; 2393 ASSERT(zoneid != ALL_ZONES); 2394 mp = mp->b_cont; 2395 } 2396 2397 ip6h = (ip6_t *)mp->b_rptr; 2398 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2399 ip6i_t *ip6i; 2400 /* 2401 * This message should have been pulled up already 2402 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 2403 * the header is pulled up. 2404 */ 2405 ip6i = (ip6i_t *)ip6h; 2406 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 2407 sizeof (ip6i_t) + IPV6_HDR_LEN); 2408 mp->b_rptr += sizeof (ip6i_t); 2409 } 2410 /* 2411 * Ignore failure since icmp_unreachable_v6 will silently 2412 * drop packets with an unspecified source address. 2413 */ 2414 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 2415 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 2416 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 2417 mp = nxt_mp; 2418 } 2419 } 2420 2421 /* 2422 * Called by SIOCSNDP* ioctl to add/change an nce entry 2423 * and the corresponding attributes. 2424 * Disallow states other than ND_REACHABLE or ND_STALE. 2425 */ 2426 int 2427 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2428 { 2429 sin6_t *sin6; 2430 in6_addr_t *addr; 2431 nce_t *nce; 2432 int err; 2433 uint16_t new_flags = 0; 2434 uint16_t old_flags = 0; 2435 int inflags = lnr->lnr_flags; 2436 2437 if ((lnr->lnr_state_create != ND_REACHABLE) && 2438 (lnr->lnr_state_create != ND_STALE)) 2439 return (EINVAL); 2440 2441 sin6 = (sin6_t *)&lnr->lnr_addr; 2442 addr = &sin6->sin6_addr; 2443 2444 mutex_enter(&ndp_g_lock); 2445 /* We know it can not be mapping so just look in the hash table */ 2446 nce = nce_lookup_addr(ill, addr); 2447 if (nce != NULL) 2448 new_flags = nce->nce_flags; 2449 2450 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2451 case NDF_ISROUTER_ON: 2452 new_flags |= NCE_F_ISROUTER; 2453 break; 2454 case NDF_ISROUTER_OFF: 2455 new_flags &= ~NCE_F_ISROUTER; 2456 break; 2457 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2458 mutex_exit(&ndp_g_lock); 2459 if (nce != NULL) 2460 NCE_REFRELE(nce); 2461 return (EINVAL); 2462 } 2463 2464 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2465 case NDF_ANYCAST_ON: 2466 new_flags |= NCE_F_ANYCAST; 2467 break; 2468 case NDF_ANYCAST_OFF: 2469 new_flags &= ~NCE_F_ANYCAST; 2470 break; 2471 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2472 mutex_exit(&ndp_g_lock); 2473 if (nce != NULL) 2474 NCE_REFRELE(nce); 2475 return (EINVAL); 2476 } 2477 2478 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 2479 case NDF_PROXY_ON: 2480 new_flags |= NCE_F_PROXY; 2481 break; 2482 case NDF_PROXY_OFF: 2483 new_flags &= ~NCE_F_PROXY; 2484 break; 2485 case (NDF_PROXY_OFF|NDF_PROXY_ON): 2486 mutex_exit(&ndp_g_lock); 2487 if (nce != NULL) 2488 NCE_REFRELE(nce); 2489 return (EINVAL); 2490 } 2491 2492 if (nce == NULL) { 2493 err = ndp_add(ill, 2494 (uchar_t *)lnr->lnr_hdw_addr, 2495 addr, 2496 &ipv6_all_ones, 2497 &ipv6_all_zeros, 2498 0, 2499 new_flags, 2500 lnr->lnr_state_create, 2501 &nce); 2502 if (err != 0) { 2503 mutex_exit(&ndp_g_lock); 2504 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2505 return (err); 2506 } 2507 } 2508 old_flags = nce->nce_flags; 2509 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2510 /* 2511 * Router turned to host, delete all ires. 2512 * XXX Just delete the entry, but we need to add too. 2513 */ 2514 nce->nce_flags &= ~NCE_F_ISROUTER; 2515 mutex_exit(&ndp_g_lock); 2516 ndp_delete(nce); 2517 NCE_REFRELE(nce); 2518 return (0); 2519 } 2520 mutex_exit(&ndp_g_lock); 2521 2522 mutex_enter(&nce->nce_lock); 2523 nce->nce_flags = new_flags; 2524 mutex_exit(&nce->nce_lock); 2525 /* 2526 * Note that we ignore the state at this point, which 2527 * should be either STALE or REACHABLE. Instead we let 2528 * the link layer address passed in to determine the state 2529 * much like incoming packets. 2530 */ 2531 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 2532 NCE_REFRELE(nce); 2533 return (0); 2534 } 2535 2536 /* 2537 * If the device driver supports it, we make nce_fp_mp to have 2538 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 2539 * The caller insures there is hold on nce for this function. 2540 * Note that since ill_fastpath_probe() copies the mblk there is 2541 * no need for the hold beyond this function. 2542 */ 2543 static void 2544 nce_fastpath(nce_t *nce) 2545 { 2546 ill_t *ill = nce->nce_ill; 2547 int res; 2548 2549 ASSERT(ill != NULL); 2550 if (nce->nce_fp_mp != NULL) { 2551 /* Already contains fastpath info */ 2552 return; 2553 } 2554 if (nce->nce_res_mp != NULL) { 2555 nce_fastpath_list_add(nce); 2556 res = ill_fastpath_probe(ill, nce->nce_res_mp); 2557 /* 2558 * EAGAIN is an indication of a transient error 2559 * i.e. allocation failure etc. leave the nce in the list it 2560 * will be updated when another probe happens for another ire 2561 * if not it will be taken out of the list when the ire is 2562 * deleted. 2563 */ 2564 2565 if (res != 0 && res != EAGAIN) 2566 nce_fastpath_list_delete(nce); 2567 } 2568 } 2569 2570 /* 2571 * Drain the list of nce's waiting for fastpath response. 2572 */ 2573 void 2574 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 2575 void *arg) 2576 { 2577 2578 nce_t *next_nce; 2579 nce_t *current_nce; 2580 nce_t *first_nce; 2581 nce_t *prev_nce = NULL; 2582 2583 ASSERT(ill != NULL); 2584 2585 mutex_enter(&ill->ill_lock); 2586 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 2587 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 2588 next_nce = current_nce->nce_fastpath; 2589 /* 2590 * Take it off the list if we're flushing, or if the callback 2591 * routine tells us to do so. Otherwise, leave the nce in the 2592 * fastpath list to handle any pending response from the lower 2593 * layer. We can't drain the list when the callback routine 2594 * comparison failed, because the response is asynchronous in 2595 * nature, and may not arrive in the same order as the list 2596 * insertion. 2597 */ 2598 if (func == NULL || func(current_nce, arg)) { 2599 current_nce->nce_fastpath = NULL; 2600 if (current_nce == first_nce) 2601 ill->ill_fastpath_list = first_nce = next_nce; 2602 else 2603 prev_nce->nce_fastpath = next_nce; 2604 } else { 2605 /* previous element that is still in the list */ 2606 prev_nce = current_nce; 2607 } 2608 current_nce = next_nce; 2609 } 2610 mutex_exit(&ill->ill_lock); 2611 } 2612 2613 /* 2614 * Add nce to the nce fastpath list. 2615 */ 2616 void 2617 nce_fastpath_list_add(nce_t *nce) 2618 { 2619 ill_t *ill; 2620 2621 ill = nce->nce_ill; 2622 ASSERT(ill != NULL); 2623 2624 mutex_enter(&ill->ill_lock); 2625 mutex_enter(&nce->nce_lock); 2626 2627 /* 2628 * if nce has not been deleted and 2629 * is not already in the list add it. 2630 */ 2631 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 2632 (nce->nce_fastpath == NULL)) { 2633 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 2634 ill->ill_fastpath_list = nce; 2635 } 2636 2637 mutex_exit(&nce->nce_lock); 2638 mutex_exit(&ill->ill_lock); 2639 } 2640 2641 /* 2642 * remove nce from the nce fastpath list. 2643 */ 2644 void 2645 nce_fastpath_list_delete(nce_t *nce) 2646 { 2647 nce_t *nce_ptr; 2648 2649 ill_t *ill; 2650 2651 ill = nce->nce_ill; 2652 ASSERT(ill != NULL); 2653 2654 mutex_enter(&ill->ill_lock); 2655 if (nce->nce_fastpath == NULL) 2656 goto done; 2657 2658 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 2659 2660 if (ill->ill_fastpath_list == nce) { 2661 ill->ill_fastpath_list = nce->nce_fastpath; 2662 } else { 2663 nce_ptr = ill->ill_fastpath_list; 2664 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 2665 if (nce_ptr->nce_fastpath == nce) { 2666 nce_ptr->nce_fastpath = nce->nce_fastpath; 2667 break; 2668 } 2669 nce_ptr = nce_ptr->nce_fastpath; 2670 } 2671 } 2672 2673 nce->nce_fastpath = NULL; 2674 done: 2675 mutex_exit(&ill->ill_lock); 2676 } 2677 2678 /* 2679 * Update all NCE's that are not in fastpath mode and 2680 * have an nce_fp_mp that matches mp. mp->b_cont contains 2681 * the fastpath header. 2682 * 2683 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 2684 */ 2685 boolean_t 2686 ndp_fastpath_update(nce_t *nce, void *arg) 2687 { 2688 mblk_t *mp, *fp_mp; 2689 uchar_t *mp_rptr, *ud_mp_rptr; 2690 mblk_t *ud_mp = nce->nce_res_mp; 2691 ptrdiff_t cmplen; 2692 2693 if (nce->nce_flags & NCE_F_MAPPING) 2694 return (B_TRUE); 2695 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 2696 return (B_TRUE); 2697 2698 ip2dbg(("ndp_fastpath_update: trying\n")); 2699 mp = (mblk_t *)arg; 2700 mp_rptr = mp->b_rptr; 2701 cmplen = mp->b_wptr - mp_rptr; 2702 ASSERT(cmplen >= 0); 2703 ud_mp_rptr = ud_mp->b_rptr; 2704 /* 2705 * The nce is locked here to prevent any other threads 2706 * from accessing and changing nce_res_mp when the IPv6 address 2707 * becomes resolved to an lla while we're in the middle 2708 * of looking at and comparing the hardware address (lla). 2709 * It is also locked to prevent multiple threads in nce_fastpath_update 2710 * from examining nce_res_mp atthe same time. 2711 */ 2712 mutex_enter(&nce->nce_lock); 2713 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 2714 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 2715 mutex_exit(&nce->nce_lock); 2716 /* 2717 * Don't take the ire off the fastpath list yet, 2718 * since the response may come later. 2719 */ 2720 return (B_FALSE); 2721 } 2722 /* Matched - install mp as the fastpath mp */ 2723 ip1dbg(("ndp_fastpath_update: match\n")); 2724 fp_mp = dupb(mp->b_cont); 2725 if (fp_mp != NULL) { 2726 nce->nce_fp_mp = fp_mp; 2727 } 2728 mutex_exit(&nce->nce_lock); 2729 return (B_TRUE); 2730 } 2731 2732 /* 2733 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 2734 * driver. Note that it assumes IP is exclusive... 2735 */ 2736 /* ARGSUSED */ 2737 void 2738 ndp_fastpath_flush(nce_t *nce, char *arg) 2739 { 2740 if (nce->nce_flags & NCE_F_MAPPING) 2741 return; 2742 /* No fastpath info? */ 2743 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 2744 return; 2745 2746 /* Just delete the NCE... */ 2747 ndp_delete(nce); 2748 } 2749 2750 /* 2751 * Return a pointer to a given option in the packet. 2752 * Assumes that option part of the packet have already been validated. 2753 */ 2754 nd_opt_hdr_t * 2755 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 2756 { 2757 while (optlen > 0) { 2758 if (opt->nd_opt_type == opt_type) 2759 return (opt); 2760 optlen -= 8 * opt->nd_opt_len; 2761 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2762 } 2763 return (NULL); 2764 } 2765 2766 /* 2767 * Verify all option lengths present are > 0, also check to see 2768 * if the option lengths and packet length are consistent. 2769 */ 2770 boolean_t 2771 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 2772 { 2773 ASSERT(opt != NULL); 2774 while (optlen > 0) { 2775 if (opt->nd_opt_len == 0) 2776 return (B_FALSE); 2777 optlen -= 8 * opt->nd_opt_len; 2778 if (optlen < 0) 2779 return (B_FALSE); 2780 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 2781 } 2782 return (B_TRUE); 2783 } 2784 2785 /* 2786 * ndp_walk function. 2787 * Free a fraction of the NCE cache entries. 2788 * A fraction of zero means to not free any in that category. 2789 */ 2790 void 2791 ndp_cache_reclaim(nce_t *nce, char *arg) 2792 { 2793 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 2794 uint_t rand; 2795 2796 if (nce->nce_flags & NCE_F_PERMANENT) 2797 return; 2798 2799 rand = (uint_t)lbolt + 2800 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 2801 if (ncr->ncr_host != 0 && 2802 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 2803 ndp_delete(nce); 2804 return; 2805 } 2806 } 2807 2808 /* 2809 * ndp_walk function. 2810 * Count the number of NCEs that can be deleted. 2811 * These would be hosts but not routers. 2812 */ 2813 void 2814 ndp_cache_count(nce_t *nce, char *arg) 2815 { 2816 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 2817 2818 if (nce->nce_flags & NCE_F_PERMANENT) 2819 return; 2820 2821 ncc->ncc_total++; 2822 if (!(nce->nce_flags & NCE_F_ISROUTER)) 2823 ncc->ncc_host++; 2824 } 2825 2826 #ifdef NCE_DEBUG 2827 th_trace_t * 2828 th_trace_nce_lookup(nce_t *nce) 2829 { 2830 int bucket_id; 2831 th_trace_t *th_trace; 2832 2833 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2834 2835 bucket_id = IP_TR_HASH(curthread); 2836 ASSERT(bucket_id < IP_TR_HASH_MAX); 2837 2838 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 2839 th_trace = th_trace->th_next) { 2840 if (th_trace->th_id == curthread) 2841 return (th_trace); 2842 } 2843 return (NULL); 2844 } 2845 2846 void 2847 nce_trace_ref(nce_t *nce) 2848 { 2849 int bucket_id; 2850 th_trace_t *th_trace; 2851 2852 /* 2853 * Attempt to locate the trace buffer for the curthread. 2854 * If it does not exist, then allocate a new trace buffer 2855 * and link it in list of trace bufs for this ipif, at the head 2856 */ 2857 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2858 2859 if (nce->nce_trace_disable == B_TRUE) 2860 return; 2861 2862 th_trace = th_trace_nce_lookup(nce); 2863 if (th_trace == NULL) { 2864 bucket_id = IP_TR_HASH(curthread); 2865 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 2866 KM_NOSLEEP); 2867 if (th_trace == NULL) { 2868 nce->nce_trace_disable = B_TRUE; 2869 nce_trace_inactive(nce); 2870 return; 2871 } 2872 th_trace->th_id = curthread; 2873 th_trace->th_next = nce->nce_trace[bucket_id]; 2874 th_trace->th_prev = &nce->nce_trace[bucket_id]; 2875 if (th_trace->th_next != NULL) 2876 th_trace->th_next->th_prev = &th_trace->th_next; 2877 nce->nce_trace[bucket_id] = th_trace; 2878 } 2879 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 2880 th_trace->th_refcnt++; 2881 th_trace_rrecord(th_trace); 2882 } 2883 2884 void 2885 nce_untrace_ref(nce_t *nce) 2886 { 2887 th_trace_t *th_trace; 2888 2889 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2890 2891 if (nce->nce_trace_disable == B_TRUE) 2892 return; 2893 2894 th_trace = th_trace_nce_lookup(nce); 2895 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 2896 2897 th_trace_rrecord(th_trace); 2898 th_trace->th_refcnt--; 2899 } 2900 2901 void 2902 nce_trace_inactive(nce_t *nce) 2903 { 2904 th_trace_t *th_trace; 2905 int i; 2906 2907 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2908 2909 for (i = 0; i < IP_TR_HASH_MAX; i++) { 2910 while (nce->nce_trace[i] != NULL) { 2911 th_trace = nce->nce_trace[i]; 2912 2913 /* unlink th_trace and free it */ 2914 nce->nce_trace[i] = th_trace->th_next; 2915 if (th_trace->th_next != NULL) 2916 th_trace->th_next->th_prev = 2917 &nce->nce_trace[i]; 2918 2919 th_trace->th_next = NULL; 2920 th_trace->th_prev = NULL; 2921 kmem_free(th_trace, sizeof (th_trace_t)); 2922 } 2923 } 2924 2925 } 2926 2927 /* ARGSUSED */ 2928 int 2929 nce_thread_exit(nce_t *nce, caddr_t arg) 2930 { 2931 th_trace_t *th_trace; 2932 2933 mutex_enter(&nce->nce_lock); 2934 th_trace = th_trace_nce_lookup(nce); 2935 2936 if (th_trace == NULL) { 2937 mutex_exit(&nce->nce_lock); 2938 return (0); 2939 } 2940 2941 ASSERT(th_trace->th_refcnt == 0); 2942 2943 /* unlink th_trace and free it */ 2944 *th_trace->th_prev = th_trace->th_next; 2945 if (th_trace->th_next != NULL) 2946 th_trace->th_next->th_prev = th_trace->th_prev; 2947 th_trace->th_next = NULL; 2948 th_trace->th_prev = NULL; 2949 kmem_free(th_trace, sizeof (th_trace_t)); 2950 mutex_exit(&nce->nce_lock); 2951 return (0); 2952 } 2953 #endif 2954