1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strsun.h> 30 #include <sys/sysmacros.h> 31 #include <sys/errno.h> 32 #include <sys/dlpi.h> 33 #include <sys/socket.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/zone.h> 41 #include <sys/ethernet.h> 42 #include <sys/sdt.h> 43 #include <sys/mac.h> 44 45 #include <net/if.h> 46 #include <net/if_types.h> 47 #include <net/if_dl.h> 48 #include <net/route.h> 49 #include <netinet/in.h> 50 #include <netinet/ip6.h> 51 #include <netinet/icmp6.h> 52 53 #include <inet/common.h> 54 #include <inet/mi.h> 55 #include <inet/mib2.h> 56 #include <inet/nd.h> 57 #include <inet/ip.h> 58 #include <inet/ip_impl.h> 59 #include <inet/ipclassifier.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_rts.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ndp.h> 65 #include <inet/sctp_ip.h> 66 #include <inet/ip_arp.h> 67 #include <inet/ip2mac_impl.h> 68 69 #define ANNOUNCE_INTERVAL(isv6) \ 70 (isv6 ? ipst->ips_ip_ndp_unsolicit_interval : \ 71 ipst->ips_ip_arp_publish_interval) 72 73 #define DEFENSE_INTERVAL(isv6) \ 74 (isv6 ? ipst->ips_ndp_defend_interval : \ 75 ipst->ips_arp_defend_interval) 76 77 /* Non-tunable probe interval, based on link capabilities */ 78 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 79 80 /* 81 * The IPv4 Link Local address space is special; we do extra duplicate checking 82 * there, as the entire assignment mechanism rests on random numbers. 83 */ 84 #define IS_IPV4_LL_SPACE(ptr) (((uchar_t *)ptr)[0] == 169 && \ 85 ((uchar_t *)ptr)[1] == 254) 86 87 /* 88 * NCE_EXTERNAL_FLAGS_MASK defines the set of ncec_flags that may be passed 89 * in to the ncec*add* functions. 90 * 91 * NCE_F_AUTHORITY means that we ignore any incoming adverts for that 92 * mapping (though DAD is performed for the mapping). NCE_F_PUBLISH means 93 * that we will respond to requests for the protocol address. 94 */ 95 #define NCE_EXTERNAL_FLAGS_MASK \ 96 (NCE_F_MYADDR | NCE_F_ISROUTER | NCE_F_NONUD | \ 97 NCE_F_ANYCAST | NCE_F_UNSOL_ADV | NCE_F_BCAST | NCE_F_MCAST | \ 98 NCE_F_AUTHORITY | NCE_F_PUBLISH | NCE_F_STATIC) 99 100 /* 101 * Lock ordering: 102 * 103 * ndp_g_lock -> ill_lock -> ncec_lock 104 * 105 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 106 * ncec_next. ncec_lock protects the contents of the NCE (particularly 107 * ncec_refcnt). 108 */ 109 110 static void nce_cleanup_list(ncec_t *ncec); 111 static void nce_set_ll(ncec_t *ncec, uchar_t *ll_addr); 112 static ncec_t *ncec_lookup_illgrp(ill_t *, const in6_addr_t *, 113 ncec_t *); 114 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *); 115 static int nce_set_multicast_v6(ill_t *ill, const in6_addr_t *addr, 116 uint16_t ncec_flags, nce_t **newnce); 117 static int nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst, 118 uint16_t ncec_flags, nce_t **newnce); 119 static boolean_t ndp_xmit(ill_t *ill, uint32_t operation, 120 uint8_t *hwaddr, uint_t hwaddr_len, const in6_addr_t *sender, 121 const in6_addr_t *target, int flag); 122 static void ncec_refhold_locked(ncec_t *); 123 static boolean_t ill_defend_rate_limit(ill_t *, ncec_t *); 124 static void nce_queue_mp_common(ncec_t *, mblk_t *, boolean_t); 125 static int nce_add_common(ill_t *, uchar_t *, uint_t, const in6_addr_t *, 126 uint16_t, uint16_t, nce_t **); 127 static nce_t *nce_add_impl(ill_t *, ncec_t *, nce_t *, mblk_t *); 128 static nce_t *nce_add(ill_t *, ncec_t *); 129 static void nce_inactive(nce_t *); 130 extern nce_t *nce_lookup(ill_t *, const in6_addr_t *); 131 static nce_t *nce_ill_lookup_then_add(ill_t *, ncec_t *); 132 static int nce_add_v6(ill_t *, uchar_t *, uint_t, const in6_addr_t *, 133 uint16_t, uint16_t, nce_t **); 134 static int nce_add_v4(ill_t *, uchar_t *, uint_t, const in_addr_t *, 135 uint16_t, uint16_t, nce_t **); 136 static int nce_add_v6_postprocess(nce_t *); 137 static int nce_add_v4_postprocess(nce_t *); 138 static ill_t *nce_resolve_src(ncec_t *, in6_addr_t *); 139 static clock_t nce_fuzz_interval(clock_t, boolean_t); 140 static void nce_resolv_ipmp_ok(ncec_t *); 141 static void nce_walk_common(ill_t *, pfi_t, void *); 142 static void nce_start_timer(ncec_t *, uint_t); 143 static nce_t *nce_fastpath_create(ill_t *, ncec_t *); 144 static void nce_fastpath_trigger(nce_t *); 145 static nce_t *nce_fastpath(ncec_t *, boolean_t, nce_t *); 146 147 #ifdef DEBUG 148 static void ncec_trace_cleanup(const ncec_t *); 149 #endif 150 151 #define NCE_HASH_PTR_V4(ipst, addr) \ 152 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 153 154 #define NCE_HASH_PTR_V6(ipst, addr) \ 155 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 156 NCE_TABLE_SIZE)])) 157 158 extern kmem_cache_t *ncec_cache; 159 extern kmem_cache_t *nce_cache; 160 161 /* 162 * Send out a IPv6 (unicast) or IPv4 (broadcast) DAD probe 163 * If src_ill is not null, the ncec_addr is bound to src_ill. The 164 * src_ill is ignored by nce_dad for IPv4 Neighbor Cache entries where 165 * the probe is sent on the ncec_ill (in the non-IPMP case) or the 166 * IPMP cast_ill (in the IPMP case). 167 * 168 * Note that the probe interval is based on ncec->ncec_ill which 169 * may be the ipmp_ill. 170 */ 171 static void 172 nce_dad(ncec_t *ncec, ill_t *src_ill, boolean_t send_probe) 173 { 174 boolean_t dropped; 175 uint32_t probe_interval; 176 177 ASSERT(!(ncec->ncec_flags & NCE_F_MCAST)); 178 ASSERT(!(ncec->ncec_flags & NCE_F_BCAST)); 179 if (ncec->ncec_ipversion == IPV6_VERSION) { 180 dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT, 181 ncec->ncec_lladdr, ncec->ncec_lladdr_length, 182 &ipv6_all_zeros, &ncec->ncec_addr, NDP_PROBE); 183 probe_interval = ILL_PROBE_INTERVAL(ncec->ncec_ill); 184 } else { 185 /* IPv4 DAD delay the initial probe. */ 186 if (send_probe) 187 dropped = arp_probe(ncec); 188 else 189 dropped = B_TRUE; 190 probe_interval = nce_fuzz_interval(ncec->ncec_xmit_interval, 191 !send_probe); 192 } 193 if (!dropped) { 194 mutex_enter(&ncec->ncec_lock); 195 ncec->ncec_pcnt--; 196 mutex_exit(&ncec->ncec_lock); 197 } 198 nce_restart_timer(ncec, probe_interval); 199 } 200 201 /* 202 * Compute default flags to use for an advertisement of this ncec's address. 203 */ 204 static int 205 nce_advert_flags(const ncec_t *ncec) 206 { 207 int flag = 0; 208 209 if (ncec->ncec_flags & NCE_F_ISROUTER) 210 flag |= NDP_ISROUTER; 211 if (!(ncec->ncec_flags & NCE_F_ANYCAST)) 212 flag |= NDP_ORIDE; 213 214 return (flag); 215 } 216 217 /* 218 * NDP Cache Entry creation routine. 219 * This routine must always be called with ndp6->ndp_g_lock held. 220 */ 221 int 222 nce_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, 223 const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) 224 { 225 int err; 226 nce_t *nce; 227 228 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock)); 229 ASSERT(ill != NULL && ill->ill_isv6); 230 231 err = nce_add_common(ill, hw_addr, hw_addr_len, addr, flags, state, 232 &nce); 233 if (err != 0) 234 return (err); 235 ASSERT(newnce != NULL); 236 *newnce = nce; 237 return (err); 238 } 239 240 /* 241 * Post-processing routine to be executed after nce_add_v6(). This function 242 * triggers fastpath (if appropriate) and DAD on the newly added nce entry 243 * and must be called without any locks held. 244 */ 245 int 246 nce_add_v6_postprocess(nce_t *nce) 247 { 248 ncec_t *ncec = nce->nce_common; 249 boolean_t dropped = B_FALSE; 250 uchar_t *hw_addr = ncec->ncec_lladdr; 251 uint_t hw_addr_len = ncec->ncec_lladdr_length; 252 ill_t *ill = ncec->ncec_ill; 253 int err = 0; 254 uint16_t flags = ncec->ncec_flags; 255 ip_stack_t *ipst = ill->ill_ipst; 256 boolean_t trigger_fastpath = B_TRUE; 257 258 /* 259 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 260 * we call nce_fastpath as soon as the ncec is resolved in nce_process. 261 * We call nce_fastpath from nce_update if the link layer address of 262 * the peer changes from nce_update 263 */ 264 if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) || 265 (hw_addr == NULL && ill->ill_net_type != IRE_IF_NORESOLVER)) 266 trigger_fastpath = B_FALSE; 267 268 if (trigger_fastpath) 269 nce_fastpath_trigger(nce); 270 if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) { 271 ill_t *hwaddr_ill; 272 /* 273 * Unicast entry that needs DAD. 274 */ 275 if (IS_IPMP(ill)) { 276 hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp, 277 hw_addr, hw_addr_len); 278 } else { 279 hwaddr_ill = ill; 280 } 281 nce_dad(ncec, hwaddr_ill, B_TRUE); 282 err = EINPROGRESS; 283 } else if (flags & NCE_F_UNSOL_ADV) { 284 /* 285 * We account for the transmit below by assigning one 286 * less than the ndd variable. Subsequent decrements 287 * are done in nce_timer. 288 */ 289 mutex_enter(&ncec->ncec_lock); 290 ncec->ncec_unsolicit_count = 291 ipst->ips_ip_ndp_unsolicit_count - 1; 292 mutex_exit(&ncec->ncec_lock); 293 dropped = ndp_xmit(ill, 294 ND_NEIGHBOR_ADVERT, 295 hw_addr, 296 hw_addr_len, 297 &ncec->ncec_addr, /* Source and target of the adv */ 298 &ipv6_all_hosts_mcast, /* Destination of the packet */ 299 nce_advert_flags(ncec)); 300 mutex_enter(&ncec->ncec_lock); 301 if (dropped) 302 ncec->ncec_unsolicit_count++; 303 else 304 ncec->ncec_last_time_defended = ddi_get_lbolt(); 305 if (ncec->ncec_unsolicit_count != 0) { 306 nce_start_timer(ncec, 307 ipst->ips_ip_ndp_unsolicit_interval); 308 } 309 mutex_exit(&ncec->ncec_lock); 310 } 311 return (err); 312 } 313 314 /* 315 * Atomically lookup and add (if needed) Neighbor Cache information for 316 * an address. 317 * 318 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses 319 * are always added pointing at the ipmp_ill. Thus, when the ill passed 320 * to nce_add_v6 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t 321 * entries will be created, both pointing at the same ncec_t. The nce_t 322 * entries will have their nce_ill set to the ipmp_ill and the under_ill 323 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill. 324 * Local addresses are always created on the ill passed to nce_add_v6. 325 */ 326 int 327 nce_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, 328 const in6_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) 329 { 330 int err = 0; 331 ip_stack_t *ipst = ill->ill_ipst; 332 nce_t *nce, *upper_nce = NULL; 333 ill_t *in_ill = ill; 334 boolean_t need_ill_refrele = B_FALSE; 335 336 if (flags & NCE_F_MCAST) { 337 /* 338 * hw_addr will be figured out in nce_set_multicast_v6; 339 * caller has to select the cast_ill 340 */ 341 ASSERT(hw_addr == NULL); 342 ASSERT(!IS_IPMP(ill)); 343 err = nce_set_multicast_v6(ill, addr, flags, newnce); 344 return (err); 345 } 346 ASSERT(ill->ill_isv6); 347 if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) { 348 ill = ipmp_ill_hold_ipmp_ill(ill); 349 if (ill == NULL) 350 return (ENXIO); 351 need_ill_refrele = B_TRUE; 352 } 353 354 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 355 nce = nce_lookup_addr(ill, addr); 356 if (nce == NULL) { 357 err = nce_add_v6(ill, hw_addr, hw_addr_len, addr, flags, state, 358 &nce); 359 } else { 360 err = EEXIST; 361 } 362 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 363 if (err == 0) 364 err = nce_add_v6_postprocess(nce); 365 if (in_ill != ill && nce != NULL) { 366 nce_t *under_nce; 367 368 /* 369 * in_ill was the under_ill. Try to create the under_nce. 370 * Hold the ill_g_lock to prevent changes to group membership 371 * until we are done. 372 */ 373 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 374 if (IS_IN_SAME_ILLGRP(in_ill, ill)) { 375 under_nce = nce_fastpath_create(in_ill, 376 nce->nce_common); 377 upper_nce = nce; 378 if ((nce = under_nce) == NULL) 379 err = EINVAL; 380 } 381 rw_exit(&ipst->ips_ill_g_lock); 382 if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common)) 383 nce_fastpath_trigger(under_nce); 384 } 385 if (nce != NULL) { 386 if (newnce != NULL) 387 *newnce = nce; 388 else 389 nce_refrele(nce); 390 } 391 /* nce_refrele is deferred until the lock is dropped */ 392 if (upper_nce != NULL) 393 nce_refrele(upper_nce); 394 if (need_ill_refrele) 395 ill_refrele(ill); 396 return (err); 397 } 398 399 /* 400 * Remove all the CONDEMNED nces from the appropriate hash table. 401 * We create a private list of NCEs, these may have ires pointing 402 * to them, so the list will be passed through to clean up dependent 403 * ires and only then we can do ncec_refrele() which can make NCE inactive. 404 */ 405 static void 406 nce_remove(ndp_g_t *ndp, ncec_t *ncec, ncec_t **free_nce_list) 407 { 408 ncec_t *ncec1; 409 ncec_t **ptpn; 410 411 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 412 ASSERT(ndp->ndp_g_walker == 0); 413 for (; ncec; ncec = ncec1) { 414 ncec1 = ncec->ncec_next; 415 mutex_enter(&ncec->ncec_lock); 416 if (NCE_ISCONDEMNED(ncec)) { 417 ptpn = ncec->ncec_ptpn; 418 ncec1 = ncec->ncec_next; 419 if (ncec1 != NULL) 420 ncec1->ncec_ptpn = ptpn; 421 *ptpn = ncec1; 422 ncec->ncec_ptpn = NULL; 423 ncec->ncec_next = NULL; 424 ncec->ncec_next = *free_nce_list; 425 *free_nce_list = ncec; 426 } 427 mutex_exit(&ncec->ncec_lock); 428 } 429 } 430 431 /* 432 * 1. Mark the entry CONDEMNED. This ensures that no new nce_lookup() 433 * will return this NCE. Also no new timeouts will 434 * be started (See nce_restart_timer). 435 * 2. Cancel any currently running timeouts. 436 * 3. If there is an ndp walker, return. The walker will do the cleanup. 437 * This ensures that walkers see a consistent list of NCEs while walking. 438 * 4. Otherwise remove the NCE from the list of NCEs 439 */ 440 void 441 ncec_delete(ncec_t *ncec) 442 { 443 ncec_t **ptpn; 444 ncec_t *ncec1; 445 int ipversion = ncec->ncec_ipversion; 446 ndp_g_t *ndp; 447 ip_stack_t *ipst = ncec->ncec_ipst; 448 449 if (ipversion == IPV4_VERSION) 450 ndp = ipst->ips_ndp4; 451 else 452 ndp = ipst->ips_ndp6; 453 454 /* Serialize deletes */ 455 mutex_enter(&ncec->ncec_lock); 456 if (NCE_ISCONDEMNED(ncec)) { 457 /* Some other thread is doing the delete */ 458 mutex_exit(&ncec->ncec_lock); 459 return; 460 } 461 /* 462 * Caller has a refhold. Also 1 ref for being in the list. Thus 463 * refcnt has to be >= 2 464 */ 465 ASSERT(ncec->ncec_refcnt >= 2); 466 ncec->ncec_flags |= NCE_F_CONDEMNED; 467 mutex_exit(&ncec->ncec_lock); 468 469 /* Count how many condemned ires for kmem_cache callback */ 470 atomic_add_32(&ipst->ips_num_nce_condemned, 1); 471 nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); 472 473 /* Complete any waiting callbacks */ 474 ncec_cb_dispatch(ncec); 475 476 /* 477 * Cancel any running timer. Timeout can't be restarted 478 * since CONDEMNED is set. Can't hold ncec_lock across untimeout. 479 * Passing invalid timeout id is fine. 480 */ 481 if (ncec->ncec_timeout_id != 0) { 482 (void) untimeout(ncec->ncec_timeout_id); 483 ncec->ncec_timeout_id = 0; 484 } 485 486 mutex_enter(&ndp->ndp_g_lock); 487 if (ncec->ncec_ptpn == NULL) { 488 /* 489 * The last ndp walker has already removed this ncec from 490 * the list after we marked the ncec CONDEMNED and before 491 * we grabbed the global lock. 492 */ 493 mutex_exit(&ndp->ndp_g_lock); 494 return; 495 } 496 if (ndp->ndp_g_walker > 0) { 497 /* 498 * Can't unlink. The walker will clean up 499 */ 500 ndp->ndp_g_walker_cleanup = B_TRUE; 501 mutex_exit(&ndp->ndp_g_lock); 502 return; 503 } 504 505 /* 506 * Now remove the ncec from the list. nce_restart_timer won't restart 507 * the timer since it is marked CONDEMNED. 508 */ 509 ptpn = ncec->ncec_ptpn; 510 ncec1 = ncec->ncec_next; 511 if (ncec1 != NULL) 512 ncec1->ncec_ptpn = ptpn; 513 *ptpn = ncec1; 514 ncec->ncec_ptpn = NULL; 515 ncec->ncec_next = NULL; 516 mutex_exit(&ndp->ndp_g_lock); 517 518 /* Removed from ncec_ptpn/ncec_next list */ 519 ncec_refrele_notr(ncec); 520 } 521 522 void 523 ncec_inactive(ncec_t *ncec) 524 { 525 mblk_t **mpp; 526 ill_t *ill = ncec->ncec_ill; 527 ip_stack_t *ipst = ncec->ncec_ipst; 528 529 ASSERT(ncec->ncec_refcnt == 0); 530 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 531 532 /* Count how many condemned nces for kmem_cache callback */ 533 if (NCE_ISCONDEMNED(ncec)) 534 atomic_add_32(&ipst->ips_num_nce_condemned, -1); 535 536 /* Free all allocated messages */ 537 mpp = &ncec->ncec_qd_mp; 538 while (*mpp != NULL) { 539 mblk_t *mp; 540 541 mp = *mpp; 542 *mpp = mp->b_next; 543 544 inet_freemsg(mp); 545 } 546 /* 547 * must have been cleaned up in ncec_delete 548 */ 549 ASSERT(list_is_empty(&ncec->ncec_cb)); 550 list_destroy(&ncec->ncec_cb); 551 /* 552 * free the ncec_lladdr if one was allocated in nce_add_common() 553 */ 554 if (ncec->ncec_lladdr_length > 0) 555 kmem_free(ncec->ncec_lladdr, ncec->ncec_lladdr_length); 556 557 #ifdef DEBUG 558 ncec_trace_cleanup(ncec); 559 #endif 560 561 mutex_enter(&ill->ill_lock); 562 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 563 (char *), "ncec", (void *), ncec); 564 ill->ill_ncec_cnt--; 565 ncec->ncec_ill = NULL; 566 /* 567 * If the number of ncec's associated with this ill have dropped 568 * to zero, check whether we need to restart any operation that 569 * is waiting for this to happen. 570 */ 571 if (ILL_DOWN_OK(ill)) { 572 /* ipif_ill_refrele_tail drops the ill_lock */ 573 ipif_ill_refrele_tail(ill); 574 } else { 575 mutex_exit(&ill->ill_lock); 576 } 577 578 mutex_destroy(&ncec->ncec_lock); 579 kmem_cache_free(ncec_cache, ncec); 580 } 581 582 /* 583 * ncec_walk routine. Delete the ncec if it is associated with the ill 584 * that is going away. Always called as a writer. 585 */ 586 void 587 ncec_delete_per_ill(ncec_t *ncec, uchar_t *arg) 588 { 589 if ((ncec != NULL) && ncec->ncec_ill == (ill_t *)arg) { 590 ncec_delete(ncec); 591 } 592 } 593 594 /* 595 * Neighbor Cache cleanup logic for a list of ncec_t entries. 596 */ 597 static void 598 nce_cleanup_list(ncec_t *ncec) 599 { 600 ncec_t *ncec_next; 601 602 ASSERT(ncec != NULL); 603 while (ncec != NULL) { 604 ncec_next = ncec->ncec_next; 605 ncec->ncec_next = NULL; 606 607 /* 608 * It is possible for the last ndp walker (this thread) 609 * to come here after ncec_delete has marked the ncec CONDEMNED 610 * and before it has removed the ncec from the fastpath list 611 * or called untimeout. So we need to do it here. It is safe 612 * for both ncec_delete and this thread to do it twice or 613 * even simultaneously since each of the threads has a 614 * reference on the ncec. 615 */ 616 nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); 617 /* 618 * Cancel any running timer. Timeout can't be restarted 619 * since CONDEMNED is set. The ncec_lock can't be 620 * held across untimeout though passing invalid timeout 621 * id is fine. 622 */ 623 if (ncec->ncec_timeout_id != 0) { 624 (void) untimeout(ncec->ncec_timeout_id); 625 ncec->ncec_timeout_id = 0; 626 } 627 /* Removed from ncec_ptpn/ncec_next list */ 628 ncec_refrele_notr(ncec); 629 ncec = ncec_next; 630 } 631 } 632 633 /* 634 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 635 */ 636 boolean_t 637 nce_restart_dad(ncec_t *ncec) 638 { 639 boolean_t started; 640 ill_t *ill, *hwaddr_ill; 641 642 if (ncec == NULL) 643 return (B_FALSE); 644 ill = ncec->ncec_ill; 645 mutex_enter(&ncec->ncec_lock); 646 if (ncec->ncec_state == ND_PROBE) { 647 mutex_exit(&ncec->ncec_lock); 648 started = B_TRUE; 649 } else if (ncec->ncec_state == ND_REACHABLE) { 650 ASSERT(ncec->ncec_lladdr != NULL); 651 ncec->ncec_state = ND_PROBE; 652 ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; 653 /* 654 * Slight cheat here: we don't use the initial probe delay 655 * for IPv4 in this obscure case. 656 */ 657 mutex_exit(&ncec->ncec_lock); 658 if (IS_IPMP(ill)) { 659 hwaddr_ill = ipmp_illgrp_find_ill(ill->ill_grp, 660 ncec->ncec_lladdr, ncec->ncec_lladdr_length); 661 } else { 662 hwaddr_ill = ill; 663 } 664 nce_dad(ncec, hwaddr_ill, B_TRUE); 665 started = B_TRUE; 666 } else { 667 mutex_exit(&ncec->ncec_lock); 668 started = B_FALSE; 669 } 670 return (started); 671 } 672 673 /* 674 * IPv6 Cache entry lookup. Try to find an ncec matching the parameters passed. 675 * If one is found, the refcnt on the ncec will be incremented. 676 */ 677 ncec_t * 678 ncec_lookup_illgrp_v6(ill_t *ill, const in6_addr_t *addr) 679 { 680 ncec_t *ncec; 681 ip_stack_t *ipst = ill->ill_ipst; 682 683 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 684 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 685 686 /* Get head of v6 hash table */ 687 ncec = *((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr)); 688 ncec = ncec_lookup_illgrp(ill, addr, ncec); 689 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 690 rw_exit(&ipst->ips_ill_g_lock); 691 return (ncec); 692 } 693 /* 694 * IPv4 Cache entry lookup. Try to find an ncec matching the parameters passed. 695 * If one is found, the refcnt on the ncec will be incremented. 696 */ 697 ncec_t * 698 ncec_lookup_illgrp_v4(ill_t *ill, const in_addr_t *addr) 699 { 700 ncec_t *ncec = NULL; 701 in6_addr_t addr6; 702 ip_stack_t *ipst = ill->ill_ipst; 703 704 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 705 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 706 707 /* Get head of v4 hash table */ 708 ncec = *((ncec_t **)NCE_HASH_PTR_V4(ipst, *addr)); 709 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 710 ncec = ncec_lookup_illgrp(ill, &addr6, ncec); 711 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 712 rw_exit(&ipst->ips_ill_g_lock); 713 return (ncec); 714 } 715 716 /* 717 * Cache entry lookup. Try to find an ncec matching the parameters passed. 718 * If an ncec is found, increment the hold count on that ncec. 719 * The caller passes in the start of the appropriate hash table, and must 720 * be holding the appropriate global lock (ndp_g_lock). In addition, since 721 * this function matches ncec_t entries across the illgrp, the ips_ill_g_lock 722 * must be held as reader. 723 * 724 * This function always matches across the ipmp group. 725 */ 726 ncec_t * 727 ncec_lookup_illgrp(ill_t *ill, const in6_addr_t *addr, ncec_t *ncec) 728 { 729 ndp_g_t *ndp; 730 ip_stack_t *ipst = ill->ill_ipst; 731 732 if (ill->ill_isv6) 733 ndp = ipst->ips_ndp6; 734 else 735 ndp = ipst->ips_ndp4; 736 737 ASSERT(ill != NULL); 738 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 739 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 740 return (NULL); 741 for (; ncec != NULL; ncec = ncec->ncec_next) { 742 if (ncec->ncec_ill == ill || 743 IS_IN_SAME_ILLGRP(ill, ncec->ncec_ill)) { 744 if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) { 745 mutex_enter(&ncec->ncec_lock); 746 if (!NCE_ISCONDEMNED(ncec)) { 747 ncec_refhold_locked(ncec); 748 mutex_exit(&ncec->ncec_lock); 749 break; 750 } 751 mutex_exit(&ncec->ncec_lock); 752 } 753 } 754 } 755 return (ncec); 756 } 757 758 /* 759 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t 760 * entries for ill only, i.e., when ill is part of an ipmp group, 761 * nce_lookup_v4 will never try to match across the group. 762 */ 763 nce_t * 764 nce_lookup_v4(ill_t *ill, const in_addr_t *addr) 765 { 766 nce_t *nce; 767 in6_addr_t addr6; 768 ip_stack_t *ipst = ill->ill_ipst; 769 770 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 771 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 772 nce = nce_lookup_addr(ill, &addr6); 773 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 774 return (nce); 775 } 776 777 /* 778 * Find an nce_t on ill with nce_addr == addr. Lookup the nce_t 779 * entries for ill only, i.e., when ill is part of an ipmp group, 780 * nce_lookup_v6 will never try to match across the group. 781 */ 782 nce_t * 783 nce_lookup_v6(ill_t *ill, const in6_addr_t *addr6) 784 { 785 nce_t *nce; 786 ip_stack_t *ipst = ill->ill_ipst; 787 788 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 789 nce = nce_lookup_addr(ill, addr6); 790 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 791 return (nce); 792 } 793 794 static nce_t * 795 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr) 796 { 797 nce_t *nce; 798 799 ASSERT(ill != NULL); 800 #ifdef DEBUG 801 if (ill->ill_isv6) 802 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp6->ndp_g_lock)); 803 else 804 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock)); 805 #endif 806 mutex_enter(&ill->ill_lock); 807 nce = nce_lookup(ill, addr); 808 mutex_exit(&ill->ill_lock); 809 return (nce); 810 } 811 812 813 /* 814 * Router turned to host. We need to make sure that cached copies of the ncec 815 * are not used for forwarding packets if they were derived from the default 816 * route, and that the default route itself is removed, as required by 817 * section 7.2.5 of RFC 2461. 818 * 819 * Note that the ncec itself probably has valid link-layer information for the 820 * nexthop, so that there is no reason to delete the ncec, as long as the 821 * ISROUTER flag is turned off. 822 */ 823 static void 824 ncec_router_to_host(ncec_t *ncec) 825 { 826 ire_t *ire; 827 ip_stack_t *ipst = ncec->ncec_ipst; 828 829 mutex_enter(&ncec->ncec_lock); 830 ncec->ncec_flags &= ~NCE_F_ISROUTER; 831 mutex_exit(&ncec->ncec_lock); 832 833 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros, 834 &ncec->ncec_addr, IRE_DEFAULT, ncec->ncec_ill, ALL_ZONES, NULL, 835 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW, 0, ipst, NULL); 836 if (ire != NULL) { 837 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 838 ire_delete(ire); 839 ire_refrele(ire); 840 } 841 } 842 843 /* 844 * Process passed in parameters either from an incoming packet or via 845 * user ioctl. 846 */ 847 void 848 nce_process(ncec_t *ncec, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 849 { 850 ill_t *ill = ncec->ncec_ill; 851 uint32_t hw_addr_len = ill->ill_phys_addr_length; 852 boolean_t ll_updated = B_FALSE; 853 boolean_t ll_changed; 854 nce_t *nce; 855 856 ASSERT(ncec->ncec_ipversion == IPV6_VERSION); 857 /* 858 * No updates of link layer address or the neighbor state is 859 * allowed, when the cache is in NONUD state. This still 860 * allows for responding to reachability solicitation. 861 */ 862 mutex_enter(&ncec->ncec_lock); 863 if (ncec->ncec_state == ND_INCOMPLETE) { 864 if (hw_addr == NULL) { 865 mutex_exit(&ncec->ncec_lock); 866 return; 867 } 868 nce_set_ll(ncec, hw_addr); 869 /* 870 * Update ncec state and send the queued packets 871 * back to ip this time ire will be added. 872 */ 873 if (flag & ND_NA_FLAG_SOLICITED) { 874 nce_update(ncec, ND_REACHABLE, NULL); 875 } else { 876 nce_update(ncec, ND_STALE, NULL); 877 } 878 mutex_exit(&ncec->ncec_lock); 879 nce = nce_fastpath(ncec, B_TRUE, NULL); 880 nce_resolv_ok(ncec); 881 if (nce != NULL) 882 nce_refrele(nce); 883 return; 884 } 885 ll_changed = nce_cmp_ll_addr(ncec, hw_addr, hw_addr_len); 886 if (!is_adv) { 887 /* If this is a SOLICITATION request only */ 888 if (ll_changed) 889 nce_update(ncec, ND_STALE, hw_addr); 890 mutex_exit(&ncec->ncec_lock); 891 ncec_cb_dispatch(ncec); 892 return; 893 } 894 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 895 /* If in any other state than REACHABLE, ignore */ 896 if (ncec->ncec_state == ND_REACHABLE) { 897 nce_update(ncec, ND_STALE, NULL); 898 } 899 mutex_exit(&ncec->ncec_lock); 900 ncec_cb_dispatch(ncec); 901 return; 902 } else { 903 if (ll_changed) { 904 nce_update(ncec, ND_UNCHANGED, hw_addr); 905 ll_updated = B_TRUE; 906 } 907 if (flag & ND_NA_FLAG_SOLICITED) { 908 nce_update(ncec, ND_REACHABLE, NULL); 909 } else { 910 if (ll_updated) { 911 nce_update(ncec, ND_STALE, NULL); 912 } 913 } 914 mutex_exit(&ncec->ncec_lock); 915 if (!(flag & ND_NA_FLAG_ROUTER) && (ncec->ncec_flags & 916 NCE_F_ISROUTER)) { 917 ncec_router_to_host(ncec); 918 } else { 919 ncec_cb_dispatch(ncec); 920 } 921 } 922 } 923 924 /* 925 * Pass arg1 to the pfi supplied, along with each ncec in existence. 926 * ncec_walk() places a REFHOLD on the ncec and drops the lock when 927 * walking the hash list. 928 */ 929 void 930 ncec_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 931 boolean_t trace) 932 { 933 ncec_t *ncec; 934 ncec_t *ncec1; 935 ncec_t **ncep; 936 ncec_t *free_nce_list = NULL; 937 938 mutex_enter(&ndp->ndp_g_lock); 939 /* Prevent ncec_delete from unlink and free of NCE */ 940 ndp->ndp_g_walker++; 941 mutex_exit(&ndp->ndp_g_lock); 942 for (ncep = ndp->nce_hash_tbl; 943 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 944 for (ncec = *ncep; ncec != NULL; ncec = ncec1) { 945 ncec1 = ncec->ncec_next; 946 if (ill == NULL || ncec->ncec_ill == ill) { 947 if (trace) { 948 ncec_refhold(ncec); 949 (*pfi)(ncec, arg1); 950 ncec_refrele(ncec); 951 } else { 952 ncec_refhold_notr(ncec); 953 (*pfi)(ncec, arg1); 954 ncec_refrele_notr(ncec); 955 } 956 } 957 } 958 } 959 mutex_enter(&ndp->ndp_g_lock); 960 ndp->ndp_g_walker--; 961 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 962 /* Time to delete condemned entries */ 963 for (ncep = ndp->nce_hash_tbl; 964 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 965 ncec = *ncep; 966 if (ncec != NULL) { 967 nce_remove(ndp, ncec, &free_nce_list); 968 } 969 } 970 ndp->ndp_g_walker_cleanup = B_FALSE; 971 } 972 973 mutex_exit(&ndp->ndp_g_lock); 974 975 if (free_nce_list != NULL) { 976 nce_cleanup_list(free_nce_list); 977 } 978 } 979 980 /* 981 * Walk everything. 982 * Note that ill can be NULL hence can't derive the ipst from it. 983 */ 984 void 985 ncec_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 986 { 987 ncec_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 988 ncec_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 989 } 990 991 /* 992 * For each interface an entry is added for the unspecified multicast group. 993 * Here that mapping is used to form the multicast cache entry for a particular 994 * multicast destination. 995 */ 996 static int 997 nce_set_multicast_v6(ill_t *ill, const in6_addr_t *dst, 998 uint16_t flags, nce_t **newnce) 999 { 1000 uchar_t *hw_addr; 1001 int err = 0; 1002 ip_stack_t *ipst = ill->ill_ipst; 1003 nce_t *nce; 1004 1005 ASSERT(ill != NULL); 1006 ASSERT(ill->ill_isv6); 1007 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1008 1009 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 1010 nce = nce_lookup_addr(ill, dst); 1011 if (nce != NULL) { 1012 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1013 goto done; 1014 } 1015 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1016 /* 1017 * For IRE_IF_RESOLVER a hardware mapping can be 1018 * generated. 1019 */ 1020 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1021 if (hw_addr == NULL) { 1022 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1023 return (ENOMEM); 1024 } 1025 ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr); 1026 } else { 1027 /* No hw_addr is needed for IRE_IF_NORESOLVER. */ 1028 hw_addr = NULL; 1029 } 1030 ASSERT((flags & NCE_F_MCAST) != 0); 1031 ASSERT((flags & NCE_F_NONUD) != 0); 1032 /* nce_state will be computed by nce_add_common() */ 1033 err = nce_add_v6(ill, hw_addr, ill->ill_phys_addr_length, dst, flags, 1034 ND_UNCHANGED, &nce); 1035 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 1036 if (err == 0) 1037 err = nce_add_v6_postprocess(nce); 1038 if (hw_addr != NULL) 1039 kmem_free(hw_addr, ill->ill_nd_lla_len); 1040 if (err != 0) { 1041 ip1dbg(("nce_set_multicast_v6: create failed" "%d\n", err)); 1042 return (err); 1043 } 1044 done: 1045 ASSERT(nce->nce_common->ncec_state == ND_REACHABLE); 1046 if (newnce != NULL) 1047 *newnce = nce; 1048 else 1049 nce_refrele(nce); 1050 return (0); 1051 } 1052 1053 /* 1054 * Return the link layer address, and any flags of a ncec. 1055 */ 1056 int 1057 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1058 { 1059 ncec_t *ncec; 1060 in6_addr_t *addr; 1061 sin6_t *sin6; 1062 1063 ASSERT(ill != NULL && ill->ill_isv6); 1064 sin6 = (sin6_t *)&lnr->lnr_addr; 1065 addr = &sin6->sin6_addr; 1066 1067 /* 1068 * NOTE: if the ill is an IPMP interface, then match against the whole 1069 * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1070 * addresses for the data addresses on an IPMP interface even though 1071 * ipif_ndp_up() created them with an ncec_ill of ipif_bound_ill. 1072 */ 1073 ncec = ncec_lookup_illgrp_v6(ill, addr); 1074 if (ncec == NULL) 1075 return (ESRCH); 1076 /* If no link layer address is available yet, return ESRCH */ 1077 if (!NCE_ISREACHABLE(ncec)) { 1078 ncec_refrele(ncec); 1079 return (ESRCH); 1080 } 1081 lnr->lnr_hdw_len = ill->ill_phys_addr_length; 1082 bcopy(ncec->ncec_lladdr, (uchar_t *)&lnr->lnr_hdw_addr, 1083 lnr->lnr_hdw_len); 1084 if (ncec->ncec_flags & NCE_F_ISROUTER) 1085 lnr->lnr_flags = NDF_ISROUTER_ON; 1086 if (ncec->ncec_flags & NCE_F_ANYCAST) 1087 lnr->lnr_flags |= NDF_ANYCAST_ON; 1088 ncec_refrele(ncec); 1089 return (0); 1090 } 1091 1092 /* 1093 * Finish setting up the Enable/Disable multicast for the driver. 1094 */ 1095 mblk_t * 1096 ndp_mcastreq(ill_t *ill, const in6_addr_t *v6group, uint32_t hw_addr_len, 1097 uint32_t hw_addr_offset, mblk_t *mp) 1098 { 1099 uchar_t *hw_addr; 1100 ipaddr_t v4group; 1101 uchar_t *addr; 1102 1103 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1104 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1105 IN6_V4MAPPED_TO_IPADDR(v6group, v4group); 1106 1107 ASSERT(CLASSD(v4group)); 1108 ASSERT(!(ill->ill_isv6)); 1109 1110 addr = (uchar_t *)&v4group; 1111 } else { 1112 ASSERT(IN6_IS_ADDR_MULTICAST(v6group)); 1113 ASSERT(ill->ill_isv6); 1114 1115 addr = (uchar_t *)v6group; 1116 } 1117 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1118 if (hw_addr == NULL) { 1119 ip0dbg(("ndp_mcastreq NULL hw_addr\n")); 1120 freemsg(mp); 1121 return (NULL); 1122 } 1123 1124 ip_mcast_mapping(ill, addr, hw_addr); 1125 return (mp); 1126 } 1127 1128 void 1129 ip_ndp_resolve(ncec_t *ncec) 1130 { 1131 in_addr_t sender4 = INADDR_ANY; 1132 in6_addr_t sender6 = ipv6_all_zeros; 1133 ill_t *src_ill; 1134 uint32_t ms; 1135 1136 src_ill = nce_resolve_src(ncec, &sender6); 1137 if (src_ill == NULL) { 1138 /* Make sure we try again later */ 1139 ms = ncec->ncec_ill->ill_reachable_retrans_time; 1140 nce_restart_timer(ncec, (clock_t)ms); 1141 return; 1142 } 1143 if (ncec->ncec_ipversion == IPV4_VERSION) 1144 IN6_V4MAPPED_TO_IPADDR(&sender6, sender4); 1145 mutex_enter(&ncec->ncec_lock); 1146 if (ncec->ncec_ipversion == IPV6_VERSION) 1147 ms = ndp_solicit(ncec, sender6, src_ill); 1148 else 1149 ms = arp_request(ncec, sender4, src_ill); 1150 mutex_exit(&ncec->ncec_lock); 1151 if (ms == 0) { 1152 if (ncec->ncec_state != ND_REACHABLE) { 1153 if (ncec->ncec_ipversion == IPV6_VERSION) 1154 ndp_resolv_failed(ncec); 1155 else 1156 arp_resolv_failed(ncec); 1157 ASSERT((ncec->ncec_flags & NCE_F_STATIC) == 0); 1158 nce_make_unreachable(ncec); 1159 ncec_delete(ncec); 1160 } 1161 } else { 1162 nce_restart_timer(ncec, (clock_t)ms); 1163 } 1164 done: 1165 ill_refrele(src_ill); 1166 } 1167 1168 /* 1169 * Send an IPv6 neighbor solicitation. 1170 * Returns number of milliseconds after which we should either rexmit or abort. 1171 * Return of zero means we should abort. 1172 * The caller holds the ncec_lock to protect ncec_qd_mp and ncec_rcnt. 1173 * The optional source address is used as a hint to ndp_solicit for 1174 * which source to use in the packet. 1175 * 1176 * NOTE: This routine drops ncec_lock (and later reacquires it) when sending 1177 * the packet. 1178 */ 1179 uint32_t 1180 ndp_solicit(ncec_t *ncec, in6_addr_t src, ill_t *ill) 1181 { 1182 in6_addr_t dst; 1183 boolean_t dropped = B_FALSE; 1184 1185 ASSERT(ncec->ncec_ipversion == IPV6_VERSION); 1186 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 1187 1188 if (ncec->ncec_rcnt == 0) 1189 return (0); 1190 1191 dst = ncec->ncec_addr; 1192 ncec->ncec_rcnt--; 1193 mutex_exit(&ncec->ncec_lock); 1194 dropped = ndp_xmit(ill, ND_NEIGHBOR_SOLICIT, ill->ill_phys_addr, 1195 ill->ill_phys_addr_length, &src, &dst, 0); 1196 mutex_enter(&ncec->ncec_lock); 1197 if (dropped) 1198 ncec->ncec_rcnt++; 1199 return (ncec->ncec_ill->ill_reachable_retrans_time); 1200 } 1201 1202 /* 1203 * Attempt to recover an address on an interface that's been marked as a 1204 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1205 * no easy way to just probe the address and have the right thing happen if 1206 * it's no longer in use. Instead, we just bring it up normally and allow the 1207 * regular interface start-up logic to probe for a remaining duplicate and take 1208 * us back down if necessary. 1209 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1210 * ip_ndp_excl. 1211 */ 1212 /* ARGSUSED */ 1213 void 1214 ip_addr_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1215 { 1216 ill_t *ill = rq->q_ptr; 1217 ipif_t *ipif; 1218 in6_addr_t *addr6 = (in6_addr_t *)mp->b_rptr; 1219 in_addr_t *addr4 = (in_addr_t *)mp->b_rptr; 1220 boolean_t addr_equal; 1221 1222 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1223 /* 1224 * We do not support recovery of proxy ARP'd interfaces, 1225 * because the system lacks a complete proxy ARP mechanism. 1226 */ 1227 if (ill->ill_isv6) { 1228 addr_equal = IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1229 addr6); 1230 } else { 1231 addr_equal = (ipif->ipif_lcl_addr == *addr4); 1232 } 1233 1234 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !addr_equal) 1235 continue; 1236 1237 /* 1238 * If we have already recovered or if the interface is going 1239 * away, then ignore. 1240 */ 1241 mutex_enter(&ill->ill_lock); 1242 if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1243 (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1244 mutex_exit(&ill->ill_lock); 1245 continue; 1246 } 1247 1248 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1249 ill->ill_ipif_dup_count--; 1250 mutex_exit(&ill->ill_lock); 1251 ipif->ipif_was_dup = B_TRUE; 1252 1253 if (ill->ill_isv6) { 1254 VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1255 (void) ipif_up_done_v6(ipif); 1256 } else { 1257 VERIFY(ipif_arp_up(ipif, Res_act_initial, B_TRUE) != 1258 EINPROGRESS); 1259 (void) ipif_up_done(ipif); 1260 } 1261 } 1262 freeb(mp); 1263 } 1264 1265 /* 1266 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1267 * As long as someone else holds the address, the interface will stay down. 1268 * When that conflict goes away, the interface is brought back up. This is 1269 * done so that accidental shutdowns of addresses aren't made permanent. Your 1270 * server will recover from a failure. 1271 * 1272 * For DHCP and temporary addresses, recovery is not done in the kernel. 1273 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1274 * 1275 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1276 */ 1277 void 1278 ipif_dup_recovery(void *arg) 1279 { 1280 ipif_t *ipif = arg; 1281 1282 ipif->ipif_recovery_id = 0; 1283 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1284 return; 1285 1286 /* 1287 * No lock, because this is just an optimization. 1288 */ 1289 if (ipif->ipif_state_flags & IPIF_CONDEMNED) 1290 return; 1291 1292 /* If the link is down, we'll retry this later */ 1293 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1294 return; 1295 1296 ipif_do_recovery(ipif); 1297 } 1298 1299 /* 1300 * Perform interface recovery by forcing the duplicate interfaces up and 1301 * allowing the system to determine which ones should stay up. 1302 * 1303 * Called both by recovery timer expiry and link-up notification. 1304 */ 1305 void 1306 ipif_do_recovery(ipif_t *ipif) 1307 { 1308 ill_t *ill = ipif->ipif_ill; 1309 mblk_t *mp; 1310 ip_stack_t *ipst = ill->ill_ipst; 1311 size_t mp_size; 1312 1313 if (ipif->ipif_isv6) 1314 mp_size = sizeof (ipif->ipif_v6lcl_addr); 1315 else 1316 mp_size = sizeof (ipif->ipif_lcl_addr); 1317 mp = allocb(mp_size, BPRI_MED); 1318 if (mp == NULL) { 1319 mutex_enter(&ill->ill_lock); 1320 if (ipst->ips_ip_dup_recovery > 0 && 1321 ipif->ipif_recovery_id == 0 && 1322 !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 1323 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 1324 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1325 } 1326 mutex_exit(&ill->ill_lock); 1327 } else { 1328 /* 1329 * A recovery timer may still be running if we got here from 1330 * ill_restart_dad(); cancel that timer. 1331 */ 1332 if (ipif->ipif_recovery_id != 0) 1333 (void) untimeout(ipif->ipif_recovery_id); 1334 ipif->ipif_recovery_id = 0; 1335 1336 if (ipif->ipif_isv6) { 1337 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1338 sizeof (ipif->ipif_v6lcl_addr)); 1339 } else { 1340 bcopy(&ipif->ipif_lcl_addr, mp->b_rptr, 1341 sizeof (ipif->ipif_lcl_addr)); 1342 } 1343 ill_refhold(ill); 1344 qwriter_ip(ill, ill->ill_rq, mp, ip_addr_recover, NEW_OP, 1345 B_FALSE); 1346 } 1347 } 1348 1349 /* 1350 * Find the MAC and IP addresses in an NA/NS message. 1351 */ 1352 static void 1353 ip_ndp_find_addresses(mblk_t *mp, ip_recv_attr_t *ira, ill_t *ill, 1354 in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp) 1355 { 1356 icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1357 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 1358 uchar_t *addr; 1359 int alen; 1360 1361 /* icmp_inbound_v6 ensures this */ 1362 ASSERT(ira->ira_flags & IRAF_L2SRC_SET); 1363 1364 addr = ira->ira_l2src; 1365 alen = ill->ill_phys_addr_length; 1366 if (alen > 0) { 1367 *haddr = addr; 1368 *haddrlenp = alen; 1369 } else { 1370 *haddr = NULL; 1371 *haddrlenp = 0; 1372 } 1373 1374 /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1375 *targp = ns->nd_ns_target; 1376 } 1377 1378 /* 1379 * This is for exclusive changes due to NDP duplicate address detection 1380 * failure. 1381 */ 1382 /* ARGSUSED */ 1383 static void 1384 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1385 { 1386 ill_t *ill = rq->q_ptr; 1387 ipif_t *ipif; 1388 uchar_t *haddr; 1389 uint_t haddrlen; 1390 ip_stack_t *ipst = ill->ill_ipst; 1391 in6_addr_t targ; 1392 ip_recv_attr_t iras; 1393 mblk_t *attrmp; 1394 1395 attrmp = mp; 1396 mp = mp->b_cont; 1397 attrmp->b_cont = NULL; 1398 if (!ip_recv_attr_from_mblk(attrmp, &iras)) { 1399 /* The ill or ip_stack_t disappeared on us */ 1400 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1401 ip_drop_input("ip_recv_attr_from_mblk", mp, ill); 1402 freemsg(mp); 1403 ira_cleanup(&iras, B_TRUE); 1404 return; 1405 } 1406 1407 ASSERT(ill == iras.ira_rill); 1408 1409 ip_ndp_find_addresses(mp, &iras, ill, &targ, &haddr, &haddrlen); 1410 if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 1411 /* 1412 * Ignore conflicts generated by misbehaving switches that 1413 * just reflect our own messages back to us. For IPMP, we may 1414 * see reflections across any ill in the illgrp. 1415 * 1416 * RFC2462 and revisions tried to detect both the case 1417 * when a statically configured IPv6 address is a duplicate, 1418 * and the case when the L2 address itself is a duplicate. The 1419 * later is important because, with stateles address autoconf, 1420 * if the L2 address is a duplicate, the resulting IPv6 1421 * address(es) would also be duplicates. We rely on DAD of the 1422 * IPv6 address itself to detect the latter case. 1423 */ 1424 /* For an under ill_grp can change under lock */ 1425 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1426 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1427 IS_UNDER_IPMP(ill) && 1428 ipmp_illgrp_find_ill(ill->ill_grp, haddr, 1429 haddrlen) != NULL) { 1430 rw_exit(&ipst->ips_ill_g_lock); 1431 goto ignore_conflict; 1432 } 1433 rw_exit(&ipst->ips_ill_g_lock); 1434 } 1435 1436 /* 1437 * Look up the appropriate ipif. 1438 */ 1439 ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, ipst); 1440 if (ipif == NULL) 1441 goto ignore_conflict; 1442 1443 /* Reload the ill to match the ipif */ 1444 ill = ipif->ipif_ill; 1445 1446 /* If it's already duplicate or ineligible, then don't do anything. */ 1447 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1448 ipif_refrele(ipif); 1449 goto ignore_conflict; 1450 } 1451 1452 /* 1453 * If this is a failure during duplicate recovery, then don't 1454 * complain. It may take a long time to recover. 1455 */ 1456 if (!ipif->ipif_was_dup) { 1457 char ibuf[LIFNAMSIZ]; 1458 char hbuf[MAC_STR_LEN]; 1459 char sbuf[INET6_ADDRSTRLEN]; 1460 1461 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1462 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1463 " disabled", ibuf, 1464 inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1465 mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 1466 } 1467 mutex_enter(&ill->ill_lock); 1468 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1469 ipif->ipif_flags |= IPIF_DUPLICATE; 1470 ill->ill_ipif_dup_count++; 1471 mutex_exit(&ill->ill_lock); 1472 (void) ipif_down(ipif, NULL, NULL); 1473 (void) ipif_down_tail(ipif); 1474 mutex_enter(&ill->ill_lock); 1475 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1476 ill->ill_net_type == IRE_IF_RESOLVER && 1477 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1478 ipst->ips_ip_dup_recovery > 0) { 1479 ASSERT(ipif->ipif_recovery_id == 0); 1480 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 1481 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1482 } 1483 mutex_exit(&ill->ill_lock); 1484 ipif_refrele(ipif); 1485 1486 ignore_conflict: 1487 freemsg(mp); 1488 ira_cleanup(&iras, B_TRUE); 1489 } 1490 1491 /* 1492 * Handle failure by tearing down the ipifs with the specified address. Note 1493 * that tearing down the ipif also means deleting the ncec through ipif_down, so 1494 * it's not possible to do recovery by just restarting the ncec timer. Instead, 1495 * we start a timer on the ipif. 1496 * Caller has to free mp; 1497 */ 1498 static void 1499 ndp_failure(mblk_t *mp, ip_recv_attr_t *ira) 1500 { 1501 const uchar_t *haddr; 1502 ill_t *ill = ira->ira_rill; 1503 1504 /* 1505 * Ignore conflicts generated by misbehaving switches that just 1506 * reflect our own messages back to us. 1507 */ 1508 1509 /* icmp_inbound_v6 ensures this */ 1510 ASSERT(ira->ira_flags & IRAF_L2SRC_SET); 1511 haddr = ira->ira_l2src; 1512 if (haddr != NULL && 1513 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1514 return; 1515 } 1516 1517 if ((mp = copymsg(mp)) != NULL) { 1518 mblk_t *attrmp; 1519 1520 attrmp = ip_recv_attr_to_mblk(ira); 1521 if (attrmp == NULL) { 1522 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1523 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1524 freemsg(mp); 1525 } else { 1526 ASSERT(attrmp->b_cont == NULL); 1527 attrmp->b_cont = mp; 1528 mp = attrmp; 1529 ill_refhold(ill); 1530 qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_excl, NEW_OP, 1531 B_FALSE); 1532 } 1533 } 1534 } 1535 1536 /* 1537 * Handle a discovered conflict: some other system is advertising that it owns 1538 * one of our IP addresses. We need to defend ourselves, or just shut down the 1539 * interface. 1540 * 1541 * Handles both IPv4 and IPv6 1542 */ 1543 boolean_t 1544 ip_nce_conflict(mblk_t *mp, ip_recv_attr_t *ira, ncec_t *ncec) 1545 { 1546 ipif_t *ipif; 1547 clock_t now; 1548 uint_t maxdefense; 1549 uint_t defs; 1550 ill_t *ill = ira->ira_ill; 1551 ip_stack_t *ipst = ill->ill_ipst; 1552 uint32_t elapsed; 1553 boolean_t isv6 = ill->ill_isv6; 1554 ipaddr_t ncec_addr; 1555 1556 if (isv6) { 1557 ipif = ipif_lookup_addr_v6(&ncec->ncec_addr, ill, ALL_ZONES, 1558 ipst); 1559 } else { 1560 if (arp_no_defense) { 1561 /* 1562 * Yes, there is a conflict, but no, we do not 1563 * defend ourself. 1564 */ 1565 return (B_TRUE); 1566 } 1567 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr); 1568 ipif = ipif_lookup_addr(ncec_addr, ill, ALL_ZONES, 1569 ipst); 1570 } 1571 if (ipif == NULL) 1572 return (B_FALSE); 1573 1574 /* 1575 * First, figure out if this address is disposable. 1576 */ 1577 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1578 maxdefense = ipst->ips_ip_max_temp_defend; 1579 else 1580 maxdefense = ipst->ips_ip_max_defend; 1581 1582 /* 1583 * Now figure out how many times we've defended ourselves. Ignore 1584 * defenses that happened long in the past. 1585 */ 1586 now = ddi_get_lbolt(); 1587 elapsed = (drv_hztousec(now - ncec->ncec_last_time_defended))/1000000; 1588 mutex_enter(&ncec->ncec_lock); 1589 if ((defs = ncec->ncec_defense_count) > 0 && 1590 elapsed > ipst->ips_ip_defend_interval) { 1591 /* 1592 * ip_defend_interval has elapsed. 1593 * reset the defense count. 1594 */ 1595 ncec->ncec_defense_count = defs = 0; 1596 } 1597 ncec->ncec_defense_count++; 1598 ncec->ncec_last_time_defended = now; 1599 mutex_exit(&ncec->ncec_lock); 1600 ipif_refrele(ipif); 1601 1602 /* 1603 * If we've defended ourselves too many times already, then give up and 1604 * tear down the interface(s) using this address. 1605 * Otherwise, caller has to defend by sending out an announce. 1606 */ 1607 if (defs >= maxdefense) { 1608 if (isv6) 1609 ndp_failure(mp, ira); 1610 else 1611 arp_failure(mp, ira); 1612 } else { 1613 return (B_TRUE); /* caller must defend this address */ 1614 } 1615 return (B_FALSE); 1616 } 1617 1618 /* 1619 * Handle reception of Neighbor Solicitation messages. 1620 */ 1621 static void 1622 ndp_input_solicit(mblk_t *mp, ip_recv_attr_t *ira) 1623 { 1624 ill_t *ill = ira->ira_ill, *under_ill; 1625 nd_neighbor_solicit_t *ns; 1626 uint32_t hlen = ill->ill_phys_addr_length; 1627 uchar_t *haddr = NULL; 1628 icmp6_t *icmp_nd; 1629 ip6_t *ip6h; 1630 ncec_t *our_ncec = NULL; 1631 in6_addr_t target; 1632 in6_addr_t src; 1633 int len; 1634 int flag = 0; 1635 nd_opt_hdr_t *opt = NULL; 1636 boolean_t bad_solicit = B_FALSE; 1637 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1638 boolean_t need_ill_refrele = B_FALSE; 1639 1640 ip6h = (ip6_t *)mp->b_rptr; 1641 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1642 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1643 src = ip6h->ip6_src; 1644 ns = (nd_neighbor_solicit_t *)icmp_nd; 1645 target = ns->nd_ns_target; 1646 if (IN6_IS_ADDR_MULTICAST(&target)) { 1647 if (ip_debug > 2) { 1648 /* ip1dbg */ 1649 pr_addr_dbg("ndp_input_solicit: Target is" 1650 " multicast! %s\n", AF_INET6, &target); 1651 } 1652 bad_solicit = B_TRUE; 1653 goto done; 1654 } 1655 if (len > sizeof (nd_neighbor_solicit_t)) { 1656 /* Options present */ 1657 opt = (nd_opt_hdr_t *)&ns[1]; 1658 len -= sizeof (nd_neighbor_solicit_t); 1659 if (!ndp_verify_optlen(opt, len)) { 1660 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1661 bad_solicit = B_TRUE; 1662 goto done; 1663 } 1664 } 1665 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1666 /* Check to see if this is a valid DAD solicitation */ 1667 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1668 if (ip_debug > 2) { 1669 /* ip1dbg */ 1670 pr_addr_dbg("ndp_input_solicit: IPv6 " 1671 "Destination is not solicited node " 1672 "multicast %s\n", AF_INET6, 1673 &ip6h->ip6_dst); 1674 } 1675 bad_solicit = B_TRUE; 1676 goto done; 1677 } 1678 } 1679 1680 /* 1681 * NOTE: with IPMP, it's possible the nominated multicast ill (which 1682 * received this packet if it's multicast) is not the ill tied to 1683 * e.g. the IPMP ill's data link-local. So we match across the illgrp 1684 * to ensure we find the associated NCE. 1685 */ 1686 our_ncec = ncec_lookup_illgrp_v6(ill, &target); 1687 /* 1688 * If this is a valid Solicitation for an address we are publishing, 1689 * then a PUBLISH entry should exist in the cache 1690 */ 1691 if (our_ncec == NULL || !NCE_PUBLISH(our_ncec)) { 1692 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1693 "ifname=%s ", ill->ill_name)); 1694 if (ip_debug > 2) { 1695 /* ip1dbg */ 1696 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1697 } 1698 if (our_ncec == NULL) 1699 bad_solicit = B_TRUE; 1700 goto done; 1701 } 1702 1703 /* At this point we should have a verified NS per spec */ 1704 if (opt != NULL) { 1705 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1706 if (opt != NULL) { 1707 haddr = (uchar_t *)&opt[1]; 1708 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1709 hlen == 0) { 1710 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1711 bad_solicit = B_TRUE; 1712 goto done; 1713 } 1714 } 1715 } 1716 1717 /* If sending directly to peer, set the unicast flag */ 1718 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 1719 flag |= NDP_UNICAST; 1720 1721 /* 1722 * Create/update the entry for the soliciting node on the ipmp_ill. 1723 * or respond to outstanding queries, don't if 1724 * the source is unspecified address. 1725 */ 1726 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1727 int err; 1728 nce_t *nnce; 1729 1730 ASSERT(ill->ill_isv6); 1731 /* 1732 * Regular solicitations *must* include the Source Link-Layer 1733 * Address option. Ignore messages that do not. 1734 */ 1735 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1736 ip1dbg(("ndp_input_solicit: source link-layer address " 1737 "option missing with a specified source.\n")); 1738 bad_solicit = B_TRUE; 1739 goto done; 1740 } 1741 1742 /* 1743 * This is a regular solicitation. If we're still in the 1744 * process of verifying the address, then don't respond at all 1745 * and don't keep track of the sender. 1746 */ 1747 if (our_ncec->ncec_state == ND_PROBE) 1748 goto done; 1749 1750 /* 1751 * If the solicitation doesn't have sender hardware address 1752 * (legal for unicast solicitation), then process without 1753 * installing the return NCE. Either we already know it, or 1754 * we'll be forced to look it up when (and if) we reply to the 1755 * packet. 1756 */ 1757 if (haddr == NULL) 1758 goto no_source; 1759 1760 under_ill = ill; 1761 if (IS_UNDER_IPMP(under_ill)) { 1762 ill = ipmp_ill_hold_ipmp_ill(under_ill); 1763 if (ill == NULL) 1764 ill = under_ill; 1765 else 1766 need_ill_refrele = B_TRUE; 1767 } 1768 err = nce_lookup_then_add_v6(ill, 1769 haddr, hlen, 1770 &src, /* Soliciting nodes address */ 1771 0, 1772 ND_STALE, 1773 &nnce); 1774 1775 if (need_ill_refrele) { 1776 ill_refrele(ill); 1777 ill = under_ill; 1778 need_ill_refrele = B_FALSE; 1779 } 1780 switch (err) { 1781 case 0: 1782 /* done with this entry */ 1783 nce_refrele(nnce); 1784 break; 1785 case EEXIST: 1786 /* 1787 * B_FALSE indicates this is not an an advertisement. 1788 */ 1789 nce_process(nnce->nce_common, haddr, 0, B_FALSE); 1790 nce_refrele(nnce); 1791 break; 1792 default: 1793 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1794 err)); 1795 goto done; 1796 } 1797 no_source: 1798 flag |= NDP_SOLICITED; 1799 } else { 1800 /* 1801 * No source link layer address option should be present in a 1802 * valid DAD request. 1803 */ 1804 if (haddr != NULL) { 1805 ip1dbg(("ndp_input_solicit: source link-layer address " 1806 "option present with an unspecified source.\n")); 1807 bad_solicit = B_TRUE; 1808 goto done; 1809 } 1810 if (our_ncec->ncec_state == ND_PROBE) { 1811 /* 1812 * Internally looped-back probes will have 1813 * IRAF_L2SRC_LOOPBACK set so we can ignore our own 1814 * transmissions. 1815 */ 1816 if (!(ira->ira_flags & IRAF_L2SRC_LOOPBACK)) { 1817 /* 1818 * If someone else is probing our address, then 1819 * we've crossed wires. Declare failure. 1820 */ 1821 ndp_failure(mp, ira); 1822 } 1823 goto done; 1824 } 1825 /* 1826 * This is a DAD probe. Multicast the advertisement to the 1827 * all-nodes address. 1828 */ 1829 src = ipv6_all_hosts_mcast; 1830 } 1831 flag |= nce_advert_flags(our_ncec); 1832 (void) ndp_xmit(ill, 1833 ND_NEIGHBOR_ADVERT, 1834 our_ncec->ncec_lladdr, 1835 our_ncec->ncec_lladdr_length, 1836 &target, /* Source and target of the advertisement pkt */ 1837 &src, /* IP Destination (source of original pkt) */ 1838 flag); 1839 done: 1840 if (bad_solicit) 1841 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 1842 if (our_ncec != NULL) 1843 ncec_refrele(our_ncec); 1844 } 1845 1846 /* 1847 * Handle reception of Neighbor Solicitation messages 1848 */ 1849 void 1850 ndp_input_advert(mblk_t *mp, ip_recv_attr_t *ira) 1851 { 1852 ill_t *ill = ira->ira_ill; 1853 nd_neighbor_advert_t *na; 1854 uint32_t hlen = ill->ill_phys_addr_length; 1855 uchar_t *haddr = NULL; 1856 icmp6_t *icmp_nd; 1857 ip6_t *ip6h; 1858 ncec_t *dst_ncec = NULL; 1859 in6_addr_t target; 1860 nd_opt_hdr_t *opt = NULL; 1861 int len; 1862 ip_stack_t *ipst = ill->ill_ipst; 1863 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1864 1865 ip6h = (ip6_t *)mp->b_rptr; 1866 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1867 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1868 na = (nd_neighbor_advert_t *)icmp_nd; 1869 1870 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 1871 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 1872 ip1dbg(("ndp_input_advert: Target is multicast but the " 1873 "solicited flag is not zero\n")); 1874 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1875 return; 1876 } 1877 target = na->nd_na_target; 1878 if (IN6_IS_ADDR_MULTICAST(&target)) { 1879 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 1880 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1881 return; 1882 } 1883 if (len > sizeof (nd_neighbor_advert_t)) { 1884 opt = (nd_opt_hdr_t *)&na[1]; 1885 if (!ndp_verify_optlen(opt, 1886 len - sizeof (nd_neighbor_advert_t))) { 1887 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 1888 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 1889 return; 1890 } 1891 /* At this point we have a verified NA per spec */ 1892 len -= sizeof (nd_neighbor_advert_t); 1893 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 1894 if (opt != NULL) { 1895 haddr = (uchar_t *)&opt[1]; 1896 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1897 hlen == 0) { 1898 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1899 BUMP_MIB(mib, 1900 ipv6IfIcmpInBadNeighborAdvertisements); 1901 return; 1902 } 1903 } 1904 } 1905 1906 /* 1907 * NOTE: we match across the illgrp since we need to do DAD for all of 1908 * our local addresses, and those are spread across all the active 1909 * ills in the group. 1910 */ 1911 if ((dst_ncec = ncec_lookup_illgrp_v6(ill, &target)) == NULL) 1912 return; 1913 1914 if (NCE_PUBLISH(dst_ncec)) { 1915 /* 1916 * Someone just advertised an addresses that we publish. First, 1917 * check it it was us -- if so, we can safely ignore it. 1918 * We don't get the haddr from the ira_l2src because, in the 1919 * case that the packet originated from us, on an IPMP group, 1920 * the ira_l2src may would be the link-layer address of the 1921 * cast_ill used to send the packet, which may not be the same 1922 * as the dst_ncec->ncec_lladdr of the address. 1923 */ 1924 if (haddr != NULL) { 1925 if (ira->ira_flags & IRAF_L2SRC_LOOPBACK) 1926 goto out; 1927 1928 if (!nce_cmp_ll_addr(dst_ncec, haddr, hlen)) 1929 goto out; /* from us -- no conflict */ 1930 1931 /* 1932 * If we're in an IPMP group, check if this is an echo 1933 * from another ill in the group. Use the double- 1934 * checked locking pattern to avoid grabbing 1935 * ill_g_lock in the non-IPMP case. 1936 */ 1937 if (IS_UNDER_IPMP(ill)) { 1938 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1939 if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 1940 ill->ill_grp, haddr, hlen) != NULL) { 1941 rw_exit(&ipst->ips_ill_g_lock); 1942 goto out; 1943 } 1944 rw_exit(&ipst->ips_ill_g_lock); 1945 } 1946 } 1947 1948 /* 1949 * This appears to be a real conflict. If we're trying to 1950 * configure this NCE (ND_PROBE), then shut it down. 1951 * Otherwise, handle the discovered conflict. 1952 */ 1953 if (dst_ncec->ncec_state == ND_PROBE) { 1954 ndp_failure(mp, ira); 1955 } else { 1956 if (ip_nce_conflict(mp, ira, dst_ncec)) { 1957 char hbuf[MAC_STR_LEN]; 1958 char sbuf[INET6_ADDRSTRLEN]; 1959 1960 cmn_err(CE_WARN, 1961 "node '%s' is using %s on %s", 1962 inet_ntop(AF_INET6, &target, sbuf, 1963 sizeof (sbuf)), 1964 haddr == NULL ? "<none>" : 1965 mac_colon_addr(haddr, hlen, hbuf, 1966 sizeof (hbuf)), ill->ill_name); 1967 /* 1968 * RFC 4862, Section 5.4.4 does not mandate 1969 * any specific behavior when an NA matches 1970 * a non-tentative address assigned to the 1971 * receiver. We make the choice of defending 1972 * our address, based on the assumption that 1973 * the sender has not detected the Duplicate. 1974 * 1975 * ncec_last_time_defended has been adjusted 1976 * in ip_nce_conflict() 1977 */ 1978 (void) ndp_announce(dst_ncec); 1979 } 1980 } 1981 } else { 1982 if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 1983 dst_ncec->ncec_flags |= NCE_F_ISROUTER; 1984 1985 /* B_TRUE indicates this an advertisement */ 1986 nce_process(dst_ncec, haddr, na->nd_na_flags_reserved, B_TRUE); 1987 } 1988 out: 1989 ncec_refrele(dst_ncec); 1990 } 1991 1992 /* 1993 * Process NDP neighbor solicitation/advertisement messages. 1994 * The checksum has already checked o.k before reaching here. 1995 * Information about the datalink header is contained in ira_l2src, but 1996 * that should be ignored for loopback packets. 1997 */ 1998 void 1999 ndp_input(mblk_t *mp, ip_recv_attr_t *ira) 2000 { 2001 ill_t *ill = ira->ira_rill; 2002 icmp6_t *icmp_nd; 2003 ip6_t *ip6h; 2004 int len; 2005 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2006 ill_t *orig_ill = NULL; 2007 2008 /* 2009 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 2010 * and make it be the IPMP upper so avoid being confused by a packet 2011 * addressed to a unicast address on a different ill. 2012 */ 2013 if (IS_UNDER_IPMP(ill)) { 2014 orig_ill = ill; 2015 ill = ipmp_ill_hold_ipmp_ill(orig_ill); 2016 if (ill == NULL) { 2017 ill = orig_ill; 2018 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2019 ip_drop_input("ipIfStatsInDiscards - IPMP ill", 2020 mp, ill); 2021 freemsg(mp); 2022 return; 2023 } 2024 ASSERT(ill != orig_ill); 2025 orig_ill = ira->ira_ill; 2026 ira->ira_ill = ill; 2027 mib = ill->ill_icmp6_mib; 2028 } 2029 if (!pullupmsg(mp, -1)) { 2030 ip1dbg(("ndp_input: pullupmsg failed\n")); 2031 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2032 ip_drop_input("ipIfStatsInDiscards - pullupmsg", mp, ill); 2033 goto done; 2034 } 2035 ip6h = (ip6_t *)mp->b_rptr; 2036 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2037 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2038 ip_drop_input("ipv6IfIcmpBadHoplimit", mp, ill); 2039 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2040 goto done; 2041 } 2042 /* 2043 * NDP does not accept any extension headers between the 2044 * IP header and the ICMP header since e.g. a routing 2045 * header could be dangerous. 2046 * This assumes that any AH or ESP headers are removed 2047 * by ip prior to passing the packet to ndp_input. 2048 */ 2049 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2050 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2051 ip6h->ip6_nxt)); 2052 ip_drop_input("Wrong next header", mp, ill); 2053 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2054 goto done; 2055 } 2056 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2057 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2058 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2059 if (icmp_nd->icmp6_code != 0) { 2060 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2061 ip_drop_input("code non-zero", mp, ill); 2062 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2063 goto done; 2064 } 2065 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2066 /* 2067 * Make sure packet length is large enough for either 2068 * a NS or a NA icmp packet. 2069 */ 2070 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2071 ip1dbg(("ndp_input: packet too short\n")); 2072 ip_drop_input("packet too short", mp, ill); 2073 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2074 goto done; 2075 } 2076 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2077 ndp_input_solicit(mp, ira); 2078 } else { 2079 ndp_input_advert(mp, ira); 2080 } 2081 done: 2082 freemsg(mp); 2083 if (orig_ill != NULL) { 2084 ill_refrele(ill); 2085 ira->ira_ill = orig_ill; 2086 } 2087 } 2088 2089 /* 2090 * ndp_xmit is called to form and transmit a ND solicitation or 2091 * advertisement ICMP packet. 2092 * 2093 * If the source address is unspecified and this isn't a probe (used for 2094 * duplicate address detection), an appropriate source address and link layer 2095 * address will be chosen here. The link layer address option is included if 2096 * the source is specified (i.e., all non-probe packets), and omitted (per the 2097 * specification) otherwise. 2098 * 2099 * It returns B_FALSE only if it does a successful put() to the 2100 * corresponding ill's ill_wq otherwise returns B_TRUE. 2101 */ 2102 static boolean_t 2103 ndp_xmit(ill_t *ill, uint32_t operation, uint8_t *hw_addr, uint_t hw_addr_len, 2104 const in6_addr_t *sender, const in6_addr_t *target, int flag) 2105 { 2106 uint32_t len; 2107 icmp6_t *icmp6; 2108 mblk_t *mp; 2109 ip6_t *ip6h; 2110 nd_opt_hdr_t *opt; 2111 uint_t plen; 2112 zoneid_t zoneid = GLOBAL_ZONEID; 2113 ill_t *hwaddr_ill = ill; 2114 ip_xmit_attr_t ixas; 2115 ip_stack_t *ipst = ill->ill_ipst; 2116 boolean_t need_refrele = B_FALSE; 2117 boolean_t probe = B_FALSE; 2118 2119 if (IS_UNDER_IPMP(ill)) { 2120 probe = ipif_lookup_testaddr_v6(ill, sender, NULL); 2121 /* 2122 * We send non-probe packets on the upper IPMP interface. 2123 * ip_output_simple() will use cast_ill for sending any 2124 * multicast packets. Note that we can't follow the same 2125 * logic for probe packets because all interfaces in the ipmp 2126 * group may have failed, so that we really want to only try 2127 * to send the ND packet on the ill corresponding to the src 2128 * address. 2129 */ 2130 if (!probe) { 2131 ill = ipmp_ill_hold_ipmp_ill(ill); 2132 if (ill != NULL) 2133 need_refrele = B_TRUE; 2134 else 2135 ill = hwaddr_ill; 2136 } 2137 } 2138 2139 /* 2140 * If we have a unspecified source(sender) address, select a 2141 * proper source address for the solicitation here itself so 2142 * that we can initialize the h/w address correctly. 2143 * 2144 * If the sender is specified then we use this address in order 2145 * to lookup the zoneid before calling ip_output_v6(). This is to 2146 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly 2147 * by IP (we cannot guarantee that the global zone has an interface 2148 * route to the destination). 2149 * 2150 * Note that the NA never comes here with the unspecified source 2151 * address. 2152 */ 2153 2154 /* 2155 * Probes will have unspec src at this point. 2156 */ 2157 if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) { 2158 zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ipst); 2159 /* 2160 * It's possible for ipif_lookup_addr_zoneid_v6() to return 2161 * ALL_ZONES if it cannot find a matching ipif for the address 2162 * we are trying to use. In this case we err on the side of 2163 * trying to send the packet by defaulting to the GLOBAL_ZONEID. 2164 */ 2165 if (zoneid == ALL_ZONES) 2166 zoneid = GLOBAL_ZONEID; 2167 } 2168 2169 plen = (sizeof (nd_opt_hdr_t) + hw_addr_len + 7) / 8; 2170 len = IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t) + plen * 8; 2171 mp = allocb(len, BPRI_LO); 2172 if (mp == NULL) { 2173 if (need_refrele) 2174 ill_refrele(ill); 2175 return (B_TRUE); 2176 } 2177 2178 bzero((char *)mp->b_rptr, len); 2179 mp->b_wptr = mp->b_rptr + len; 2180 2181 bzero(&ixas, sizeof (ixas)); 2182 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6 | IXAF_NO_HW_CKSUM; 2183 2184 ixas.ixa_ifindex = ill->ill_phyint->phyint_ifindex; 2185 ixas.ixa_ipst = ipst; 2186 ixas.ixa_cred = kcred; 2187 ixas.ixa_cpid = NOPID; 2188 ixas.ixa_tsl = NULL; 2189 ixas.ixa_zoneid = zoneid; 2190 2191 ip6h = (ip6_t *)mp->b_rptr; 2192 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2193 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN); 2194 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2195 ip6h->ip6_hops = IPV6_MAX_HOPS; 2196 ixas.ixa_multicast_ttl = ip6h->ip6_hops; 2197 ip6h->ip6_dst = *target; 2198 icmp6 = (icmp6_t *)&ip6h[1]; 2199 2200 if (hw_addr_len != 0) { 2201 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2202 sizeof (nd_neighbor_advert_t)); 2203 } else { 2204 opt = NULL; 2205 } 2206 if (operation == ND_NEIGHBOR_SOLICIT) { 2207 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2208 2209 if (opt != NULL && !(flag & NDP_PROBE)) { 2210 /* 2211 * Note that we don't send out SLLA for ND probes 2212 * per RFC 4862, even though we do send out the src 2213 * haddr for IPv4 DAD probes, even though both IPv4 2214 * and IPv6 go out with the unspecified/INADDR_ANY 2215 * src IP addr. 2216 */ 2217 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2218 } 2219 ip6h->ip6_src = *sender; 2220 ns->nd_ns_target = *target; 2221 if (!(flag & NDP_UNICAST)) { 2222 /* Form multicast address of the target */ 2223 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2224 ip6h->ip6_dst.s6_addr32[3] |= 2225 ns->nd_ns_target.s6_addr32[3]; 2226 } 2227 } else { 2228 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2229 2230 ASSERT(!(flag & NDP_PROBE)); 2231 if (opt != NULL) 2232 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2233 ip6h->ip6_src = *sender; 2234 na->nd_na_target = *sender; 2235 if (flag & NDP_ISROUTER) 2236 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2237 if (flag & NDP_SOLICITED) 2238 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2239 if (flag & NDP_ORIDE) 2240 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2241 } 2242 2243 if (!(flag & NDP_PROBE)) { 2244 if (hw_addr != NULL && opt != NULL) { 2245 /* Fill in link layer address and option len */ 2246 opt->nd_opt_len = (uint8_t)plen; 2247 bcopy(hw_addr, &opt[1], hw_addr_len); 2248 } 2249 } 2250 if (opt != NULL && opt->nd_opt_type == 0) { 2251 /* If there's no link layer address option, then strip it. */ 2252 len -= plen * 8; 2253 mp->b_wptr = mp->b_rptr + len; 2254 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN); 2255 } 2256 2257 icmp6->icmp6_type = (uint8_t)operation; 2258 icmp6->icmp6_code = 0; 2259 /* 2260 * Prepare for checksum by putting icmp length in the icmp 2261 * checksum field. The checksum is calculated in ip_output.c. 2262 */ 2263 icmp6->icmp6_cksum = ip6h->ip6_plen; 2264 2265 (void) ip_output_simple(mp, &ixas); 2266 ixa_cleanup(&ixas); 2267 if (need_refrele) 2268 ill_refrele(ill); 2269 return (B_FALSE); 2270 } 2271 2272 /* 2273 * Used to set ND_UNREACHBLE before ncec_delete sets it NCE_F_CONDEMNED. 2274 * The datapath uses this as an indication that there 2275 * is a problem (as opposed to a NCE that was just 2276 * reclaimed due to lack of memory. 2277 * Note that static ARP entries never become unreachable. 2278 */ 2279 void 2280 nce_make_unreachable(ncec_t *ncec) 2281 { 2282 mutex_enter(&ncec->ncec_lock); 2283 ncec->ncec_state = ND_UNREACHABLE; 2284 mutex_exit(&ncec->ncec_lock); 2285 } 2286 2287 /* 2288 * NCE retransmit timer. Common to IPv4 and IPv6. 2289 * This timer goes off when: 2290 * a. It is time to retransmit a resolution for resolver. 2291 * b. It is time to send reachability probes. 2292 */ 2293 void 2294 nce_timer(void *arg) 2295 { 2296 ncec_t *ncec = arg; 2297 ill_t *ill = ncec->ncec_ill, *src_ill; 2298 char addrbuf[INET6_ADDRSTRLEN]; 2299 boolean_t dropped = B_FALSE; 2300 ip_stack_t *ipst = ncec->ncec_ipst; 2301 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 2302 in_addr_t sender4 = INADDR_ANY; 2303 in6_addr_t sender6 = ipv6_all_zeros; 2304 2305 /* 2306 * The timer has to be cancelled by ncec_delete before doing the final 2307 * refrele. So the NCE is guaranteed to exist when the timer runs 2308 * until it clears the timeout_id. Before clearing the timeout_id 2309 * bump up the refcnt so that we can continue to use the ncec 2310 */ 2311 ASSERT(ncec != NULL); 2312 mutex_enter(&ncec->ncec_lock); 2313 ncec_refhold_locked(ncec); 2314 ncec->ncec_timeout_id = 0; 2315 mutex_exit(&ncec->ncec_lock); 2316 2317 src_ill = nce_resolve_src(ncec, &sender6); 2318 /* if we could not find a sender address, return */ 2319 if (src_ill == NULL) { 2320 if (!isv6) { 2321 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, sender4); 2322 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET, 2323 &sender4, addrbuf, sizeof (addrbuf)))); 2324 } else { 2325 ip1dbg(("no src ill for %s\n", inet_ntop(AF_INET6, 2326 &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); 2327 } 2328 nce_restart_timer(ncec, ill->ill_reachable_retrans_time); 2329 ncec_refrele(ncec); 2330 return; 2331 } 2332 if (!isv6) 2333 IN6_V4MAPPED_TO_IPADDR(&sender6, sender4); 2334 2335 mutex_enter(&ncec->ncec_lock); 2336 /* 2337 * Check the reachability state. 2338 */ 2339 switch (ncec->ncec_state) { 2340 case ND_DELAY: 2341 ASSERT(ncec->ncec_lladdr != NULL); 2342 ncec->ncec_state = ND_PROBE; 2343 ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; 2344 if (isv6) { 2345 mutex_exit(&ncec->ncec_lock); 2346 dropped = ndp_xmit(src_ill, ND_NEIGHBOR_SOLICIT, 2347 src_ill->ill_phys_addr, 2348 src_ill->ill_phys_addr_length, 2349 &sender6, &ncec->ncec_addr, 2350 NDP_UNICAST); 2351 } else { 2352 dropped = arp_request(ncec, sender4, src_ill); 2353 mutex_exit(&ncec->ncec_lock); 2354 } 2355 if (!dropped) { 2356 mutex_enter(&ncec->ncec_lock); 2357 ncec->ncec_pcnt--; 2358 mutex_exit(&ncec->ncec_lock); 2359 } 2360 if (ip_debug > 3) { 2361 /* ip2dbg */ 2362 pr_addr_dbg("nce_timer: state for %s changed " 2363 "to PROBE\n", AF_INET6, &ncec->ncec_addr); 2364 } 2365 nce_restart_timer(ncec, ill->ill_reachable_retrans_time); 2366 break; 2367 case ND_PROBE: 2368 /* must be retransmit timer */ 2369 ASSERT(ncec->ncec_pcnt >= -1); 2370 if (ncec->ncec_pcnt > 0) { 2371 /* 2372 * As per RFC2461, the ncec gets deleted after 2373 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2374 * Note that the first unicast solicitation is sent 2375 * during the DELAY state. 2376 */ 2377 ip2dbg(("nce_timer: pcount=%x dst %s\n", 2378 ncec->ncec_pcnt, 2379 inet_ntop((isv6? AF_INET6 : AF_INET), 2380 &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); 2381 if (NCE_PUBLISH(ncec)) { 2382 mutex_exit(&ncec->ncec_lock); 2383 /* 2384 * send out a probe; note that src_ill 2385 * is ignored by nce_dad() for all 2386 * DAD message types other than IPv6 2387 * unicast probes 2388 */ 2389 nce_dad(ncec, src_ill, B_TRUE); 2390 } else { 2391 ASSERT(src_ill != NULL); 2392 if (isv6) { 2393 mutex_exit(&ncec->ncec_lock); 2394 dropped = ndp_xmit(src_ill, 2395 ND_NEIGHBOR_SOLICIT, 2396 src_ill->ill_phys_addr, 2397 src_ill->ill_phys_addr_length, 2398 &sender6, &ncec->ncec_addr, 2399 NDP_UNICAST); 2400 } else { 2401 /* 2402 * since the nce is REACHABLE, 2403 * the ARP request will be sent out 2404 * as a link-layer unicast. 2405 */ 2406 dropped = arp_request(ncec, sender4, 2407 src_ill); 2408 mutex_exit(&ncec->ncec_lock); 2409 } 2410 if (!dropped) { 2411 mutex_enter(&ncec->ncec_lock); 2412 ncec->ncec_pcnt--; 2413 mutex_exit(&ncec->ncec_lock); 2414 } 2415 nce_restart_timer(ncec, 2416 ill->ill_reachable_retrans_time); 2417 } 2418 } else if (ncec->ncec_pcnt < 0) { 2419 /* No hope, delete the ncec */ 2420 /* Tell datapath it went bad */ 2421 ncec->ncec_state = ND_UNREACHABLE; 2422 mutex_exit(&ncec->ncec_lock); 2423 if (ip_debug > 2) { 2424 /* ip1dbg */ 2425 pr_addr_dbg("nce_timer: Delete NCE for" 2426 " dst %s\n", (isv6? AF_INET6: AF_INET), 2427 &ncec->ncec_addr); 2428 } 2429 /* if static ARP can't delete. */ 2430 if ((ncec->ncec_flags & NCE_F_STATIC) == 0) 2431 ncec_delete(ncec); 2432 2433 } else if (!NCE_PUBLISH(ncec)) { 2434 /* 2435 * Probe count is 0 for a dynamic entry (one that we 2436 * ourselves are not publishing). We should never get 2437 * here if NONUD was requested, hence the ASSERT below. 2438 */ 2439 ASSERT((ncec->ncec_flags & NCE_F_NONUD) == 0); 2440 ip2dbg(("nce_timer: pcount=%x dst %s\n", 2441 ncec->ncec_pcnt, inet_ntop(AF_INET6, 2442 &ncec->ncec_addr, addrbuf, sizeof (addrbuf)))); 2443 ncec->ncec_pcnt--; 2444 mutex_exit(&ncec->ncec_lock); 2445 /* Wait one interval before killing */ 2446 nce_restart_timer(ncec, 2447 ill->ill_reachable_retrans_time); 2448 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2449 ipif_t *ipif; 2450 ipaddr_t ncec_addr; 2451 2452 /* 2453 * We're done probing, and we can now declare this 2454 * address to be usable. Let IP know that it's ok to 2455 * use. 2456 */ 2457 ncec->ncec_state = ND_REACHABLE; 2458 ncec->ncec_flags &= ~NCE_F_UNVERIFIED; 2459 mutex_exit(&ncec->ncec_lock); 2460 if (isv6) { 2461 ipif = ipif_lookup_addr_exact_v6( 2462 &ncec->ncec_addr, ill, ipst); 2463 } else { 2464 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, 2465 ncec_addr); 2466 ipif = ipif_lookup_addr_exact(ncec_addr, ill, 2467 ipst); 2468 } 2469 if (ipif != NULL) { 2470 if (ipif->ipif_was_dup) { 2471 char ibuf[LIFNAMSIZ + 10]; 2472 char sbuf[INET6_ADDRSTRLEN]; 2473 2474 ipif->ipif_was_dup = B_FALSE; 2475 (void) inet_ntop(AF_INET6, 2476 &ipif->ipif_v6lcl_addr, 2477 sbuf, sizeof (sbuf)); 2478 ipif_get_name(ipif, ibuf, 2479 sizeof (ibuf)); 2480 cmn_err(CE_NOTE, "recovered address " 2481 "%s on %s", sbuf, ibuf); 2482 } 2483 if ((ipif->ipif_flags & IPIF_UP) && 2484 !ipif->ipif_addr_ready) 2485 ipif_up_notify(ipif); 2486 ipif->ipif_addr_ready = 1; 2487 ipif_refrele(ipif); 2488 } 2489 if (!isv6 && arp_no_defense) 2490 break; 2491 /* Begin defending our new address */ 2492 if (ncec->ncec_unsolicit_count > 0) { 2493 ncec->ncec_unsolicit_count--; 2494 if (isv6) { 2495 dropped = ndp_announce(ncec); 2496 } else { 2497 dropped = arp_announce(ncec); 2498 } 2499 2500 if (dropped) 2501 ncec->ncec_unsolicit_count++; 2502 else 2503 ncec->ncec_last_time_defended = 2504 ddi_get_lbolt(); 2505 } 2506 if (ncec->ncec_unsolicit_count > 0) { 2507 nce_restart_timer(ncec, 2508 ANNOUNCE_INTERVAL(isv6)); 2509 } else if (DEFENSE_INTERVAL(isv6) != 0) { 2510 nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6)); 2511 } 2512 } else { 2513 /* 2514 * This is an address we're probing to be our own, but 2515 * the ill is down. Wait until it comes back before 2516 * doing anything, but switch to reachable state so 2517 * that the restart will work. 2518 */ 2519 ncec->ncec_state = ND_REACHABLE; 2520 mutex_exit(&ncec->ncec_lock); 2521 } 2522 break; 2523 case ND_INCOMPLETE: { 2524 mblk_t *mp, *nextmp; 2525 mblk_t **prevmpp; 2526 2527 /* 2528 * Per case (2) in the nce_queue_mp() comments, scan ncec_qd_mp 2529 * for any IPMP probe packets, and toss them. IPMP probe 2530 * packets will always be at the head of ncec_qd_mp, so that 2531 * we can stop at the first queued ND packet that is 2532 * not a probe packet. 2533 */ 2534 prevmpp = &ncec->ncec_qd_mp; 2535 for (mp = ncec->ncec_qd_mp; mp != NULL; mp = nextmp) { 2536 nextmp = mp->b_next; 2537 2538 if (IS_UNDER_IPMP(ill) && ncec->ncec_nprobes > 0) { 2539 inet_freemsg(mp); 2540 ncec->ncec_nprobes--; 2541 *prevmpp = nextmp; 2542 } else { 2543 prevmpp = &mp->b_next; 2544 } 2545 } 2546 2547 /* 2548 * Must be resolver's retransmit timer. 2549 */ 2550 mutex_exit(&ncec->ncec_lock); 2551 ip_ndp_resolve(ncec); 2552 break; 2553 } 2554 case ND_REACHABLE: 2555 if (((ncec->ncec_flags & NCE_F_UNSOL_ADV) && 2556 ncec->ncec_unsolicit_count != 0) || 2557 (NCE_PUBLISH(ncec) && DEFENSE_INTERVAL(isv6) != 0)) { 2558 if (ncec->ncec_unsolicit_count > 0) { 2559 ncec->ncec_unsolicit_count--; 2560 mutex_exit(&ncec->ncec_lock); 2561 /* 2562 * When we get to zero announcements left, 2563 * switch to address defense 2564 */ 2565 } else { 2566 boolean_t rate_limit; 2567 2568 mutex_exit(&ncec->ncec_lock); 2569 rate_limit = ill_defend_rate_limit(ill, ncec); 2570 if (rate_limit) { 2571 nce_restart_timer(ncec, 2572 DEFENSE_INTERVAL(isv6)); 2573 break; 2574 } 2575 } 2576 if (isv6) { 2577 dropped = ndp_announce(ncec); 2578 } else { 2579 dropped = arp_announce(ncec); 2580 } 2581 mutex_enter(&ncec->ncec_lock); 2582 if (dropped) { 2583 ncec->ncec_unsolicit_count++; 2584 } else { 2585 ncec->ncec_last_time_defended = 2586 ddi_get_lbolt(); 2587 } 2588 mutex_exit(&ncec->ncec_lock); 2589 if (ncec->ncec_unsolicit_count != 0) { 2590 nce_restart_timer(ncec, 2591 ANNOUNCE_INTERVAL(isv6)); 2592 } else { 2593 nce_restart_timer(ncec, DEFENSE_INTERVAL(isv6)); 2594 } 2595 } else { 2596 mutex_exit(&ncec->ncec_lock); 2597 } 2598 break; 2599 default: 2600 mutex_exit(&ncec->ncec_lock); 2601 break; 2602 } 2603 done: 2604 ncec_refrele(ncec); 2605 ill_refrele(src_ill); 2606 } 2607 2608 /* 2609 * Set a link layer address from the ll_addr passed in. 2610 * Copy SAP from ill. 2611 */ 2612 static void 2613 nce_set_ll(ncec_t *ncec, uchar_t *ll_addr) 2614 { 2615 ill_t *ill = ncec->ncec_ill; 2616 2617 ASSERT(ll_addr != NULL); 2618 if (ill->ill_phys_addr_length > 0) { 2619 /* 2620 * The bcopy() below used to be called for the physical address 2621 * length rather than the link layer address length. For 2622 * ethernet and many other media, the phys_addr and lla are 2623 * identical. 2624 * 2625 * The phys_addr and lla may not be the same for devices that 2626 * support DL_IPV6_LINK_LAYER_ADDR, though there are currently 2627 * no known instances of these. 2628 * 2629 * For PPP or other interfaces with a zero length 2630 * physical address, don't do anything here. 2631 * The bcopy() with a zero phys_addr length was previously 2632 * a no-op for interfaces with a zero-length physical address. 2633 * Using the lla for them would change the way they operate. 2634 * Doing nothing in such cases preserves expected behavior. 2635 */ 2636 bcopy(ll_addr, ncec->ncec_lladdr, ill->ill_nd_lla_len); 2637 } 2638 } 2639 2640 boolean_t 2641 nce_cmp_ll_addr(const ncec_t *ncec, const uchar_t *ll_addr, 2642 uint32_t ll_addr_len) 2643 { 2644 ASSERT(ncec->ncec_lladdr != NULL); 2645 if (ll_addr == NULL) 2646 return (B_FALSE); 2647 if (bcmp(ll_addr, ncec->ncec_lladdr, ll_addr_len) != 0) 2648 return (B_TRUE); 2649 return (B_FALSE); 2650 } 2651 2652 /* 2653 * Updates the link layer address or the reachability state of 2654 * a cache entry. Reset probe counter if needed. 2655 */ 2656 void 2657 nce_update(ncec_t *ncec, uint16_t new_state, uchar_t *new_ll_addr) 2658 { 2659 ill_t *ill = ncec->ncec_ill; 2660 boolean_t need_stop_timer = B_FALSE; 2661 boolean_t need_fastpath_update = B_FALSE; 2662 nce_t *nce = NULL; 2663 timeout_id_t tid; 2664 2665 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 2666 /* 2667 * If this interface does not do NUD, there is no point 2668 * in allowing an update to the cache entry. Although 2669 * we will respond to NS. 2670 * The only time we accept an update for a resolver when 2671 * NUD is turned off is when it has just been created. 2672 * Non-Resolvers will always be created as REACHABLE. 2673 */ 2674 if (new_state != ND_UNCHANGED) { 2675 if ((ncec->ncec_flags & NCE_F_NONUD) && 2676 (ncec->ncec_state != ND_INCOMPLETE)) 2677 return; 2678 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2679 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2680 need_stop_timer = B_TRUE; 2681 if (new_state == ND_REACHABLE) 2682 ncec->ncec_last = TICK_TO_MSEC(ddi_get_lbolt64()); 2683 else { 2684 /* We force NUD in this case */ 2685 ncec->ncec_last = 0; 2686 } 2687 ncec->ncec_state = new_state; 2688 ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; 2689 ASSERT(ncec->ncec_lladdr != NULL || new_state == ND_INITIAL || 2690 new_state == ND_INCOMPLETE); 2691 } 2692 if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) { 2693 tid = ncec->ncec_timeout_id; 2694 ncec->ncec_timeout_id = 0; 2695 } 2696 /* 2697 * Re-trigger fastpath probe and 2698 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2699 * whatever packets that happens to be transmitting at the time. 2700 */ 2701 if (new_ll_addr != NULL) { 2702 bcopy(new_ll_addr, ncec->ncec_lladdr, 2703 ill->ill_phys_addr_length); 2704 need_fastpath_update = B_TRUE; 2705 } 2706 mutex_exit(&ncec->ncec_lock); 2707 if (need_stop_timer || (ncec->ncec_flags & NCE_F_STATIC)) { 2708 if (tid != 0) 2709 (void) untimeout(tid); 2710 } 2711 if (need_fastpath_update) { 2712 /* 2713 * Delete any existing existing dlur_mp and fp_mp information. 2714 * For IPMP interfaces, all underlying ill's must be checked 2715 * and purged. 2716 */ 2717 nce_fastpath_list_delete(ncec->ncec_ill, ncec, NULL); 2718 /* 2719 * add the new dlur_mp and fp_mp 2720 */ 2721 nce = nce_fastpath(ncec, B_TRUE, NULL); 2722 if (nce != NULL) 2723 nce_refrele(nce); 2724 } 2725 mutex_enter(&ncec->ncec_lock); 2726 } 2727 2728 static void 2729 nce_queue_mp_common(ncec_t *ncec, mblk_t *mp, boolean_t head_insert) 2730 { 2731 uint_t count = 0; 2732 mblk_t **mpp, *tmp; 2733 2734 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 2735 2736 for (mpp = &ncec->ncec_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2737 if (++count > ncec->ncec_ill->ill_max_buf) { 2738 tmp = ncec->ncec_qd_mp->b_next; 2739 ncec->ncec_qd_mp->b_next = NULL; 2740 /* 2741 * if we never create data addrs on the under_ill 2742 * does this matter? 2743 */ 2744 BUMP_MIB(ncec->ncec_ill->ill_ip_mib, 2745 ipIfStatsOutDiscards); 2746 ip_drop_output("ipIfStatsOutDiscards", ncec->ncec_qd_mp, 2747 ncec->ncec_ill); 2748 freemsg(ncec->ncec_qd_mp); 2749 ncec->ncec_qd_mp = tmp; 2750 } 2751 } 2752 2753 if (head_insert) { 2754 ncec->ncec_nprobes++; 2755 mp->b_next = ncec->ncec_qd_mp; 2756 ncec->ncec_qd_mp = mp; 2757 } else { 2758 *mpp = mp; 2759 } 2760 } 2761 2762 /* 2763 * nce_queue_mp will queue the packet into the ncec_qd_mp. The packet will be 2764 * queued at the head or tail of the queue based on the input argument 2765 * 'head_insert'. The caller should specify this argument as B_TRUE if this 2766 * packet is an IPMP probe packet, in which case the following happens: 2767 * 2768 * 1. Insert it at the head of the ncec_qd_mp list. Consider the normal 2769 * (non-ipmp_probe) load-speading case where the source address of the ND 2770 * packet is not tied to ncec_ill. If the ill bound to the source address 2771 * cannot receive, the response to the ND packet will not be received. 2772 * However, if ND packets for ncec_ill's probes are queued behind that ND 2773 * packet, those probes will also fail to be sent, and thus in.mpathd will 2774 * erroneously conclude that ncec_ill has also failed. 2775 * 2776 * 2. Drop the ipmp_probe packet in ndp_timer() if the ND did not succeed on 2777 * the first attempt. This ensures that ND problems do not manifest as 2778 * probe RTT spikes. 2779 * 2780 * We achieve this by inserting ipmp_probe() packets at the head of the 2781 * nce_queue. 2782 * 2783 * The ncec for the probe target is created with ncec_ill set to the ipmp_ill, 2784 * but the caller needs to set head_insert to B_TRUE if this is a probe packet. 2785 */ 2786 void 2787 nce_queue_mp(ncec_t *ncec, mblk_t *mp, boolean_t head_insert) 2788 { 2789 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 2790 nce_queue_mp_common(ncec, mp, head_insert); 2791 } 2792 2793 /* 2794 * Called when address resolution failed due to a timeout. 2795 * Send an ICMP unreachable in response to all queued packets. 2796 */ 2797 void 2798 ndp_resolv_failed(ncec_t *ncec) 2799 { 2800 mblk_t *mp, *nxt_mp; 2801 char buf[INET6_ADDRSTRLEN]; 2802 ill_t *ill = ncec->ncec_ill; 2803 ip_recv_attr_t iras; 2804 2805 bzero(&iras, sizeof (iras)); 2806 iras.ira_flags = 0; 2807 /* 2808 * we are setting the ira_rill to the ipmp_ill (instead of 2809 * the actual ill on which the packet was received), but this 2810 * is ok because we don't actually need the real ira_rill. 2811 * to send the icmp unreachable to the sender. 2812 */ 2813 iras.ira_ill = iras.ira_rill = ill; 2814 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 2815 iras.ira_rifindex = iras.ira_ruifindex; 2816 2817 ip1dbg(("ndp_resolv_failed: dst %s\n", 2818 inet_ntop(AF_INET6, (char *)&ncec->ncec_addr, buf, sizeof (buf)))); 2819 mutex_enter(&ncec->ncec_lock); 2820 mp = ncec->ncec_qd_mp; 2821 ncec->ncec_qd_mp = NULL; 2822 ncec->ncec_nprobes = 0; 2823 mutex_exit(&ncec->ncec_lock); 2824 while (mp != NULL) { 2825 nxt_mp = mp->b_next; 2826 mp->b_next = NULL; 2827 2828 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2829 ip_drop_output("ipIfStatsOutDiscards - address unreachable", 2830 mp, ill); 2831 icmp_unreachable_v6(mp, 2832 ICMP6_DST_UNREACH_ADDR, B_FALSE, &iras); 2833 ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 2834 mp = nxt_mp; 2835 } 2836 ncec_cb_dispatch(ncec); /* finish off waiting callbacks */ 2837 } 2838 2839 /* 2840 * Handle the completion of NDP and ARP resolution. 2841 */ 2842 void 2843 nce_resolv_ok(ncec_t *ncec) 2844 { 2845 mblk_t *mp; 2846 uint_t pkt_len; 2847 iaflags_t ixaflags = IXAF_NO_TRACE; 2848 nce_t *nce; 2849 ill_t *ill = ncec->ncec_ill; 2850 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 2851 ip_stack_t *ipst = ill->ill_ipst; 2852 2853 if (IS_IPMP(ncec->ncec_ill)) { 2854 nce_resolv_ipmp_ok(ncec); 2855 return; 2856 } 2857 /* non IPMP case */ 2858 2859 mutex_enter(&ncec->ncec_lock); 2860 ASSERT(ncec->ncec_nprobes == 0); 2861 mp = ncec->ncec_qd_mp; 2862 ncec->ncec_qd_mp = NULL; 2863 mutex_exit(&ncec->ncec_lock); 2864 2865 while (mp != NULL) { 2866 mblk_t *nxt_mp; 2867 2868 if (ill->ill_isv6) { 2869 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2870 2871 pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 2872 } else { 2873 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2874 2875 ixaflags |= IXAF_IS_IPV4; 2876 pkt_len = ntohs(ipha->ipha_length); 2877 } 2878 nxt_mp = mp->b_next; 2879 mp->b_next = NULL; 2880 /* 2881 * IXAF_NO_DEV_FLOW_CTL information for TCP packets is no 2882 * longer available, but it's ok to drop this flag because TCP 2883 * has its own flow-control in effect, so TCP packets 2884 * are not likely to get here when flow-control is in effect. 2885 */ 2886 mutex_enter(&ill->ill_lock); 2887 nce = nce_lookup(ill, &ncec->ncec_addr); 2888 mutex_exit(&ill->ill_lock); 2889 2890 if (nce == NULL) { 2891 if (isv6) { 2892 BUMP_MIB(&ipst->ips_ip6_mib, 2893 ipIfStatsOutDiscards); 2894 } else { 2895 BUMP_MIB(&ipst->ips_ip_mib, 2896 ipIfStatsOutDiscards); 2897 } 2898 ip_drop_output("ipIfStatsOutDiscards - no nce", 2899 mp, NULL); 2900 freemsg(mp); 2901 } else { 2902 /* 2903 * We don't know the zoneid, but 2904 * ip_xmit does not care since IXAF_NO_TRACE 2905 * is set. (We traced the packet the first 2906 * time through ip_xmit.) 2907 */ 2908 (void) ip_xmit(mp, nce, ixaflags, pkt_len, 0, 2909 ALL_ZONES, 0, NULL); 2910 nce_refrele(nce); 2911 } 2912 mp = nxt_mp; 2913 } 2914 2915 ncec_cb_dispatch(ncec); /* complete callbacks */ 2916 } 2917 2918 /* 2919 * Called by SIOCSNDP* ioctl to add/change an ncec entry 2920 * and the corresponding attributes. 2921 * Disallow states other than ND_REACHABLE or ND_STALE. 2922 */ 2923 int 2924 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 2925 { 2926 sin6_t *sin6; 2927 in6_addr_t *addr; 2928 ncec_t *ncec; 2929 nce_t *nce; 2930 int err = 0; 2931 uint16_t new_flags = 0; 2932 uint16_t old_flags = 0; 2933 int inflags = lnr->lnr_flags; 2934 ip_stack_t *ipst = ill->ill_ipst; 2935 boolean_t do_postprocess = B_FALSE; 2936 2937 ASSERT(ill->ill_isv6); 2938 if ((lnr->lnr_state_create != ND_REACHABLE) && 2939 (lnr->lnr_state_create != ND_STALE)) 2940 return (EINVAL); 2941 2942 sin6 = (sin6_t *)&lnr->lnr_addr; 2943 addr = &sin6->sin6_addr; 2944 2945 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2946 ASSERT(!IS_UNDER_IPMP(ill)); 2947 nce = nce_lookup_addr(ill, addr); 2948 if (nce != NULL) 2949 new_flags = nce->nce_common->ncec_flags; 2950 2951 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 2952 case NDF_ISROUTER_ON: 2953 new_flags |= NCE_F_ISROUTER; 2954 break; 2955 case NDF_ISROUTER_OFF: 2956 new_flags &= ~NCE_F_ISROUTER; 2957 break; 2958 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 2959 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 2960 if (nce != NULL) 2961 nce_refrele(nce); 2962 return (EINVAL); 2963 } 2964 2965 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 2966 case NDF_ANYCAST_ON: 2967 new_flags |= NCE_F_ANYCAST; 2968 break; 2969 case NDF_ANYCAST_OFF: 2970 new_flags &= ~NCE_F_ANYCAST; 2971 break; 2972 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 2973 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 2974 if (nce != NULL) 2975 nce_refrele(nce); 2976 return (EINVAL); 2977 } 2978 2979 if (nce == NULL) { 2980 err = nce_add_v6(ill, 2981 (uchar_t *)lnr->lnr_hdw_addr, 2982 ill->ill_phys_addr_length, 2983 addr, 2984 new_flags, 2985 lnr->lnr_state_create, 2986 &nce); 2987 if (err != 0) { 2988 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 2989 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 2990 return (err); 2991 } else { 2992 do_postprocess = B_TRUE; 2993 } 2994 } 2995 ncec = nce->nce_common; 2996 old_flags = ncec->ncec_flags; 2997 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 2998 ncec_router_to_host(ncec); 2999 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3000 if (do_postprocess) 3001 err = nce_add_v6_postprocess(nce); 3002 nce_refrele(nce); 3003 return (0); 3004 } 3005 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3006 3007 if (do_postprocess) 3008 err = nce_add_v6_postprocess(nce); 3009 /* 3010 * err cannot be anything other than 0 because we don't support 3011 * proxy arp of static addresses. 3012 */ 3013 ASSERT(err == 0); 3014 3015 mutex_enter(&ncec->ncec_lock); 3016 ncec->ncec_flags = new_flags; 3017 mutex_exit(&ncec->ncec_lock); 3018 /* 3019 * Note that we ignore the state at this point, which 3020 * should be either STALE or REACHABLE. Instead we let 3021 * the link layer address passed in to determine the state 3022 * much like incoming packets. 3023 */ 3024 nce_process(ncec, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3025 nce_refrele(nce); 3026 return (0); 3027 } 3028 3029 /* 3030 * Create an nce_t structure for ill using the ncec->ncec_lladdr to set up 3031 * the nce_dlur_mp. If ill != ncec->ncec_ill, then the ips_ill_g_lock must 3032 * be held to ensure that they are in the same group. 3033 */ 3034 static nce_t * 3035 nce_fastpath_create(ill_t *ill, ncec_t *ncec) 3036 { 3037 3038 nce_t *nce; 3039 3040 nce = nce_ill_lookup_then_add(ill, ncec); 3041 3042 if (nce == NULL || IS_LOOPBACK(nce->nce_ill) || IS_VNI(nce->nce_ill)) 3043 return (nce); 3044 3045 /* 3046 * hold the ncec_lock to synchronize with nce_update() so that, 3047 * at the end of this function, the contents of nce_dlur_mp are 3048 * consistent with ncec->ncec_lladdr, even though some intermediate 3049 * packet may have been sent out with a mangled address, which would 3050 * only be a transient condition. 3051 */ 3052 mutex_enter(&ncec->ncec_lock); 3053 if (ncec->ncec_lladdr != NULL) { 3054 bcopy(ncec->ncec_lladdr, nce->nce_dlur_mp->b_rptr + 3055 NCE_LL_ADDR_OFFSET(ill), ill->ill_phys_addr_length); 3056 } else { 3057 nce->nce_dlur_mp = ill_dlur_gen(NULL, 0, ill->ill_sap, 3058 ill->ill_sap_length); 3059 } 3060 mutex_exit(&ncec->ncec_lock); 3061 return (nce); 3062 } 3063 3064 /* 3065 * we make nce_fp_mp to have an M_DATA prepend. 3066 * The caller ensures there is hold on ncec for this function. 3067 * Note that since ill_fastpath_probe() copies the mblk there is 3068 * no need to hold the nce or ncec beyond this function. 3069 * 3070 * If the caller has passed in a non-null ncec_nce to nce_faspath() that 3071 * ncec_nce must correspond to the nce for ncec with nce_ill == ncec->ncec_ill 3072 * and will be returned back by this function, so that no extra nce_refrele 3073 * is required for the caller. The calls from nce_add_common() use this 3074 * method. All other callers (that pass in NULL ncec_nce) will have to do a 3075 * nce_refrele of the returned nce (when it is non-null). 3076 */ 3077 nce_t * 3078 nce_fastpath(ncec_t *ncec, boolean_t trigger_fp_req, nce_t *ncec_nce) 3079 { 3080 nce_t *nce; 3081 ill_t *ill = ncec->ncec_ill; 3082 3083 ASSERT(ill != NULL); 3084 3085 if (IS_IPMP(ill) && trigger_fp_req) { 3086 trigger_fp_req = B_FALSE; 3087 ipmp_ncec_fastpath(ncec, ill); 3088 3089 } 3090 /* 3091 * If the caller already has the nce corresponding to the ill, use 3092 * that one. Otherwise we have to lookup/add the nce. Calls from 3093 * nce_add_common() fall in the former category, and have just done 3094 * the nce lookup/add that can be reused. 3095 */ 3096 if (ncec_nce == NULL) 3097 nce = nce_fastpath_create(ill, ncec); 3098 else 3099 nce = ncec_nce; 3100 3101 if (nce == NULL || IS_LOOPBACK(nce->nce_ill) || IS_VNI(nce->nce_ill)) 3102 return (nce); 3103 3104 if (trigger_fp_req) 3105 nce_fastpath_trigger(nce); 3106 return (nce); 3107 } 3108 3109 /* 3110 * Trigger fastpath on nce. No locks may be held. 3111 */ 3112 static void 3113 nce_fastpath_trigger(nce_t *nce) 3114 { 3115 int res; 3116 ill_t *ill = nce->nce_ill; 3117 ncec_t *ncec = nce->nce_common; 3118 3119 res = ill_fastpath_probe(ill, nce->nce_dlur_mp); 3120 /* 3121 * EAGAIN is an indication of a transient error 3122 * i.e. allocation failure etc. leave the ncec in the list it 3123 * will be updated when another probe happens for another ire 3124 * if not it will be taken out of the list when the ire is 3125 * deleted. 3126 */ 3127 if (res != 0 && res != EAGAIN && res != ENOTSUP) 3128 nce_fastpath_list_delete(ill, ncec, NULL); 3129 } 3130 3131 /* 3132 * Add ncec to the nce fastpath list on ill. 3133 */ 3134 static nce_t * 3135 nce_ill_lookup_then_add_locked(ill_t *ill, ncec_t *ncec) 3136 { 3137 nce_t *nce = NULL; 3138 3139 ASSERT(MUTEX_HELD(&ill->ill_lock)); 3140 /* 3141 * Atomically ensure that the ill is not CONDEMNED and is not going 3142 * down, before adding the NCE. 3143 */ 3144 if (ill->ill_state_flags & ILL_CONDEMNED) 3145 return (NULL); 3146 mutex_enter(&ncec->ncec_lock); 3147 /* 3148 * if ncec has not been deleted and 3149 * is not already in the list add it. 3150 */ 3151 if (!NCE_ISCONDEMNED(ncec)) { 3152 nce = nce_lookup(ill, &ncec->ncec_addr); 3153 if (nce != NULL) 3154 goto done; 3155 nce = nce_add(ill, ncec); 3156 } 3157 done: 3158 mutex_exit(&ncec->ncec_lock); 3159 return (nce); 3160 } 3161 3162 nce_t * 3163 nce_ill_lookup_then_add(ill_t *ill, ncec_t *ncec) 3164 { 3165 nce_t *nce; 3166 3167 mutex_enter(&ill->ill_lock); 3168 nce = nce_ill_lookup_then_add_locked(ill, ncec); 3169 mutex_exit(&ill->ill_lock); 3170 return (nce); 3171 } 3172 3173 3174 /* 3175 * remove ncec from the ill_nce list. If 'dead' is non-null, the deleted 3176 * nce is added to the 'dead' list, and the caller must nce_refrele() the 3177 * entry after all locks have been dropped. 3178 */ 3179 void 3180 nce_fastpath_list_delete(ill_t *ill, ncec_t *ncec, list_t *dead) 3181 { 3182 nce_t *nce; 3183 3184 ASSERT(ill != NULL); 3185 3186 /* first clean out any nce pointers in the under_ills */ 3187 if (IS_IPMP(ill)) 3188 ipmp_ncec_flush_nce(ncec); 3189 3190 /* now the ill itself */ 3191 mutex_enter(&ill->ill_lock); 3192 for (nce = list_head(&ill->ill_nce); nce != NULL; 3193 nce = list_next(&ill->ill_nce, nce)) { 3194 if (nce->nce_common == ncec) { 3195 nce_refhold(nce); 3196 nce_delete(nce); 3197 break; 3198 } 3199 } 3200 mutex_exit(&ill->ill_lock); 3201 if (nce != NULL) { 3202 if (dead == NULL) 3203 nce_refrele(nce); 3204 else 3205 list_insert_tail(dead, nce); 3206 } 3207 } 3208 3209 /* 3210 * when the fastpath response does not fit in the datab 3211 * associated with the existing nce_fp_mp, we delete and 3212 * add the nce to retrigger fastpath based on the information 3213 * in the ncec_t. 3214 */ 3215 static nce_t * 3216 nce_delete_then_add(nce_t *nce) 3217 { 3218 ill_t *ill = nce->nce_ill; 3219 nce_t *newnce = NULL; 3220 3221 ip0dbg(("nce_delete_then_add nce %p ill %s\n", 3222 (void *)nce, ill->ill_name)); 3223 mutex_enter(&ill->ill_lock); 3224 mutex_enter(&nce->nce_common->ncec_lock); 3225 nce_delete(nce); 3226 /* 3227 * Make sure that ncec is not condemned before adding. We hold the 3228 * ill_lock and ncec_lock to synchronize with ncec_delete() and 3229 * ipmp_ncec_flush_nce() 3230 */ 3231 if (!NCE_ISCONDEMNED(nce->nce_common)) 3232 newnce = nce_add(ill, nce->nce_common); 3233 mutex_exit(&nce->nce_common->ncec_lock); 3234 mutex_exit(&ill->ill_lock); 3235 nce_refrele(nce); 3236 return (newnce); /* could be null if nomem */ 3237 } 3238 3239 typedef struct nce_fp_match_s { 3240 nce_t *nce_fp_match_res; 3241 mblk_t *nce_fp_match_ack_mp; 3242 } nce_fp_match_t; 3243 3244 /* ARGSUSED */ 3245 static int 3246 nce_fastpath_match_dlur(ill_t *ill, nce_t *nce, void *arg) 3247 { 3248 nce_fp_match_t *nce_fp_marg = arg; 3249 ncec_t *ncec = nce->nce_common; 3250 mblk_t *mp = nce_fp_marg->nce_fp_match_ack_mp; 3251 uchar_t *mp_rptr, *ud_mp_rptr; 3252 mblk_t *ud_mp = nce->nce_dlur_mp; 3253 ptrdiff_t cmplen; 3254 3255 /* 3256 * mp is the mp associated with the fastpath ack. 3257 * ud_mp is the outstanding DL_UNITDATA_REQ on the nce_t 3258 * under consideration. If the contents match, then the 3259 * fastpath ack is used to update the nce. 3260 */ 3261 if (ud_mp == NULL) 3262 return (0); 3263 mp_rptr = mp->b_rptr; 3264 cmplen = mp->b_wptr - mp_rptr; 3265 ASSERT(cmplen >= 0); 3266 3267 ud_mp_rptr = ud_mp->b_rptr; 3268 /* 3269 * The ncec is locked here to prevent any other threads from accessing 3270 * and changing nce_dlur_mp when the address becomes resolved to an 3271 * lla while we're in the middle of looking at and comparing the 3272 * hardware address (lla). It is also locked to prevent multiple 3273 * threads in nce_fastpath() from examining nce_dlur_mp at the same 3274 * time. 3275 */ 3276 mutex_enter(&ncec->ncec_lock); 3277 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3278 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) == 0) { 3279 nce_fp_marg->nce_fp_match_res = nce; 3280 mutex_exit(&ncec->ncec_lock); 3281 nce_refhold(nce); 3282 return (1); 3283 } 3284 mutex_exit(&ncec->ncec_lock); 3285 return (0); 3286 } 3287 3288 /* 3289 * Update all NCE's that are not in fastpath mode and 3290 * have an nce_fp_mp that matches mp. mp->b_cont contains 3291 * the fastpath header. 3292 * 3293 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3294 */ 3295 void 3296 nce_fastpath_update(ill_t *ill, mblk_t *mp) 3297 { 3298 nce_fp_match_t nce_fp_marg; 3299 nce_t *nce; 3300 mblk_t *nce_fp_mp, *fp_mp; 3301 3302 nce_fp_marg.nce_fp_match_res = NULL; 3303 nce_fp_marg.nce_fp_match_ack_mp = mp; 3304 3305 nce_walk(ill, nce_fastpath_match_dlur, &nce_fp_marg); 3306 3307 if ((nce = nce_fp_marg.nce_fp_match_res) == NULL) 3308 return; 3309 3310 mutex_enter(&nce->nce_lock); 3311 nce_fp_mp = nce->nce_fp_mp; 3312 3313 if (nce_fp_mp != NULL) { 3314 fp_mp = mp->b_cont; 3315 if (nce_fp_mp->b_rptr + MBLKL(fp_mp) > 3316 nce_fp_mp->b_datap->db_lim) { 3317 mutex_exit(&nce->nce_lock); 3318 nce = nce_delete_then_add(nce); 3319 if (nce == NULL) { 3320 return; 3321 } 3322 mutex_enter(&nce->nce_lock); 3323 nce_fp_mp = nce->nce_fp_mp; 3324 } 3325 } 3326 3327 /* Matched - install mp as the fastpath mp */ 3328 if (nce_fp_mp == NULL) { 3329 fp_mp = dupb(mp->b_cont); 3330 nce->nce_fp_mp = fp_mp; 3331 } else { 3332 fp_mp = mp->b_cont; 3333 bcopy(fp_mp->b_rptr, nce_fp_mp->b_rptr, MBLKL(fp_mp)); 3334 nce->nce_fp_mp->b_wptr = nce->nce_fp_mp->b_rptr 3335 + MBLKL(fp_mp); 3336 } 3337 mutex_exit(&nce->nce_lock); 3338 nce_refrele(nce); 3339 } 3340 3341 /* 3342 * Return a pointer to a given option in the packet. 3343 * Assumes that option part of the packet have already been validated. 3344 */ 3345 nd_opt_hdr_t * 3346 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3347 { 3348 while (optlen > 0) { 3349 if (opt->nd_opt_type == opt_type) 3350 return (opt); 3351 optlen -= 8 * opt->nd_opt_len; 3352 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3353 } 3354 return (NULL); 3355 } 3356 3357 /* 3358 * Verify all option lengths present are > 0, also check to see 3359 * if the option lengths and packet length are consistent. 3360 */ 3361 boolean_t 3362 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3363 { 3364 ASSERT(opt != NULL); 3365 while (optlen > 0) { 3366 if (opt->nd_opt_len == 0) 3367 return (B_FALSE); 3368 optlen -= 8 * opt->nd_opt_len; 3369 if (optlen < 0) 3370 return (B_FALSE); 3371 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3372 } 3373 return (B_TRUE); 3374 } 3375 3376 /* 3377 * ncec_walk function. 3378 * Free a fraction of the NCE cache entries. 3379 * 3380 * A possible optimization here would be to use ncec_last where possible, and 3381 * delete the least-frequently used entry, which would require more complex 3382 * computation as we walk through the ncec's (e.g., track ncec entries by 3383 * order of ncec_last and/or maintain state) 3384 */ 3385 static void 3386 ncec_cache_reclaim(ncec_t *ncec, char *arg) 3387 { 3388 ip_stack_t *ipst = ncec->ncec_ipst; 3389 uint_t fraction = *(uint_t *)arg; 3390 uint_t rand; 3391 3392 if ((ncec->ncec_flags & 3393 (NCE_F_MYADDR | NCE_F_STATIC | NCE_F_BCAST)) != 0) { 3394 return; 3395 } 3396 3397 rand = (uint_t)ddi_get_lbolt() + 3398 NCE_ADDR_HASH_V6(ncec->ncec_addr, NCE_TABLE_SIZE); 3399 if ((rand/fraction)*fraction == rand) { 3400 IP_STAT(ipst, ip_nce_reclaim_deleted); 3401 ncec_delete(ncec); 3402 } 3403 } 3404 3405 /* 3406 * kmem_cache callback to free up memory. 3407 * 3408 * For now we just delete a fixed fraction. 3409 */ 3410 static void 3411 ip_nce_reclaim_stack(ip_stack_t *ipst) 3412 { 3413 uint_t fraction = ipst->ips_ip_nce_reclaim_fraction; 3414 3415 IP_STAT(ipst, ip_nce_reclaim_calls); 3416 3417 ncec_walk(NULL, (pfi_t)ncec_cache_reclaim, (uchar_t *)&fraction, ipst); 3418 3419 /* 3420 * Walk all CONNs that can have a reference on an ire, ncec or dce. 3421 * Get them to update any stale references to drop any refholds they 3422 * have. 3423 */ 3424 ipcl_walk(conn_ixa_cleanup, (void *)B_FALSE, ipst); 3425 } 3426 3427 /* 3428 * Called by the memory allocator subsystem directly, when the system 3429 * is running low on memory. 3430 */ 3431 /* ARGSUSED */ 3432 void 3433 ip_nce_reclaim(void *args) 3434 { 3435 netstack_handle_t nh; 3436 netstack_t *ns; 3437 3438 netstack_next_init(&nh); 3439 while ((ns = netstack_next(&nh)) != NULL) { 3440 ip_nce_reclaim_stack(ns->netstack_ip); 3441 netstack_rele(ns); 3442 } 3443 netstack_next_fini(&nh); 3444 } 3445 3446 #ifdef DEBUG 3447 void 3448 ncec_trace_ref(ncec_t *ncec) 3449 { 3450 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 3451 3452 if (ncec->ncec_trace_disable) 3453 return; 3454 3455 if (!th_trace_ref(ncec, ncec->ncec_ipst)) { 3456 ncec->ncec_trace_disable = B_TRUE; 3457 ncec_trace_cleanup(ncec); 3458 } 3459 } 3460 3461 void 3462 ncec_untrace_ref(ncec_t *ncec) 3463 { 3464 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 3465 3466 if (!ncec->ncec_trace_disable) 3467 th_trace_unref(ncec); 3468 } 3469 3470 static void 3471 ncec_trace_cleanup(const ncec_t *ncec) 3472 { 3473 th_trace_cleanup(ncec, ncec->ncec_trace_disable); 3474 } 3475 #endif 3476 3477 /* 3478 * Called when address resolution fails due to a timeout. 3479 * Send an ICMP unreachable in response to all queued packets. 3480 */ 3481 void 3482 arp_resolv_failed(ncec_t *ncec) 3483 { 3484 mblk_t *mp, *nxt_mp; 3485 char buf[INET6_ADDRSTRLEN]; 3486 struct in_addr ipv4addr; 3487 ill_t *ill = ncec->ncec_ill; 3488 ip_stack_t *ipst = ncec->ncec_ipst; 3489 ip_recv_attr_t iras; 3490 3491 bzero(&iras, sizeof (iras)); 3492 iras.ira_flags = IRAF_IS_IPV4; 3493 /* 3494 * we are setting the ira_rill to the ipmp_ill (instead of 3495 * the actual ill on which the packet was received), but this 3496 * is ok because we don't actually need the real ira_rill. 3497 * to send the icmp unreachable to the sender. 3498 */ 3499 iras.ira_ill = iras.ira_rill = ill; 3500 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 3501 iras.ira_rifindex = iras.ira_ruifindex; 3502 3503 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ipv4addr); 3504 ip3dbg(("arp_resolv_failed: dst %s\n", 3505 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3506 mutex_enter(&ncec->ncec_lock); 3507 mp = ncec->ncec_qd_mp; 3508 ncec->ncec_qd_mp = NULL; 3509 ncec->ncec_nprobes = 0; 3510 mutex_exit(&ncec->ncec_lock); 3511 while (mp != NULL) { 3512 nxt_mp = mp->b_next; 3513 mp->b_next = NULL; 3514 3515 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 3516 ip_drop_output("ipIfStatsOutDiscards - address unreachable", 3517 mp, ill); 3518 if (ipst->ips_ip_arp_icmp_error) { 3519 ip3dbg(("arp_resolv_failed: " 3520 "Calling icmp_unreachable\n")); 3521 icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras); 3522 } else { 3523 freemsg(mp); 3524 } 3525 ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); 3526 mp = nxt_mp; 3527 } 3528 ncec_cb_dispatch(ncec); /* finish off waiting callbacks */ 3529 } 3530 3531 /* 3532 * if ill is an under_ill, translate it to the ipmp_ill and add the 3533 * nce on the ipmp_ill. Two nce_t entries (one on the ipmp_ill, and 3534 * one on the underlying in_ill) will be created for the 3535 * ncec_t in this case. The ncec_t itself will be created on the ipmp_ill. 3536 */ 3537 int 3538 nce_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, 3539 const in_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) 3540 { 3541 int err; 3542 in6_addr_t addr6; 3543 ip_stack_t *ipst = ill->ill_ipst; 3544 nce_t *nce, *upper_nce = NULL; 3545 ill_t *in_ill = ill, *under = NULL; 3546 boolean_t need_ill_refrele = B_FALSE; 3547 3548 if (flags & NCE_F_MCAST) { 3549 /* 3550 * hw_addr will be figured out in nce_set_multicast_v4; 3551 * caller needs to pass in the cast_ill for ipmp 3552 */ 3553 ASSERT(hw_addr == NULL); 3554 ASSERT(!IS_IPMP(ill)); 3555 err = nce_set_multicast_v4(ill, addr, flags, newnce); 3556 return (err); 3557 } 3558 3559 if (IS_UNDER_IPMP(ill) && !(flags & NCE_F_MYADDR)) { 3560 ill = ipmp_ill_hold_ipmp_ill(ill); 3561 if (ill == NULL) 3562 return (ENXIO); 3563 need_ill_refrele = B_TRUE; 3564 } 3565 if ((flags & NCE_F_BCAST) != 0) { 3566 /* 3567 * IPv4 broadcast ncec: compute the hwaddr. 3568 */ 3569 if (IS_IPMP(ill)) { 3570 under = ipmp_ill_get_xmit_ill(ill, B_FALSE); 3571 if (under == NULL) { 3572 if (need_ill_refrele) 3573 ill_refrele(ill); 3574 return (ENETDOWN); 3575 } 3576 hw_addr = under->ill_bcast_mp->b_rptr + 3577 NCE_LL_ADDR_OFFSET(under); 3578 hw_addr_len = under->ill_phys_addr_length; 3579 } else { 3580 hw_addr = ill->ill_bcast_mp->b_rptr + 3581 NCE_LL_ADDR_OFFSET(ill), 3582 hw_addr_len = ill->ill_phys_addr_length; 3583 } 3584 } 3585 3586 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3587 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3588 nce = nce_lookup_addr(ill, &addr6); 3589 if (nce == NULL) { 3590 err = nce_add_v4(ill, hw_addr, hw_addr_len, addr, flags, 3591 state, &nce); 3592 } else { 3593 err = EEXIST; 3594 } 3595 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3596 if (err == 0) 3597 err = nce_add_v4_postprocess(nce); 3598 3599 if (in_ill != ill && nce != NULL) { 3600 nce_t *under_nce; 3601 3602 /* 3603 * in_ill was the under_ill. Try to create the under_nce. 3604 * Hold the ill_g_lock to prevent changes to group membership 3605 * until we are done. 3606 */ 3607 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 3608 if (IS_IN_SAME_ILLGRP(in_ill, ill)) { 3609 under_nce = nce_fastpath_create(in_ill, 3610 nce->nce_common); 3611 upper_nce = nce; 3612 if ((nce = under_nce) == NULL) 3613 err = EINVAL; 3614 } 3615 rw_exit(&ipst->ips_ill_g_lock); 3616 if (under_nce != NULL && NCE_ISREACHABLE(nce->nce_common)) 3617 nce_fastpath_trigger(under_nce); 3618 } 3619 if (nce != NULL) { 3620 if (newnce != NULL) 3621 *newnce = nce; 3622 else 3623 nce_refrele(nce); 3624 } 3625 3626 if (under != NULL) 3627 ill_refrele(under); 3628 3629 if (upper_nce != NULL) 3630 nce_refrele(upper_nce); 3631 3632 if (need_ill_refrele) 3633 ill_refrele(ill); 3634 3635 return (err); 3636 } 3637 3638 /* 3639 * NDP Cache Entry creation routine for IPv4. 3640 * This routine must always be called with ndp4->ndp_g_lock held. 3641 * Prior to return, ncec_refcnt is incremented. 3642 * 3643 * IPMP notes: the ncec for non-local (i.e., !NCE_MYADDR(ncec) addresses 3644 * are always added pointing at the ipmp_ill. Thus, when the ill passed 3645 * to nce_add_v4 is an under_ill (i.e., IS_UNDER_IPMP(ill)) two nce_t 3646 * entries will be created, both pointing at the same ncec_t. The nce_t 3647 * entries will have their nce_ill set to the ipmp_ill and the under_ill 3648 * respectively, with the ncec_t having its ncec_ill pointing at the ipmp_ill. 3649 * Local addresses are always created on the ill passed to nce_add_v4. 3650 */ 3651 int 3652 nce_add_v4(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, 3653 const in_addr_t *addr, uint16_t flags, uint16_t state, nce_t **newnce) 3654 { 3655 int err; 3656 boolean_t is_multicast = (flags & NCE_F_MCAST); 3657 struct in6_addr addr6; 3658 nce_t *nce; 3659 3660 ASSERT(MUTEX_HELD(&ill->ill_ipst->ips_ndp4->ndp_g_lock)); 3661 ASSERT(!ill->ill_isv6); 3662 ASSERT(!IN_MULTICAST(htonl(*addr)) || is_multicast); 3663 3664 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3665 err = nce_add_common(ill, hw_addr, hw_addr_len, &addr6, flags, state, 3666 &nce); 3667 ASSERT(newnce != NULL); 3668 *newnce = nce; 3669 return (err); 3670 } 3671 3672 /* 3673 * Post-processing routine to be executed after nce_add_v4(). This function 3674 * triggers fastpath (if appropriate) and DAD on the newly added nce entry 3675 * and must be called without any locks held. 3676 * 3677 * Always returns 0, but we return an int to keep this symmetric with the 3678 * IPv6 counter-part. 3679 */ 3680 int 3681 nce_add_v4_postprocess(nce_t *nce) 3682 { 3683 ncec_t *ncec = nce->nce_common; 3684 uint16_t flags = ncec->ncec_flags; 3685 boolean_t ndp_need_dad = B_FALSE; 3686 boolean_t dropped; 3687 clock_t delay; 3688 ip_stack_t *ipst = ncec->ncec_ill->ill_ipst; 3689 uchar_t *hw_addr = ncec->ncec_lladdr; 3690 boolean_t trigger_fastpath = B_TRUE; 3691 3692 /* 3693 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 3694 * we call nce_fastpath as soon as the ncec is resolved in nce_process. 3695 * We call nce_fastpath from nce_update if the link layer address of 3696 * the peer changes from nce_update 3697 */ 3698 if (NCE_PUBLISH(ncec) || !NCE_ISREACHABLE(ncec) || (hw_addr == NULL && 3699 ncec->ncec_ill->ill_net_type != IRE_IF_NORESOLVER)) 3700 trigger_fastpath = B_FALSE; 3701 3702 if (trigger_fastpath) 3703 nce_fastpath_trigger(nce); 3704 3705 if (NCE_PUBLISH(ncec) && ncec->ncec_state == ND_PROBE) { 3706 /* 3707 * Either the caller (by passing in ND_PROBE) 3708 * or nce_add_common() (by the internally computed state 3709 * based on ncec_addr and ill_net_type) has determined 3710 * that this unicast entry needs DAD. Trigger DAD. 3711 */ 3712 ndp_need_dad = B_TRUE; 3713 } else if (flags & NCE_F_UNSOL_ADV) { 3714 /* 3715 * We account for the transmit below by assigning one 3716 * less than the ndd variable. Subsequent decrements 3717 * are done in nce_timer. 3718 */ 3719 mutex_enter(&ncec->ncec_lock); 3720 ncec->ncec_unsolicit_count = 3721 ipst->ips_ip_arp_publish_count - 1; 3722 mutex_exit(&ncec->ncec_lock); 3723 dropped = arp_announce(ncec); 3724 mutex_enter(&ncec->ncec_lock); 3725 if (dropped) 3726 ncec->ncec_unsolicit_count++; 3727 else 3728 ncec->ncec_last_time_defended = ddi_get_lbolt(); 3729 if (ncec->ncec_unsolicit_count != 0) { 3730 nce_start_timer(ncec, 3731 ipst->ips_ip_arp_publish_interval); 3732 } 3733 mutex_exit(&ncec->ncec_lock); 3734 } 3735 3736 /* 3737 * If ncec_xmit_interval is 0, user has configured us to send the first 3738 * probe right away. Do so, and set up for the subsequent probes. 3739 */ 3740 if (ndp_need_dad) { 3741 mutex_enter(&ncec->ncec_lock); 3742 if (ncec->ncec_pcnt == 0) { 3743 /* 3744 * DAD probes and announce can be 3745 * administratively disabled by setting the 3746 * probe_count to zero. Restart the timer in 3747 * this case to mark the ipif as ready. 3748 */ 3749 ncec->ncec_unsolicit_count = 0; 3750 mutex_exit(&ncec->ncec_lock); 3751 nce_restart_timer(ncec, 0); 3752 } else { 3753 mutex_exit(&ncec->ncec_lock); 3754 delay = ((ncec->ncec_flags & NCE_F_FAST) ? 3755 ipst->ips_arp_probe_delay : 3756 ipst->ips_arp_fastprobe_delay); 3757 nce_dad(ncec, NULL, (delay == 0 ? B_TRUE : B_FALSE)); 3758 } 3759 } 3760 return (0); 3761 } 3762 3763 /* 3764 * ncec_walk routine to update all entries that have a given destination or 3765 * gateway address and cached link layer (MAC) address. This is used when ARP 3766 * informs us that a network-to-link-layer mapping may have changed. 3767 */ 3768 void 3769 nce_update_hw_changed(ncec_t *ncec, void *arg) 3770 { 3771 nce_hw_map_t *hwm = arg; 3772 ipaddr_t ncec_addr; 3773 3774 if (ncec->ncec_state != ND_REACHABLE) 3775 return; 3776 3777 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ncec_addr); 3778 if (ncec_addr != hwm->hwm_addr) 3779 return; 3780 3781 mutex_enter(&ncec->ncec_lock); 3782 if (hwm->hwm_flags != 0) 3783 ncec->ncec_flags = hwm->hwm_flags; 3784 nce_update(ncec, ND_STALE, hwm->hwm_hwaddr); 3785 mutex_exit(&ncec->ncec_lock); 3786 } 3787 3788 void 3789 ncec_refhold(ncec_t *ncec) 3790 { 3791 mutex_enter(&(ncec)->ncec_lock); 3792 (ncec)->ncec_refcnt++; 3793 ASSERT((ncec)->ncec_refcnt != 0); 3794 #ifdef DEBUG 3795 ncec_trace_ref(ncec); 3796 #endif 3797 mutex_exit(&(ncec)->ncec_lock); 3798 } 3799 3800 void 3801 ncec_refhold_notr(ncec_t *ncec) 3802 { 3803 mutex_enter(&(ncec)->ncec_lock); 3804 (ncec)->ncec_refcnt++; 3805 ASSERT((ncec)->ncec_refcnt != 0); 3806 mutex_exit(&(ncec)->ncec_lock); 3807 } 3808 3809 static void 3810 ncec_refhold_locked(ncec_t *ncec) 3811 { 3812 ASSERT(MUTEX_HELD(&(ncec)->ncec_lock)); 3813 (ncec)->ncec_refcnt++; 3814 #ifdef DEBUG 3815 ncec_trace_ref(ncec); 3816 #endif 3817 } 3818 3819 /* ncec_inactive destroys the mutex thus no mutex_exit is needed */ 3820 void 3821 ncec_refrele(ncec_t *ncec) 3822 { 3823 mutex_enter(&(ncec)->ncec_lock); 3824 #ifdef DEBUG 3825 ncec_untrace_ref(ncec); 3826 #endif 3827 ASSERT((ncec)->ncec_refcnt != 0); 3828 if (--(ncec)->ncec_refcnt == 0) { 3829 ncec_inactive(ncec); 3830 } else { 3831 mutex_exit(&(ncec)->ncec_lock); 3832 } 3833 } 3834 3835 void 3836 ncec_refrele_notr(ncec_t *ncec) 3837 { 3838 mutex_enter(&(ncec)->ncec_lock); 3839 ASSERT((ncec)->ncec_refcnt != 0); 3840 if (--(ncec)->ncec_refcnt == 0) { 3841 ncec_inactive(ncec); 3842 } else { 3843 mutex_exit(&(ncec)->ncec_lock); 3844 } 3845 } 3846 3847 /* 3848 * Common to IPv4 and IPv6. 3849 */ 3850 void 3851 nce_restart_timer(ncec_t *ncec, uint_t ms) 3852 { 3853 timeout_id_t tid; 3854 3855 ASSERT(!MUTEX_HELD(&(ncec)->ncec_lock)); 3856 3857 /* First cancel any running timer */ 3858 mutex_enter(&ncec->ncec_lock); 3859 tid = ncec->ncec_timeout_id; 3860 ncec->ncec_timeout_id = 0; 3861 if (tid != 0) { 3862 mutex_exit(&ncec->ncec_lock); 3863 (void) untimeout(tid); 3864 mutex_enter(&ncec->ncec_lock); 3865 } 3866 3867 /* Restart timer */ 3868 nce_start_timer(ncec, ms); 3869 mutex_exit(&ncec->ncec_lock); 3870 } 3871 3872 static void 3873 nce_start_timer(ncec_t *ncec, uint_t ms) 3874 { 3875 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 3876 /* 3877 * Don't start the timer if the ncec has been deleted, or if the timer 3878 * is already running 3879 */ 3880 if (!NCE_ISCONDEMNED(ncec) && ncec->ncec_timeout_id == 0) { 3881 ncec->ncec_timeout_id = timeout(nce_timer, ncec, 3882 MSEC_TO_TICK(ms) == 0 ? 1 : MSEC_TO_TICK(ms)); 3883 } 3884 } 3885 3886 int 3887 nce_set_multicast_v4(ill_t *ill, const in_addr_t *dst, 3888 uint16_t flags, nce_t **newnce) 3889 { 3890 uchar_t *hw_addr; 3891 int err = 0; 3892 ip_stack_t *ipst = ill->ill_ipst; 3893 in6_addr_t dst6; 3894 nce_t *nce; 3895 3896 ASSERT(!ill->ill_isv6); 3897 3898 IN6_IPADDR_TO_V4MAPPED(*dst, &dst6); 3899 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 3900 if ((nce = nce_lookup_addr(ill, &dst6)) != NULL) { 3901 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3902 goto done; 3903 } 3904 if (ill->ill_net_type == IRE_IF_RESOLVER) { 3905 /* 3906 * For IRE_IF_RESOLVER a hardware mapping can be 3907 * generated, for IRE_IF_NORESOLVER, resolution cookie 3908 * in the ill is copied in nce_add_v4(). 3909 */ 3910 hw_addr = kmem_alloc(ill->ill_phys_addr_length, KM_NOSLEEP); 3911 if (hw_addr == NULL) { 3912 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3913 return (ENOMEM); 3914 } 3915 ip_mcast_mapping(ill, (uchar_t *)dst, hw_addr); 3916 } else { 3917 /* 3918 * IRE_IF_NORESOLVER type simply copies the resolution 3919 * cookie passed in. So no hw_addr is needed. 3920 */ 3921 hw_addr = NULL; 3922 } 3923 ASSERT(flags & NCE_F_MCAST); 3924 ASSERT(flags & NCE_F_NONUD); 3925 /* nce_state will be computed by nce_add_common() */ 3926 err = nce_add_v4(ill, hw_addr, ill->ill_phys_addr_length, dst, flags, 3927 ND_UNCHANGED, &nce); 3928 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 3929 if (err == 0) 3930 err = nce_add_v4_postprocess(nce); 3931 if (hw_addr != NULL) 3932 kmem_free(hw_addr, ill->ill_phys_addr_length); 3933 if (err != 0) { 3934 ip1dbg(("nce_set_multicast_v4: create failed" "%d\n", err)); 3935 return (err); 3936 } 3937 done: 3938 if (newnce != NULL) 3939 *newnce = nce; 3940 else 3941 nce_refrele(nce); 3942 return (0); 3943 } 3944 3945 /* 3946 * This is used when scanning for "old" (least recently broadcast) NCEs. We 3947 * don't want to have to walk the list for every single one, so we gather up 3948 * batches at a time. 3949 */ 3950 #define NCE_RESCHED_LIST_LEN 8 3951 3952 typedef struct { 3953 ill_t *ncert_ill; 3954 uint_t ncert_num; 3955 ncec_t *ncert_nces[NCE_RESCHED_LIST_LEN]; 3956 } nce_resched_t; 3957 3958 /* 3959 * Pick the longest waiting NCEs for defense. 3960 */ 3961 /* ARGSUSED */ 3962 static int 3963 ncec_reschedule(ill_t *ill, nce_t *nce, void *arg) 3964 { 3965 nce_resched_t *ncert = arg; 3966 ncec_t **ncecs; 3967 ncec_t **ncec_max; 3968 ncec_t *ncec_temp; 3969 ncec_t *ncec = nce->nce_common; 3970 3971 ASSERT(ncec->ncec_ill == ncert->ncert_ill); 3972 /* 3973 * Only reachable entries that are ready for announcement are eligible. 3974 */ 3975 if (!NCE_MYADDR(ncec) || ncec->ncec_state != ND_REACHABLE) 3976 return (0); 3977 if (ncert->ncert_num < NCE_RESCHED_LIST_LEN) { 3978 ncec_refhold(ncec); 3979 ncert->ncert_nces[ncert->ncert_num++] = ncec; 3980 } else { 3981 ncecs = ncert->ncert_nces; 3982 ncec_max = ncecs + NCE_RESCHED_LIST_LEN; 3983 ncec_refhold(ncec); 3984 for (; ncecs < ncec_max; ncecs++) { 3985 ASSERT(ncec != NULL); 3986 if ((*ncecs)->ncec_last_time_defended > 3987 ncec->ncec_last_time_defended) { 3988 ncec_temp = *ncecs; 3989 *ncecs = ncec; 3990 ncec = ncec_temp; 3991 } 3992 } 3993 ncec_refrele(ncec); 3994 } 3995 return (0); 3996 } 3997 3998 /* 3999 * Reschedule the ARP defense of any long-waiting NCEs. It's assumed that this 4000 * doesn't happen very often (if at all), and thus it needn't be highly 4001 * optimized. (Note, though, that it's actually O(N) complexity, because the 4002 * outer loop is bounded by a constant rather than by the length of the list.) 4003 */ 4004 static void 4005 nce_ill_reschedule(ill_t *ill, nce_resched_t *ncert) 4006 { 4007 ncec_t *ncec; 4008 ip_stack_t *ipst = ill->ill_ipst; 4009 uint_t i, defend_rate; 4010 4011 i = ill->ill_defend_count; 4012 ill->ill_defend_count = 0; 4013 if (ill->ill_isv6) 4014 defend_rate = ipst->ips_ndp_defend_rate; 4015 else 4016 defend_rate = ipst->ips_arp_defend_rate; 4017 /* If none could be sitting around, then don't reschedule */ 4018 if (i < defend_rate) { 4019 DTRACE_PROBE1(reschedule_none, ill_t *, ill); 4020 return; 4021 } 4022 ncert->ncert_ill = ill; 4023 while (ill->ill_defend_count < defend_rate) { 4024 nce_walk_common(ill, ncec_reschedule, ncert); 4025 for (i = 0; i < ncert->ncert_num; i++) { 4026 4027 ncec = ncert->ncert_nces[i]; 4028 mutex_enter(&ncec->ncec_lock); 4029 ncec->ncec_flags |= NCE_F_DELAYED; 4030 mutex_exit(&ncec->ncec_lock); 4031 /* 4032 * we plan to schedule this ncec, so incr the 4033 * defend_count in anticipation. 4034 */ 4035 if (++ill->ill_defend_count >= defend_rate) 4036 break; 4037 } 4038 if (ncert->ncert_num < NCE_RESCHED_LIST_LEN) 4039 break; 4040 } 4041 } 4042 4043 /* 4044 * Check if the current rate-limiting parameters permit the sending 4045 * of another address defense announcement for both IPv4 and IPv6. 4046 * Returns B_TRUE if rate-limiting is in effect (i.e., send is not 4047 * permitted), and B_FALSE otherwise. The `defend_rate' parameter 4048 * determines how many address defense announcements are permitted 4049 * in any `defense_perio' interval. 4050 */ 4051 static boolean_t 4052 ill_defend_rate_limit(ill_t *ill, ncec_t *ncec) 4053 { 4054 clock_t now = ddi_get_lbolt(); 4055 ip_stack_t *ipst = ill->ill_ipst; 4056 clock_t start = ill->ill_defend_start; 4057 uint32_t elapsed, defend_period, defend_rate; 4058 nce_resched_t ncert; 4059 boolean_t ret; 4060 int i; 4061 4062 if (ill->ill_isv6) { 4063 defend_period = ipst->ips_ndp_defend_period; 4064 defend_rate = ipst->ips_ndp_defend_rate; 4065 } else { 4066 defend_period = ipst->ips_arp_defend_period; 4067 defend_rate = ipst->ips_arp_defend_rate; 4068 } 4069 if (defend_rate == 0) 4070 return (B_TRUE); 4071 bzero(&ncert, sizeof (ncert)); 4072 mutex_enter(&ill->ill_lock); 4073 if (start > 0) { 4074 elapsed = now - start; 4075 if (elapsed > SEC_TO_TICK(defend_period)) { 4076 ill->ill_defend_start = now; 4077 /* 4078 * nce_ill_reschedule will attempt to 4079 * prevent starvation by reschduling the 4080 * oldest entries, which are marked with 4081 * the NCE_F_DELAYED flag. 4082 */ 4083 nce_ill_reschedule(ill, &ncert); 4084 } 4085 } else { 4086 ill->ill_defend_start = now; 4087 } 4088 ASSERT(ill->ill_defend_count <= defend_rate); 4089 mutex_enter(&ncec->ncec_lock); 4090 if (ncec->ncec_flags & NCE_F_DELAYED) { 4091 /* 4092 * This ncec was rescheduled as one of the really old 4093 * entries needing on-going defense. The 4094 * ill_defend_count was already incremented in 4095 * nce_ill_reschedule. Go ahead and send the announce. 4096 */ 4097 ncec->ncec_flags &= ~NCE_F_DELAYED; 4098 mutex_exit(&ncec->ncec_lock); 4099 ret = B_FALSE; 4100 goto done; 4101 } 4102 mutex_exit(&ncec->ncec_lock); 4103 if (ill->ill_defend_count < defend_rate) 4104 ill->ill_defend_count++; 4105 if (ill->ill_defend_count == defend_rate) { 4106 /* 4107 * we are no longer allowed to send unbidden defense 4108 * messages. Wait for rescheduling. 4109 */ 4110 ret = B_TRUE; 4111 } else { 4112 ret = B_FALSE; 4113 } 4114 done: 4115 mutex_exit(&ill->ill_lock); 4116 /* 4117 * After all the locks have been dropped we can restart nce timer, 4118 * and refrele the delayed ncecs 4119 */ 4120 for (i = 0; i < ncert.ncert_num; i++) { 4121 clock_t xmit_interval; 4122 ncec_t *tmp; 4123 4124 tmp = ncert.ncert_nces[i]; 4125 xmit_interval = nce_fuzz_interval(tmp->ncec_xmit_interval, 4126 B_FALSE); 4127 nce_restart_timer(tmp, xmit_interval); 4128 ncec_refrele(tmp); 4129 } 4130 return (ret); 4131 } 4132 4133 boolean_t 4134 ndp_announce(ncec_t *ncec) 4135 { 4136 return (ndp_xmit(ncec->ncec_ill, ND_NEIGHBOR_ADVERT, ncec->ncec_lladdr, 4137 ncec->ncec_lladdr_length, &ncec->ncec_addr, &ipv6_all_hosts_mcast, 4138 nce_advert_flags(ncec))); 4139 } 4140 4141 ill_t * 4142 nce_resolve_src(ncec_t *ncec, in6_addr_t *src) 4143 { 4144 mblk_t *mp; 4145 in6_addr_t src6; 4146 ipaddr_t src4; 4147 ill_t *ill = ncec->ncec_ill; 4148 ill_t *src_ill = NULL; 4149 ipif_t *ipif = NULL; 4150 boolean_t is_myaddr = NCE_MYADDR(ncec); 4151 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 4152 4153 ASSERT(src != NULL); 4154 ASSERT(IN6_IS_ADDR_UNSPECIFIED(src)); 4155 src6 = *src; 4156 if (is_myaddr) { 4157 src6 = ncec->ncec_addr; 4158 if (!isv6) 4159 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, src4); 4160 } else { 4161 /* 4162 * try to find one from the outgoing packet. 4163 */ 4164 mutex_enter(&ncec->ncec_lock); 4165 mp = ncec->ncec_qd_mp; 4166 if (mp != NULL) { 4167 if (isv6) { 4168 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4169 4170 src6 = ip6h->ip6_src; 4171 } else { 4172 ipha_t *ipha = (ipha_t *)mp->b_rptr; 4173 4174 src4 = ipha->ipha_src; 4175 IN6_IPADDR_TO_V4MAPPED(src4, &src6); 4176 } 4177 } 4178 mutex_exit(&ncec->ncec_lock); 4179 } 4180 4181 /* 4182 * For outgoing packets, if the src of outgoing packet is one 4183 * of the assigned interface addresses use it, otherwise we 4184 * will pick the source address below. 4185 * For local addresses (is_myaddr) doing DAD, NDP announce 4186 * messages are mcast. So we use the (IPMP) cast_ill or the 4187 * (non-IPMP) ncec_ill for these message types. The only case 4188 * of unicast DAD messages are for IPv6 ND probes, for which 4189 * we find the ipif_bound_ill corresponding to the ncec_addr. 4190 */ 4191 if (!IN6_IS_ADDR_UNSPECIFIED(&src6) || is_myaddr) { 4192 if (isv6) { 4193 ipif = ipif_lookup_addr_nondup_v6(&src6, ill, ALL_ZONES, 4194 ill->ill_ipst); 4195 } else { 4196 ipif = ipif_lookup_addr_nondup(src4, ill, ALL_ZONES, 4197 ill->ill_ipst); 4198 } 4199 4200 /* 4201 * If no relevant ipif can be found, then it's not one of our 4202 * addresses. Reset to :: and try to find a src for the NS or 4203 * ARP request using ipif_select_source_v[4,6] below. 4204 * If an ipif can be found, but it's not yet done with 4205 * DAD verification, and we are not being invoked for 4206 * DAD (i.e., !is_myaddr), then just postpone this 4207 * transmission until later. 4208 */ 4209 if (ipif == NULL) { 4210 src6 = ipv6_all_zeros; 4211 src4 = INADDR_ANY; 4212 } else if (!ipif->ipif_addr_ready && !is_myaddr) { 4213 DTRACE_PROBE2(nce__resolve__ipif__not__ready, 4214 ncec_t *, ncec, ipif_t *, ipif); 4215 ipif_refrele(ipif); 4216 return (NULL); 4217 } 4218 } 4219 4220 if (IN6_IS_ADDR_UNSPECIFIED(&src6) && !is_myaddr) { 4221 /* 4222 * Pick a source address for this solicitation, but 4223 * restrict the selection to addresses assigned to the 4224 * output interface. We do this because the destination will 4225 * create a neighbor cache entry for the source address of 4226 * this packet, so the source address had better be a valid 4227 * neighbor. 4228 */ 4229 if (isv6) { 4230 ipif = ipif_select_source_v6(ill, &ncec->ncec_addr, 4231 B_TRUE, IPV6_PREFER_SRC_DEFAULT, ALL_ZONES, 4232 B_FALSE, NULL); 4233 } else { 4234 ipaddr_t nce_addr; 4235 4236 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, nce_addr); 4237 ipif = ipif_select_source_v4(ill, nce_addr, ALL_ZONES, 4238 B_FALSE, NULL); 4239 } 4240 if (ipif == NULL && IS_IPMP(ill)) { 4241 ill_t *send_ill = ipmp_ill_get_xmit_ill(ill, B_TRUE); 4242 4243 if (send_ill != NULL) { 4244 if (isv6) { 4245 ipif = ipif_select_source_v6(send_ill, 4246 &ncec->ncec_addr, B_TRUE, 4247 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES, 4248 B_FALSE, NULL); 4249 } else { 4250 IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, 4251 src4); 4252 ipif = ipif_select_source_v4(send_ill, 4253 src4, ALL_ZONES, B_TRUE, NULL); 4254 } 4255 ill_refrele(send_ill); 4256 } 4257 } 4258 4259 if (ipif == NULL) { 4260 char buf[INET6_ADDRSTRLEN]; 4261 4262 ip1dbg(("nce_resolve_src: No source ipif for dst %s\n", 4263 inet_ntop((isv6 ? AF_INET6 : AF_INET), 4264 (char *)&ncec->ncec_addr, buf, sizeof (buf)))); 4265 DTRACE_PROBE1(nce__resolve__no__ipif, ncec_t *, ncec); 4266 return (NULL); 4267 } 4268 src6 = ipif->ipif_v6lcl_addr; 4269 } 4270 *src = src6; 4271 if (ipif != NULL) { 4272 src_ill = ipif->ipif_ill; 4273 if (IS_IPMP(src_ill)) 4274 src_ill = ipmp_ipif_hold_bound_ill(ipif); 4275 else 4276 ill_refhold(src_ill); 4277 ipif_refrele(ipif); 4278 DTRACE_PROBE2(nce__resolve__src__ill, ncec_t *, ncec, 4279 ill_t *, src_ill); 4280 } 4281 return (src_ill); 4282 } 4283 4284 void 4285 ip_nce_lookup_and_update(ipaddr_t *addr, ipif_t *ipif, ip_stack_t *ipst, 4286 uchar_t *hwaddr, int hwaddr_len, int flags) 4287 { 4288 ill_t *ill; 4289 ncec_t *ncec; 4290 nce_t *nce; 4291 uint16_t new_state; 4292 4293 ill = (ipif ? ipif->ipif_ill : NULL); 4294 if (ill != NULL) { 4295 /* 4296 * only one ncec is possible 4297 */ 4298 nce = nce_lookup_v4(ill, addr); 4299 if (nce != NULL) { 4300 ncec = nce->nce_common; 4301 mutex_enter(&ncec->ncec_lock); 4302 if (NCE_ISREACHABLE(ncec)) 4303 new_state = ND_UNCHANGED; 4304 else 4305 new_state = ND_STALE; 4306 ncec->ncec_flags = flags; 4307 nce_update(ncec, new_state, hwaddr); 4308 mutex_exit(&ncec->ncec_lock); 4309 nce_refrele(nce); 4310 return; 4311 } 4312 } else { 4313 /* 4314 * ill is wildcard; clean up all ncec's and ire's 4315 * that match on addr. 4316 */ 4317 nce_hw_map_t hwm; 4318 4319 hwm.hwm_addr = *addr; 4320 hwm.hwm_hwlen = hwaddr_len; 4321 hwm.hwm_hwaddr = hwaddr; 4322 hwm.hwm_flags = flags; 4323 4324 ncec_walk_common(ipst->ips_ndp4, NULL, 4325 (pfi_t)nce_update_hw_changed, (uchar_t *)&hwm, B_TRUE); 4326 } 4327 } 4328 4329 /* 4330 * Common function to add ncec entries. 4331 * we always add the ncec with ncec_ill == ill, and always create 4332 * nce_t on ncec_ill. A dlpi fastpath message may be triggered if the 4333 * ncec is !reachable. 4334 * 4335 * When the caller passes in an nce_state of ND_UNCHANGED, 4336 * nce_add_common() will determine the state of the created nce based 4337 * on the ill_net_type and nce_flags used. Otherwise, the nce will 4338 * be created with state set to the passed in nce_state. 4339 */ 4340 static int 4341 nce_add_common(ill_t *ill, uchar_t *hw_addr, uint_t hw_addr_len, 4342 const in6_addr_t *addr, uint16_t flags, uint16_t nce_state, nce_t **retnce) 4343 { 4344 static ncec_t nce_nil; 4345 uchar_t *template = NULL; 4346 int err; 4347 ncec_t *ncec; 4348 ncec_t **ncep; 4349 ip_stack_t *ipst = ill->ill_ipst; 4350 uint16_t state; 4351 boolean_t fastprobe = B_FALSE; 4352 struct ndp_g_s *ndp; 4353 nce_t *nce = NULL; 4354 mblk_t *dlur_mp = NULL; 4355 4356 if (ill->ill_isv6) 4357 ndp = ill->ill_ipst->ips_ndp6; 4358 else 4359 ndp = ill->ill_ipst->ips_ndp4; 4360 4361 *retnce = NULL; 4362 4363 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 4364 4365 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 4366 ip0dbg(("nce_add_common: no addr\n")); 4367 return (EINVAL); 4368 } 4369 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 4370 ip0dbg(("nce_add_common: flags = %x\n", (int)flags)); 4371 return (EINVAL); 4372 } 4373 4374 if (ill->ill_isv6) { 4375 ncep = ((ncec_t **)NCE_HASH_PTR_V6(ipst, *addr)); 4376 } else { 4377 ipaddr_t v4addr; 4378 4379 IN6_V4MAPPED_TO_IPADDR(addr, v4addr); 4380 ncep = ((ncec_t **)NCE_HASH_PTR_V4(ipst, v4addr)); 4381 } 4382 4383 /* 4384 * The caller has ensured that there is no nce on ill, but there could 4385 * still be an nce_common_t for the address, so that we find exisiting 4386 * ncec_t strucutures first, and atomically add a new nce_t if 4387 * one is found. The ndp_g_lock ensures that we don't cross threads 4388 * with an ncec_delete(). Unlike ncec_lookup_illgrp() we do not 4389 * compare for matches across the illgrp because this function is 4390 * called via nce_lookup_then_add_v* -> nce_add_v* -> nce_add_common, 4391 * with the nce_lookup_then_add_v* passing in the ipmp_ill where 4392 * appropriate. 4393 */ 4394 ncec = *ncep; 4395 for (; ncec != NULL; ncec = ncec->ncec_next) { 4396 if (ncec->ncec_ill == ill) { 4397 if (IN6_ARE_ADDR_EQUAL(&ncec->ncec_addr, addr)) { 4398 *retnce = nce_ill_lookup_then_add(ill, ncec); 4399 if (*retnce != NULL) 4400 break; 4401 } 4402 } 4403 } 4404 if (*retnce != NULL) { 4405 /* 4406 * We should never find *retnce to be MYADDR, since the caller 4407 * may then incorrectly restart a DAD timer that's already 4408 * running. 4409 */ 4410 ASSERT(!NCE_MYADDR(ncec)); 4411 /* caller must trigger fastpath on nce */ 4412 return (0); 4413 } 4414 ncec = kmem_cache_alloc(ncec_cache, KM_NOSLEEP); 4415 if (ncec == NULL) 4416 return (ENOMEM); 4417 *ncec = nce_nil; 4418 ncec->ncec_ill = ill; 4419 ncec->ncec_ipversion = (ill->ill_isv6 ? IPV6_VERSION : IPV4_VERSION); 4420 ncec->ncec_flags = flags; 4421 ncec->ncec_ipst = ipst; /* No netstack_hold */ 4422 4423 if (!ill->ill_isv6) { 4424 ipaddr_t addr4; 4425 4426 /* 4427 * DAD probe interval and probe count are set based on 4428 * fast/slow probe settings. If the underlying link doesn't 4429 * have reliably up/down notifications or if we're working 4430 * with IPv4 169.254.0.0/16 Link Local Address space, then 4431 * don't use the fast timers. Otherwise, use them. 4432 */ 4433 ASSERT(IN6_IS_ADDR_V4MAPPED(addr)); 4434 IN6_V4MAPPED_TO_IPADDR(addr, addr4); 4435 if (ill->ill_note_link && !IS_IPV4_LL_SPACE(&addr4)) 4436 fastprobe = B_TRUE; 4437 if (fastprobe) { 4438 ncec->ncec_xmit_interval = 4439 ipst->ips_arp_fastprobe_interval; 4440 ncec->ncec_pcnt = 4441 ipst->ips_arp_fastprobe_count; 4442 ncec->ncec_flags |= NCE_F_FAST; 4443 } else { 4444 ncec->ncec_xmit_interval = 4445 ipst->ips_arp_probe_interval; 4446 ncec->ncec_pcnt = 4447 ipst->ips_arp_probe_count; 4448 } 4449 if (NCE_PUBLISH(ncec)) { 4450 ncec->ncec_unsolicit_count = 4451 ipst->ips_ip_arp_publish_count; 4452 } 4453 } else { 4454 /* 4455 * probe interval is constant: ILL_PROBE_INTERVAL 4456 * probe count is constant: ND_MAX_UNICAST_SOLICIT 4457 */ 4458 ncec->ncec_pcnt = ND_MAX_UNICAST_SOLICIT; 4459 if (NCE_PUBLISH(ncec)) { 4460 ncec->ncec_unsolicit_count = 4461 ipst->ips_ip_ndp_unsolicit_count; 4462 } 4463 } 4464 ncec->ncec_rcnt = ill->ill_xmit_count; 4465 ncec->ncec_addr = *addr; 4466 ncec->ncec_qd_mp = NULL; 4467 ncec->ncec_refcnt = 1; /* for ncec getting created */ 4468 mutex_init(&ncec->ncec_lock, NULL, MUTEX_DEFAULT, NULL); 4469 ncec->ncec_trace_disable = B_FALSE; 4470 4471 /* 4472 * ncec_lladdr holds link layer address 4473 */ 4474 if (hw_addr_len > 0) { 4475 template = kmem_alloc(hw_addr_len, KM_NOSLEEP); 4476 if (template == NULL) { 4477 err = ENOMEM; 4478 goto err_ret; 4479 } 4480 ncec->ncec_lladdr = template; 4481 ncec->ncec_lladdr_length = hw_addr_len; 4482 bzero(ncec->ncec_lladdr, hw_addr_len); 4483 } 4484 if ((flags & NCE_F_BCAST) != 0) { 4485 state = ND_REACHABLE; 4486 ASSERT(hw_addr_len > 0); 4487 } else if (ill->ill_net_type == IRE_IF_RESOLVER) { 4488 state = ND_INITIAL; 4489 } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 4490 /* 4491 * NORESOLVER entries are always created in the REACHABLE 4492 * state. 4493 */ 4494 state = ND_REACHABLE; 4495 if (ill->ill_phys_addr_length == IP_ADDR_LEN && 4496 ill->ill_mactype != DL_IPV4 && 4497 ill->ill_mactype != DL_6TO4) { 4498 /* 4499 * We create a nce_res_mp with the IP nexthop address 4500 * as the destination address if the physical length 4501 * is exactly 4 bytes for point-to-multipoint links 4502 * that do their own resolution from IP to link-layer 4503 * address (e.g. IP over X.25). 4504 */ 4505 bcopy((uchar_t *)addr, 4506 ncec->ncec_lladdr, ill->ill_phys_addr_length); 4507 } 4508 if (ill->ill_phys_addr_length == IPV6_ADDR_LEN && 4509 ill->ill_mactype != DL_IPV6) { 4510 /* 4511 * We create a nce_res_mp with the IP nexthop address 4512 * as the destination address if the physical legnth 4513 * is exactly 16 bytes for point-to-multipoint links 4514 * that do their own resolution from IP to link-layer 4515 * address. 4516 */ 4517 bcopy((uchar_t *)addr, 4518 ncec->ncec_lladdr, ill->ill_phys_addr_length); 4519 } 4520 /* 4521 * Since NUD is not part of the base IPv4 protocol definition, 4522 * IPv4 neighbor entries on NORESOLVER interfaces will never 4523 * age, and are marked NCE_F_NONUD. 4524 */ 4525 if (!ill->ill_isv6) 4526 ncec->ncec_flags |= NCE_F_NONUD; 4527 } else if (ill->ill_net_type == IRE_LOOPBACK) { 4528 state = ND_REACHABLE; 4529 } 4530 4531 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) { 4532 /* 4533 * We are adding an ncec with a deterministic hw_addr, 4534 * so the state can only be one of {REACHABLE, STALE, PROBE}. 4535 * 4536 * if we are adding a unicast ncec for the local address 4537 * it would be REACHABLE; we would be adding a ND_STALE entry 4538 * for the requestor of an ARP_REQUEST/ND_SOLICIT. Our own 4539 * addresses are added in PROBE to trigger DAD. 4540 */ 4541 if ((flags & (NCE_F_MCAST|NCE_F_BCAST)) || 4542 ill->ill_net_type == IRE_IF_NORESOLVER) 4543 state = ND_REACHABLE; 4544 else if (!NCE_PUBLISH(ncec)) 4545 state = ND_STALE; 4546 else 4547 state = ND_PROBE; 4548 if (hw_addr != NULL) 4549 nce_set_ll(ncec, hw_addr); 4550 } 4551 /* caller overrides internally computed state */ 4552 if (nce_state != ND_UNCHANGED) 4553 state = nce_state; 4554 4555 if (state == ND_PROBE) 4556 ncec->ncec_flags |= NCE_F_UNVERIFIED; 4557 4558 ncec->ncec_state = state; 4559 4560 if (state == ND_REACHABLE) { 4561 ncec->ncec_last = ncec->ncec_init_time = 4562 TICK_TO_MSEC(ddi_get_lbolt64()); 4563 } else { 4564 ncec->ncec_last = 0; 4565 if (state == ND_INITIAL) 4566 ncec->ncec_init_time = TICK_TO_MSEC(ddi_get_lbolt64()); 4567 } 4568 list_create(&ncec->ncec_cb, sizeof (ncec_cb_t), 4569 offsetof(ncec_cb_t, ncec_cb_node)); 4570 /* 4571 * have all the memory allocations out of the way before taking locks 4572 * and adding the nce. 4573 */ 4574 nce = kmem_cache_alloc(nce_cache, KM_NOSLEEP); 4575 if (nce == NULL) { 4576 err = ENOMEM; 4577 goto err_ret; 4578 } 4579 if (ncec->ncec_lladdr != NULL || 4580 ill->ill_net_type == IRE_IF_NORESOLVER) { 4581 dlur_mp = ill_dlur_gen(ncec->ncec_lladdr, 4582 ill->ill_phys_addr_length, ill->ill_sap, 4583 ill->ill_sap_length); 4584 if (dlur_mp == NULL) { 4585 err = ENOMEM; 4586 goto err_ret; 4587 } 4588 } 4589 4590 /* 4591 * Atomically ensure that the ill is not CONDEMNED, before 4592 * adding the NCE. 4593 */ 4594 mutex_enter(&ill->ill_lock); 4595 if (ill->ill_state_flags & ILL_CONDEMNED) { 4596 mutex_exit(&ill->ill_lock); 4597 err = EINVAL; 4598 goto err_ret; 4599 } 4600 if (!NCE_MYADDR(ncec) && 4601 (ill->ill_state_flags & ILL_DOWN_IN_PROGRESS)) { 4602 mutex_exit(&ill->ill_lock); 4603 DTRACE_PROBE1(nce__add__on__down__ill, ncec_t *, ncec); 4604 err = EINVAL; 4605 goto err_ret; 4606 } 4607 /* 4608 * Acquire the ncec_lock even before adding the ncec to the list 4609 * so that it cannot get deleted after the ncec is added, but 4610 * before we add the nce. 4611 */ 4612 mutex_enter(&ncec->ncec_lock); 4613 if ((ncec->ncec_next = *ncep) != NULL) 4614 ncec->ncec_next->ncec_ptpn = &ncec->ncec_next; 4615 *ncep = ncec; 4616 ncec->ncec_ptpn = ncep; 4617 4618 /* Bump up the number of ncec's referencing this ill */ 4619 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 4620 (char *), "ncec", (void *), ncec); 4621 ill->ill_ncec_cnt++; 4622 /* 4623 * Since we hold the ncec_lock at this time, the ncec cannot be 4624 * condemned, and we can safely add the nce. 4625 */ 4626 *retnce = nce_add_impl(ill, ncec, nce, dlur_mp); 4627 mutex_exit(&ncec->ncec_lock); 4628 mutex_exit(&ill->ill_lock); 4629 4630 /* caller must trigger fastpath on *retnce */ 4631 return (0); 4632 4633 err_ret: 4634 if (ncec != NULL) 4635 kmem_cache_free(ncec_cache, ncec); 4636 if (nce != NULL) 4637 kmem_cache_free(nce_cache, nce); 4638 freemsg(dlur_mp); 4639 if (template != NULL) 4640 kmem_free(template, ill->ill_phys_addr_length); 4641 return (err); 4642 } 4643 4644 /* 4645 * take a ref on the nce 4646 */ 4647 void 4648 nce_refhold(nce_t *nce) 4649 { 4650 mutex_enter(&nce->nce_lock); 4651 nce->nce_refcnt++; 4652 ASSERT((nce)->nce_refcnt != 0); 4653 mutex_exit(&nce->nce_lock); 4654 } 4655 4656 /* 4657 * release a ref on the nce; In general, this 4658 * cannot be called with locks held because nce_inactive 4659 * may result in nce_inactive which will take the ill_lock, 4660 * do ipif_ill_refrele_tail etc. Thus the one exception 4661 * where this can be called with locks held is when the caller 4662 * is certain that the nce_refcnt is sufficient to prevent 4663 * the invocation of nce_inactive. 4664 */ 4665 void 4666 nce_refrele(nce_t *nce) 4667 { 4668 ASSERT((nce)->nce_refcnt != 0); 4669 mutex_enter(&nce->nce_lock); 4670 if (--nce->nce_refcnt == 0) 4671 nce_inactive(nce); /* destroys the mutex */ 4672 else 4673 mutex_exit(&nce->nce_lock); 4674 } 4675 4676 /* 4677 * free the nce after all refs have gone away. 4678 */ 4679 static void 4680 nce_inactive(nce_t *nce) 4681 { 4682 ill_t *ill = nce->nce_ill; 4683 4684 ASSERT(nce->nce_refcnt == 0); 4685 4686 ncec_refrele_notr(nce->nce_common); 4687 nce->nce_common = NULL; 4688 freemsg(nce->nce_fp_mp); 4689 freemsg(nce->nce_dlur_mp); 4690 4691 mutex_enter(&ill->ill_lock); 4692 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 4693 (char *), "nce", (void *), nce); 4694 ill->ill_nce_cnt--; 4695 nce->nce_ill = NULL; 4696 /* 4697 * If the number of ncec's associated with this ill have dropped 4698 * to zero, check whether we need to restart any operation that 4699 * is waiting for this to happen. 4700 */ 4701 if (ILL_DOWN_OK(ill)) { 4702 /* ipif_ill_refrele_tail drops the ill_lock */ 4703 ipif_ill_refrele_tail(ill); 4704 } else { 4705 mutex_exit(&ill->ill_lock); 4706 } 4707 4708 mutex_destroy(&nce->nce_lock); 4709 kmem_cache_free(nce_cache, nce); 4710 } 4711 4712 /* 4713 * Add an nce to the ill_nce list. 4714 */ 4715 static nce_t * 4716 nce_add_impl(ill_t *ill, ncec_t *ncec, nce_t *nce, mblk_t *dlur_mp) 4717 { 4718 bzero(nce, sizeof (*nce)); 4719 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 4720 nce->nce_common = ncec; 4721 nce->nce_addr = ncec->ncec_addr; 4722 nce->nce_ill = ill; 4723 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 4724 (char *), "nce", (void *), nce); 4725 ill->ill_nce_cnt++; 4726 4727 nce->nce_refcnt = 1; /* for the thread */ 4728 ncec->ncec_refcnt++; /* want ncec_refhold_locked_notr(ncec) */ 4729 nce->nce_dlur_mp = dlur_mp; 4730 4731 /* add nce to the ill's fastpath list. */ 4732 nce->nce_refcnt++; /* for the list */ 4733 list_insert_head(&ill->ill_nce, nce); 4734 return (nce); 4735 } 4736 4737 static nce_t * 4738 nce_add(ill_t *ill, ncec_t *ncec) 4739 { 4740 nce_t *nce; 4741 mblk_t *dlur_mp = NULL; 4742 4743 ASSERT(MUTEX_HELD(&ill->ill_lock)); 4744 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 4745 4746 nce = kmem_cache_alloc(nce_cache, KM_NOSLEEP); 4747 if (nce == NULL) 4748 return (NULL); 4749 if (ncec->ncec_lladdr != NULL || 4750 ill->ill_net_type == IRE_IF_NORESOLVER) { 4751 dlur_mp = ill_dlur_gen(ncec->ncec_lladdr, 4752 ill->ill_phys_addr_length, ill->ill_sap, 4753 ill->ill_sap_length); 4754 if (dlur_mp == NULL) { 4755 kmem_cache_free(nce_cache, nce); 4756 return (NULL); 4757 } 4758 } 4759 return (nce_add_impl(ill, ncec, nce, dlur_mp)); 4760 } 4761 4762 /* 4763 * remove the nce from the ill_faspath list 4764 */ 4765 void 4766 nce_delete(nce_t *nce) 4767 { 4768 ill_t *ill = nce->nce_ill; 4769 4770 ASSERT(MUTEX_HELD(&ill->ill_lock)); 4771 4772 mutex_enter(&nce->nce_lock); 4773 if (nce->nce_is_condemned) { 4774 /* 4775 * some other thread has removed this nce from the ill_nce list 4776 */ 4777 mutex_exit(&nce->nce_lock); 4778 return; 4779 } 4780 nce->nce_is_condemned = B_TRUE; 4781 mutex_exit(&nce->nce_lock); 4782 4783 list_remove(&ill->ill_nce, nce); 4784 /* 4785 * even though we are holding the ill_lock, it is ok to 4786 * call nce_refrele here because we know that we should have 4787 * at least 2 refs on the nce: one for the thread, and one 4788 * for the list. The refrele below will release the one for 4789 * the list. 4790 */ 4791 nce_refrele(nce); 4792 } 4793 4794 nce_t * 4795 nce_lookup(ill_t *ill, const in6_addr_t *addr) 4796 { 4797 nce_t *nce = NULL; 4798 4799 ASSERT(ill != NULL); 4800 ASSERT(MUTEX_HELD(&ill->ill_lock)); 4801 4802 for (nce = list_head(&ill->ill_nce); nce != NULL; 4803 nce = list_next(&ill->ill_nce, nce)) { 4804 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr)) 4805 break; 4806 } 4807 4808 /* 4809 * if we found the nce on the ill_nce list while holding 4810 * the ill_lock, then it cannot be condemned yet. 4811 */ 4812 if (nce != NULL) { 4813 ASSERT(!nce->nce_is_condemned); 4814 nce_refhold(nce); 4815 } 4816 return (nce); 4817 } 4818 4819 /* 4820 * Walk the ill_nce list on ill. The callback function func() cannot perform 4821 * any destructive actions. 4822 */ 4823 static void 4824 nce_walk_common(ill_t *ill, pfi_t func, void *arg) 4825 { 4826 nce_t *nce = NULL, *nce_next; 4827 4828 ASSERT(MUTEX_HELD(&ill->ill_lock)); 4829 for (nce = list_head(&ill->ill_nce); nce != NULL; ) { 4830 nce_next = list_next(&ill->ill_nce, nce); 4831 if (func(ill, nce, arg) != 0) 4832 break; 4833 nce = nce_next; 4834 } 4835 } 4836 4837 void 4838 nce_walk(ill_t *ill, pfi_t func, void *arg) 4839 { 4840 mutex_enter(&ill->ill_lock); 4841 nce_walk_common(ill, func, arg); 4842 mutex_exit(&ill->ill_lock); 4843 } 4844 4845 void 4846 nce_flush(ill_t *ill, boolean_t flushall) 4847 { 4848 nce_t *nce, *nce_next; 4849 list_t dead; 4850 4851 list_create(&dead, sizeof (nce_t), offsetof(nce_t, nce_node)); 4852 mutex_enter(&ill->ill_lock); 4853 for (nce = list_head(&ill->ill_nce); nce != NULL; ) { 4854 nce_next = list_next(&ill->ill_nce, nce); 4855 if (!flushall && NCE_PUBLISH(nce->nce_common)) { 4856 nce = nce_next; 4857 continue; 4858 } 4859 /* 4860 * nce_delete requires that the caller should either not 4861 * be holding locks, or should hold a ref to ensure that 4862 * we wont hit ncec_inactive. So take a ref and clean up 4863 * after the list is flushed. 4864 */ 4865 nce_refhold(nce); 4866 nce_delete(nce); 4867 list_insert_tail(&dead, nce); 4868 nce = nce_next; 4869 } 4870 mutex_exit(&ill->ill_lock); 4871 while ((nce = list_head(&dead)) != NULL) { 4872 list_remove(&dead, nce); 4873 nce_refrele(nce); 4874 } 4875 ASSERT(list_is_empty(&dead)); 4876 list_destroy(&dead); 4877 } 4878 4879 /* Return an interval that is anywhere in the [1 .. intv] range */ 4880 static clock_t 4881 nce_fuzz_interval(clock_t intv, boolean_t initial_time) 4882 { 4883 clock_t rnd, frac; 4884 4885 (void) random_get_pseudo_bytes((uint8_t *)&rnd, sizeof (rnd)); 4886 /* Note that clock_t is signed; must chop off bits */ 4887 rnd &= (1ul << (NBBY * sizeof (rnd) - 1)) - 1; 4888 if (initial_time) { 4889 if (intv <= 0) 4890 intv = 1; 4891 else 4892 intv = (rnd % intv) + 1; 4893 } else { 4894 /* Compute 'frac' as 20% of the configured interval */ 4895 if ((frac = intv / 5) <= 1) 4896 frac = 2; 4897 /* Set intv randomly in the range [intv-frac .. intv+frac] */ 4898 if ((intv = intv - frac + rnd % (2 * frac + 1)) <= 0) 4899 intv = 1; 4900 } 4901 return (intv); 4902 } 4903 4904 void 4905 nce_resolv_ipmp_ok(ncec_t *ncec) 4906 { 4907 mblk_t *mp; 4908 uint_t pkt_len; 4909 iaflags_t ixaflags = IXAF_NO_TRACE; 4910 nce_t *under_nce; 4911 ill_t *ill = ncec->ncec_ill; 4912 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 4913 ipif_t *src_ipif = NULL; 4914 ip_stack_t *ipst = ill->ill_ipst; 4915 ill_t *send_ill; 4916 uint_t nprobes; 4917 4918 ASSERT(IS_IPMP(ill)); 4919 4920 mutex_enter(&ncec->ncec_lock); 4921 nprobes = ncec->ncec_nprobes; 4922 mp = ncec->ncec_qd_mp; 4923 ncec->ncec_qd_mp = NULL; 4924 ncec->ncec_nprobes = 0; 4925 mutex_exit(&ncec->ncec_lock); 4926 4927 while (mp != NULL) { 4928 mblk_t *nxt_mp; 4929 4930 nxt_mp = mp->b_next; 4931 mp->b_next = NULL; 4932 if (isv6) { 4933 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4934 4935 pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 4936 src_ipif = ipif_lookup_addr_nondup_v6(&ip6h->ip6_src, 4937 ill, ALL_ZONES, ipst); 4938 } else { 4939 ipha_t *ipha = (ipha_t *)mp->b_rptr; 4940 4941 ixaflags |= IXAF_IS_IPV4; 4942 pkt_len = ntohs(ipha->ipha_length); 4943 src_ipif = ipif_lookup_addr_nondup(ipha->ipha_src, 4944 ill, ALL_ZONES, ipst); 4945 } 4946 4947 /* 4948 * find a new nce based on an under_ill. The first IPMP probe 4949 * packet gets queued, so we could still find a src_ipif that 4950 * matches an IPMP test address. 4951 */ 4952 if (src_ipif == NULL || IS_IPMP(src_ipif->ipif_ill)) { 4953 /* 4954 * if src_ipif is null, this could be either a 4955 * forwarded packet or a probe whose src got deleted. 4956 * We identify the former case by looking for the 4957 * ncec_nprobes: the first ncec_nprobes packets are 4958 * probes; 4959 */ 4960 if (src_ipif == NULL && nprobes > 0) 4961 goto drop_pkt; 4962 4963 /* 4964 * For forwarded packets, we use the ipmp rotor 4965 * to find send_ill. 4966 */ 4967 send_ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, 4968 B_TRUE); 4969 } else { 4970 send_ill = src_ipif->ipif_ill; 4971 ill_refhold(send_ill); 4972 } 4973 4974 DTRACE_PROBE4(nce__resolve__ipmp, (mblk_t *), mp, 4975 (ncec_t *), ncec, (ipif_t *), 4976 src_ipif, (ill_t *), send_ill); 4977 4978 if (send_ill == NULL) { 4979 if (src_ipif != NULL) 4980 ipif_refrele(src_ipif); 4981 goto drop_pkt; 4982 } 4983 /* create an under_nce on send_ill */ 4984 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4985 if (IS_IN_SAME_ILLGRP(send_ill, ncec->ncec_ill)) 4986 under_nce = nce_fastpath_create(send_ill, ncec); 4987 else 4988 under_nce = NULL; 4989 rw_exit(&ipst->ips_ill_g_lock); 4990 if (under_nce != NULL && NCE_ISREACHABLE(ncec)) 4991 nce_fastpath_trigger(under_nce); 4992 4993 ill_refrele(send_ill); 4994 if (src_ipif != NULL) 4995 ipif_refrele(src_ipif); 4996 4997 if (under_nce != NULL) { 4998 (void) ip_xmit(mp, under_nce, ixaflags, pkt_len, 0, 4999 ALL_ZONES, 0, NULL); 5000 nce_refrele(under_nce); 5001 if (nprobes > 0) 5002 nprobes--; 5003 mp = nxt_mp; 5004 continue; 5005 } 5006 drop_pkt: 5007 if (isv6) { 5008 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5009 } else { 5010 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5011 } 5012 ip_drop_output("ipIfStatsOutDiscards - no under_ill", mp, NULL); 5013 freemsg(mp); 5014 if (nprobes > 0) 5015 nprobes--; 5016 mp = nxt_mp; 5017 } 5018 ncec_cb_dispatch(ncec); /* complete callbacks */ 5019 } 5020