1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/sysmacros.h> 33 #include <sys/errno.h> 34 #include <sys/dlpi.h> 35 #include <sys/socket.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/vtrace.h> 41 #include <sys/kmem.h> 42 #include <sys/zone.h> 43 #include <sys/ethernet.h> 44 #include <sys/sdt.h> 45 46 #include <net/if.h> 47 #include <net/if_types.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <netinet/in.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/ip.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip_rts.h> 62 #include <inet/ip6.h> 63 #include <inet/ip_ndp.h> 64 #include <inet/ipsec_impl.h> 65 #include <inet/ipsec_info.h> 66 #include <inet/sctp_ip.h> 67 68 /* 69 * Function names with nce_ prefix are static while function 70 * names with ndp_ prefix are used by rest of the IP. 71 * 72 * Lock ordering: 73 * 74 * ndp_g_lock -> ill_lock -> nce_lock 75 * 76 * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 77 * nce_next. Nce_lock protects the contents of the NCE (particularly 78 * nce_refcnt). 79 */ 80 81 static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 82 uint32_t ll_addr_len); 83 static void nce_fastpath(nce_t *nce); 84 static void nce_ire_delete(nce_t *nce); 85 static void nce_ire_delete1(ire_t *ire, char *nce_arg); 86 static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 87 static nce_t *nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *); 88 static nce_t *nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr); 89 static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 90 uchar_t *addr); 91 static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 92 static void nce_queue_mp(nce_t *nce, mblk_t *mp); 93 static void nce_report1(nce_t *nce, uchar_t *mp_arg); 94 static mblk_t *nce_udreq_alloc(ill_t *ill); 95 static void nce_update(nce_t *nce, uint16_t new_state, 96 uchar_t *new_ll_addr); 97 static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 98 static boolean_t nce_xmit(ill_t *ill, uint32_t operation, 99 ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, 100 const in6_addr_t *target, int flag); 101 extern void th_trace_rrecord(th_trace_t *); 102 static int ndp_lookup_then_add_v6(ill_t *, uchar_t *, 103 const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, 104 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 105 static int ndp_lookup_then_add_v4(ill_t *, uchar_t *, 106 const in_addr_t *, const in_addr_t *, const in_addr_t *, 107 uint32_t, uint16_t, uint16_t, nce_t **, mblk_t *, mblk_t *); 108 static int ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *, 109 const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t, 110 nce_t **); 111 static int ndp_add_v4(ill_t *, uchar_t *, const in_addr_t *, 112 const in_addr_t *, const in_addr_t *, uint32_t, uint16_t, uint16_t, 113 nce_t **, mblk_t *, mblk_t *); 114 115 116 #ifdef NCE_DEBUG 117 void nce_trace_inactive(nce_t *); 118 #endif 119 120 ndp_g_t ndp4, ndp6; 121 122 #define NCE_HASH_PTR_V4(addr) \ 123 (&(ndp4.nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 124 125 #define NCE_HASH_PTR_V6(addr) \ 126 (&(ndp6.nce_hash_tbl[NCE_ADDR_HASH_V6(addr, NCE_TABLE_SIZE)])) 127 128 int 129 ndp_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 130 const void *mask, const void *extract_mask, 131 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 132 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 133 { 134 int status; 135 136 if (ill->ill_isv6) 137 status = ndp_add_v6(ill, hw_addr, (in6_addr_t *)addr, 138 (in6_addr_t *)mask, (in6_addr_t *)extract_mask, 139 hw_extract_start, flags, state, newnce); 140 else 141 status = ndp_add_v4(ill, hw_addr, (in_addr_t *)addr, 142 (in_addr_t *)mask, (in_addr_t *)extract_mask, 143 hw_extract_start, flags, state, newnce, fp_mp, res_mp); 144 return (status); 145 } 146 147 /* Non-tunable probe interval, based on link capabilities */ 148 #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 149 150 /* 151 * NDP Cache Entry creation routine. 152 * Mapped entries will never do NUD . 153 * This routine must always be called with ndp6.ndp_g_lock held. 154 * Prior to return, nce_refcnt is incremented. 155 */ 156 static int 157 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 158 const in6_addr_t *mask, const in6_addr_t *extract_mask, 159 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 160 nce_t **newnce) 161 { 162 static nce_t nce_nil; 163 nce_t *nce; 164 mblk_t *mp; 165 mblk_t *template; 166 nce_t **ncep; 167 int err; 168 boolean_t dropped = B_FALSE; 169 170 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 171 ASSERT(ill != NULL && ill->ill_isv6); 172 if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 173 ip0dbg(("ndp_add: no addr\n")); 174 return (EINVAL); 175 } 176 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 177 ip0dbg(("ndp_add: flags = %x\n", (int)flags)); 178 return (EINVAL); 179 } 180 if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 181 (flags & NCE_F_MAPPING)) { 182 ip0dbg(("ndp_add: extract mask zero for mapping")); 183 return (EINVAL); 184 } 185 /* 186 * Allocate the mblk to hold the nce. 187 * 188 * XXX This can come out of a separate cache - nce_cache. 189 * We don't need the mp anymore as there are no more 190 * "qwriter"s 191 */ 192 mp = allocb(sizeof (nce_t), BPRI_MED); 193 if (mp == NULL) 194 return (ENOMEM); 195 196 nce = (nce_t *)mp->b_rptr; 197 mp->b_wptr = (uchar_t *)&nce[1]; 198 *nce = nce_nil; 199 200 /* 201 * This one holds link layer address 202 */ 203 if (ill->ill_net_type == IRE_IF_RESOLVER) { 204 template = nce_udreq_alloc(ill); 205 } else { 206 ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 207 ASSERT((ill->ill_resolver_mp != NULL)); 208 template = copyb(ill->ill_resolver_mp); 209 } 210 if (template == NULL) { 211 freeb(mp); 212 return (ENOMEM); 213 } 214 nce->nce_ill = ill; 215 nce->nce_ipversion = IPV6_VERSION; 216 nce->nce_flags = flags; 217 nce->nce_state = state; 218 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 219 nce->nce_rcnt = ill->ill_xmit_count; 220 nce->nce_addr = *addr; 221 nce->nce_mask = *mask; 222 nce->nce_extract_mask = *extract_mask; 223 nce->nce_ll_extract_start = hw_extract_start; 224 nce->nce_fp_mp = NULL; 225 nce->nce_res_mp = template; 226 if (state == ND_REACHABLE) 227 nce->nce_last = TICK_TO_MSEC(lbolt64); 228 else 229 nce->nce_last = 0; 230 nce->nce_qd_mp = NULL; 231 nce->nce_mp = mp; 232 if (hw_addr != NULL) 233 nce_set_ll(nce, hw_addr); 234 /* This one is for nce getting created */ 235 nce->nce_refcnt = 1; 236 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 237 if (nce->nce_flags & NCE_F_MAPPING) { 238 ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 239 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 240 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 241 ncep = &ndp6.nce_mask_entries; 242 } else { 243 ncep = ((nce_t **)NCE_HASH_PTR_V6(*addr)); 244 } 245 246 #ifdef NCE_DEBUG 247 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 248 #endif 249 /* 250 * Atomically ensure that the ill is not CONDEMNED, before 251 * adding the NCE. 252 */ 253 mutex_enter(&ill->ill_lock); 254 if (ill->ill_state_flags & ILL_CONDEMNED) { 255 mutex_exit(&ill->ill_lock); 256 freeb(mp); 257 freeb(template); 258 return (EINVAL); 259 } 260 if ((nce->nce_next = *ncep) != NULL) 261 nce->nce_next->nce_ptpn = &nce->nce_next; 262 *ncep = nce; 263 nce->nce_ptpn = ncep; 264 *newnce = nce; 265 /* This one is for nce being used by an active thread */ 266 NCE_REFHOLD(*newnce); 267 268 /* Bump up the number of nce's referencing this ill */ 269 ill->ill_nce_cnt++; 270 mutex_exit(&ill->ill_lock); 271 272 err = 0; 273 if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 274 mutex_enter(&nce->nce_lock); 275 mutex_exit(&ndp6.ndp_g_lock); 276 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 277 mutex_exit(&nce->nce_lock); 278 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 279 &ipv6_all_zeros, addr, NDP_PROBE); 280 if (dropped) { 281 mutex_enter(&nce->nce_lock); 282 nce->nce_pcnt++; 283 mutex_exit(&nce->nce_lock); 284 } 285 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 286 mutex_enter(&ndp6.ndp_g_lock); 287 err = EINPROGRESS; 288 } else if (flags & NCE_F_UNSOL_ADV) { 289 /* 290 * We account for the transmit below by assigning one 291 * less than the ndd variable. Subsequent decrements 292 * are done in ndp_timer. 293 */ 294 mutex_enter(&nce->nce_lock); 295 mutex_exit(&ndp6.ndp_g_lock); 296 nce->nce_unsolicit_count = ip_ndp_unsolicit_count - 1; 297 mutex_exit(&nce->nce_lock); 298 dropped = nce_xmit(ill, 299 ND_NEIGHBOR_ADVERT, 300 ill, /* ill to be used for extracting ill_nd_lla */ 301 B_TRUE, /* use ill_nd_lla */ 302 addr, /* Source and target of the advertisement pkt */ 303 &ipv6_all_hosts_mcast, /* Destination of the packet */ 304 NDP_ORIDE); 305 mutex_enter(&nce->nce_lock); 306 if (dropped) 307 nce->nce_unsolicit_count++; 308 if (nce->nce_unsolicit_count != 0) { 309 nce->nce_timeout_id = timeout(ndp_timer, nce, 310 MSEC_TO_TICK(ip_ndp_unsolicit_interval)); 311 } 312 mutex_exit(&nce->nce_lock); 313 mutex_enter(&ndp6.ndp_g_lock); 314 } 315 /* 316 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 317 * we call nce_fastpath as soon as the nce is resolved in ndp_process. 318 * We call nce_fastpath from nce_update if the link layer address of 319 * the peer changes from nce_update 320 */ 321 if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 322 nce_fastpath(nce); 323 return (err); 324 } 325 326 int 327 ndp_lookup_then_add(ill_t *ill, uchar_t *hw_addr, const void *addr, 328 const void *mask, const void *extract_mask, 329 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 330 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 331 { 332 int status; 333 334 if (ill->ill_isv6) { 335 status = ndp_lookup_then_add_v6(ill, hw_addr, 336 (in6_addr_t *)addr, (in6_addr_t *)mask, 337 (in6_addr_t *)extract_mask, hw_extract_start, flags, 338 state, newnce, fp_mp, res_mp); 339 } else { 340 status = ndp_lookup_then_add_v4(ill, hw_addr, 341 (in_addr_t *)addr, (in_addr_t *)mask, 342 (in_addr_t *)extract_mask, hw_extract_start, flags, 343 state, newnce, fp_mp, res_mp); 344 } 345 346 return (status); 347 } 348 349 static int 350 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 351 const in6_addr_t *mask, const in6_addr_t *extract_mask, 352 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 353 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 354 { 355 int err = 0; 356 nce_t *nce; 357 358 ASSERT(ill != NULL && ill->ill_isv6); 359 mutex_enter(&ndp6.ndp_g_lock); 360 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 361 nce = nce_lookup_addr(ill, addr, nce); 362 if (nce == NULL) { 363 err = ndp_add(ill, 364 hw_addr, 365 addr, 366 mask, 367 extract_mask, 368 hw_extract_start, 369 flags, 370 state, 371 newnce, 372 fp_mp, 373 res_mp); 374 } else { 375 *newnce = nce; 376 err = EEXIST; 377 } 378 mutex_exit(&ndp6.ndp_g_lock); 379 return (err); 380 } 381 382 /* 383 * Remove all the CONDEMNED nces from the appropriate hash table. 384 * We create a private list of NCEs, these may have ires pointing 385 * to them, so the list will be passed through to clean up dependent 386 * ires and only then we can do NCE_REFRELE which can make NCE inactive. 387 */ 388 static void 389 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 390 { 391 nce_t *nce1; 392 nce_t **ptpn; 393 394 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 395 ASSERT(ndp->ndp_g_walker == 0); 396 for (; nce; nce = nce1) { 397 nce1 = nce->nce_next; 398 mutex_enter(&nce->nce_lock); 399 if (nce->nce_flags & NCE_F_CONDEMNED) { 400 ptpn = nce->nce_ptpn; 401 nce1 = nce->nce_next; 402 if (nce1 != NULL) 403 nce1->nce_ptpn = ptpn; 404 *ptpn = nce1; 405 nce->nce_ptpn = NULL; 406 nce->nce_next = NULL; 407 nce->nce_next = *free_nce_list; 408 *free_nce_list = nce; 409 } 410 mutex_exit(&nce->nce_lock); 411 } 412 } 413 414 /* 415 * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 416 * will return this NCE. Also no new IREs will be created that 417 * point to this NCE (See ire_add_v6). Also no new timeouts will 418 * be started (See NDP_RESTART_TIMER). 419 * 2. Cancel any currently running timeouts. 420 * 3. If there is an ndp walker, return. The walker will do the cleanup. 421 * This ensures that walkers see a consistent list of NCEs while walking. 422 * 4. Otherwise remove the NCE from the list of NCEs 423 * 5. Delete all IREs pointing to this NCE. 424 */ 425 void 426 ndp_delete(nce_t *nce) 427 { 428 nce_t **ptpn; 429 nce_t *nce1; 430 int ipversion = nce->nce_ipversion; 431 ndp_g_t *ndp = (ipversion == IPV4_VERSION ? &ndp4 : &ndp6); 432 433 /* Serialize deletes */ 434 mutex_enter(&nce->nce_lock); 435 if (nce->nce_flags & NCE_F_CONDEMNED) { 436 /* Some other thread is doing the delete */ 437 mutex_exit(&nce->nce_lock); 438 return; 439 } 440 /* 441 * Caller has a refhold. Also 1 ref for being in the list. Thus 442 * refcnt has to be >= 2 443 */ 444 ASSERT(nce->nce_refcnt >= 2); 445 nce->nce_flags |= NCE_F_CONDEMNED; 446 mutex_exit(&nce->nce_lock); 447 448 nce_fastpath_list_delete(nce); 449 450 /* 451 * Cancel any running timer. Timeout can't be restarted 452 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 453 * Passing invalid timeout id is fine. 454 */ 455 if (nce->nce_timeout_id != 0) { 456 (void) untimeout(nce->nce_timeout_id); 457 nce->nce_timeout_id = 0; 458 } 459 460 mutex_enter(&ndp->ndp_g_lock); 461 if (nce->nce_ptpn == NULL) { 462 /* 463 * The last ndp walker has already removed this nce from 464 * the list after we marked the nce CONDEMNED and before 465 * we grabbed the global lock. 466 */ 467 mutex_exit(&ndp->ndp_g_lock); 468 return; 469 } 470 if (ndp->ndp_g_walker > 0) { 471 /* 472 * Can't unlink. The walker will clean up 473 */ 474 ndp->ndp_g_walker_cleanup = B_TRUE; 475 mutex_exit(&ndp->ndp_g_lock); 476 return; 477 } 478 479 /* 480 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 481 * the timer since it is marked CONDEMNED. 482 */ 483 ptpn = nce->nce_ptpn; 484 nce1 = nce->nce_next; 485 if (nce1 != NULL) 486 nce1->nce_ptpn = ptpn; 487 *ptpn = nce1; 488 nce->nce_ptpn = NULL; 489 nce->nce_next = NULL; 490 mutex_exit(&ndp->ndp_g_lock); 491 492 nce_ire_delete(nce); 493 } 494 495 void 496 ndp_inactive(nce_t *nce) 497 { 498 mblk_t **mpp; 499 ill_t *ill; 500 501 ASSERT(nce->nce_refcnt == 0); 502 ASSERT(MUTEX_HELD(&nce->nce_lock)); 503 ASSERT(nce->nce_fastpath == NULL); 504 505 /* Free all nce allocated messages */ 506 mpp = &nce->nce_first_mp_to_free; 507 do { 508 while (*mpp != NULL) { 509 mblk_t *mp; 510 511 mp = *mpp; 512 *mpp = mp->b_next; 513 mp->b_next = NULL; 514 mp->b_prev = NULL; 515 freemsg(mp); 516 } 517 } while (mpp++ != &nce->nce_last_mp_to_free); 518 519 #ifdef NCE_DEBUG 520 nce_trace_inactive(nce); 521 #endif 522 523 ill = nce->nce_ill; 524 mutex_enter(&ill->ill_lock); 525 ill->ill_nce_cnt--; 526 /* 527 * If the number of nce's associated with this ill have dropped 528 * to zero, check whether we need to restart any operation that 529 * is waiting for this to happen. 530 */ 531 if (ill->ill_nce_cnt == 0) { 532 /* ipif_ill_refrele_tail drops the ill_lock */ 533 ipif_ill_refrele_tail(ill); 534 } else { 535 mutex_exit(&ill->ill_lock); 536 } 537 mutex_destroy(&nce->nce_lock); 538 freeb(nce->nce_mp); 539 } 540 541 /* 542 * ndp_walk routine. Delete the nce if it is associated with the ill 543 * that is going away. Always called as a writer. 544 */ 545 void 546 ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 547 { 548 if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 549 ndp_delete(nce); 550 } 551 } 552 553 /* 554 * Walk a list of to be inactive NCEs and blow away all the ires. 555 */ 556 static void 557 nce_ire_delete_list(nce_t *nce) 558 { 559 nce_t *nce_next; 560 561 ASSERT(nce != NULL); 562 while (nce != NULL) { 563 nce_next = nce->nce_next; 564 nce->nce_next = NULL; 565 566 /* 567 * It is possible for the last ndp walker (this thread) 568 * to come here after ndp_delete has marked the nce CONDEMNED 569 * and before it has removed the nce from the fastpath list 570 * or called untimeout. So we need to do it here. It is safe 571 * for both ndp_delete and this thread to do it twice or 572 * even simultaneously since each of the threads has a 573 * reference on the nce. 574 */ 575 nce_fastpath_list_delete(nce); 576 /* 577 * Cancel any running timer. Timeout can't be restarted 578 * since CONDEMNED is set. Can't hold nce_lock across untimeout. 579 * Passing invalid timeout id is fine. 580 */ 581 if (nce->nce_timeout_id != 0) { 582 (void) untimeout(nce->nce_timeout_id); 583 nce->nce_timeout_id = 0; 584 } 585 /* 586 * We might hit this func thus in the v4 case: 587 * ipif_down->ipif_ndp_down->ndp_walk 588 */ 589 590 if (nce->nce_ipversion == IPV4_VERSION) { 591 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 592 IRE_CACHE, nce_ire_delete1, 593 (char *)nce, nce->nce_ill); 594 } else { 595 ASSERT(nce->nce_ipversion == IPV6_VERSION); 596 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 597 IRE_CACHE, nce_ire_delete1, 598 (char *)nce, nce->nce_ill); 599 } 600 NCE_REFRELE_NOTR(nce); 601 nce = nce_next; 602 } 603 } 604 605 /* 606 * Delete an ire when the nce goes away. 607 */ 608 /* ARGSUSED */ 609 static void 610 nce_ire_delete(nce_t *nce) 611 { 612 if (nce->nce_ipversion == IPV6_VERSION) { 613 ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 614 nce_ire_delete1, (char *)nce, nce->nce_ill); 615 NCE_REFRELE_NOTR(nce); 616 } else { 617 ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 618 nce_ire_delete1, (char *)nce, nce->nce_ill); 619 NCE_REFRELE_NOTR(nce); 620 } 621 } 622 623 /* 624 * ire_walk routine used to delete every IRE that shares this nce 625 */ 626 static void 627 nce_ire_delete1(ire_t *ire, char *nce_arg) 628 { 629 nce_t *nce = (nce_t *)nce_arg; 630 631 ASSERT(ire->ire_type == IRE_CACHE); 632 633 if (ire->ire_nce == nce) { 634 ASSERT(ire->ire_ipversion == nce->nce_ipversion); 635 ire_delete(ire); 636 } 637 } 638 639 /* 640 * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 641 */ 642 boolean_t 643 ndp_restart_dad(nce_t *nce) 644 { 645 boolean_t started; 646 boolean_t dropped; 647 648 if (nce == NULL) 649 return (B_FALSE); 650 mutex_enter(&nce->nce_lock); 651 if (nce->nce_state == ND_PROBE) { 652 mutex_exit(&nce->nce_lock); 653 started = B_TRUE; 654 } else if (nce->nce_state == ND_REACHABLE) { 655 nce->nce_state = ND_PROBE; 656 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 657 mutex_exit(&nce->nce_lock); 658 dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL, 659 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE); 660 if (dropped) { 661 mutex_enter(&nce->nce_lock); 662 nce->nce_pcnt++; 663 mutex_exit(&nce->nce_lock); 664 } 665 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 666 started = B_TRUE; 667 } else { 668 mutex_exit(&nce->nce_lock); 669 started = B_FALSE; 670 } 671 return (started); 672 } 673 674 /* 675 * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 676 * If one is found, the refcnt on the nce will be incremented. 677 */ 678 nce_t * 679 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock) 680 { 681 nce_t *nce; 682 683 ASSERT(ill != NULL && ill->ill_isv6); 684 if (!caller_holds_lock) { 685 mutex_enter(&ndp6.ndp_g_lock); 686 } 687 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); /* head of v6 hash table */ 688 nce = nce_lookup_addr(ill, addr, nce); 689 if (nce == NULL) 690 nce = nce_lookup_mapping(ill, addr); 691 if (!caller_holds_lock) 692 mutex_exit(&ndp6.ndp_g_lock); 693 return (nce); 694 } 695 /* 696 * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 697 * If one is found, the refcnt on the nce will be incremented. 698 * Since multicast mappings are handled in arp, there are no nce_mcast_entries 699 * so we skip the nce_lookup_mapping call. 700 * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 701 */ 702 nce_t * 703 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 704 { 705 nce_t *nce; 706 in6_addr_t addr6; 707 708 if (!caller_holds_lock) { 709 mutex_enter(&ndp4.ndp_g_lock); 710 } 711 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); /* head of v6 hash table */ 712 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 713 nce = nce_lookup_addr(ill, &addr6, nce); 714 if (!caller_holds_lock) 715 mutex_exit(&ndp4.ndp_g_lock); 716 return (nce); 717 } 718 719 /* 720 * Cache entry lookup. Try to find an nce matching the parameters passed. 721 * Look only for exact entries (no mappings). If an nce is found, increment 722 * the hold count on that nce. The caller passes in the start of the 723 * appropriate hash table, and must be holding the appropriate global 724 * lock (ndp_g_lock). 725 */ 726 static nce_t * 727 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce) 728 { 729 ndp_g_t *ndp = (ill->ill_isv6 ? &ndp6 : &ndp4); 730 731 ASSERT(ill != NULL); 732 ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 733 if (IN6_IS_ADDR_UNSPECIFIED(addr)) 734 return (NULL); 735 for (; nce != NULL; nce = nce->nce_next) { 736 if (nce->nce_ill == ill) { 737 if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 738 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 739 &ipv6_all_ones)) { 740 mutex_enter(&nce->nce_lock); 741 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 742 NCE_REFHOLD_LOCKED(nce); 743 mutex_exit(&nce->nce_lock); 744 break; 745 } 746 mutex_exit(&nce->nce_lock); 747 } 748 } 749 } 750 return (nce); 751 } 752 753 /* 754 * Cache entry lookup. Try to find an nce matching the parameters passed. 755 * Look only for mappings. 756 */ 757 static nce_t * 758 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 759 { 760 nce_t *nce; 761 762 ASSERT(ill != NULL && ill->ill_isv6); 763 ASSERT(MUTEX_HELD(&ndp6.ndp_g_lock)); 764 if (!IN6_IS_ADDR_MULTICAST(addr)) 765 return (NULL); 766 nce = ndp6.nce_mask_entries; 767 for (; nce != NULL; nce = nce->nce_next) 768 if (nce->nce_ill == ill && 769 (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 770 mutex_enter(&nce->nce_lock); 771 if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 772 NCE_REFHOLD_LOCKED(nce); 773 mutex_exit(&nce->nce_lock); 774 break; 775 } 776 mutex_exit(&nce->nce_lock); 777 } 778 return (nce); 779 } 780 781 /* 782 * Process passed in parameters either from an incoming packet or via 783 * user ioctl. 784 */ 785 void 786 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 787 { 788 ill_t *ill = nce->nce_ill; 789 uint32_t hw_addr_len = ill->ill_nd_lla_len; 790 mblk_t *mp; 791 boolean_t ll_updated = B_FALSE; 792 boolean_t ll_changed; 793 794 ASSERT(nce->nce_ipversion == IPV6_VERSION); 795 /* 796 * No updates of link layer address or the neighbor state is 797 * allowed, when the cache is in NONUD state. This still 798 * allows for responding to reachability solicitation. 799 */ 800 mutex_enter(&nce->nce_lock); 801 if (nce->nce_state == ND_INCOMPLETE) { 802 if (hw_addr == NULL) { 803 mutex_exit(&nce->nce_lock); 804 return; 805 } 806 nce_set_ll(nce, hw_addr); 807 /* 808 * Update nce state and send the queued packets 809 * back to ip this time ire will be added. 810 */ 811 if (flag & ND_NA_FLAG_SOLICITED) { 812 nce_update(nce, ND_REACHABLE, NULL); 813 } else { 814 nce_update(nce, ND_STALE, NULL); 815 } 816 mutex_exit(&nce->nce_lock); 817 nce_fastpath(nce); 818 mutex_enter(&nce->nce_lock); 819 mp = nce->nce_qd_mp; 820 nce->nce_qd_mp = NULL; 821 mutex_exit(&nce->nce_lock); 822 while (mp != NULL) { 823 mblk_t *nxt_mp; 824 825 nxt_mp = mp->b_next; 826 mp->b_next = NULL; 827 if (mp->b_prev != NULL) { 828 ill_t *inbound_ill; 829 queue_t *fwdq = NULL; 830 uint_t ifindex; 831 832 ifindex = (uint_t)(uintptr_t)mp->b_prev; 833 inbound_ill = ill_lookup_on_ifindex(ifindex, 834 B_TRUE, NULL, NULL, NULL, NULL); 835 if (inbound_ill == NULL) { 836 mp->b_prev = NULL; 837 freemsg(mp); 838 return; 839 } else { 840 fwdq = inbound_ill->ill_rq; 841 } 842 mp->b_prev = NULL; 843 /* 844 * Send a forwarded packet back into ip_rput_v6 845 * just as in ire_send_v6(). 846 * Extract the queue from b_prev (set in 847 * ip_rput_data_v6). 848 */ 849 if (fwdq != NULL) { 850 /* 851 * Forwarded packets hop count will 852 * get decremented in ip_rput_data_v6 853 */ 854 put(fwdq, mp); 855 } else { 856 /* 857 * Send locally originated packets back 858 * into * ip_wput_v6. 859 */ 860 put(ill->ill_wq, mp); 861 } 862 ill_refrele(inbound_ill); 863 } else { 864 put(ill->ill_wq, mp); 865 } 866 mp = nxt_mp; 867 } 868 return; 869 } 870 ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 871 if (!is_adv) { 872 /* If this is a SOLICITATION request only */ 873 if (ll_changed) 874 nce_update(nce, ND_STALE, hw_addr); 875 mutex_exit(&nce->nce_lock); 876 return; 877 } 878 if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 879 /* If in any other state than REACHABLE, ignore */ 880 if (nce->nce_state == ND_REACHABLE) { 881 nce_update(nce, ND_STALE, NULL); 882 } 883 mutex_exit(&nce->nce_lock); 884 return; 885 } else { 886 if (ll_changed) { 887 nce_update(nce, ND_UNCHANGED, hw_addr); 888 ll_updated = B_TRUE; 889 } 890 if (flag & ND_NA_FLAG_SOLICITED) { 891 nce_update(nce, ND_REACHABLE, NULL); 892 } else { 893 if (ll_updated) { 894 nce_update(nce, ND_STALE, NULL); 895 } 896 } 897 mutex_exit(&nce->nce_lock); 898 if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 899 NCE_F_ISROUTER)) { 900 ire_t *ire; 901 902 /* 903 * Router turned to host. We need to remove the 904 * entry as well as any default route that may be 905 * using this as a next hop. This is required by 906 * section 7.2.5 of RFC 2461. 907 */ 908 ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 909 &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 910 nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 911 MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 912 MATCH_IRE_DEFAULT); 913 if (ire != NULL) { 914 ip_rts_rtmsg(RTM_DELETE, ire, 0); 915 ire_delete(ire); 916 ire_refrele(ire); 917 } 918 ndp_delete(nce); 919 } 920 } 921 } 922 923 /* 924 * Pass arg1 to the pfi supplied, along with each nce in existence. 925 * ndp_walk() places a REFHOLD on the nce and drops the lock when 926 * walking the hash list. 927 */ 928 void 929 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 930 boolean_t trace) 931 { 932 933 nce_t *nce; 934 nce_t *nce1; 935 nce_t **ncep; 936 nce_t *free_nce_list = NULL; 937 938 mutex_enter(&ndp->ndp_g_lock); 939 /* Prevent ndp_delete from unlink and free of NCE */ 940 ndp->ndp_g_walker++; 941 mutex_exit(&ndp->ndp_g_lock); 942 for (ncep = ndp->nce_hash_tbl; 943 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 944 for (nce = *ncep; nce != NULL; nce = nce1) { 945 nce1 = nce->nce_next; 946 if (ill == NULL || nce->nce_ill == ill) { 947 if (trace) { 948 NCE_REFHOLD(nce); 949 (*pfi)(nce, arg1); 950 NCE_REFRELE(nce); 951 } else { 952 NCE_REFHOLD_NOTR(nce); 953 (*pfi)(nce, arg1); 954 NCE_REFRELE_NOTR(nce); 955 } 956 } 957 } 958 } 959 for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 960 nce1 = nce->nce_next; 961 if (ill == NULL || nce->nce_ill == ill) { 962 if (trace) { 963 NCE_REFHOLD(nce); 964 (*pfi)(nce, arg1); 965 NCE_REFRELE(nce); 966 } else { 967 NCE_REFHOLD_NOTR(nce); 968 (*pfi)(nce, arg1); 969 NCE_REFRELE_NOTR(nce); 970 } 971 } 972 } 973 mutex_enter(&ndp->ndp_g_lock); 974 ndp->ndp_g_walker--; 975 /* 976 * While NCE's are removed from global list they are placed 977 * in a private list, to be passed to nce_ire_delete_list(). 978 * The reason is, there may be ires pointing to this nce 979 * which needs to cleaned up. 980 */ 981 if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 982 /* Time to delete condemned entries */ 983 for (ncep = ndp->nce_hash_tbl; 984 ncep < A_END(ndp->nce_hash_tbl); ncep++) { 985 nce = *ncep; 986 if (nce != NULL) { 987 nce_remove(ndp, nce, &free_nce_list); 988 } 989 } 990 nce = ndp->nce_mask_entries; 991 if (nce != NULL) { 992 nce_remove(ndp, nce, &free_nce_list); 993 } 994 ndp->ndp_g_walker_cleanup = B_FALSE; 995 } 996 mutex_exit(&ndp->ndp_g_lock); 997 998 if (free_nce_list != NULL) { 999 nce_ire_delete_list(free_nce_list); 1000 } 1001 } 1002 1003 void 1004 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1) 1005 { 1006 ndp_walk_common(&ndp4, ill, pfi, arg1, B_TRUE); 1007 ndp_walk_common(&ndp6, ill, pfi, arg1, B_TRUE); 1008 } 1009 1010 /* 1011 * Prepend the zoneid using an ipsec_out_t for later use by functions like 1012 * ip_rput_v6() after neighbor discovery has taken place. If the message 1013 * block already has a M_CTL at the front of it, then simply set the zoneid 1014 * appropriately. 1015 */ 1016 static mblk_t * 1017 ndp_prepend_zone(mblk_t *mp, zoneid_t zoneid) 1018 { 1019 mblk_t *first_mp; 1020 ipsec_out_t *io; 1021 1022 ASSERT(zoneid != ALL_ZONES); 1023 if (mp->b_datap->db_type == M_CTL) { 1024 io = (ipsec_out_t *)mp->b_rptr; 1025 ASSERT(io->ipsec_out_type == IPSEC_OUT); 1026 io->ipsec_out_zoneid = zoneid; 1027 return (mp); 1028 } 1029 1030 first_mp = ipsec_alloc_ipsec_out(); 1031 if (first_mp == NULL) 1032 return (NULL); 1033 io = (ipsec_out_t *)first_mp->b_rptr; 1034 /* This is not a secure packet */ 1035 io->ipsec_out_secure = B_FALSE; 1036 io->ipsec_out_zoneid = zoneid; 1037 first_mp->b_cont = mp; 1038 return (first_mp); 1039 } 1040 1041 /* 1042 * Process resolve requests. Handles both mapped entries 1043 * as well as cases that needs to be send out on the wire. 1044 * Lookup a NCE for a given IRE. Regardless of whether one exists 1045 * or one is created, we defer making ire point to nce until the 1046 * ire is actually added at which point the nce_refcnt on the nce is 1047 * incremented. This is done primarily to have symmetry between ire_add() 1048 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1049 */ 1050 int 1051 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 1052 { 1053 nce_t *nce; 1054 int err = 0; 1055 uint32_t ms; 1056 mblk_t *mp_nce = NULL; 1057 1058 ASSERT(ill != NULL); 1059 ASSERT(ill->ill_isv6); 1060 if (IN6_IS_ADDR_MULTICAST(dst)) { 1061 err = nce_set_multicast(ill, dst); 1062 return (err); 1063 } 1064 err = ndp_lookup_then_add(ill, 1065 NULL, /* No hardware address */ 1066 dst, 1067 &ipv6_all_ones, 1068 &ipv6_all_zeros, 1069 0, 1070 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1071 ND_INCOMPLETE, 1072 &nce, 1073 NULL, /* let ndp_add figure out fastpath mp and dlureq_mp for v6 */ 1074 NULL); 1075 1076 switch (err) { 1077 case 0: 1078 /* 1079 * New cache entry was created. Make sure that the state 1080 * is not ND_INCOMPLETE. It can be in some other state 1081 * even before we send out the solicitation as we could 1082 * get un-solicited advertisements. 1083 * 1084 * If this is an XRESOLV interface, simply return 0, 1085 * since we don't want to solicit just yet. 1086 */ 1087 if (ill->ill_flags & ILLF_XRESOLV) { 1088 NCE_REFRELE(nce); 1089 return (0); 1090 } 1091 rw_enter(&ill_g_lock, RW_READER); 1092 mutex_enter(&nce->nce_lock); 1093 if (nce->nce_state != ND_INCOMPLETE) { 1094 mutex_exit(&nce->nce_lock); 1095 rw_exit(&ill_g_lock); 1096 NCE_REFRELE(nce); 1097 return (0); 1098 } 1099 mp_nce = ndp_prepend_zone(mp, zoneid); 1100 if (mp_nce == NULL) { 1101 /* The caller will free mp */ 1102 mutex_exit(&nce->nce_lock); 1103 rw_exit(&ill_g_lock); 1104 ndp_delete(nce); 1105 NCE_REFRELE(nce); 1106 return (ENOMEM); 1107 } 1108 ms = nce_solicit(nce, mp_nce); 1109 rw_exit(&ill_g_lock); 1110 if (ms == 0) { 1111 /* The caller will free mp */ 1112 if (mp_nce != mp) 1113 freeb(mp_nce); 1114 mutex_exit(&nce->nce_lock); 1115 ndp_delete(nce); 1116 NCE_REFRELE(nce); 1117 return (EBUSY); 1118 } 1119 mutex_exit(&nce->nce_lock); 1120 NDP_RESTART_TIMER(nce, (clock_t)ms); 1121 NCE_REFRELE(nce); 1122 return (EINPROGRESS); 1123 case EEXIST: 1124 /* Resolution in progress just queue the packet */ 1125 mutex_enter(&nce->nce_lock); 1126 if (nce->nce_state == ND_INCOMPLETE) { 1127 mp_nce = ndp_prepend_zone(mp, zoneid); 1128 if (mp_nce == NULL) { 1129 err = ENOMEM; 1130 } else { 1131 nce_queue_mp(nce, mp_nce); 1132 err = EINPROGRESS; 1133 } 1134 } else { 1135 /* 1136 * Any other state implies we have 1137 * a nce but IRE needs to be added ... 1138 * ire_add_v6() will take care of the 1139 * the case when the nce becomes CONDEMNED 1140 * before the ire is added to the table. 1141 */ 1142 err = 0; 1143 } 1144 mutex_exit(&nce->nce_lock); 1145 NCE_REFRELE(nce); 1146 break; 1147 default: 1148 ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 1149 break; 1150 } 1151 return (err); 1152 } 1153 1154 /* 1155 * When there is no resolver, the link layer template is passed in 1156 * the IRE. 1157 * Lookup a NCE for a given IRE. Regardless of whether one exists 1158 * or one is created, we defer making ire point to nce until the 1159 * ire is actually added at which point the nce_refcnt on the nce is 1160 * incremented. This is done primarily to have symmetry between ire_add() 1161 * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 1162 */ 1163 int 1164 ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 1165 { 1166 nce_t *nce; 1167 int err = 0; 1168 1169 ASSERT(ill != NULL); 1170 ASSERT(ill->ill_isv6); 1171 if (IN6_IS_ADDR_MULTICAST(dst)) { 1172 err = nce_set_multicast(ill, dst); 1173 return (err); 1174 } 1175 1176 err = ndp_lookup_then_add(ill, 1177 NULL, /* hardware address */ 1178 dst, 1179 &ipv6_all_ones, 1180 &ipv6_all_zeros, 1181 0, 1182 (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 1183 ND_REACHABLE, 1184 &nce, 1185 NULL, /* let ndp_add figure out fp_mp/dlureq_mp for v6 */ 1186 NULL); 1187 1188 switch (err) { 1189 case 0: 1190 /* 1191 * Cache entry with a proper resolver cookie was 1192 * created. 1193 */ 1194 NCE_REFRELE(nce); 1195 break; 1196 case EEXIST: 1197 err = 0; 1198 NCE_REFRELE(nce); 1199 break; 1200 default: 1201 ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 1202 break; 1203 } 1204 return (err); 1205 } 1206 1207 /* 1208 * For each interface an entry is added for the unspecified multicast group. 1209 * Here that mapping is used to form the multicast cache entry for a particular 1210 * multicast destination. 1211 */ 1212 static int 1213 nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 1214 { 1215 nce_t *mnce; /* Multicast mapping entry */ 1216 nce_t *nce; 1217 uchar_t *hw_addr = NULL; 1218 int err = 0; 1219 1220 ASSERT(ill != NULL); 1221 ASSERT(ill->ill_isv6); 1222 ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 1223 1224 mutex_enter(&ndp6.ndp_g_lock); 1225 nce = *((nce_t **)NCE_HASH_PTR_V6(*dst)); 1226 nce = nce_lookup_addr(ill, dst, nce); 1227 if (nce != NULL) { 1228 mutex_exit(&ndp6.ndp_g_lock); 1229 NCE_REFRELE(nce); 1230 return (0); 1231 } 1232 /* No entry, now lookup for a mapping this should never fail */ 1233 mnce = nce_lookup_mapping(ill, dst); 1234 if (mnce == NULL) { 1235 /* Something broken for the interface. */ 1236 mutex_exit(&ndp6.ndp_g_lock); 1237 return (ESRCH); 1238 } 1239 ASSERT(mnce->nce_flags & NCE_F_MAPPING); 1240 if (ill->ill_net_type == IRE_IF_RESOLVER) { 1241 /* 1242 * For IRE_IF_RESOLVER a hardware mapping can be 1243 * generated, for IRE_IF_NORESOLVER, resolution cookie 1244 * in the ill is copied in ndp_add(). 1245 */ 1246 hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 1247 if (hw_addr == NULL) { 1248 mutex_exit(&ndp6.ndp_g_lock); 1249 NCE_REFRELE(mnce); 1250 return (ENOMEM); 1251 } 1252 nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 1253 } 1254 NCE_REFRELE(mnce); 1255 /* 1256 * IRE_IF_NORESOLVER type simply copies the resolution 1257 * cookie passed in. So no hw_addr is needed. 1258 */ 1259 err = ndp_add(ill, 1260 hw_addr, 1261 dst, 1262 &ipv6_all_ones, 1263 &ipv6_all_zeros, 1264 0, 1265 NCE_F_NONUD, 1266 ND_REACHABLE, 1267 &nce, 1268 NULL, 1269 NULL); 1270 mutex_exit(&ndp6.ndp_g_lock); 1271 if (hw_addr != NULL) 1272 kmem_free(hw_addr, ill->ill_nd_lla_len); 1273 if (err != 0) { 1274 ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 1275 return (err); 1276 } 1277 NCE_REFRELE(nce); 1278 return (0); 1279 } 1280 1281 /* 1282 * Return the link layer address, and any flags of a nce. 1283 */ 1284 int 1285 ndp_query(ill_t *ill, struct lif_nd_req *lnr) 1286 { 1287 nce_t *nce; 1288 in6_addr_t *addr; 1289 sin6_t *sin6; 1290 dl_unitdata_req_t *dl; 1291 1292 ASSERT(ill != NULL && ill->ill_isv6); 1293 sin6 = (sin6_t *)&lnr->lnr_addr; 1294 addr = &sin6->sin6_addr; 1295 1296 nce = ndp_lookup_v6(ill, addr, B_FALSE); 1297 if (nce == NULL) 1298 return (ESRCH); 1299 /* If in INCOMPLETE state, no link layer address is available yet */ 1300 if (nce->nce_state == ND_INCOMPLETE) 1301 goto done; 1302 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 1303 if (ill->ill_flags & ILLF_XRESOLV) 1304 lnr->lnr_hdw_len = dl->dl_dest_addr_length; 1305 else 1306 lnr->lnr_hdw_len = ill->ill_nd_lla_len; 1307 ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 1308 sizeof (lnr->lnr_hdw_addr)); 1309 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 1310 (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 1311 if (nce->nce_flags & NCE_F_ISROUTER) 1312 lnr->lnr_flags = NDF_ISROUTER_ON; 1313 if (nce->nce_flags & NCE_F_PROXY) 1314 lnr->lnr_flags |= NDF_PROXY_ON; 1315 if (nce->nce_flags & NCE_F_ANYCAST) 1316 lnr->lnr_flags |= NDF_ANYCAST_ON; 1317 done: 1318 NCE_REFRELE(nce); 1319 return (0); 1320 } 1321 1322 /* 1323 * Send Enable/Disable multicast reqs to driver. 1324 */ 1325 int 1326 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 1327 uint32_t hw_addr_offset, mblk_t *mp) 1328 { 1329 nce_t *nce; 1330 uchar_t *hw_addr; 1331 1332 ASSERT(ill != NULL && ill->ill_isv6); 1333 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 1334 hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 1335 if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 1336 freemsg(mp); 1337 return (EINVAL); 1338 } 1339 mutex_enter(&ndp6.ndp_g_lock); 1340 nce = nce_lookup_mapping(ill, addr); 1341 if (nce == NULL) { 1342 mutex_exit(&ndp6.ndp_g_lock); 1343 freemsg(mp); 1344 return (ESRCH); 1345 } 1346 mutex_exit(&ndp6.ndp_g_lock); 1347 /* 1348 * Update dl_addr_length and dl_addr_offset for primitives that 1349 * have physical addresses as opposed to full saps 1350 */ 1351 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1352 case DL_ENABMULTI_REQ: 1353 /* Track the state if this is the first enabmulti */ 1354 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1355 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1356 ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 1357 break; 1358 case DL_DISABMULTI_REQ: 1359 ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 1360 break; 1361 default: 1362 NCE_REFRELE(nce); 1363 ip1dbg(("ndp_mcastreq: default\n")); 1364 return (EINVAL); 1365 } 1366 nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 1367 NCE_REFRELE(nce); 1368 putnext(ill->ill_wq, mp); 1369 return (0); 1370 } 1371 1372 /* 1373 * Send a neighbor solicitation. 1374 * Returns number of milliseconds after which we should either rexmit or abort. 1375 * Return of zero means we should abort. 1376 * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 1377 * 1378 * NOTE: This routine drops nce_lock (and later reacquires it) when sending 1379 * the packet. 1380 * NOTE: This routine does not consume mp. 1381 */ 1382 uint32_t 1383 nce_solicit(nce_t *nce, mblk_t *mp) 1384 { 1385 ill_t *ill; 1386 ill_t *src_ill; 1387 ip6_t *ip6h; 1388 in6_addr_t src; 1389 in6_addr_t dst; 1390 ipif_t *ipif; 1391 ip6i_t *ip6i; 1392 boolean_t dropped = B_FALSE; 1393 1394 ASSERT(RW_READ_HELD(&ill_g_lock)); 1395 ASSERT(MUTEX_HELD(&nce->nce_lock)); 1396 ill = nce->nce_ill; 1397 ASSERT(ill != NULL); 1398 1399 if (nce->nce_rcnt == 0) { 1400 return (0); 1401 } 1402 1403 if (mp == NULL) { 1404 ASSERT(nce->nce_qd_mp != NULL); 1405 mp = nce->nce_qd_mp; 1406 } else { 1407 nce_queue_mp(nce, mp); 1408 } 1409 1410 /* Handle ip_newroute_v6 giving us IPSEC packets */ 1411 if (mp->b_datap->db_type == M_CTL) 1412 mp = mp->b_cont; 1413 1414 ip6h = (ip6_t *)mp->b_rptr; 1415 if (ip6h->ip6_nxt == IPPROTO_RAW) { 1416 /* 1417 * This message should have been pulled up already in 1418 * ip_wput_v6. We can't do pullups here because the message 1419 * could be from the nce_qd_mp which could have b_next/b_prev 1420 * non-NULL. 1421 */ 1422 ip6i = (ip6i_t *)ip6h; 1423 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 1424 sizeof (ip6i_t) + IPV6_HDR_LEN); 1425 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 1426 } 1427 src = ip6h->ip6_src; 1428 /* 1429 * If the src of outgoing packet is one of the assigned interface 1430 * addresses use it, otherwise we will pick the source address below. 1431 */ 1432 src_ill = ill; 1433 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1434 if (ill->ill_group != NULL) 1435 src_ill = ill->ill_group->illgrp_ill; 1436 for (; src_ill != NULL; src_ill = src_ill->ill_group_next) { 1437 for (ipif = src_ill->ill_ipif; ipif != NULL; 1438 ipif = ipif->ipif_next) { 1439 if (IN6_ARE_ADDR_EQUAL(&src, 1440 &ipif->ipif_v6lcl_addr)) { 1441 break; 1442 } 1443 } 1444 if (ipif != NULL) 1445 break; 1446 } 1447 /* 1448 * If no relevant ipif can be found, then it's not one of our 1449 * addresses. Reset to :: and let nce_xmit. If an ipif can be 1450 * found, but it's not yet done with DAD verification, then 1451 * just postpone this transmission until later. 1452 */ 1453 if (src_ill == NULL) 1454 src = ipv6_all_zeros; 1455 else if (!ipif->ipif_addr_ready) 1456 return (ill->ill_reachable_retrans_time); 1457 } 1458 dst = nce->nce_addr; 1459 /* 1460 * If source address is unspecified, nce_xmit will choose 1461 * one for us and initialize the hardware address also 1462 * appropriately. 1463 */ 1464 if (IN6_IS_ADDR_UNSPECIFIED(&src)) 1465 src_ill = NULL; 1466 nce->nce_rcnt--; 1467 mutex_exit(&nce->nce_lock); 1468 rw_exit(&ill_g_lock); 1469 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src, 1470 &dst, 0); 1471 rw_enter(&ill_g_lock, RW_READER); 1472 mutex_enter(&nce->nce_lock); 1473 if (dropped) 1474 nce->nce_rcnt++; 1475 return (ill->ill_reachable_retrans_time); 1476 } 1477 1478 /* 1479 * Attempt to recover an address on an interface that's been marked as a 1480 * duplicate. Because NCEs are destroyed when the interface goes down, there's 1481 * no easy way to just probe the address and have the right thing happen if 1482 * it's no longer in use. Instead, we just bring it up normally and allow the 1483 * regular interface start-up logic to probe for a remaining duplicate and take 1484 * us back down if necessary. 1485 * Neither DHCP nor temporary addresses arrive here; they're excluded by 1486 * ip_ndp_excl. 1487 */ 1488 /* ARGSUSED */ 1489 static void 1490 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1491 { 1492 ill_t *ill = rq->q_ptr; 1493 ipif_t *ipif; 1494 in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 1495 1496 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1497 /* 1498 * We do not support recovery of proxy ARP'd interfaces, 1499 * because the system lacks a complete proxy ARP mechanism. 1500 */ 1501 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1502 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 1503 continue; 1504 } 1505 1506 /* 1507 * If we have already recovered, then ignore. 1508 */ 1509 mutex_enter(&ill->ill_lock); 1510 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) { 1511 mutex_exit(&ill->ill_lock); 1512 continue; 1513 } 1514 1515 ipif->ipif_flags &= ~IPIF_DUPLICATE; 1516 ill->ill_ipif_dup_count--; 1517 mutex_exit(&ill->ill_lock); 1518 ipif->ipif_was_dup = B_TRUE; 1519 1520 if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) 1521 (void) ipif_up_done_v6(ipif); 1522 } 1523 freeb(mp); 1524 } 1525 1526 /* 1527 * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 1528 * As long as someone else holds the address, the interface will stay down. 1529 * When that conflict goes away, the interface is brought back up. This is 1530 * done so that accidental shutdowns of addresses aren't made permanent. Your 1531 * server will recover from a failure. 1532 * 1533 * For DHCP and temporary addresses, recovery is not done in the kernel. 1534 * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 1535 * 1536 * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 1537 */ 1538 static void 1539 ipif6_dup_recovery(void *arg) 1540 { 1541 ipif_t *ipif = arg; 1542 1543 ipif->ipif_recovery_id = 0; 1544 if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 1545 return; 1546 1547 /* If the link is down, we'll retry this later */ 1548 if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 1549 return; 1550 1551 ndp_do_recovery(ipif); 1552 } 1553 1554 /* 1555 * Perform interface recovery by forcing the duplicate interfaces up and 1556 * allowing the system to determine which ones should stay up. 1557 * 1558 * Called both by recovery timer expiry and link-up notification. 1559 */ 1560 void 1561 ndp_do_recovery(ipif_t *ipif) 1562 { 1563 ill_t *ill = ipif->ipif_ill; 1564 mblk_t *mp; 1565 1566 mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 1567 if (mp == NULL) { 1568 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1569 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1570 } else { 1571 bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 1572 sizeof (ipif->ipif_v6lcl_addr)); 1573 ill_refhold(ill); 1574 (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, ip_ndp_recover, 1575 CUR_OP, B_FALSE); 1576 } 1577 } 1578 1579 /* 1580 * Find the solicitation in the given message, and extract printable details 1581 * (MAC and IP addresses) from it. 1582 */ 1583 static nd_neighbor_solicit_t * 1584 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf, 1585 size_t hlen, char *sbuf, size_t slen, uchar_t **haddr) 1586 { 1587 nd_neighbor_solicit_t *ns; 1588 ip6_t *ip6h; 1589 uchar_t *addr; 1590 int alen; 1591 1592 alen = 0; 1593 ip6h = (ip6_t *)mp->b_rptr; 1594 if (dl_mp == NULL) { 1595 nd_opt_hdr_t *opt; 1596 int nslen; 1597 1598 /* 1599 * If it's from the fast-path, then it can't be a probe 1600 * message, and thus must include the source linkaddr option. 1601 * Extract that here. 1602 */ 1603 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1604 nslen = mp->b_wptr - (uchar_t *)ns; 1605 if ((nslen -= sizeof (*ns)) > 0) { 1606 opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen, 1607 ND_OPT_SOURCE_LINKADDR); 1608 if (opt != NULL && 1609 opt->nd_opt_len * 8 - sizeof (*opt) >= 1610 ill->ill_nd_lla_len) { 1611 addr = (uchar_t *)(opt + 1); 1612 alen = ill->ill_nd_lla_len; 1613 } 1614 } 1615 /* 1616 * We cheat a bit here for the sake of printing usable log 1617 * messages in the rare case where the reply we got was unicast 1618 * without a source linkaddr option, and the interface is in 1619 * fastpath mode. (Sigh.) 1620 */ 1621 if (alen == 0 && ill->ill_type == IFT_ETHER && 1622 MBLKHEAD(mp) >= sizeof (struct ether_header)) { 1623 struct ether_header *pether; 1624 1625 pether = (struct ether_header *)((char *)ip6h - 1626 sizeof (*pether)); 1627 addr = pether->ether_shost.ether_addr_octet; 1628 alen = ETHERADDRL; 1629 } 1630 } else { 1631 dl_unitdata_ind_t *dlu; 1632 1633 dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 1634 alen = dlu->dl_src_addr_length; 1635 if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 1636 dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 1637 addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 1638 if (ill->ill_sap_length < 0) { 1639 alen += ill->ill_sap_length; 1640 } else { 1641 addr += ill->ill_sap_length; 1642 alen -= ill->ill_sap_length; 1643 } 1644 } 1645 } 1646 if (alen > 0) { 1647 *haddr = addr; 1648 (void) mac_colon_addr(addr, alen, hbuf, hlen); 1649 } else { 1650 *haddr = NULL; 1651 (void) strcpy(hbuf, "?"); 1652 } 1653 ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN); 1654 (void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen); 1655 return (ns); 1656 } 1657 1658 /* 1659 * This is for exclusive changes due to NDP duplicate address detection 1660 * failure. 1661 */ 1662 /* ARGSUSED */ 1663 static void 1664 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 1665 { 1666 ill_t *ill = rq->q_ptr; 1667 ipif_t *ipif; 1668 char ibuf[LIFNAMSIZ + 10]; /* 10 digits for logical i/f number */ 1669 char hbuf[MAC_STR_LEN]; 1670 char sbuf[INET6_ADDRSTRLEN]; 1671 nd_neighbor_solicit_t *ns; 1672 mblk_t *dl_mp = NULL; 1673 uchar_t *haddr; 1674 1675 if (DB_TYPE(mp) != M_DATA) { 1676 dl_mp = mp; 1677 mp = mp->b_cont; 1678 } 1679 ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf, 1680 sizeof (sbuf), &haddr); 1681 if (haddr != NULL && 1682 bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) { 1683 /* 1684 * Ignore conflicts generated by misbehaving switches that just 1685 * reflect our own messages back to us. 1686 */ 1687 goto ignore_conflict; 1688 } 1689 (void) strlcpy(ibuf, ill->ill_name, sizeof (ibuf)); 1690 for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 1691 1692 if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 1693 !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 1694 &ns->nd_ns_target)) { 1695 continue; 1696 } 1697 1698 /* If it's already marked, then don't do anything. */ 1699 if (ipif->ipif_flags & IPIF_DUPLICATE) 1700 continue; 1701 1702 /* 1703 * If this is a failure during duplicate recovery, then don't 1704 * complain. It may take a long time to recover. 1705 */ 1706 if (!ipif->ipif_was_dup) { 1707 if (ipif->ipif_id != 0) { 1708 (void) snprintf(ibuf + ill->ill_name_length - 1, 1709 sizeof (ibuf) - ill->ill_name_length + 1, 1710 ":%d", ipif->ipif_id); 1711 } 1712 cmn_err(CE_WARN, "%s has duplicate address %s (in " 1713 "use by %s); disabled", ibuf, sbuf, hbuf); 1714 } 1715 mutex_enter(&ill->ill_lock); 1716 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1717 ipif->ipif_flags |= IPIF_DUPLICATE; 1718 ill->ill_ipif_dup_count++; 1719 mutex_exit(&ill->ill_lock); 1720 (void) ipif_down(ipif, NULL, NULL); 1721 ipif_down_tail(ipif); 1722 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1723 ill->ill_net_type == IRE_IF_RESOLVER && 1724 ip_dup_recovery > 0) 1725 ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1726 ipif, MSEC_TO_TICK(ip_dup_recovery)); 1727 } 1728 ignore_conflict: 1729 if (dl_mp != NULL) 1730 freeb(dl_mp); 1731 freemsg(mp); 1732 } 1733 1734 /* 1735 * Handle failure by tearing down the ipifs with the specified address. Note 1736 * that tearing down the ipif also means deleting the nce through ipif_down, so 1737 * it's not possible to do recovery by just restarting the nce timer. Instead, 1738 * we start a timer on the ipif. 1739 */ 1740 static void 1741 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1742 { 1743 if ((mp = copymsg(mp)) != NULL) { 1744 if (dl_mp == NULL) 1745 dl_mp = mp; 1746 else if ((dl_mp = copyb(dl_mp)) != NULL) 1747 dl_mp->b_cont = mp; 1748 if (dl_mp == NULL) { 1749 freemsg(mp); 1750 } else { 1751 ill_refhold(ill); 1752 (void) qwriter_ip(NULL, ill, ill->ill_rq, dl_mp, 1753 ip_ndp_excl, CUR_OP, B_FALSE); 1754 } 1755 } 1756 ndp_delete(nce); 1757 } 1758 1759 /* 1760 * Handle a discovered conflict: some other system is advertising that it owns 1761 * one of our IP addresses. We need to defend ourselves, or just shut down the 1762 * interface. 1763 */ 1764 static void 1765 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 1766 { 1767 ipif_t *ipif; 1768 uint32_t now; 1769 uint_t maxdefense; 1770 uint_t defs; 1771 1772 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 1773 NULL, NULL); 1774 if (ipif == NULL) 1775 return; 1776 /* 1777 * First, figure out if this address is disposable. 1778 */ 1779 if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 1780 maxdefense = ip_max_temp_defend; 1781 else 1782 maxdefense = ip_max_defend; 1783 1784 /* 1785 * Now figure out how many times we've defended ourselves. Ignore 1786 * defenses that happened long in the past. 1787 */ 1788 now = gethrestime_sec(); 1789 mutex_enter(&nce->nce_lock); 1790 if ((defs = nce->nce_defense_count) > 0 && 1791 now - nce->nce_defense_time > ip_defend_interval) { 1792 nce->nce_defense_count = defs = 0; 1793 } 1794 nce->nce_defense_count++; 1795 nce->nce_defense_time = now; 1796 mutex_exit(&nce->nce_lock); 1797 ipif_refrele(ipif); 1798 1799 /* 1800 * If we've defended ourselves too many times already, then give up and 1801 * tear down the interface(s) using this address. Otherwise, defend by 1802 * sending out an unsolicited Neighbor Advertisement. 1803 */ 1804 if (defs >= maxdefense) { 1805 ip_ndp_failure(ill, mp, dl_mp, nce); 1806 } else { 1807 char hbuf[MAC_STR_LEN]; 1808 char sbuf[INET6_ADDRSTRLEN]; 1809 uchar_t *haddr; 1810 1811 (void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, 1812 sizeof (hbuf), sbuf, sizeof (sbuf), &haddr); 1813 cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1814 hbuf, sbuf, ill->ill_name); 1815 (void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE, 1816 &nce->nce_addr, &ipv6_all_hosts_mcast, NDP_ORIDE); 1817 } 1818 } 1819 1820 static void 1821 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 1822 { 1823 nd_neighbor_solicit_t *ns; 1824 uint32_t hlen = ill->ill_nd_lla_len; 1825 uchar_t *haddr = NULL; 1826 icmp6_t *icmp_nd; 1827 ip6_t *ip6h; 1828 nce_t *our_nce = NULL; 1829 in6_addr_t target; 1830 in6_addr_t src; 1831 int len; 1832 int flag = 0; 1833 nd_opt_hdr_t *opt = NULL; 1834 boolean_t bad_solicit = B_FALSE; 1835 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 1836 1837 ip6h = (ip6_t *)mp->b_rptr; 1838 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1839 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 1840 src = ip6h->ip6_src; 1841 ns = (nd_neighbor_solicit_t *)icmp_nd; 1842 target = ns->nd_ns_target; 1843 if (IN6_IS_ADDR_MULTICAST(&target)) { 1844 if (ip_debug > 2) { 1845 /* ip1dbg */ 1846 pr_addr_dbg("ndp_input_solicit: Target is" 1847 " multicast! %s\n", AF_INET6, &target); 1848 } 1849 bad_solicit = B_TRUE; 1850 goto done; 1851 } 1852 if (len > sizeof (nd_neighbor_solicit_t)) { 1853 /* Options present */ 1854 opt = (nd_opt_hdr_t *)&ns[1]; 1855 len -= sizeof (nd_neighbor_solicit_t); 1856 if (!ndp_verify_optlen(opt, len)) { 1857 ip1dbg(("ndp_input_solicit: Bad opt len\n")); 1858 bad_solicit = B_TRUE; 1859 goto done; 1860 } 1861 } 1862 if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 1863 /* Check to see if this is a valid DAD solicitation */ 1864 if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 1865 if (ip_debug > 2) { 1866 /* ip1dbg */ 1867 pr_addr_dbg("ndp_input_solicit: IPv6 " 1868 "Destination is not solicited node " 1869 "multicast %s\n", AF_INET6, 1870 &ip6h->ip6_dst); 1871 } 1872 bad_solicit = B_TRUE; 1873 goto done; 1874 } 1875 } 1876 1877 our_nce = ndp_lookup_v6(ill, &target, B_FALSE); 1878 /* 1879 * If this is a valid Solicitation, a permanent 1880 * entry should exist in the cache 1881 */ 1882 if (our_nce == NULL || 1883 !(our_nce->nce_flags & NCE_F_PERMANENT)) { 1884 ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 1885 "ifname=%s ", ill->ill_name)); 1886 if (ip_debug > 2) { 1887 /* ip1dbg */ 1888 pr_addr_dbg(" dst %s\n", AF_INET6, &target); 1889 } 1890 bad_solicit = B_TRUE; 1891 goto done; 1892 } 1893 1894 /* At this point we should have a verified NS per spec */ 1895 if (opt != NULL) { 1896 opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 1897 if (opt != NULL) { 1898 haddr = (uchar_t *)&opt[1]; 1899 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 1900 hlen == 0) { 1901 ip1dbg(("ndp_input_advert: bad SLLA\n")); 1902 bad_solicit = B_TRUE; 1903 goto done; 1904 } 1905 } 1906 } 1907 1908 /* Set override flag, it will be reset later if need be. */ 1909 flag |= NDP_ORIDE; 1910 if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1911 flag |= NDP_UNICAST; 1912 } 1913 1914 /* 1915 * Create/update the entry for the soliciting node. 1916 * or respond to outstanding queries, don't if 1917 * the source is unspecified address. 1918 */ 1919 if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 1920 int err; 1921 nce_t *nnce; 1922 1923 ASSERT(ill->ill_isv6); 1924 /* 1925 * Regular solicitations *must* include the Source Link-Layer 1926 * Address option. Ignore messages that do not. 1927 */ 1928 if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 1929 ip1dbg(("ndp_input_solicit: source link-layer address " 1930 "option missing with a specified source.\n")); 1931 bad_solicit = B_TRUE; 1932 goto done; 1933 } 1934 1935 /* 1936 * This is a regular solicitation. If we're still in the 1937 * process of verifying the address, then don't respond at all 1938 * and don't keep track of the sender. 1939 */ 1940 if (our_nce->nce_state == ND_PROBE) 1941 goto done; 1942 1943 /* 1944 * If the solicitation doesn't have sender hardware address 1945 * (legal for unicast solicitation), then process without 1946 * installing the return NCE. Either we already know it, or 1947 * we'll be forced to look it up when (and if) we reply to the 1948 * packet. 1949 */ 1950 if (haddr == NULL) 1951 goto no_source; 1952 1953 err = ndp_lookup_then_add(ill, 1954 haddr, 1955 &src, /* Soliciting nodes address */ 1956 &ipv6_all_ones, 1957 &ipv6_all_zeros, 1958 0, 1959 0, 1960 ND_STALE, 1961 &nnce, 1962 NULL, 1963 NULL); 1964 switch (err) { 1965 case 0: 1966 /* done with this entry */ 1967 NCE_REFRELE(nnce); 1968 break; 1969 case EEXIST: 1970 /* 1971 * B_FALSE indicates this is not an 1972 * an advertisement. 1973 */ 1974 ndp_process(nnce, haddr, 0, B_FALSE); 1975 NCE_REFRELE(nnce); 1976 break; 1977 default: 1978 ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 1979 err)); 1980 goto done; 1981 } 1982 no_source: 1983 flag |= NDP_SOLICITED; 1984 } else { 1985 /* 1986 * No source link layer address option should be present in a 1987 * valid DAD request. 1988 */ 1989 if (haddr != NULL) { 1990 ip1dbg(("ndp_input_solicit: source link-layer address " 1991 "option present with an unspecified source.\n")); 1992 bad_solicit = B_TRUE; 1993 goto done; 1994 } 1995 if (our_nce->nce_state == ND_PROBE) { 1996 /* 1997 * Internally looped-back probes won't have DLPI 1998 * attached to them. External ones (which are sent by 1999 * multicast) always will. Just ignore our own 2000 * transmissions. 2001 */ 2002 if (dl_mp != NULL) { 2003 /* 2004 * If someone else is probing our address, then 2005 * we've crossed wires. Declare failure. 2006 */ 2007 ip_ndp_failure(ill, mp, dl_mp, our_nce); 2008 } 2009 goto done; 2010 } 2011 /* 2012 * This is a DAD probe. Multicast the advertisement to the 2013 * all-nodes address. 2014 */ 2015 src = ipv6_all_hosts_mcast; 2016 } 2017 if (our_nce->nce_flags & NCE_F_ISROUTER) 2018 flag |= NDP_ISROUTER; 2019 if (our_nce->nce_flags & NCE_F_PROXY) 2020 flag &= ~NDP_ORIDE; 2021 /* Response to a solicitation */ 2022 (void) nce_xmit(ill, 2023 ND_NEIGHBOR_ADVERT, 2024 ill, /* ill to be used for extracting ill_nd_lla */ 2025 B_TRUE, /* use ill_nd_lla */ 2026 &target, /* Source and target of the advertisement pkt */ 2027 &src, /* IP Destination (source of original pkt) */ 2028 flag); 2029 done: 2030 if (bad_solicit) 2031 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 2032 if (our_nce != NULL) 2033 NCE_REFRELE(our_nce); 2034 } 2035 2036 void 2037 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2038 { 2039 nd_neighbor_advert_t *na; 2040 uint32_t hlen = ill->ill_nd_lla_len; 2041 uchar_t *haddr = NULL; 2042 icmp6_t *icmp_nd; 2043 ip6_t *ip6h; 2044 nce_t *dst_nce = NULL; 2045 in6_addr_t target; 2046 nd_opt_hdr_t *opt = NULL; 2047 int len; 2048 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2049 2050 ip6h = (ip6_t *)mp->b_rptr; 2051 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2052 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2053 na = (nd_neighbor_advert_t *)icmp_nd; 2054 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 2055 (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 2056 ip1dbg(("ndp_input_advert: Target is multicast but the " 2057 "solicited flag is not zero\n")); 2058 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2059 return; 2060 } 2061 target = na->nd_na_target; 2062 if (IN6_IS_ADDR_MULTICAST(&target)) { 2063 ip1dbg(("ndp_input_advert: Target is multicast!\n")); 2064 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2065 return; 2066 } 2067 if (len > sizeof (nd_neighbor_advert_t)) { 2068 opt = (nd_opt_hdr_t *)&na[1]; 2069 if (!ndp_verify_optlen(opt, 2070 len - sizeof (nd_neighbor_advert_t))) { 2071 ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 2072 BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 2073 return; 2074 } 2075 /* At this point we have a verified NA per spec */ 2076 len -= sizeof (nd_neighbor_advert_t); 2077 opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 2078 if (opt != NULL) { 2079 haddr = (uchar_t *)&opt[1]; 2080 if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 2081 hlen == 0) { 2082 ip1dbg(("ndp_input_advert: bad SLLA\n")); 2083 BUMP_MIB(mib, 2084 ipv6IfIcmpInBadNeighborAdvertisements); 2085 return; 2086 } 2087 } 2088 } 2089 2090 /* 2091 * If this interface is part of the group look at all the 2092 * ills in the group. 2093 */ 2094 rw_enter(&ill_g_lock, RW_READER); 2095 if (ill->ill_group != NULL) 2096 ill = ill->ill_group->illgrp_ill; 2097 2098 for (; ill != NULL; ill = ill->ill_group_next) { 2099 mutex_enter(&ill->ill_lock); 2100 if (!ILL_CAN_LOOKUP(ill)) { 2101 mutex_exit(&ill->ill_lock); 2102 continue; 2103 } 2104 ill_refhold_locked(ill); 2105 mutex_exit(&ill->ill_lock); 2106 dst_nce = ndp_lookup_v6(ill, &target, B_FALSE); 2107 /* We have to drop the lock since ndp_process calls put* */ 2108 rw_exit(&ill_g_lock); 2109 if (dst_nce != NULL) { 2110 if ((dst_nce->nce_flags & NCE_F_PERMANENT) && 2111 dst_nce->nce_state == ND_PROBE) { 2112 /* 2113 * Someone else sent an advertisement for an 2114 * address that we're trying to configure. 2115 * Tear it down. Note that dl_mp might be NULL 2116 * if we're getting a unicast reply. This 2117 * isn't typically done (multicast is the norm 2118 * in response to a probe), but ip_ndp_failure 2119 * will handle the dl_mp == NULL case as well. 2120 */ 2121 ip_ndp_failure(ill, mp, dl_mp, dst_nce); 2122 } else if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2123 /* 2124 * Someone just announced one of our local 2125 * addresses. If it wasn't us, then this is a 2126 * conflict. Defend the address or shut it 2127 * down. 2128 */ 2129 if (dl_mp != NULL && 2130 (haddr == NULL || 2131 nce_cmp_ll_addr(dst_nce, haddr, 2132 ill->ill_nd_lla_len))) { 2133 ip_ndp_conflict(ill, mp, dl_mp, 2134 dst_nce); 2135 } 2136 } else { 2137 if (na->nd_na_flags_reserved & 2138 ND_NA_FLAG_ROUTER) { 2139 dst_nce->nce_flags |= NCE_F_ISROUTER; 2140 } 2141 /* B_TRUE indicates this an advertisement */ 2142 ndp_process(dst_nce, haddr, 2143 na->nd_na_flags_reserved, B_TRUE); 2144 } 2145 NCE_REFRELE(dst_nce); 2146 } 2147 rw_enter(&ill_g_lock, RW_READER); 2148 ill_refrele(ill); 2149 } 2150 rw_exit(&ill_g_lock); 2151 } 2152 2153 /* 2154 * Process NDP neighbor solicitation/advertisement messages. 2155 * The checksum has already checked o.k before reaching here. 2156 */ 2157 void 2158 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 2159 { 2160 icmp6_t *icmp_nd; 2161 ip6_t *ip6h; 2162 int len; 2163 mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 2164 2165 2166 if (!pullupmsg(mp, -1)) { 2167 ip1dbg(("ndp_input: pullupmsg failed\n")); 2168 BUMP_MIB(ill->ill_ip6_mib, ipv6InDiscards); 2169 goto done; 2170 } 2171 ip6h = (ip6_t *)mp->b_rptr; 2172 if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 2173 ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 2174 BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 2175 goto done; 2176 } 2177 /* 2178 * NDP does not accept any extension headers between the 2179 * IP header and the ICMP header since e.g. a routing 2180 * header could be dangerous. 2181 * This assumes that any AH or ESP headers are removed 2182 * by ip prior to passing the packet to ndp_input. 2183 */ 2184 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2185 ip1dbg(("ndp_input: Wrong next header 0x%x\n", 2186 ip6h->ip6_nxt)); 2187 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2188 goto done; 2189 } 2190 icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 2191 ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 2192 icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 2193 if (icmp_nd->icmp6_code != 0) { 2194 ip1dbg(("ndp_input: icmp6 code != 0 \n")); 2195 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2196 goto done; 2197 } 2198 len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 2199 /* 2200 * Make sure packet length is large enough for either 2201 * a NS or a NA icmp packet. 2202 */ 2203 if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 2204 ip1dbg(("ndp_input: packet too short\n")); 2205 BUMP_MIB(mib, ipv6IfIcmpInErrors); 2206 goto done; 2207 } 2208 if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 2209 ndp_input_solicit(ill, mp, dl_mp); 2210 } else { 2211 ndp_input_advert(ill, mp, dl_mp); 2212 } 2213 done: 2214 freemsg(mp); 2215 } 2216 2217 /* 2218 * nce_xmit is called to form and transmit a ND solicitation or 2219 * advertisement ICMP packet. 2220 * 2221 * If the source address is unspecified and this isn't a probe (used for 2222 * duplicate address detection), an appropriate source address and link layer 2223 * address will be chosen here. The link layer address option is included if 2224 * the source is specified (i.e., all non-probe packets), and omitted (per the 2225 * specification) otherwise. 2226 * 2227 * It returns B_FALSE only if it does a successful put() to the 2228 * corresponding ill's ill_wq otherwise returns B_TRUE. 2229 */ 2230 static boolean_t 2231 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, 2232 boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, 2233 int flag) 2234 { 2235 uint32_t len; 2236 icmp6_t *icmp6; 2237 mblk_t *mp; 2238 ip6_t *ip6h; 2239 nd_opt_hdr_t *opt; 2240 uint_t plen; 2241 ip6i_t *ip6i; 2242 ipif_t *src_ipif = NULL; 2243 uint8_t *hw_addr; 2244 2245 /* 2246 * If we have a unspecified source(sender) address, select a 2247 * proper source address for the solicitation here itself so 2248 * that we can initialize the h/w address correctly. This is 2249 * needed for interface groups as source address can come from 2250 * the whole group and the h/w address initialized from ill will 2251 * be wrong if the source address comes from a different ill. 2252 * 2253 * Note that the NA never comes here with the unspecified source 2254 * address. The following asserts that whenever the source 2255 * address is specified, the haddr also should be specified. 2256 */ 2257 ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL)); 2258 2259 if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2260 ASSERT(operation != ND_NEIGHBOR_ADVERT); 2261 /* 2262 * Pick a source address for this solicitation, but 2263 * restrict the selection to addresses assigned to the 2264 * output interface (or interface group). We do this 2265 * because the destination will create a neighbor cache 2266 * entry for the source address of this packet, so the 2267 * source address had better be a valid neighbor. 2268 */ 2269 src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL, 2270 IPV6_PREFER_SRC_DEFAULT, GLOBAL_ZONEID); 2271 if (src_ipif == NULL) { 2272 char buf[INET6_ADDRSTRLEN]; 2273 2274 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2275 inet_ntop(AF_INET6, (char *)target, buf, 2276 sizeof (buf)))); 2277 return (B_TRUE); 2278 } 2279 sender = &src_ipif->ipif_v6src_addr; 2280 hwaddr_ill = src_ipif->ipif_ill; 2281 } 2282 2283 /* 2284 * Always make sure that the NS/NA packets don't get load 2285 * spread. This is needed so that the probe packets sent 2286 * by the in.mpathd daemon can really go out on the desired 2287 * interface. Probe packets are made to go out on a desired 2288 * interface by including a ip6i with ATTACH_IF flag. As these 2289 * packets indirectly end up sending/receiving NS/NA packets 2290 * (neighbor doing NUD), we have to make sure that NA 2291 * also go out on the same interface. 2292 */ 2293 plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8; 2294 len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2295 plen * 8; 2296 mp = allocb(len, BPRI_LO); 2297 if (mp == NULL) { 2298 if (src_ipif != NULL) 2299 ipif_refrele(src_ipif); 2300 return (B_TRUE); 2301 } 2302 bzero((char *)mp->b_rptr, len); 2303 mp->b_wptr = mp->b_rptr + len; 2304 2305 ip6i = (ip6i_t *)mp->b_rptr; 2306 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2307 ip6i->ip6i_nxt = IPPROTO_RAW; 2308 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2309 if (flag & NDP_PROBE) 2310 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 2311 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2312 2313 ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 2314 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2315 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2316 ip6h->ip6_nxt = IPPROTO_ICMPV6; 2317 ip6h->ip6_hops = IPV6_MAX_HOPS; 2318 ip6h->ip6_dst = *target; 2319 icmp6 = (icmp6_t *)&ip6h[1]; 2320 2321 opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 2322 sizeof (nd_neighbor_advert_t)); 2323 2324 if (operation == ND_NEIGHBOR_SOLICIT) { 2325 nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 2326 2327 if (!(flag & NDP_PROBE)) 2328 opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 2329 ip6h->ip6_src = *sender; 2330 ns->nd_ns_target = *target; 2331 if (!(flag & NDP_UNICAST)) { 2332 /* Form multicast address of the target */ 2333 ip6h->ip6_dst = ipv6_solicited_node_mcast; 2334 ip6h->ip6_dst.s6_addr32[3] |= 2335 ns->nd_ns_target.s6_addr32[3]; 2336 } 2337 } else { 2338 nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 2339 2340 ASSERT(!(flag & NDP_PROBE)); 2341 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 2342 ip6h->ip6_src = *sender; 2343 na->nd_na_target = *sender; 2344 if (flag & NDP_ISROUTER) 2345 na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 2346 if (flag & NDP_SOLICITED) 2347 na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 2348 if (flag & NDP_ORIDE) 2349 na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 2350 } 2351 2352 hw_addr = NULL; 2353 if (!(flag & NDP_PROBE)) { 2354 mutex_enter(&hwaddr_ill->ill_lock); 2355 hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 2356 hwaddr_ill->ill_phys_addr; 2357 if (hw_addr != NULL) { 2358 /* Fill in link layer address and option len */ 2359 opt->nd_opt_len = (uint8_t)plen; 2360 bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 2361 } 2362 mutex_exit(&hwaddr_ill->ill_lock); 2363 } 2364 if (hw_addr == NULL) { 2365 /* If there's no link layer address option, then strip it. */ 2366 len -= plen * 8; 2367 mp->b_wptr = mp->b_rptr + len; 2368 ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2369 } 2370 2371 icmp6->icmp6_type = (uint8_t)operation; 2372 icmp6->icmp6_code = 0; 2373 /* 2374 * Prepare for checksum by putting icmp length in the icmp 2375 * checksum field. The checksum is calculated in ip_wput_v6. 2376 */ 2377 icmp6->icmp6_cksum = ip6h->ip6_plen; 2378 2379 if (src_ipif != NULL) 2380 ipif_refrele(src_ipif); 2381 if (canput(ill->ill_wq)) { 2382 put(ill->ill_wq, mp); 2383 return (B_FALSE); 2384 } 2385 freemsg(mp); 2386 return (B_TRUE); 2387 } 2388 2389 /* 2390 * Make a link layer address (does not include the SAP) from an nce. 2391 * To form the link layer address, use the last four bytes of ipv6 2392 * address passed in and the fixed offset stored in nce. 2393 */ 2394 static void 2395 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 2396 { 2397 uchar_t *mask, *to; 2398 ill_t *ill = nce->nce_ill; 2399 int len; 2400 2401 if (ill->ill_net_type == IRE_IF_NORESOLVER) 2402 return; 2403 ASSERT(nce->nce_res_mp != NULL); 2404 ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 2405 ASSERT(nce->nce_flags & NCE_F_MAPPING); 2406 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2407 ASSERT(addr != NULL); 2408 bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 2409 addrpos, ill->ill_nd_lla_len); 2410 len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 2411 IPV6_ADDR_LEN); 2412 mask = (uchar_t *)&nce->nce_extract_mask; 2413 mask += (IPV6_ADDR_LEN - len); 2414 addr += (IPV6_ADDR_LEN - len); 2415 to = addrpos + nce->nce_ll_extract_start; 2416 while (len-- > 0) 2417 *to++ |= *mask++ & *addr++; 2418 } 2419 2420 /* 2421 * Pass a cache report back out via NDD. 2422 */ 2423 /* ARGSUSED */ 2424 int 2425 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr) 2426 { 2427 (void) mi_mpprintf(mp, "ifname hardware addr flags" 2428 " proto addr/mask"); 2429 ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp); 2430 return (0); 2431 } 2432 2433 /* 2434 * Add a single line to the NDP Cache Entry Report. 2435 */ 2436 static void 2437 nce_report1(nce_t *nce, uchar_t *mp_arg) 2438 { 2439 ill_t *ill = nce->nce_ill; 2440 char local_buf[INET6_ADDRSTRLEN]; 2441 uchar_t flags_buf[10]; 2442 uint32_t flags = nce->nce_flags; 2443 mblk_t *mp = (mblk_t *)mp_arg; 2444 uchar_t *h; 2445 uchar_t *m = flags_buf; 2446 in6_addr_t v6addr; 2447 2448 /* 2449 * Lock the nce to protect nce_res_mp from being changed 2450 * if an external resolver address resolution completes 2451 * while nce_res_mp is being accessed here. 2452 * 2453 * Deal with all address formats, not just Ethernet-specific 2454 * In addition, make sure that the mblk has enough space 2455 * before writing to it. If is doesn't, allocate a new one. 2456 */ 2457 if (nce->nce_ipversion == IPV4_VERSION) 2458 /* Don't include v4 nce_ts in NDP cache entry report */ 2459 return; 2460 2461 ASSERT(ill != NULL); 2462 v6addr = nce->nce_mask; 2463 if (flags & NCE_F_PERMANENT) 2464 *m++ = 'P'; 2465 if (flags & NCE_F_ISROUTER) 2466 *m++ = 'R'; 2467 if (flags & NCE_F_MAPPING) 2468 *m++ = 'M'; 2469 *m = '\0'; 2470 2471 if (ill->ill_net_type == IRE_IF_RESOLVER) { 2472 size_t addrlen; 2473 char *addr_buf; 2474 dl_unitdata_req_t *dl; 2475 2476 mutex_enter(&nce->nce_lock); 2477 h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2478 dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 2479 if (ill->ill_flags & ILLF_XRESOLV) 2480 addrlen = (3 * (dl->dl_dest_addr_length)); 2481 else 2482 addrlen = (3 * (ill->ill_nd_lla_len)); 2483 if (addrlen <= 0) { 2484 mutex_exit(&nce->nce_lock); 2485 (void) mi_mpprintf(mp, 2486 "%8s %9s %5s %s/%d", 2487 ill->ill_name, 2488 "None", 2489 (uchar_t *)&flags_buf, 2490 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2491 (char *)local_buf, sizeof (local_buf)), 2492 ip_mask_to_plen_v6(&v6addr)); 2493 } else { 2494 /* 2495 * Convert the hardware/lla address to ascii 2496 */ 2497 addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP); 2498 if (addr_buf == NULL) { 2499 mutex_exit(&nce->nce_lock); 2500 return; 2501 } 2502 (void) mac_colon_addr((uint8_t *)h, 2503 (ill->ill_flags & ILLF_XRESOLV) ? 2504 dl->dl_dest_addr_length : ill->ill_nd_lla_len, 2505 addr_buf, addrlen); 2506 mutex_exit(&nce->nce_lock); 2507 (void) mi_mpprintf(mp, "%8s %17s %5s %s/%d", 2508 ill->ill_name, addr_buf, (uchar_t *)&flags_buf, 2509 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2510 (char *)local_buf, sizeof (local_buf)), 2511 ip_mask_to_plen_v6(&v6addr)); 2512 kmem_free(addr_buf, addrlen); 2513 } 2514 } else { 2515 (void) mi_mpprintf(mp, 2516 "%8s %9s %5s %s/%d", 2517 ill->ill_name, 2518 "None", 2519 (uchar_t *)&flags_buf, 2520 inet_ntop(AF_INET6, (char *)&nce->nce_addr, 2521 (char *)local_buf, sizeof (local_buf)), 2522 ip_mask_to_plen_v6(&v6addr)); 2523 } 2524 } 2525 2526 mblk_t * 2527 nce_udreq_alloc(ill_t *ill) 2528 { 2529 mblk_t *template_mp = NULL; 2530 dl_unitdata_req_t *dlur; 2531 int sap_length; 2532 2533 ASSERT(ill->ill_isv6); 2534 2535 sap_length = ill->ill_sap_length; 2536 template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 2537 ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 2538 if (template_mp == NULL) 2539 return (NULL); 2540 2541 dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 2542 dlur->dl_priority.dl_min = 0; 2543 dlur->dl_priority.dl_max = 0; 2544 dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 2545 dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 2546 2547 /* Copy in the SAP value. */ 2548 NCE_LL_SAP_COPY(ill, template_mp); 2549 2550 return (template_mp); 2551 } 2552 2553 /* 2554 * NDP retransmit timer. 2555 * This timer goes off when: 2556 * a. It is time to retransmit NS for resolver. 2557 * b. It is time to send reachability probes. 2558 */ 2559 void 2560 ndp_timer(void *arg) 2561 { 2562 nce_t *nce = arg; 2563 ill_t *ill = nce->nce_ill; 2564 uint32_t ms; 2565 char addrbuf[INET6_ADDRSTRLEN]; 2566 mblk_t *mp; 2567 boolean_t dropped = B_FALSE; 2568 2569 /* 2570 * The timer has to be cancelled by ndp_delete before doing the final 2571 * refrele. So the NCE is guaranteed to exist when the timer runs 2572 * until it clears the timeout_id. Before clearing the timeout_id 2573 * bump up the refcnt so that we can continue to use the nce 2574 */ 2575 ASSERT(nce != NULL); 2576 2577 /* 2578 * Grab the ill_g_lock now itself to avoid lock order problems. 2579 * nce_solicit needs ill_g_lock to be able to traverse ills 2580 */ 2581 rw_enter(&ill_g_lock, RW_READER); 2582 mutex_enter(&nce->nce_lock); 2583 NCE_REFHOLD_LOCKED(nce); 2584 nce->nce_timeout_id = 0; 2585 2586 /* 2587 * Check the reachability state first. 2588 */ 2589 switch (nce->nce_state) { 2590 case ND_DELAY: 2591 rw_exit(&ill_g_lock); 2592 nce->nce_state = ND_PROBE; 2593 mutex_exit(&nce->nce_lock); 2594 (void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE, 2595 &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST); 2596 if (ip_debug > 3) { 2597 /* ip2dbg */ 2598 pr_addr_dbg("ndp_timer: state for %s changed " 2599 "to PROBE\n", AF_INET6, &nce->nce_addr); 2600 } 2601 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2602 NCE_REFRELE(nce); 2603 return; 2604 case ND_PROBE: 2605 /* must be retransmit timer */ 2606 rw_exit(&ill_g_lock); 2607 nce->nce_pcnt--; 2608 ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 2609 nce->nce_pcnt >= -1); 2610 if (nce->nce_pcnt > 0) { 2611 /* 2612 * As per RFC2461, the nce gets deleted after 2613 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 2614 * Note that the first unicast solicitation is sent 2615 * during the DELAY state. 2616 */ 2617 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2618 nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 2619 addrbuf, sizeof (addrbuf)))); 2620 mutex_exit(&nce->nce_lock); 2621 dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, 2622 B_FALSE, &ipv6_all_zeros, &nce->nce_addr, 2623 (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 2624 NDP_UNICAST); 2625 if (dropped) { 2626 mutex_enter(&nce->nce_lock); 2627 nce->nce_pcnt++; 2628 mutex_exit(&nce->nce_lock); 2629 } 2630 NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2631 } else if (nce->nce_pcnt < 0) { 2632 /* No hope, delete the nce */ 2633 nce->nce_state = ND_UNREACHABLE; 2634 mutex_exit(&nce->nce_lock); 2635 if (ip_debug > 2) { 2636 /* ip1dbg */ 2637 pr_addr_dbg("ndp_timer: Delete IRE for" 2638 " dst %s\n", AF_INET6, &nce->nce_addr); 2639 } 2640 ndp_delete(nce); 2641 } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 2642 /* Wait RetransTimer, before deleting the entry */ 2643 ip2dbg(("ndp_timer: pcount=%x dst %s\n", 2644 nce->nce_pcnt, inet_ntop(AF_INET6, 2645 &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 2646 mutex_exit(&nce->nce_lock); 2647 /* Wait one interval before killing */ 2648 NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 2649 } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 2650 ipif_t *ipif; 2651 2652 /* 2653 * We're done probing, and we can now declare this 2654 * address to be usable. Let IP know that it's ok to 2655 * use. 2656 */ 2657 nce->nce_state = ND_REACHABLE; 2658 mutex_exit(&nce->nce_lock); 2659 ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, 2660 ALL_ZONES, NULL, NULL, NULL, NULL); 2661 if (ipif != NULL) { 2662 if (ipif->ipif_was_dup) { 2663 char ibuf[LIFNAMSIZ + 10]; 2664 char sbuf[INET6_ADDRSTRLEN]; 2665 2666 ipif->ipif_was_dup = B_FALSE; 2667 (void) strlcpy(ibuf, ill->ill_name, 2668 sizeof (ibuf)); 2669 (void) inet_ntop(AF_INET6, 2670 &ipif->ipif_v6lcl_addr, 2671 sbuf, sizeof (sbuf)); 2672 if (ipif->ipif_id != 0) { 2673 (void) snprintf(ibuf + 2674 ill->ill_name_length - 1, 2675 sizeof (ibuf) - 2676 ill->ill_name_length + 1, 2677 ":%d", ipif->ipif_id); 2678 } 2679 cmn_err(CE_NOTE, "recovered address " 2680 "%s on %s", sbuf, ibuf); 2681 } 2682 if ((ipif->ipif_flags & IPIF_UP) && 2683 !ipif->ipif_addr_ready) { 2684 ip_rts_ifmsg(ipif); 2685 ip_rts_newaddrmsg(RTM_ADD, 0, ipif); 2686 sctp_update_ipif(ipif, SCTP_IPIF_UP); 2687 } 2688 ipif->ipif_addr_ready = 1; 2689 ipif_refrele(ipif); 2690 } 2691 /* Begin defending our new address */ 2692 nce->nce_unsolicit_count = 0; 2693 dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, 2694 B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast, 2695 NDP_ORIDE); 2696 if (dropped) { 2697 nce->nce_unsolicit_count = 1; 2698 NDP_RESTART_TIMER(nce, 2699 ip_ndp_unsolicit_interval); 2700 } else if (ip_ndp_defense_interval != 0) { 2701 NDP_RESTART_TIMER(nce, ip_ndp_defense_interval); 2702 } 2703 } else { 2704 /* 2705 * This is an address we're probing to be our own, but 2706 * the ill is down. Wait until it comes back before 2707 * doing anything, but switch to reachable state so 2708 * that the restart will work. 2709 */ 2710 nce->nce_state = ND_REACHABLE; 2711 mutex_exit(&nce->nce_lock); 2712 } 2713 NCE_REFRELE(nce); 2714 return; 2715 case ND_INCOMPLETE: 2716 /* 2717 * Must be resolvers retransmit timer. 2718 */ 2719 for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) { 2720 ip6i_t *ip6i; 2721 ip6_t *ip6h; 2722 mblk_t *data_mp; 2723 2724 /* 2725 * Walk the list of packets queued, and see if there 2726 * are any multipathing probe packets. Such packets 2727 * are always queued at the head. Since this is a 2728 * retransmit timer firing, mark such packets as 2729 * delayed in ND resolution. This info will be used 2730 * in ip_wput_v6(). Multipathing probe packets will 2731 * always have an ip6i_t. Once we hit a packet without 2732 * it, we can break out of this loop. 2733 */ 2734 if (mp->b_datap->db_type == M_CTL) 2735 data_mp = mp->b_cont; 2736 else 2737 data_mp = mp; 2738 2739 ip6h = (ip6_t *)data_mp->b_rptr; 2740 if (ip6h->ip6_nxt != IPPROTO_RAW) 2741 break; 2742 2743 /* 2744 * This message should have been pulled up already in 2745 * ip_wput_v6. We can't do pullups here because the 2746 * b_next/b_prev is non-NULL. 2747 */ 2748 ip6i = (ip6i_t *)ip6h; 2749 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2750 sizeof (ip6i_t) + IPV6_HDR_LEN); 2751 2752 /* Mark this packet as delayed due to ND resolution */ 2753 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 2754 ip6i->ip6i_flags |= IP6I_ND_DELAYED; 2755 } 2756 if (nce->nce_qd_mp != NULL) { 2757 ms = nce_solicit(nce, NULL); 2758 rw_exit(&ill_g_lock); 2759 if (ms == 0) { 2760 if (nce->nce_state != ND_REACHABLE) { 2761 mutex_exit(&nce->nce_lock); 2762 nce_resolv_failed(nce); 2763 ndp_delete(nce); 2764 } else { 2765 mutex_exit(&nce->nce_lock); 2766 } 2767 } else { 2768 mutex_exit(&nce->nce_lock); 2769 NDP_RESTART_TIMER(nce, (clock_t)ms); 2770 } 2771 NCE_REFRELE(nce); 2772 return; 2773 } 2774 mutex_exit(&nce->nce_lock); 2775 rw_exit(&ill_g_lock); 2776 NCE_REFRELE(nce); 2777 break; 2778 case ND_REACHABLE : 2779 rw_exit(&ill_g_lock); 2780 if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 2781 nce->nce_unsolicit_count != 0) || 2782 ((nce->nce_flags & NCE_F_PERMANENT) && 2783 ip_ndp_defense_interval != 0)) { 2784 if (nce->nce_unsolicit_count > 0) 2785 nce->nce_unsolicit_count--; 2786 mutex_exit(&nce->nce_lock); 2787 dropped = nce_xmit(ill, 2788 ND_NEIGHBOR_ADVERT, 2789 ill, /* ill to be used for hw addr */ 2790 B_FALSE, /* use ill_phys_addr */ 2791 &nce->nce_addr, 2792 &ipv6_all_hosts_mcast, 2793 NDP_ORIDE); 2794 if (dropped) { 2795 mutex_enter(&nce->nce_lock); 2796 nce->nce_unsolicit_count++; 2797 mutex_exit(&nce->nce_lock); 2798 } 2799 if (nce->nce_unsolicit_count != 0) { 2800 NDP_RESTART_TIMER(nce, 2801 ip_ndp_unsolicit_interval); 2802 } else { 2803 NDP_RESTART_TIMER(nce, 2804 ip_ndp_defense_interval); 2805 } 2806 } else { 2807 mutex_exit(&nce->nce_lock); 2808 } 2809 NCE_REFRELE(nce); 2810 break; 2811 default: 2812 rw_exit(&ill_g_lock); 2813 mutex_exit(&nce->nce_lock); 2814 NCE_REFRELE(nce); 2815 break; 2816 } 2817 } 2818 2819 /* 2820 * Set a link layer address from the ll_addr passed in. 2821 * Copy SAP from ill. 2822 */ 2823 static void 2824 nce_set_ll(nce_t *nce, uchar_t *ll_addr) 2825 { 2826 ill_t *ill = nce->nce_ill; 2827 uchar_t *woffset; 2828 2829 ASSERT(ll_addr != NULL); 2830 /* Always called before fast_path_probe */ 2831 ASSERT(nce->nce_fp_mp == NULL); 2832 if (ill->ill_sap_length != 0) { 2833 /* 2834 * Copy the SAP type specified in the 2835 * request into the xmit template. 2836 */ 2837 NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 2838 } 2839 if (ill->ill_phys_addr_length > 0) { 2840 /* 2841 * The bcopy() below used to be called for the physical address 2842 * length rather than the link layer address length. For 2843 * ethernet and many other media, the phys_addr and lla are 2844 * identical. 2845 * However, with xresolv interfaces being introduced, the 2846 * phys_addr and lla are no longer the same, and the physical 2847 * address may not have any useful meaning, so we use the lla 2848 * for IPv6 address resolution and destination addressing. 2849 * 2850 * For PPP or other interfaces with a zero length 2851 * physical address, don't do anything here. 2852 * The bcopy() with a zero phys_addr length was previously 2853 * a no-op for interfaces with a zero-length physical address. 2854 * Using the lla for them would change the way they operate. 2855 * Doing nothing in such cases preserves expected behavior. 2856 */ 2857 woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2858 bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 2859 } 2860 } 2861 2862 static boolean_t 2863 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 2864 { 2865 ill_t *ill = nce->nce_ill; 2866 uchar_t *ll_offset; 2867 2868 ASSERT(nce->nce_res_mp != NULL); 2869 if (ll_addr == NULL) 2870 return (B_FALSE); 2871 ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 2872 if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 2873 return (B_TRUE); 2874 return (B_FALSE); 2875 } 2876 2877 /* 2878 * Updates the link layer address or the reachability state of 2879 * a cache entry. Reset probe counter if needed. 2880 */ 2881 static void 2882 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 2883 { 2884 ill_t *ill = nce->nce_ill; 2885 boolean_t need_stop_timer = B_FALSE; 2886 boolean_t need_fastpath_update = B_FALSE; 2887 2888 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2889 ASSERT(nce->nce_ipversion == IPV6_VERSION); 2890 /* 2891 * If this interface does not do NUD, there is no point 2892 * in allowing an update to the cache entry. Although 2893 * we will respond to NS. 2894 * The only time we accept an update for a resolver when 2895 * NUD is turned off is when it has just been created. 2896 * Non-Resolvers will always be created as REACHABLE. 2897 */ 2898 if (new_state != ND_UNCHANGED) { 2899 if ((nce->nce_flags & NCE_F_NONUD) && 2900 (nce->nce_state != ND_INCOMPLETE)) 2901 return; 2902 ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 2903 ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 2904 need_stop_timer = B_TRUE; 2905 if (new_state == ND_REACHABLE) 2906 nce->nce_last = TICK_TO_MSEC(lbolt64); 2907 else { 2908 /* We force NUD in this case */ 2909 nce->nce_last = 0; 2910 } 2911 nce->nce_state = new_state; 2912 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2913 } 2914 /* 2915 * In case of fast path we need to free the the fastpath 2916 * M_DATA and do another probe. Otherwise we can just 2917 * overwrite the DL_UNITDATA_REQ data, noting we'll lose 2918 * whatever packets that happens to be transmitting at the time. 2919 */ 2920 if (new_ll_addr != NULL) { 2921 ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 2922 ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 2923 bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 2924 NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 2925 if (nce->nce_fp_mp != NULL) { 2926 freemsg(nce->nce_fp_mp); 2927 nce->nce_fp_mp = NULL; 2928 } 2929 need_fastpath_update = B_TRUE; 2930 } 2931 mutex_exit(&nce->nce_lock); 2932 if (need_stop_timer) { 2933 (void) untimeout(nce->nce_timeout_id); 2934 nce->nce_timeout_id = 0; 2935 } 2936 if (need_fastpath_update) 2937 nce_fastpath(nce); 2938 mutex_enter(&nce->nce_lock); 2939 } 2940 2941 void 2942 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 2943 { 2944 uint_t count = 0; 2945 mblk_t **mpp; 2946 2947 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2948 2949 for (mpp = &nce->nce_qd_mp; *mpp != NULL; 2950 mpp = &(*mpp)->b_next) { 2951 if (++count > 2952 nce->nce_ill->ill_max_buf) { 2953 mblk_t *tmp = nce->nce_qd_mp->b_next; 2954 2955 nce->nce_qd_mp->b_next = NULL; 2956 nce->nce_qd_mp->b_prev = NULL; 2957 freemsg(nce->nce_qd_mp); 2958 nce->nce_qd_mp = tmp; 2959 } 2960 } 2961 /* put this on the list */ 2962 if (head_insert) { 2963 mp->b_next = nce->nce_qd_mp; 2964 nce->nce_qd_mp = mp; 2965 } else { 2966 *mpp = mp; 2967 } 2968 } 2969 2970 static void 2971 nce_queue_mp(nce_t *nce, mblk_t *mp) 2972 { 2973 boolean_t head_insert = B_FALSE; 2974 ip6_t *ip6h; 2975 ip6i_t *ip6i; 2976 mblk_t *data_mp; 2977 2978 ASSERT(MUTEX_HELD(&nce->nce_lock)); 2979 2980 if (mp->b_datap->db_type == M_CTL) 2981 data_mp = mp->b_cont; 2982 else 2983 data_mp = mp; 2984 ip6h = (ip6_t *)data_mp->b_rptr; 2985 if (ip6h->ip6_nxt == IPPROTO_RAW) { 2986 /* 2987 * This message should have been pulled up already in 2988 * ip_wput_v6. We can't do pullups here because the message 2989 * could be from the nce_qd_mp which could have b_next/b_prev 2990 * non-NULL. 2991 */ 2992 ip6i = (ip6i_t *)ip6h; 2993 ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >= 2994 sizeof (ip6i_t) + IPV6_HDR_LEN); 2995 /* 2996 * Multipathing probe packets have IP6I_DROP_IFDELAYED set. 2997 * This has 2 aspects mentioned below. 2998 * 1. Perform head insertion in the nce_qd_mp for these packets. 2999 * This ensures that next retransmit of ND solicitation 3000 * will use the interface specified by the probe packet, 3001 * for both NS and NA. This corresponds to the src address 3002 * in the IPv6 packet. If we insert at tail, we will be 3003 * depending on the packet at the head for successful 3004 * ND resolution. This is not reliable, because the interface 3005 * on which the NA arrives could be different from the interface 3006 * on which the NS was sent, and if the receiving interface is 3007 * failed, it will appear that the sending interface is also 3008 * failed, causing in.mpathd to misdiagnose this as link 3009 * failure. 3010 * 2. Drop the original packet, if the ND resolution did not 3011 * succeed in the first attempt. However we will create the 3012 * nce and the ire, as soon as the ND resolution succeeds. 3013 * We don't gain anything by queueing multiple probe packets 3014 * and sending them back-to-back once resolution succeeds. 3015 * It is sufficient to send just 1 packet after ND resolution 3016 * succeeds. Since mpathd is sending down probe packets at a 3017 * constant rate, we don't need to send the queued packet. We 3018 * need to queue it only for NDP resolution. The benefit of 3019 * dropping the probe packets that were delayed in ND 3020 * resolution, is that in.mpathd will not see inflated 3021 * RTT. If the ND resolution does not succeed within 3022 * in.mpathd's failure detection time, mpathd may detect 3023 * a failure, and it does not matter whether the packet 3024 * was queued or dropped. 3025 */ 3026 if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED) 3027 head_insert = B_TRUE; 3028 } 3029 3030 nce_queue_mp_common(nce, mp, head_insert); 3031 } 3032 3033 /* 3034 * Called when address resolution failed due to a timeout. 3035 * Send an ICMP unreachable in response to all queued packets. 3036 */ 3037 void 3038 nce_resolv_failed(nce_t *nce) 3039 { 3040 mblk_t *mp, *nxt_mp, *first_mp; 3041 char buf[INET6_ADDRSTRLEN]; 3042 ip6_t *ip6h; 3043 zoneid_t zoneid = GLOBAL_ZONEID; 3044 3045 ip1dbg(("nce_resolv_failed: dst %s\n", 3046 inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 3047 mutex_enter(&nce->nce_lock); 3048 mp = nce->nce_qd_mp; 3049 nce->nce_qd_mp = NULL; 3050 mutex_exit(&nce->nce_lock); 3051 while (mp != NULL) { 3052 nxt_mp = mp->b_next; 3053 mp->b_next = NULL; 3054 mp->b_prev = NULL; 3055 3056 first_mp = mp; 3057 if (mp->b_datap->db_type == M_CTL) { 3058 ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 3059 ASSERT(io->ipsec_out_type == IPSEC_OUT); 3060 zoneid = io->ipsec_out_zoneid; 3061 ASSERT(zoneid != ALL_ZONES); 3062 mp = mp->b_cont; 3063 } 3064 3065 ip6h = (ip6_t *)mp->b_rptr; 3066 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3067 ip6i_t *ip6i; 3068 /* 3069 * This message should have been pulled up already 3070 * in ip_wput_v6. ip_hdr_complete_v6 assumes that 3071 * the header is pulled up. 3072 */ 3073 ip6i = (ip6i_t *)ip6h; 3074 ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 3075 sizeof (ip6i_t) + IPV6_HDR_LEN); 3076 mp->b_rptr += sizeof (ip6i_t); 3077 } 3078 /* 3079 * Ignore failure since icmp_unreachable_v6 will silently 3080 * drop packets with an unspecified source address. 3081 */ 3082 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid); 3083 icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 3084 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE); 3085 mp = nxt_mp; 3086 } 3087 } 3088 3089 /* 3090 * Called by SIOCSNDP* ioctl to add/change an nce entry 3091 * and the corresponding attributes. 3092 * Disallow states other than ND_REACHABLE or ND_STALE. 3093 */ 3094 int 3095 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 3096 { 3097 sin6_t *sin6; 3098 in6_addr_t *addr; 3099 nce_t *nce; 3100 int err; 3101 uint16_t new_flags = 0; 3102 uint16_t old_flags = 0; 3103 int inflags = lnr->lnr_flags; 3104 3105 ASSERT(ill->ill_isv6); 3106 if ((lnr->lnr_state_create != ND_REACHABLE) && 3107 (lnr->lnr_state_create != ND_STALE)) 3108 return (EINVAL); 3109 3110 sin6 = (sin6_t *)&lnr->lnr_addr; 3111 addr = &sin6->sin6_addr; 3112 3113 mutex_enter(&ndp6.ndp_g_lock); 3114 /* We know it can not be mapping so just look in the hash table */ 3115 nce = *((nce_t **)NCE_HASH_PTR_V6(*addr)); 3116 nce = nce_lookup_addr(ill, addr, nce); 3117 if (nce != NULL) 3118 new_flags = nce->nce_flags; 3119 3120 switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 3121 case NDF_ISROUTER_ON: 3122 new_flags |= NCE_F_ISROUTER; 3123 break; 3124 case NDF_ISROUTER_OFF: 3125 new_flags &= ~NCE_F_ISROUTER; 3126 break; 3127 case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 3128 mutex_exit(&ndp6.ndp_g_lock); 3129 if (nce != NULL) 3130 NCE_REFRELE(nce); 3131 return (EINVAL); 3132 } 3133 3134 switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 3135 case NDF_ANYCAST_ON: 3136 new_flags |= NCE_F_ANYCAST; 3137 break; 3138 case NDF_ANYCAST_OFF: 3139 new_flags &= ~NCE_F_ANYCAST; 3140 break; 3141 case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 3142 mutex_exit(&ndp6.ndp_g_lock); 3143 if (nce != NULL) 3144 NCE_REFRELE(nce); 3145 return (EINVAL); 3146 } 3147 3148 switch (inflags & (NDF_PROXY_ON|NDF_PROXY_OFF)) { 3149 case NDF_PROXY_ON: 3150 new_flags |= NCE_F_PROXY; 3151 break; 3152 case NDF_PROXY_OFF: 3153 new_flags &= ~NCE_F_PROXY; 3154 break; 3155 case (NDF_PROXY_OFF|NDF_PROXY_ON): 3156 mutex_exit(&ndp6.ndp_g_lock); 3157 if (nce != NULL) 3158 NCE_REFRELE(nce); 3159 return (EINVAL); 3160 } 3161 3162 if (nce == NULL) { 3163 err = ndp_add(ill, 3164 (uchar_t *)lnr->lnr_hdw_addr, 3165 addr, 3166 &ipv6_all_ones, 3167 &ipv6_all_zeros, 3168 0, 3169 new_flags, 3170 lnr->lnr_state_create, 3171 &nce, 3172 NULL, 3173 NULL); 3174 if (err != 0) { 3175 mutex_exit(&ndp6.ndp_g_lock); 3176 ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 3177 return (err); 3178 } 3179 } 3180 old_flags = nce->nce_flags; 3181 if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 3182 /* 3183 * Router turned to host, delete all ires. 3184 * XXX Just delete the entry, but we need to add too. 3185 */ 3186 nce->nce_flags &= ~NCE_F_ISROUTER; 3187 mutex_exit(&ndp6.ndp_g_lock); 3188 ndp_delete(nce); 3189 NCE_REFRELE(nce); 3190 return (0); 3191 } 3192 mutex_exit(&ndp6.ndp_g_lock); 3193 3194 mutex_enter(&nce->nce_lock); 3195 nce->nce_flags = new_flags; 3196 mutex_exit(&nce->nce_lock); 3197 /* 3198 * Note that we ignore the state at this point, which 3199 * should be either STALE or REACHABLE. Instead we let 3200 * the link layer address passed in to determine the state 3201 * much like incoming packets. 3202 */ 3203 ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 3204 NCE_REFRELE(nce); 3205 return (0); 3206 } 3207 3208 /* 3209 * If the device driver supports it, we make nce_fp_mp to have 3210 * an M_DATA prepend. Otherwise nce_fp_mp will be null. 3211 * The caller insures there is hold on nce for this function. 3212 * Note that since ill_fastpath_probe() copies the mblk there is 3213 * no need for the hold beyond this function. 3214 */ 3215 static void 3216 nce_fastpath(nce_t *nce) 3217 { 3218 ill_t *ill = nce->nce_ill; 3219 int res; 3220 3221 ASSERT(ill != NULL); 3222 if (nce->nce_fp_mp != NULL) { 3223 /* Already contains fastpath info */ 3224 return; 3225 } 3226 if (nce->nce_res_mp != NULL) { 3227 nce_fastpath_list_add(nce); 3228 res = ill_fastpath_probe(ill, nce->nce_res_mp); 3229 /* 3230 * EAGAIN is an indication of a transient error 3231 * i.e. allocation failure etc. leave the nce in the list it 3232 * will be updated when another probe happens for another ire 3233 * if not it will be taken out of the list when the ire is 3234 * deleted. 3235 */ 3236 3237 if (res != 0 && res != EAGAIN) 3238 nce_fastpath_list_delete(nce); 3239 } 3240 } 3241 3242 /* 3243 * Drain the list of nce's waiting for fastpath response. 3244 */ 3245 void 3246 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 3247 void *arg) 3248 { 3249 3250 nce_t *next_nce; 3251 nce_t *current_nce; 3252 nce_t *first_nce; 3253 nce_t *prev_nce = NULL; 3254 3255 ASSERT(ill != NULL && ill->ill_isv6); 3256 3257 mutex_enter(&ill->ill_lock); 3258 first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 3259 while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 3260 next_nce = current_nce->nce_fastpath; 3261 /* 3262 * Take it off the list if we're flushing, or if the callback 3263 * routine tells us to do so. Otherwise, leave the nce in the 3264 * fastpath list to handle any pending response from the lower 3265 * layer. We can't drain the list when the callback routine 3266 * comparison failed, because the response is asynchronous in 3267 * nature, and may not arrive in the same order as the list 3268 * insertion. 3269 */ 3270 if (func == NULL || func(current_nce, arg)) { 3271 current_nce->nce_fastpath = NULL; 3272 if (current_nce == first_nce) 3273 ill->ill_fastpath_list = first_nce = next_nce; 3274 else 3275 prev_nce->nce_fastpath = next_nce; 3276 } else { 3277 /* previous element that is still in the list */ 3278 prev_nce = current_nce; 3279 } 3280 current_nce = next_nce; 3281 } 3282 mutex_exit(&ill->ill_lock); 3283 } 3284 3285 /* 3286 * Add nce to the nce fastpath list. 3287 */ 3288 void 3289 nce_fastpath_list_add(nce_t *nce) 3290 { 3291 ill_t *ill; 3292 3293 ill = nce->nce_ill; 3294 ASSERT(ill != NULL && ill->ill_isv6); 3295 3296 mutex_enter(&ill->ill_lock); 3297 mutex_enter(&nce->nce_lock); 3298 3299 /* 3300 * if nce has not been deleted and 3301 * is not already in the list add it. 3302 */ 3303 if (!(nce->nce_flags & NCE_F_CONDEMNED) && 3304 (nce->nce_fastpath == NULL)) { 3305 nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 3306 ill->ill_fastpath_list = nce; 3307 } 3308 3309 mutex_exit(&nce->nce_lock); 3310 mutex_exit(&ill->ill_lock); 3311 } 3312 3313 /* 3314 * remove nce from the nce fastpath list. 3315 */ 3316 void 3317 nce_fastpath_list_delete(nce_t *nce) 3318 { 3319 nce_t *nce_ptr; 3320 3321 ill_t *ill; 3322 3323 ill = nce->nce_ill; 3324 ASSERT(ill != NULL); 3325 if (!ill->ill_isv6) { 3326 /* 3327 * v4 nce_t's do not have nce_fastpath set. 3328 */ 3329 return; 3330 } 3331 3332 mutex_enter(&ill->ill_lock); 3333 if (nce->nce_fastpath == NULL) 3334 goto done; 3335 3336 ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 3337 3338 if (ill->ill_fastpath_list == nce) { 3339 ill->ill_fastpath_list = nce->nce_fastpath; 3340 } else { 3341 nce_ptr = ill->ill_fastpath_list; 3342 while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 3343 if (nce_ptr->nce_fastpath == nce) { 3344 nce_ptr->nce_fastpath = nce->nce_fastpath; 3345 break; 3346 } 3347 nce_ptr = nce_ptr->nce_fastpath; 3348 } 3349 } 3350 3351 nce->nce_fastpath = NULL; 3352 done: 3353 mutex_exit(&ill->ill_lock); 3354 } 3355 3356 /* 3357 * Update all NCE's that are not in fastpath mode and 3358 * have an nce_fp_mp that matches mp. mp->b_cont contains 3359 * the fastpath header. 3360 * 3361 * Returns TRUE if entry should be dequeued, or FALSE otherwise. 3362 */ 3363 boolean_t 3364 ndp_fastpath_update(nce_t *nce, void *arg) 3365 { 3366 mblk_t *mp, *fp_mp; 3367 uchar_t *mp_rptr, *ud_mp_rptr; 3368 mblk_t *ud_mp = nce->nce_res_mp; 3369 ptrdiff_t cmplen; 3370 3371 if (nce->nce_flags & NCE_F_MAPPING) 3372 return (B_TRUE); 3373 if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 3374 return (B_TRUE); 3375 3376 ip2dbg(("ndp_fastpath_update: trying\n")); 3377 mp = (mblk_t *)arg; 3378 mp_rptr = mp->b_rptr; 3379 cmplen = mp->b_wptr - mp_rptr; 3380 ASSERT(cmplen >= 0); 3381 ud_mp_rptr = ud_mp->b_rptr; 3382 /* 3383 * The nce is locked here to prevent any other threads 3384 * from accessing and changing nce_res_mp when the IPv6 address 3385 * becomes resolved to an lla while we're in the middle 3386 * of looking at and comparing the hardware address (lla). 3387 * It is also locked to prevent multiple threads in nce_fastpath_update 3388 * from examining nce_res_mp atthe same time. 3389 */ 3390 mutex_enter(&nce->nce_lock); 3391 if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 3392 bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 3393 mutex_exit(&nce->nce_lock); 3394 /* 3395 * Don't take the ire off the fastpath list yet, 3396 * since the response may come later. 3397 */ 3398 return (B_FALSE); 3399 } 3400 /* Matched - install mp as the fastpath mp */ 3401 ip1dbg(("ndp_fastpath_update: match\n")); 3402 fp_mp = dupb(mp->b_cont); 3403 if (fp_mp != NULL) { 3404 nce->nce_fp_mp = fp_mp; 3405 } 3406 mutex_exit(&nce->nce_lock); 3407 return (B_TRUE); 3408 } 3409 3410 /* 3411 * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 3412 * driver. Note that it assumes IP is exclusive... 3413 */ 3414 /* ARGSUSED */ 3415 void 3416 ndp_fastpath_flush(nce_t *nce, char *arg) 3417 { 3418 if (nce->nce_flags & NCE_F_MAPPING) 3419 return; 3420 /* No fastpath info? */ 3421 if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 3422 return; 3423 3424 /* Just delete the NCE... */ 3425 ndp_delete(nce); 3426 } 3427 3428 /* 3429 * Return a pointer to a given option in the packet. 3430 * Assumes that option part of the packet have already been validated. 3431 */ 3432 nd_opt_hdr_t * 3433 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 3434 { 3435 while (optlen > 0) { 3436 if (opt->nd_opt_type == opt_type) 3437 return (opt); 3438 optlen -= 8 * opt->nd_opt_len; 3439 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3440 } 3441 return (NULL); 3442 } 3443 3444 /* 3445 * Verify all option lengths present are > 0, also check to see 3446 * if the option lengths and packet length are consistent. 3447 */ 3448 boolean_t 3449 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 3450 { 3451 ASSERT(opt != NULL); 3452 while (optlen > 0) { 3453 if (opt->nd_opt_len == 0) 3454 return (B_FALSE); 3455 optlen -= 8 * opt->nd_opt_len; 3456 if (optlen < 0) 3457 return (B_FALSE); 3458 opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 3459 } 3460 return (B_TRUE); 3461 } 3462 3463 /* 3464 * ndp_walk function. 3465 * Free a fraction of the NCE cache entries. 3466 * A fraction of zero means to not free any in that category. 3467 */ 3468 void 3469 ndp_cache_reclaim(nce_t *nce, char *arg) 3470 { 3471 nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 3472 uint_t rand; 3473 3474 if (nce->nce_flags & NCE_F_PERMANENT) 3475 return; 3476 3477 rand = (uint_t)lbolt + 3478 NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 3479 if (ncr->ncr_host != 0 && 3480 (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 3481 ndp_delete(nce); 3482 return; 3483 } 3484 } 3485 3486 /* 3487 * ndp_walk function. 3488 * Count the number of NCEs that can be deleted. 3489 * These would be hosts but not routers. 3490 */ 3491 void 3492 ndp_cache_count(nce_t *nce, char *arg) 3493 { 3494 ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 3495 3496 if (nce->nce_flags & NCE_F_PERMANENT) 3497 return; 3498 3499 ncc->ncc_total++; 3500 if (!(nce->nce_flags & NCE_F_ISROUTER)) 3501 ncc->ncc_host++; 3502 } 3503 3504 #ifdef NCE_DEBUG 3505 th_trace_t * 3506 th_trace_nce_lookup(nce_t *nce) 3507 { 3508 int bucket_id; 3509 th_trace_t *th_trace; 3510 3511 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3512 3513 bucket_id = IP_TR_HASH(curthread); 3514 ASSERT(bucket_id < IP_TR_HASH_MAX); 3515 3516 for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; 3517 th_trace = th_trace->th_next) { 3518 if (th_trace->th_id == curthread) 3519 return (th_trace); 3520 } 3521 return (NULL); 3522 } 3523 3524 void 3525 nce_trace_ref(nce_t *nce) 3526 { 3527 int bucket_id; 3528 th_trace_t *th_trace; 3529 3530 /* 3531 * Attempt to locate the trace buffer for the curthread. 3532 * If it does not exist, then allocate a new trace buffer 3533 * and link it in list of trace bufs for this ipif, at the head 3534 */ 3535 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3536 3537 if (nce->nce_trace_disable == B_TRUE) 3538 return; 3539 3540 th_trace = th_trace_nce_lookup(nce); 3541 if (th_trace == NULL) { 3542 bucket_id = IP_TR_HASH(curthread); 3543 th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), 3544 KM_NOSLEEP); 3545 if (th_trace == NULL) { 3546 nce->nce_trace_disable = B_TRUE; 3547 nce_trace_inactive(nce); 3548 return; 3549 } 3550 th_trace->th_id = curthread; 3551 th_trace->th_next = nce->nce_trace[bucket_id]; 3552 th_trace->th_prev = &nce->nce_trace[bucket_id]; 3553 if (th_trace->th_next != NULL) 3554 th_trace->th_next->th_prev = &th_trace->th_next; 3555 nce->nce_trace[bucket_id] = th_trace; 3556 } 3557 ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); 3558 th_trace->th_refcnt++; 3559 th_trace_rrecord(th_trace); 3560 } 3561 3562 void 3563 nce_untrace_ref(nce_t *nce) 3564 { 3565 th_trace_t *th_trace; 3566 3567 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3568 3569 if (nce->nce_trace_disable == B_TRUE) 3570 return; 3571 3572 th_trace = th_trace_nce_lookup(nce); 3573 ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); 3574 3575 th_trace_rrecord(th_trace); 3576 th_trace->th_refcnt--; 3577 } 3578 3579 void 3580 nce_trace_inactive(nce_t *nce) 3581 { 3582 th_trace_t *th_trace; 3583 int i; 3584 3585 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3586 3587 for (i = 0; i < IP_TR_HASH_MAX; i++) { 3588 while (nce->nce_trace[i] != NULL) { 3589 th_trace = nce->nce_trace[i]; 3590 3591 /* unlink th_trace and free it */ 3592 nce->nce_trace[i] = th_trace->th_next; 3593 if (th_trace->th_next != NULL) 3594 th_trace->th_next->th_prev = 3595 &nce->nce_trace[i]; 3596 3597 th_trace->th_next = NULL; 3598 th_trace->th_prev = NULL; 3599 kmem_free(th_trace, sizeof (th_trace_t)); 3600 } 3601 } 3602 3603 } 3604 3605 /* ARGSUSED */ 3606 int 3607 nce_thread_exit(nce_t *nce, caddr_t arg) 3608 { 3609 th_trace_t *th_trace; 3610 3611 mutex_enter(&nce->nce_lock); 3612 th_trace = th_trace_nce_lookup(nce); 3613 3614 if (th_trace == NULL) { 3615 mutex_exit(&nce->nce_lock); 3616 return (0); 3617 } 3618 3619 ASSERT(th_trace->th_refcnt == 0); 3620 3621 /* unlink th_trace and free it */ 3622 *th_trace->th_prev = th_trace->th_next; 3623 if (th_trace->th_next != NULL) 3624 th_trace->th_next->th_prev = th_trace->th_prev; 3625 th_trace->th_next = NULL; 3626 th_trace->th_prev = NULL; 3627 kmem_free(th_trace, sizeof (th_trace_t)); 3628 mutex_exit(&nce->nce_lock); 3629 return (0); 3630 } 3631 #endif 3632 3633 /* 3634 * Called when address resolution fails due to a timeout. 3635 * Send an ICMP unreachable in response to all queued packets. 3636 */ 3637 void 3638 arp_resolv_failed(nce_t *nce) 3639 { 3640 mblk_t *mp, *nxt_mp, *first_mp; 3641 char buf[INET6_ADDRSTRLEN]; 3642 zoneid_t zoneid = GLOBAL_ZONEID; 3643 struct in_addr ipv4addr; 3644 3645 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 3646 ip3dbg(("arp_resolv_failed: dst %s\n", 3647 inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 3648 mutex_enter(&nce->nce_lock); 3649 mp = nce->nce_qd_mp; 3650 nce->nce_qd_mp = NULL; 3651 mutex_exit(&nce->nce_lock); 3652 3653 while (mp != NULL) { 3654 nxt_mp = mp->b_next; 3655 mp->b_next = NULL; 3656 mp->b_prev = NULL; 3657 3658 first_mp = mp; 3659 /* 3660 * Send icmp unreachable messages 3661 * to the hosts. 3662 */ 3663 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid); 3664 ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 3665 icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 3666 ICMP_HOST_UNREACHABLE); 3667 mp = nxt_mp; 3668 } 3669 } 3670 3671 static int 3672 ndp_lookup_then_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3673 const in_addr_t *mask, const in_addr_t *extract_mask, 3674 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3675 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3676 { 3677 int err = 0; 3678 nce_t *nce; 3679 in6_addr_t addr6; 3680 3681 mutex_enter(&ndp4.ndp_g_lock); 3682 nce = *((nce_t **)NCE_HASH_PTR_V4(*addr)); 3683 IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3684 nce = nce_lookup_addr(ill, &addr6, nce); 3685 if (nce == NULL) { 3686 err = ndp_add_v4(ill, 3687 hw_addr, 3688 addr, 3689 mask, 3690 extract_mask, 3691 hw_extract_start, 3692 flags, 3693 state, 3694 newnce, 3695 fp_mp, 3696 res_mp); 3697 } else { 3698 *newnce = nce; 3699 err = EEXIST; 3700 } 3701 mutex_exit(&ndp4.ndp_g_lock); 3702 return (err); 3703 } 3704 3705 /* 3706 * NDP Cache Entry creation routine for IPv4. 3707 * Mapped entries are handled in arp. 3708 * This routine must always be called with ndp4.ndp_g_lock held. 3709 * Prior to return, nce_refcnt is incremented. 3710 */ 3711 static int 3712 ndp_add_v4(ill_t *ill, uchar_t *hw_addr, const in_addr_t *addr, 3713 const in_addr_t *mask, const in_addr_t *extract_mask, 3714 uint32_t hw_extract_start, uint16_t flags, uint16_t state, 3715 nce_t **newnce, mblk_t *fp_mp, mblk_t *res_mp) 3716 { 3717 static nce_t nce_nil; 3718 nce_t *nce; 3719 mblk_t *mp; 3720 mblk_t *template; 3721 nce_t **ncep; 3722 3723 ASSERT(MUTEX_HELD(&ndp4.ndp_g_lock)); 3724 ASSERT(ill != NULL); 3725 if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 3726 return (EINVAL); 3727 } 3728 ASSERT((flags & NCE_F_MAPPING) == 0); 3729 ASSERT(extract_mask == NULL); 3730 /* 3731 * Allocate the mblk to hold the nce. 3732 */ 3733 mp = allocb(sizeof (nce_t), BPRI_MED); 3734 if (mp == NULL) 3735 return (ENOMEM); 3736 3737 nce = (nce_t *)mp->b_rptr; 3738 mp->b_wptr = (uchar_t *)&nce[1]; 3739 *nce = nce_nil; 3740 3741 /* 3742 * This one holds link layer address; if res_mp has been provided 3743 * by the caller, accept it without any further checks. Otherwise, 3744 * for V4, we fill it up with ill_resolver_mp here, then in 3745 * in ire_arpresolve(), we fill it up with the ARP query 3746 * once its formulated. 3747 */ 3748 if (res_mp != NULL) { 3749 template = res_mp; 3750 } else { 3751 template = copyb(ill->ill_resolver_mp); 3752 } 3753 if (template == NULL) { 3754 freeb(mp); 3755 return (ENOMEM); 3756 } 3757 nce->nce_ill = ill; 3758 nce->nce_ipversion = IPV4_VERSION; 3759 nce->nce_flags = flags; 3760 nce->nce_state = state; 3761 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 3762 nce->nce_rcnt = ill->ill_xmit_count; 3763 IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 3764 if (*mask == IP_HOST_MASK) { 3765 nce->nce_mask = ipv6_all_ones; 3766 } else { 3767 IN6_IPADDR_TO_V4MAPPED(*mask, &nce->nce_mask); 3768 } 3769 nce->nce_extract_mask = ipv6_all_zeros; 3770 nce->nce_ll_extract_start = hw_extract_start; 3771 nce->nce_fp_mp = (fp_mp? fp_mp : NULL); 3772 nce->nce_res_mp = template; 3773 if (state == ND_REACHABLE) 3774 nce->nce_last = TICK_TO_MSEC(lbolt64); 3775 else 3776 nce->nce_last = 0; 3777 nce->nce_qd_mp = NULL; 3778 nce->nce_mp = mp; 3779 if (hw_addr != NULL) 3780 nce_set_ll(nce, hw_addr); 3781 /* This one is for nce getting created */ 3782 nce->nce_refcnt = 1; 3783 mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 3784 ncep = ((nce_t **)NCE_HASH_PTR_V4(*addr)); 3785 3786 #ifdef NCE_DEBUG 3787 bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); 3788 #endif 3789 /* 3790 * Atomically ensure that the ill is not CONDEMNED, before 3791 * adding the NCE. 3792 */ 3793 mutex_enter(&ill->ill_lock); 3794 if (ill->ill_state_flags & ILL_CONDEMNED) { 3795 mutex_exit(&ill->ill_lock); 3796 freeb(mp); 3797 if (res_mp == NULL) { 3798 /* 3799 * template was locally allocated. need to free it. 3800 */ 3801 freeb(template); 3802 } 3803 return (EINVAL); 3804 } 3805 if ((nce->nce_next = *ncep) != NULL) 3806 nce->nce_next->nce_ptpn = &nce->nce_next; 3807 *ncep = nce; 3808 nce->nce_ptpn = ncep; 3809 *newnce = nce; 3810 /* This one is for nce being used by an active thread */ 3811 NCE_REFHOLD(*newnce); 3812 3813 /* Bump up the number of nce's referencing this ill */ 3814 ill->ill_nce_cnt++; 3815 mutex_exit(&ill->ill_lock); 3816 return (0); 3817 } 3818 3819 void 3820 ndp_flush_qd_mp(nce_t *nce) 3821 { 3822 mblk_t *qd_mp, *qd_next; 3823 3824 ASSERT(MUTEX_HELD(&nce->nce_lock)); 3825 qd_mp = nce->nce_qd_mp; 3826 nce->nce_qd_mp = NULL; 3827 while (qd_mp != NULL) { 3828 qd_next = qd_mp->b_next; 3829 qd_mp->b_next = NULL; 3830 qd_mp->b_prev = NULL; 3831 freemsg(qd_mp); 3832 qd_mp = qd_next; 3833 } 3834 } 3835 3836 nce_t * 3837 nce_reinit(nce_t *nce) 3838 { 3839 nce_t *newnce = NULL; 3840 in_addr_t nce_addr, nce_mask; 3841 3842 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3843 IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); 3844 /* 3845 * delete the old one. this will get rid of any ire's pointing 3846 * at this nce. 3847 */ 3848 ndp_delete(nce); 3849 /* 3850 * create a new nce with the same addr and mask. 3851 */ 3852 mutex_enter(&ndp4.ndp_g_lock); 3853 (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, 3854 ND_INITIAL, &newnce, NULL, NULL); 3855 mutex_exit(&ndp4.ndp_g_lock); 3856 /* 3857 * refrele the old nce. 3858 */ 3859 NCE_REFRELE(nce); 3860 return (newnce); 3861 } 3862 3863 /* 3864 * ndp_walk routine to delete all entries that have a given destination or 3865 * gateway address and cached link layer (MAC) address. This is used when ARP 3866 * informs us that a network-to-link-layer mapping may have changed. 3867 */ 3868 void 3869 nce_delete_hw_changed(nce_t *nce, void *arg) 3870 { 3871 nce_hw_map_t *hwm = arg; 3872 mblk_t *mp; 3873 dl_unitdata_req_t *dlu; 3874 uchar_t *macaddr; 3875 ill_t *ill; 3876 int saplen; 3877 ipaddr_t nce_addr; 3878 3879 if (nce->nce_state != ND_REACHABLE) 3880 return; 3881 3882 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 3883 if (nce_addr != hwm->hwm_addr) 3884 return; 3885 3886 mutex_enter(&nce->nce_lock); 3887 if ((mp = nce->nce_res_mp) == NULL) { 3888 mutex_exit(&nce->nce_lock); 3889 return; 3890 } 3891 dlu = (dl_unitdata_req_t *)mp->b_rptr; 3892 macaddr = (uchar_t *)(dlu + 1); 3893 ill = nce->nce_ill; 3894 if ((saplen = ill->ill_sap_length) > 0) 3895 macaddr += saplen; 3896 else 3897 saplen = -saplen; 3898 3899 /* 3900 * If the hardware address is unchanged, then leave this one alone. 3901 * Note that saplen == abs(saplen) now. 3902 */ 3903 if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 3904 bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 3905 mutex_exit(&nce->nce_lock); 3906 return; 3907 } 3908 mutex_exit(&nce->nce_lock); 3909 3910 DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 3911 ndp_delete(nce); 3912 } 3913 3914 /* 3915 * This function verifies whether a given IPv4 address is potentially known to 3916 * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 3917 * so that it can continue to look for hardware changes on that address. 3918 */ 3919 boolean_t 3920 ndp_lookup_ipaddr(in_addr_t addr) 3921 { 3922 nce_t *nce; 3923 struct in_addr nceaddr; 3924 3925 if (addr == INADDR_ANY) 3926 return (B_FALSE); 3927 3928 mutex_enter(&ndp4.ndp_g_lock); 3929 nce = *(nce_t **)NCE_HASH_PTR_V4(addr); 3930 for (; nce != NULL; nce = nce->nce_next) { 3931 /* Note that only v4 mapped entries are in the table. */ 3932 IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 3933 if (addr == nceaddr.s_addr && 3934 IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 3935 /* Single flag check; no lock needed */ 3936 if (!(nce->nce_flags & NCE_F_CONDEMNED)) 3937 break; 3938 } 3939 } 3940 mutex_exit(&ndp4.ndp_g_lock); 3941 return (nce != NULL); 3942 } 3943