/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function names with nce_ prefix are static while function * names with ndp_ prefix are used by rest of the IP. * * Lock ordering: * * ndp_g_lock -> ill_lock -> nce_lock * * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and * nce_next. Nce_lock protects the contents of the NCE (particularly * nce_refcnt). */ static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, uint32_t ll_addr_len); static void nce_ire_delete(nce_t *nce); static void nce_ire_delete1(ire_t *ire, char *nce_arg); static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, nce_t *); static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr); static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); static void nce_queue_mp(nce_t *nce, mblk_t *mp); static mblk_t *nce_udreq_alloc(ill_t *ill); static void nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr); static uint32_t nce_solicit(nce_t *nce, in6_addr_t src); static boolean_t nce_xmit(ill_t *ill, uint8_t type, boolean_t use_lla_addr, const in6_addr_t *sender, const in6_addr_t *target, int flag); static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, uint_t flags); static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *src, uint_t flags); static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, nce_t **, nce_t *); static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); #ifdef DEBUG static void nce_trace_cleanup(const nce_t *); #endif #define NCE_HASH_PTR_V4(ipst, addr) \ (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) #define NCE_HASH_PTR_V6(ipst, addr) \ (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ NCE_TABLE_SIZE)])) /* Non-tunable probe interval, based on link capabilities */ #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) /* * NDP Cache Entry creation routine. * Mapped entries will never do NUD . * This routine must always be called with ndp6->ndp_g_lock held. * Prior to return, nce_refcnt is incremented. */ int ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, const in6_addr_t *mask, const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, uint16_t state, nce_t **newnce) { static nce_t nce_nil; nce_t *nce; mblk_t *mp; mblk_t *template; nce_t **ncep; int err; boolean_t dropped = B_FALSE; ip_stack_t *ipst = ill->ill_ipst; ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); ASSERT(ill != NULL && ill->ill_isv6); if (IN6_IS_ADDR_UNSPECIFIED(addr)) { ip0dbg(("ndp_add_v6: no addr\n")); return (EINVAL); } if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); return (EINVAL); } if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && (flags & NCE_F_MAPPING)) { ip0dbg(("ndp_add_v6: extract mask zero for mapping")); return (EINVAL); } /* * Allocate the mblk to hold the nce. * * XXX This can come out of a separate cache - nce_cache. * We don't need the mp anymore as there are no more * "qwriter"s */ mp = allocb(sizeof (nce_t), BPRI_MED); if (mp == NULL) return (ENOMEM); nce = (nce_t *)mp->b_rptr; mp->b_wptr = (uchar_t *)&nce[1]; *nce = nce_nil; /* * This one holds link layer address */ if (ill->ill_net_type == IRE_IF_RESOLVER) { template = nce_udreq_alloc(ill); } else { if (ill->ill_resolver_mp == NULL) { freeb(mp); return (EINVAL); } ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); template = copyb(ill->ill_resolver_mp); } if (template == NULL) { freeb(mp); return (ENOMEM); } nce->nce_ill = ill; nce->nce_ipversion = IPV6_VERSION; nce->nce_flags = flags; nce->nce_state = state; nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; nce->nce_rcnt = ill->ill_xmit_count; nce->nce_addr = *addr; nce->nce_mask = *mask; nce->nce_extract_mask = *extract_mask; nce->nce_ll_extract_start = hw_extract_start; nce->nce_fp_mp = NULL; nce->nce_res_mp = template; if (state == ND_REACHABLE) nce->nce_last = TICK_TO_MSEC(lbolt64); else nce->nce_last = 0; nce->nce_qd_mp = NULL; nce->nce_mp = mp; if (hw_addr != NULL) nce_set_ll(nce, hw_addr); /* This one is for nce getting created */ nce->nce_refcnt = 1; mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); if (nce->nce_flags & NCE_F_MAPPING) { ASSERT(IN6_IS_ADDR_MULTICAST(addr)); ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); ncep = &ipst->ips_ndp6->nce_mask_entries; } else { ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); } nce->nce_trace_disable = B_FALSE; list_create(&nce->nce_cb, sizeof (nce_cb_t), offsetof(nce_cb_t, nce_cb_node)); /* * Atomically ensure that the ill is not CONDEMNED, before * adding the NCE. */ mutex_enter(&ill->ill_lock); if (ill->ill_state_flags & ILL_CONDEMNED) { mutex_exit(&ill->ill_lock); freeb(mp); freeb(template); return (EINVAL); } if ((nce->nce_next = *ncep) != NULL) nce->nce_next->nce_ptpn = &nce->nce_next; *ncep = nce; nce->nce_ptpn = ncep; *newnce = nce; /* This one is for nce being used by an active thread */ NCE_REFHOLD(*newnce); /* Bump up the number of nce's referencing this ill */ DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, (char *), "nce", (void *), nce); ill->ill_nce_cnt++; mutex_exit(&ill->ill_lock); err = 0; if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { mutex_enter(&nce->nce_lock); mutex_exit(&ipst->ips_ndp6->ndp_g_lock); nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; mutex_exit(&nce->nce_lock); dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); if (dropped) { mutex_enter(&nce->nce_lock); nce->nce_pcnt++; mutex_exit(&nce->nce_lock); } NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); mutex_enter(&ipst->ips_ndp6->ndp_g_lock); err = EINPROGRESS; } else if (flags & NCE_F_UNSOL_ADV) { /* * We account for the transmit below by assigning one * less than the ndd variable. Subsequent decrements * are done in ndp_timer. */ mutex_enter(&nce->nce_lock); mutex_exit(&ipst->ips_ndp6->ndp_g_lock); nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; mutex_exit(&nce->nce_lock); dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 0); mutex_enter(&nce->nce_lock); if (dropped) nce->nce_unsolicit_count++; if (nce->nce_unsolicit_count != 0) { ASSERT(nce->nce_timeout_id == 0); nce->nce_timeout_id = timeout(ndp_timer, nce, MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); } mutex_exit(&nce->nce_lock); mutex_enter(&ipst->ips_ndp6->ndp_g_lock); } /* * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then * we call nce_fastpath as soon as the nce is resolved in ndp_process. * We call nce_fastpath from nce_update if the link layer address of * the peer changes from nce_update */ if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) nce_fastpath(nce); return (err); } int ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, const in6_addr_t *addr, const in6_addr_t *mask, const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, uint16_t state, nce_t **newnce) { int err = 0; nce_t *nce; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_isv6); mutex_enter(&ipst->ips_ndp6->ndp_g_lock); /* Get head of v6 hash table */ nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); nce = nce_lookup_addr(ill, match_illgrp, addr, nce); if (nce == NULL) { err = ndp_add_v6(ill, hw_addr, addr, mask, extract_mask, hw_extract_start, flags, state, newnce); } else { *newnce = nce; err = EEXIST; } mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (err); } /* * Remove all the CONDEMNED nces from the appropriate hash table. * We create a private list of NCEs, these may have ires pointing * to them, so the list will be passed through to clean up dependent * ires and only then we can do NCE_REFRELE which can make NCE inactive. */ static void nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) { nce_t *nce1; nce_t **ptpn; ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); ASSERT(ndp->ndp_g_walker == 0); for (; nce; nce = nce1) { nce1 = nce->nce_next; mutex_enter(&nce->nce_lock); if (nce->nce_flags & NCE_F_CONDEMNED) { ptpn = nce->nce_ptpn; nce1 = nce->nce_next; if (nce1 != NULL) nce1->nce_ptpn = ptpn; *ptpn = nce1; nce->nce_ptpn = NULL; nce->nce_next = NULL; nce->nce_next = *free_nce_list; *free_nce_list = nce; } mutex_exit(&nce->nce_lock); } } /* * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() * will return this NCE. Also no new IREs will be created that * point to this NCE (See ire_add_v6). Also no new timeouts will * be started (See NDP_RESTART_TIMER). * 2. Cancel any currently running timeouts. * 3. If there is an ndp walker, return. The walker will do the cleanup. * This ensures that walkers see a consistent list of NCEs while walking. * 4. Otherwise remove the NCE from the list of NCEs * 5. Delete all IREs pointing to this NCE. */ void ndp_delete(nce_t *nce) { nce_t **ptpn; nce_t *nce1; int ipversion = nce->nce_ipversion; ndp_g_t *ndp; ip_stack_t *ipst = nce->nce_ill->ill_ipst; if (ipversion == IPV4_VERSION) ndp = ipst->ips_ndp4; else ndp = ipst->ips_ndp6; /* Serialize deletes */ mutex_enter(&nce->nce_lock); if (nce->nce_flags & NCE_F_CONDEMNED) { /* Some other thread is doing the delete */ mutex_exit(&nce->nce_lock); return; } /* * Caller has a refhold. Also 1 ref for being in the list. Thus * refcnt has to be >= 2 */ ASSERT(nce->nce_refcnt >= 2); nce->nce_flags |= NCE_F_CONDEMNED; mutex_exit(&nce->nce_lock); nce_fastpath_list_delete(nce); /* Complete any waiting callbacks */ nce_cb_dispatch(nce); /* * Cancel any running timer. Timeout can't be restarted * since CONDEMNED is set. Can't hold nce_lock across untimeout. * Passing invalid timeout id is fine. */ if (nce->nce_timeout_id != 0) { (void) untimeout(nce->nce_timeout_id); nce->nce_timeout_id = 0; } mutex_enter(&ndp->ndp_g_lock); if (nce->nce_ptpn == NULL) { /* * The last ndp walker has already removed this nce from * the list after we marked the nce CONDEMNED and before * we grabbed the global lock. */ mutex_exit(&ndp->ndp_g_lock); return; } if (ndp->ndp_g_walker > 0) { /* * Can't unlink. The walker will clean up */ ndp->ndp_g_walker_cleanup = B_TRUE; mutex_exit(&ndp->ndp_g_lock); return; } /* * Now remove the nce from the list. NDP_RESTART_TIMER won't restart * the timer since it is marked CONDEMNED. */ ptpn = nce->nce_ptpn; nce1 = nce->nce_next; if (nce1 != NULL) nce1->nce_ptpn = ptpn; *ptpn = nce1; nce->nce_ptpn = NULL; nce->nce_next = NULL; mutex_exit(&ndp->ndp_g_lock); nce_ire_delete(nce); } void ndp_inactive(nce_t *nce) { mblk_t **mpp; ill_t *ill; ASSERT(nce->nce_refcnt == 0); ASSERT(MUTEX_HELD(&nce->nce_lock)); ASSERT(nce->nce_fastpath == NULL); /* Free all nce allocated messages */ mpp = &nce->nce_first_mp_to_free; do { while (*mpp != NULL) { mblk_t *mp; mp = *mpp; *mpp = mp->b_next; inet_freemsg(mp); } } while (mpp++ != &nce->nce_last_mp_to_free); if (nce->nce_ipversion == IPV6_VERSION) { /* * must have been cleaned up in nce_delete */ ASSERT(list_is_empty(&nce->nce_cb)); list_destroy(&nce->nce_cb); } #ifdef DEBUG nce_trace_cleanup(nce); #endif ill = nce->nce_ill; mutex_enter(&ill->ill_lock); DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, (char *), "nce", (void *), nce); ill->ill_nce_cnt--; /* * If the number of nce's associated with this ill have dropped * to zero, check whether we need to restart any operation that * is waiting for this to happen. */ if (ILL_DOWN_OK(ill)) { /* ipif_ill_refrele_tail drops the ill_lock */ ipif_ill_refrele_tail(ill); } else { mutex_exit(&ill->ill_lock); } mutex_destroy(&nce->nce_lock); if (nce->nce_mp != NULL) inet_freemsg(nce->nce_mp); } /* * ndp_walk routine. Delete the nce if it is associated with the ill * that is going away. Always called as a writer. */ void ndp_delete_per_ill(nce_t *nce, uchar_t *arg) { if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { ndp_delete(nce); } } /* * Walk a list of to be inactive NCEs and blow away all the ires. */ static void nce_ire_delete_list(nce_t *nce) { nce_t *nce_next; ASSERT(nce != NULL); while (nce != NULL) { nce_next = nce->nce_next; nce->nce_next = NULL; /* * It is possible for the last ndp walker (this thread) * to come here after ndp_delete has marked the nce CONDEMNED * and before it has removed the nce from the fastpath list * or called untimeout. So we need to do it here. It is safe * for both ndp_delete and this thread to do it twice or * even simultaneously since each of the threads has a * reference on the nce. */ nce_fastpath_list_delete(nce); /* * Cancel any running timer. Timeout can't be restarted * since CONDEMNED is set. Can't hold nce_lock across untimeout. * Passing invalid timeout id is fine. */ if (nce->nce_timeout_id != 0) { (void) untimeout(nce->nce_timeout_id); nce->nce_timeout_id = 0; } /* * We might hit this func thus in the v4 case: * ipif_down->ipif_ndp_down->ndp_walk */ if (nce->nce_ipversion == IPV4_VERSION) { ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); } else { ASSERT(nce->nce_ipversion == IPV6_VERSION); ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); } NCE_REFRELE_NOTR(nce); nce = nce_next; } } /* * Delete an ire when the nce goes away. */ /* ARGSUSED */ static void nce_ire_delete(nce_t *nce) { if (nce->nce_ipversion == IPV6_VERSION) { ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, nce_ire_delete1, (char *)nce, nce->nce_ill); NCE_REFRELE_NOTR(nce); } else { ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, nce_ire_delete1, (char *)nce, nce->nce_ill); NCE_REFRELE_NOTR(nce); } } /* * ire_walk routine used to delete every IRE that shares this nce */ static void nce_ire_delete1(ire_t *ire, char *nce_arg) { nce_t *nce = (nce_t *)nce_arg; ASSERT(ire->ire_type == IRE_CACHE); if (ire->ire_nce == nce) { ASSERT(ire->ire_ipversion == nce->nce_ipversion); ire_delete(ire); } } /* * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. */ boolean_t ndp_restart_dad(nce_t *nce) { boolean_t started; boolean_t dropped; if (nce == NULL) return (B_FALSE); mutex_enter(&nce->nce_lock); if (nce->nce_state == ND_PROBE) { mutex_exit(&nce->nce_lock); started = B_TRUE; } else if (nce->nce_state == ND_REACHABLE) { nce->nce_state = ND_PROBE; nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; mutex_exit(&nce->nce_lock); dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); if (dropped) { mutex_enter(&nce->nce_lock); nce->nce_pcnt++; mutex_exit(&nce->nce_lock); } NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); started = B_TRUE; } else { mutex_exit(&nce->nce_lock); started = B_FALSE; } return (started); } /* * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. * If one is found, the refcnt on the nce will be incremented. */ nce_t * ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, boolean_t caller_holds_lock) { nce_t *nce; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_isv6); if (!caller_holds_lock) mutex_enter(&ipst->ips_ndp6->ndp_g_lock); /* Get head of v6 hash table */ nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); nce = nce_lookup_addr(ill, match_illgrp, addr, nce); if (nce == NULL) nce = nce_lookup_mapping(ill, addr); if (!caller_holds_lock) mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (nce); } /* * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. * If one is found, the refcnt on the nce will be incremented. * Since multicast mappings are handled in arp, there are no nce_mcast_entries * so we skip the nce_lookup_mapping call. * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL */ nce_t * ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) { nce_t *nce; in6_addr_t addr6; ip_stack_t *ipst = ill->ill_ipst; if (!caller_holds_lock) mutex_enter(&ipst->ips_ndp4->ndp_g_lock); /* Get head of v4 hash table */ nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); /* * NOTE: IPv4 never matches across the illgrp since the NCE's we're * looking up have fastpath headers that are inherently per-ill. */ nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); if (!caller_holds_lock) mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (nce); } /* * Cache entry lookup. Try to find an nce matching the parameters passed. * Look only for exact entries (no mappings). If an nce is found, increment * the hold count on that nce. The caller passes in the start of the * appropriate hash table, and must be holding the appropriate global * lock (ndp_g_lock). */ static nce_t * nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, nce_t *nce) { ndp_g_t *ndp; ip_stack_t *ipst = ill->ill_ipst; if (ill->ill_isv6) ndp = ipst->ips_ndp6; else ndp = ipst->ips_ndp4; ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); if (IN6_IS_ADDR_UNSPECIFIED(addr)) return (NULL); for (; nce != NULL; nce = nce->nce_next) { if (nce->nce_ill == ill || match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { mutex_enter(&nce->nce_lock); if (!(nce->nce_flags & NCE_F_CONDEMNED)) { NCE_REFHOLD_LOCKED(nce); mutex_exit(&nce->nce_lock); break; } mutex_exit(&nce->nce_lock); } } } return (nce); } /* * Cache entry lookup. Try to find an nce matching the parameters passed. * Look only for mappings. */ static nce_t * nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) { nce_t *nce; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); if (!IN6_IS_ADDR_MULTICAST(addr)) return (NULL); nce = ipst->ips_ndp6->nce_mask_entries; for (; nce != NULL; nce = nce->nce_next) if (nce->nce_ill == ill && (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { mutex_enter(&nce->nce_lock); if (!(nce->nce_flags & NCE_F_CONDEMNED)) { NCE_REFHOLD_LOCKED(nce); mutex_exit(&nce->nce_lock); break; } mutex_exit(&nce->nce_lock); } return (nce); } /* * Process passed in parameters either from an incoming packet or via * user ioctl. */ static void nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) { ill_t *ill = nce->nce_ill; uint32_t hw_addr_len = ill->ill_nd_lla_len; mblk_t *mp; boolean_t ll_updated = B_FALSE; boolean_t ll_changed; ip_stack_t *ipst = ill->ill_ipst; ASSERT(nce->nce_ipversion == IPV6_VERSION); /* * No updates of link layer address or the neighbor state is * allowed, when the cache is in NONUD state. This still * allows for responding to reachability solicitation. */ mutex_enter(&nce->nce_lock); if (nce->nce_state == ND_INCOMPLETE) { if (hw_addr == NULL) { mutex_exit(&nce->nce_lock); return; } nce_set_ll(nce, hw_addr); /* * Update nce state and send the queued packets * back to ip this time ire will be added. */ if (flag & ND_NA_FLAG_SOLICITED) { nce_update(nce, ND_REACHABLE, NULL); } else { nce_update(nce, ND_STALE, NULL); } mutex_exit(&nce->nce_lock); nce_fastpath(nce); nce_cb_dispatch(nce); /* complete callbacks */ mutex_enter(&nce->nce_lock); mp = nce->nce_qd_mp; nce->nce_qd_mp = NULL; mutex_exit(&nce->nce_lock); while (mp != NULL) { mblk_t *nxt_mp, *data_mp; nxt_mp = mp->b_next; mp->b_next = NULL; if (mp->b_datap->db_type == M_CTL) data_mp = mp->b_cont; else data_mp = mp; if (data_mp->b_prev != NULL) { ill_t *inbound_ill; queue_t *fwdq = NULL; uint_t ifindex; ifindex = (uint_t)(uintptr_t)data_mp->b_prev; inbound_ill = ill_lookup_on_ifindex(ifindex, B_TRUE, NULL, NULL, NULL, NULL, ipst); if (inbound_ill == NULL) { data_mp->b_prev = NULL; freemsg(mp); return; } else { fwdq = inbound_ill->ill_rq; } data_mp->b_prev = NULL; /* * Send a forwarded packet back into ip_rput_v6 * just as in ire_send_v6(). * Extract the queue from b_prev (set in * ip_rput_data_v6). */ if (fwdq != NULL) { /* * Forwarded packets hop count will * get decremented in ip_rput_data_v6 */ if (data_mp != mp) freeb(mp); put(fwdq, data_mp); } else { /* * Send locally originated packets back * into ip_wput_v6. */ put(ill->ill_wq, mp); } ill_refrele(inbound_ill); } else { put(ill->ill_wq, mp); } mp = nxt_mp; } return; } ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); if (!is_adv) { /* If this is a SOLICITATION request only */ if (ll_changed) nce_update(nce, ND_STALE, hw_addr); mutex_exit(&nce->nce_lock); nce_cb_dispatch(nce); return; } if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { /* If in any other state than REACHABLE, ignore */ if (nce->nce_state == ND_REACHABLE) { nce_update(nce, ND_STALE, NULL); } mutex_exit(&nce->nce_lock); nce_cb_dispatch(nce); return; } else { if (ll_changed) { nce_update(nce, ND_UNCHANGED, hw_addr); ll_updated = B_TRUE; } if (flag & ND_NA_FLAG_SOLICITED) { nce_update(nce, ND_REACHABLE, NULL); } else { if (ll_updated) { nce_update(nce, ND_STALE, NULL); } } mutex_exit(&nce->nce_lock); if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & NCE_F_ISROUTER)) { ire_t *ire; /* * Router turned to host. We need to remove the * entry as well as any default route that may be * using this as a next hop. This is required by * section 7.2.5 of RFC 2461. */ ire = ire_ftable_lookup_v6(&ipv6_all_zeros, &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | MATCH_IRE_DEFAULT, ipst); if (ire != NULL) { ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); ire_delete(ire); ire_refrele(ire); } ndp_delete(nce); /* will do nce_cb_dispatch */ } else { nce_cb_dispatch(nce); } } } /* * Walker state structure used by ndp_process() / ndp_process_entry(). */ typedef struct ndp_process_data { ill_t *np_ill; /* ill/illgrp to match against */ const in6_addr_t *np_addr; /* IPv6 address to match */ uchar_t *np_hw_addr; /* passed to nce_process() */ uint32_t np_flag; /* passed to nce_process() */ boolean_t np_is_adv; /* passed to nce_process() */ } ndp_process_data_t; /* * Walker callback used by ndp_process() for IPMP groups: calls nce_process() * for each NCE with a matching address that's in the same IPMP group. */ static void ndp_process_entry(nce_t *nce, void *arg) { ndp_process_data_t *npp = arg; if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); } } /* * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have * more than one NCE for a given IPv6 address to tend to. In that case, we * need to walk all NCEs and callback nce_process() for each one. Since this * is expensive, in the non-IPMP case we just directly call nce_process(). * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP * interfaces in an IPMP group share the same NCEs -- at which point this * function can be removed entirely. */ void ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) { ill_t *ill = nce->nce_ill; struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; ndp_process_data_t np; if (ill->ill_grp == NULL) { nce_process(nce, hw_addr, flag, is_adv); return; } /* IPMP case: walk all NCEs */ np.np_ill = ill; np.np_addr = &nce->nce_addr; np.np_flag = flag; np.np_is_adv = is_adv; np.np_hw_addr = hw_addr; ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); } /* * Pass arg1 to the pfi supplied, along with each nce in existence. * ndp_walk() places a REFHOLD on the nce and drops the lock when * walking the hash list. */ void ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, boolean_t trace) { nce_t *nce; nce_t *nce1; nce_t **ncep; nce_t *free_nce_list = NULL; mutex_enter(&ndp->ndp_g_lock); /* Prevent ndp_delete from unlink and free of NCE */ ndp->ndp_g_walker++; mutex_exit(&ndp->ndp_g_lock); for (ncep = ndp->nce_hash_tbl; ncep < A_END(ndp->nce_hash_tbl); ncep++) { for (nce = *ncep; nce != NULL; nce = nce1) { nce1 = nce->nce_next; if (ill == NULL || nce->nce_ill == ill) { if (trace) { NCE_REFHOLD(nce); (*pfi)(nce, arg1); NCE_REFRELE(nce); } else { NCE_REFHOLD_NOTR(nce); (*pfi)(nce, arg1); NCE_REFRELE_NOTR(nce); } } } } for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { nce1 = nce->nce_next; if (ill == NULL || nce->nce_ill == ill) { if (trace) { NCE_REFHOLD(nce); (*pfi)(nce, arg1); NCE_REFRELE(nce); } else { NCE_REFHOLD_NOTR(nce); (*pfi)(nce, arg1); NCE_REFRELE_NOTR(nce); } } } mutex_enter(&ndp->ndp_g_lock); ndp->ndp_g_walker--; /* * While NCE's are removed from global list they are placed * in a private list, to be passed to nce_ire_delete_list(). * The reason is, there may be ires pointing to this nce * which needs to cleaned up. */ if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { /* Time to delete condemned entries */ for (ncep = ndp->nce_hash_tbl; ncep < A_END(ndp->nce_hash_tbl); ncep++) { nce = *ncep; if (nce != NULL) { nce_remove(ndp, nce, &free_nce_list); } } nce = ndp->nce_mask_entries; if (nce != NULL) { nce_remove(ndp, nce, &free_nce_list); } ndp->ndp_g_walker_cleanup = B_FALSE; } mutex_exit(&ndp->ndp_g_lock); if (free_nce_list != NULL) { nce_ire_delete_list(free_nce_list); } } /* * Walk everything. * Note that ill can be NULL hence can't derive the ipst from it. */ void ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) { ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); } /* * Process resolve requests. Handles both mapped entries * as well as cases that needs to be send out on the wire. * Lookup a NCE for a given IRE. Regardless of whether one exists * or one is created, we defer making ire point to nce until the * ire is actually added at which point the nce_refcnt on the nce is * incremented. This is done primarily to have symmetry between ire_add() * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. */ int ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) { nce_t *nce, *hw_nce = NULL; int err; ill_t *ipmp_ill; uint16_t nce_flags; mblk_t *mp_nce = NULL; ip_stack_t *ipst = ill->ill_ipst; uchar_t *hwaddr = NULL; ASSERT(ill->ill_isv6); if (IN6_IS_ADDR_MULTICAST(dst)) return (nce_set_multicast(ill, dst)); nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; /* * If `ill' is under IPMP, then first check to see if there's an NCE * for `dst' on the IPMP meta-interface (e.g., because an application * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). * If so, we use that hardware address when creating the NCE below. * Note that we don't yet have a mechanism to remove these NCEs if the * NCE for `dst' on the IPMP meta-interface is subsequently removed -- * but rather than build such a beast, we should fix NCEs so that they * can be properly shared across an IPMP group. */ if (IS_UNDER_IPMP(ill)) { if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { hwaddr = hw_nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ipmp_ill); nce_flags |= hw_nce->nce_flags; } ill_refrele(ipmp_ill); } } err = ndp_lookup_then_add_v6(ill, B_FALSE, /* NCE fastpath is per ill; don't match across group */ hwaddr, dst, &ipv6_all_ones, &ipv6_all_zeros, 0, nce_flags, hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, &nce); if (hw_nce != NULL) NCE_REFRELE(hw_nce); switch (err) { case 0: /* * New cache entry was created. Make sure that the state * is not ND_INCOMPLETE. It can be in some other state * even before we send out the solicitation as we could * get un-solicited advertisements. * * If this is an XRESOLV interface, simply return 0, * since we don't want to solicit just yet. */ if (ill->ill_flags & ILLF_XRESOLV) { NCE_REFRELE(nce); return (0); } mutex_enter(&nce->nce_lock); if (nce->nce_state != ND_INCOMPLETE) { mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); return (0); } if (nce->nce_rcnt == 0) { /* The caller will free mp */ mutex_exit(&nce->nce_lock); ndp_delete(nce); NCE_REFRELE(nce); return (ESRCH); } mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); if (mp_nce == NULL) { /* The caller will free mp */ mutex_exit(&nce->nce_lock); ndp_delete(nce); NCE_REFRELE(nce); return (ENOMEM); } nce_queue_mp(nce, mp_nce); ip_ndp_resolve(nce); mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); return (EINPROGRESS); case EEXIST: /* Resolution in progress just queue the packet */ mutex_enter(&nce->nce_lock); if (nce->nce_state == ND_INCOMPLETE) { mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); if (mp_nce == NULL) { err = ENOMEM; } else { nce_queue_mp(nce, mp_nce); err = EINPROGRESS; } } else { /* * Any other state implies we have * a nce but IRE needs to be added ... * ire_add_v6() will take care of the * the case when the nce becomes CONDEMNED * before the ire is added to the table. */ err = 0; } mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); break; default: ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); break; } return (err); } /* * When there is no resolver, the link layer template is passed in * the IRE. * Lookup a NCE for a given IRE. Regardless of whether one exists * or one is created, we defer making ire point to nce until the * ire is actually added at which point the nce_refcnt on the nce is * incremented. This is done primarily to have symmetry between ire_add() * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. */ int ndp_noresolver(ill_t *ill, const in6_addr_t *dst) { nce_t *nce; int err = 0; ASSERT(ill != NULL); ASSERT(ill->ill_isv6); if (IN6_IS_ADDR_MULTICAST(dst)) { err = nce_set_multicast(ill, dst); return (err); } err = ndp_lookup_then_add_v6(ill, B_FALSE, /* NCE fastpath is per ill; don't match across group */ NULL, /* hardware address */ dst, &ipv6_all_ones, &ipv6_all_zeros, 0, (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, ND_REACHABLE, &nce); switch (err) { case 0: /* * Cache entry with a proper resolver cookie was * created. */ NCE_REFRELE(nce); break; case EEXIST: err = 0; NCE_REFRELE(nce); break; default: ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); break; } return (err); } /* * For each interface an entry is added for the unspecified multicast group. * Here that mapping is used to form the multicast cache entry for a particular * multicast destination. */ static int nce_set_multicast(ill_t *ill, const in6_addr_t *dst) { nce_t *mnce; /* Multicast mapping entry */ nce_t *nce; uchar_t *hw_addr = NULL; int err = 0; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL); ASSERT(ill->ill_isv6); ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); mutex_enter(&ipst->ips_ndp6->ndp_g_lock); nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); nce = nce_lookup_addr(ill, B_FALSE, dst, nce); if (nce != NULL) { mutex_exit(&ipst->ips_ndp6->ndp_g_lock); NCE_REFRELE(nce); return (0); } /* No entry, now lookup for a mapping this should never fail */ mnce = nce_lookup_mapping(ill, dst); if (mnce == NULL) { /* Something broken for the interface. */ mutex_exit(&ipst->ips_ndp6->ndp_g_lock); return (ESRCH); } ASSERT(mnce->nce_flags & NCE_F_MAPPING); if (ill->ill_net_type == IRE_IF_RESOLVER) { /* * For IRE_IF_RESOLVER a hardware mapping can be * generated, for IRE_IF_NORESOLVER, resolution cookie * in the ill is copied in ndp_add_v6(). */ hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); if (hw_addr == NULL) { mutex_exit(&ipst->ips_ndp6->ndp_g_lock); NCE_REFRELE(mnce); return (ENOMEM); } nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); } NCE_REFRELE(mnce); /* * IRE_IF_NORESOLVER type simply copies the resolution * cookie passed in. So no hw_addr is needed. */ err = ndp_add_v6(ill, hw_addr, dst, &ipv6_all_ones, &ipv6_all_zeros, 0, NCE_F_NONUD, ND_REACHABLE, &nce); mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (hw_addr != NULL) kmem_free(hw_addr, ill->ill_nd_lla_len); if (err != 0) { ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); return (err); } NCE_REFRELE(nce); return (0); } /* * Return the link layer address, and any flags of a nce. */ int ndp_query(ill_t *ill, struct lif_nd_req *lnr) { nce_t *nce; in6_addr_t *addr; sin6_t *sin6; dl_unitdata_req_t *dl; ASSERT(ill != NULL && ill->ill_isv6); sin6 = (sin6_t *)&lnr->lnr_addr; addr = &sin6->sin6_addr; /* * NOTE: if the ill is an IPMP interface, then match against the whole * illgrp. This e.g. allows in.ndpd to retrieve the link layer * addresses for the data addresses on an IPMP interface even though * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. */ nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); if (nce == NULL) return (ESRCH); /* If in INCOMPLETE state, no link layer address is available yet */ if (!NCE_ISREACHABLE(nce)) { NCE_REFRELE(nce); return (ESRCH); } dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; if (ill->ill_flags & ILLF_XRESOLV) lnr->lnr_hdw_len = dl->dl_dest_addr_length; else lnr->lnr_hdw_len = ill->ill_nd_lla_len; ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= sizeof (lnr->lnr_hdw_addr)); bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); if (nce->nce_flags & NCE_F_ISROUTER) lnr->lnr_flags = NDF_ISROUTER_ON; if (nce->nce_flags & NCE_F_ANYCAST) lnr->lnr_flags |= NDF_ANYCAST_ON; NCE_REFRELE(nce); return (0); } /* * Send Enable/Disable multicast reqs to driver. */ int ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, uint32_t hw_addr_offset, mblk_t *mp) { nce_t *nce; uchar_t *hw_addr; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill != NULL && ill->ill_isv6); ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { freemsg(mp); return (EINVAL); } mutex_enter(&ipst->ips_ndp6->ndp_g_lock); nce = nce_lookup_mapping(ill, addr); if (nce == NULL) { mutex_exit(&ipst->ips_ndp6->ndp_g_lock); freemsg(mp); return (ESRCH); } mutex_exit(&ipst->ips_ndp6->ndp_g_lock); /* * Update dl_addr_length and dl_addr_offset for primitives that * have physical addresses as opposed to full saps */ switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { case DL_ENABMULTI_REQ: /* Track the state if this is the first enabmulti */ if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) ill->ill_dlpi_multicast_state = IDS_INPROGRESS; ip1dbg(("ndp_mcastreq: ENABMULTI\n")); break; case DL_DISABMULTI_REQ: ip1dbg(("ndp_mcastreq: DISABMULTI\n")); break; default: NCE_REFRELE(nce); ip1dbg(("ndp_mcastreq: default\n")); return (EINVAL); } nce_make_mapping(nce, hw_addr, (uchar_t *)addr); NCE_REFRELE(nce); ill_dlpi_send(ill, mp); return (0); } /* * Send out a NS for resolving the ip address in nce. */ void ip_ndp_resolve(nce_t *nce) { in6_addr_t sender6 = ipv6_all_zeros; uint32_t ms; mblk_t *mp; ip6_t *ip6h; ASSERT(MUTEX_HELD(&nce->nce_lock)); /* * Pick the src from outgoing packet, if one is available. * Otherwise let nce_xmit figure out the src. */ if ((mp = nce->nce_qd_mp) != NULL) { /* Handle ip_newroute_v6 giving us IPSEC packets */ if (mp->b_datap->db_type == M_CTL) mp = mp->b_cont; ip6h = (ip6_t *)mp->b_rptr; if (ip6h->ip6_nxt == IPPROTO_RAW) { /* * This message should have been pulled up already in * ip_wput_v6. We can't do pullups here because * the message could be from the nce_qd_mp which could * have b_next/b_prev non-NULL. */ ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); } sender6 = ip6h->ip6_src; } ms = nce_solicit(nce, sender6); mutex_exit(&nce->nce_lock); if (ms == 0) { if (nce->nce_state != ND_REACHABLE) { nce_resolv_failed(nce); ndp_delete(nce); } } else { NDP_RESTART_TIMER(nce, (clock_t)ms); } mutex_enter(&nce->nce_lock); } /* * Send a neighbor solicitation. * Returns number of milliseconds after which we should either rexmit or abort. * Return of zero means we should abort. * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. * * NOTE: This routine drops nce_lock (and later reacquires it) when sending * the packet. */ uint32_t nce_solicit(nce_t *nce, in6_addr_t sender) { boolean_t dropped; ASSERT(nce->nce_ipversion == IPV6_VERSION); ASSERT(MUTEX_HELD(&nce->nce_lock)); if (nce->nce_rcnt == 0) return (0); nce->nce_rcnt--; mutex_exit(&nce->nce_lock); dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); mutex_enter(&nce->nce_lock); if (dropped) nce->nce_rcnt++; return (nce->nce_ill->ill_reachable_retrans_time); } /* * Attempt to recover an address on an interface that's been marked as a * duplicate. Because NCEs are destroyed when the interface goes down, there's * no easy way to just probe the address and have the right thing happen if * it's no longer in use. Instead, we just bring it up normally and allow the * regular interface start-up logic to probe for a remaining duplicate and take * us back down if necessary. * Neither DHCP nor temporary addresses arrive here; they're excluded by * ip_ndp_excl. */ /* ARGSUSED */ static void ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) { ill_t *ill = rq->q_ptr; ipif_t *ipif; in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { /* * We do not support recovery of proxy ARP'd interfaces, * because the system lacks a complete proxy ARP mechanism. */ if ((ipif->ipif_flags & IPIF_POINTOPOINT) || !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { continue; } /* * If we have already recovered or if the interface is going * away, then ignore. */ mutex_enter(&ill->ill_lock); if (!(ipif->ipif_flags & IPIF_DUPLICATE) || (ipif->ipif_state_flags & IPIF_CONDEMNED)) { mutex_exit(&ill->ill_lock); continue; } ipif->ipif_flags &= ~IPIF_DUPLICATE; ill->ill_ipif_dup_count--; mutex_exit(&ill->ill_lock); ipif->ipif_was_dup = B_TRUE; VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); (void) ipif_up_done_v6(ipif); } freeb(mp); } /* * Attempt to recover an IPv6 interface that's been shut down as a duplicate. * As long as someone else holds the address, the interface will stay down. * When that conflict goes away, the interface is brought back up. This is * done so that accidental shutdowns of addresses aren't made permanent. Your * server will recover from a failure. * * For DHCP and temporary addresses, recovery is not done in the kernel. * Instead, it's handled by user space processes (dhcpagent and in.ndpd). * * This function is entered on a timer expiry; the ID is in ipif_recovery_id. */ static void ipif6_dup_recovery(void *arg) { ipif_t *ipif = arg; ipif->ipif_recovery_id = 0; if (!(ipif->ipif_flags & IPIF_DUPLICATE)) return; /* * No lock, because this is just an optimization. */ if (ipif->ipif_state_flags & IPIF_CONDEMNED) return; /* If the link is down, we'll retry this later */ if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) return; ndp_do_recovery(ipif); } /* * Perform interface recovery by forcing the duplicate interfaces up and * allowing the system to determine which ones should stay up. * * Called both by recovery timer expiry and link-up notification. */ void ndp_do_recovery(ipif_t *ipif) { ill_t *ill = ipif->ipif_ill; mblk_t *mp; ip_stack_t *ipst = ill->ill_ipst; mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); if (mp == NULL) { mutex_enter(&ill->ill_lock); if (ipif->ipif_recovery_id == 0 && !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); } else { /* * A recovery timer may still be running if we got here from * ill_restart_dad(); cancel that timer. */ if (ipif->ipif_recovery_id != 0) (void) untimeout(ipif->ipif_recovery_id); ipif->ipif_recovery_id = 0; bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, sizeof (ipif->ipif_v6lcl_addr)); ill_refhold(ill); qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, B_FALSE); } } /* * Find the MAC and IP addresses in an NA/NS message. */ static void ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, uchar_t **haddr, uint_t *haddrlenp) { ip6_t *ip6h = (ip6_t *)mp->b_rptr; icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; uchar_t *addr; int alen = 0; if (dl_mp == NULL) { nd_opt_hdr_t *opt = NULL; int len; /* * If it's from the fast-path, then it can't be a probe * message, and thus must include a linkaddr option. * Extract that here. */ switch (icmp6->icmp6_type) { case ND_NEIGHBOR_SOLICIT: len = mp->b_wptr - (uchar_t *)ns; if ((len -= sizeof (*ns)) > 0) { opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), len, ND_OPT_SOURCE_LINKADDR); } break; case ND_NEIGHBOR_ADVERT: len = mp->b_wptr - (uchar_t *)na; if ((len -= sizeof (*na)) > 0) { opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), len, ND_OPT_TARGET_LINKADDR); } break; } if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= ill->ill_nd_lla_len) { addr = (uchar_t *)(opt + 1); alen = ill->ill_nd_lla_len; } /* * We cheat a bit here for the sake of printing usable log * messages in the rare case where the reply we got was unicast * without a source linkaddr option, and the interface is in * fastpath mode. (Sigh.) */ if (alen == 0 && ill->ill_type == IFT_ETHER && MBLKHEAD(mp) >= sizeof (struct ether_header)) { struct ether_header *pether; pether = (struct ether_header *)((char *)ip6h - sizeof (*pether)); addr = pether->ether_shost.ether_addr_octet; alen = ETHERADDRL; } } else { dl_unitdata_ind_t *dlu; dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; alen = dlu->dl_src_addr_length; if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; if (ill->ill_sap_length < 0) { alen += ill->ill_sap_length; } else { addr += ill->ill_sap_length; alen -= ill->ill_sap_length; } } } if (alen > 0) { *haddr = addr; *haddrlenp = alen; } else { *haddr = NULL; *haddrlenp = 0; } /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ *targp = ns->nd_ns_target; } /* * This is for exclusive changes due to NDP duplicate address detection * failure. */ /* ARGSUSED */ static void ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) { ill_t *ill = rq->q_ptr; ipif_t *ipif; mblk_t *dl_mp = NULL; uchar_t *haddr; uint_t haddrlen; ip_stack_t *ipst = ill->ill_ipst; in6_addr_t targ; if (DB_TYPE(mp) != M_DATA) { dl_mp = mp; mp = mp->b_cont; } ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { /* * Ignore conflicts generated by misbehaving switches that * just reflect our own messages back to us. For IPMP, we may * see reflections across any ill in the illgrp. */ if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) goto ignore_conflict; } /* * Look up the appropriate ipif. */ ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, NULL, ipst); if (ipif == NULL) goto ignore_conflict; /* Reload the ill to match the ipif */ ill = ipif->ipif_ill; /* If it's already duplicate or ineligible, then don't do anything. */ if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { ipif_refrele(ipif); goto ignore_conflict; } /* * If this is a failure during duplicate recovery, then don't * complain. It may take a long time to recover. */ if (!ipif->ipif_was_dup) { char ibuf[LIFNAMSIZ]; char hbuf[MAC_STR_LEN]; char sbuf[INET6_ADDRSTRLEN]; ipif_get_name(ipif, ibuf, sizeof (ibuf)); cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" " disabled", ibuf, inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); } mutex_enter(&ill->ill_lock); ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); ipif->ipif_flags |= IPIF_DUPLICATE; ill->ill_ipif_dup_count++; mutex_exit(&ill->ill_lock); (void) ipif_down(ipif, NULL, NULL); ipif_down_tail(ipif); mutex_enter(&ill->ill_lock); if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && ill->ill_net_type == IRE_IF_RESOLVER && !(ipif->ipif_state_flags & IPIF_CONDEMNED) && ipst->ips_ip_dup_recovery > 0) { ASSERT(ipif->ipif_recovery_id == 0); ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); } mutex_exit(&ill->ill_lock); ipif_refrele(ipif); ignore_conflict: if (dl_mp != NULL) freeb(dl_mp); freemsg(mp); } /* * Handle failure by tearing down the ipifs with the specified address. Note * that tearing down the ipif also means deleting the nce through ipif_down, so * it's not possible to do recovery by just restarting the nce timer. Instead, * we start a timer on the ipif. */ static void ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { if ((mp = copymsg(mp)) != NULL) { if (dl_mp == NULL) dl_mp = mp; else if ((dl_mp = copyb(dl_mp)) != NULL) dl_mp->b_cont = mp; if (dl_mp == NULL) { freemsg(mp); } else { ill_refhold(ill); qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, B_FALSE); } } } /* * Handle a discovered conflict: some other system is advertising that it owns * one of our IP addresses. We need to defend ourselves, or just shut down the * interface. */ static void ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) { ipif_t *ipif; uint32_t now; uint_t maxdefense; uint_t defs; ip_stack_t *ipst = ill->ill_ipst; ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, NULL, NULL, ipst); if (ipif == NULL) return; /* * First, figure out if this address is disposable. */ if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) maxdefense = ipst->ips_ip_max_temp_defend; else maxdefense = ipst->ips_ip_max_defend; /* * Now figure out how many times we've defended ourselves. Ignore * defenses that happened long in the past. */ now = gethrestime_sec(); mutex_enter(&nce->nce_lock); if ((defs = nce->nce_defense_count) > 0 && now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { nce->nce_defense_count = defs = 0; } nce->nce_defense_count++; nce->nce_defense_time = now; mutex_exit(&nce->nce_lock); ipif_refrele(ipif); /* * If we've defended ourselves too many times already, then give up and * tear down the interface(s) using this address. Otherwise, defend by * sending out an unsolicited Neighbor Advertisement. */ if (defs >= maxdefense) { ip_ndp_failure(ill, mp, dl_mp); } else { char hbuf[MAC_STR_LEN]; char sbuf[INET6_ADDRSTRLEN]; uchar_t *haddr; uint_t haddrlen; in6_addr_t targ; ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); cmn_err(CE_WARN, "node %s is using our IP address %s on %s", mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), ill->ill_name); (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); } } static void ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { nd_neighbor_solicit_t *ns; uint32_t hlen = ill->ill_nd_lla_len; uchar_t *haddr = NULL; icmp6_t *icmp_nd; ip6_t *ip6h; nce_t *our_nce = NULL; in6_addr_t target; in6_addr_t src; int len; int flag = 0; nd_opt_hdr_t *opt = NULL; boolean_t bad_solicit = B_FALSE; mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; ip6h = (ip6_t *)mp->b_rptr; icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; src = ip6h->ip6_src; ns = (nd_neighbor_solicit_t *)icmp_nd; target = ns->nd_ns_target; if (IN6_IS_ADDR_MULTICAST(&target)) { if (ip_debug > 2) { /* ip1dbg */ pr_addr_dbg("ndp_input_solicit: Target is" " multicast! %s\n", AF_INET6, &target); } bad_solicit = B_TRUE; goto done; } if (len > sizeof (nd_neighbor_solicit_t)) { /* Options present */ opt = (nd_opt_hdr_t *)&ns[1]; len -= sizeof (nd_neighbor_solicit_t); if (!ndp_verify_optlen(opt, len)) { ip1dbg(("ndp_input_solicit: Bad opt len\n")); bad_solicit = B_TRUE; goto done; } } if (IN6_IS_ADDR_UNSPECIFIED(&src)) { /* Check to see if this is a valid DAD solicitation */ if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { if (ip_debug > 2) { /* ip1dbg */ pr_addr_dbg("ndp_input_solicit: IPv6 " "Destination is not solicited node " "multicast %s\n", AF_INET6, &ip6h->ip6_dst); } bad_solicit = B_TRUE; goto done; } } /* * NOTE: with IPMP, it's possible the nominated multicast ill (which * received this packet if it's multicast) is not the ill tied to * e.g. the IPMP ill's data link-local. So we match across the illgrp * to ensure we find the associated NCE. */ our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); /* * If this is a valid Solicitation, a permanent * entry should exist in the cache */ if (our_nce == NULL || !(our_nce->nce_flags & NCE_F_PERMANENT)) { ip1dbg(("ndp_input_solicit: Wrong target in NS?!" "ifname=%s ", ill->ill_name)); if (ip_debug > 2) { /* ip1dbg */ pr_addr_dbg(" dst %s\n", AF_INET6, &target); } bad_solicit = B_TRUE; goto done; } /* At this point we should have a verified NS per spec */ if (opt != NULL) { opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); if (opt != NULL) { haddr = (uchar_t *)&opt[1]; if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || hlen == 0) { ip1dbg(("ndp_input_solicit: bad SLLA\n")); bad_solicit = B_TRUE; goto done; } } } /* If sending directly to peer, set the unicast flag */ if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) flag |= NDP_UNICAST; /* * Create/update the entry for the soliciting node. * or respond to outstanding queries, don't if * the source is unspecified address. */ if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { int err; nce_t *nnce; ASSERT(ill->ill_isv6); /* * Regular solicitations *must* include the Source Link-Layer * Address option. Ignore messages that do not. */ if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { ip1dbg(("ndp_input_solicit: source link-layer address " "option missing with a specified source.\n")); bad_solicit = B_TRUE; goto done; } /* * This is a regular solicitation. If we're still in the * process of verifying the address, then don't respond at all * and don't keep track of the sender. */ if (our_nce->nce_state == ND_PROBE) goto done; /* * If the solicitation doesn't have sender hardware address * (legal for unicast solicitation), then process without * installing the return NCE. Either we already know it, or * we'll be forced to look it up when (and if) we reply to the * packet. */ if (haddr == NULL) goto no_source; err = ndp_lookup_then_add_v6(ill, B_FALSE, haddr, &src, /* Soliciting nodes address */ &ipv6_all_ones, &ipv6_all_zeros, 0, 0, ND_STALE, &nnce); switch (err) { case 0: /* done with this entry */ NCE_REFRELE(nnce); break; case EEXIST: /* * B_FALSE indicates this is not an an advertisement. */ ndp_process(nnce, haddr, 0, B_FALSE); NCE_REFRELE(nnce); break; default: ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", err)); goto done; } no_source: flag |= NDP_SOLICITED; } else { /* * No source link layer address option should be present in a * valid DAD request. */ if (haddr != NULL) { ip1dbg(("ndp_input_solicit: source link-layer address " "option present with an unspecified source.\n")); bad_solicit = B_TRUE; goto done; } if (our_nce->nce_state == ND_PROBE) { /* * Internally looped-back probes won't have DLPI * attached to them. External ones (which are sent by * multicast) always will. Just ignore our own * transmissions. */ if (dl_mp != NULL) { /* * If someone else is probing our address, then * we've crossed wires. Declare failure. */ ip_ndp_failure(ill, mp, dl_mp); } goto done; } /* * This is a DAD probe. Multicast the advertisement to the * all-nodes address. */ src = ipv6_all_hosts_mcast; } /* Response to a solicitation */ (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); done: if (bad_solicit) BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); if (our_nce != NULL) NCE_REFRELE(our_nce); } void ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { nd_neighbor_advert_t *na; uint32_t hlen = ill->ill_nd_lla_len; uchar_t *haddr = NULL; icmp6_t *icmp_nd; ip6_t *ip6h; nce_t *dst_nce = NULL; in6_addr_t target; nd_opt_hdr_t *opt = NULL; int len; ip_stack_t *ipst = ill->ill_ipst; mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; ip6h = (ip6_t *)mp->b_rptr; icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; na = (nd_neighbor_advert_t *)icmp_nd; if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { ip1dbg(("ndp_input_advert: Target is multicast but the " "solicited flag is not zero\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; } target = na->nd_na_target; if (IN6_IS_ADDR_MULTICAST(&target)) { ip1dbg(("ndp_input_advert: Target is multicast!\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; } if (len > sizeof (nd_neighbor_advert_t)) { opt = (nd_opt_hdr_t *)&na[1]; if (!ndp_verify_optlen(opt, len - sizeof (nd_neighbor_advert_t))) { ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; } /* At this point we have a verified NA per spec */ len -= sizeof (nd_neighbor_advert_t); opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); if (opt != NULL) { haddr = (uchar_t *)&opt[1]; if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || hlen == 0) { ip1dbg(("ndp_input_advert: bad SLLA\n")); BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); return; } } } /* * NOTE: we match across the illgrp since we need to do DAD for all of * our local addresses, and those are spread across all the active * ills in the group. */ if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) return; if (dst_nce->nce_flags & NCE_F_PERMANENT) { /* * Someone just advertised one of our local addresses. First, * check it it was us -- if so, we can safely ignore it. */ if (haddr != NULL) { if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) goto out; /* from us -- no conflict */ /* * If we're in an IPMP group, check if this is an echo * from another ill in the group. Use the double- * checked locking pattern to avoid grabbing * ill_g_lock in the non-IPMP case. */ if (IS_UNDER_IPMP(ill)) { rw_enter(&ipst->ips_ill_g_lock, RW_READER); if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( ill->ill_grp, haddr, hlen) != NULL) { rw_exit(&ipst->ips_ill_g_lock); goto out; } rw_exit(&ipst->ips_ill_g_lock); } } /* * Our own (looped-back) unsolicited neighbor advertisements * will get here with dl_mp == NULL. (These will usually be * filtered by the `haddr' checks above, but point-to-point * links have no hardware address and thus make it here.) */ if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE) goto out; /* * This appears to be a real conflict. If we're trying to * configure this NCE (ND_PROBE), then shut it down. * Otherwise, handle the discovered conflict. * * In the ND_PROBE case, dl_mp might be NULL if we're getting * a unicast reply. This isn't typically done (multicast is * the norm in response to a probe), but we can handle it. */ if (dst_nce->nce_state == ND_PROBE) ip_ndp_failure(ill, mp, dl_mp); else ip_ndp_conflict(ill, mp, dl_mp, dst_nce); } else { if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) dst_nce->nce_flags |= NCE_F_ISROUTER; /* B_TRUE indicates this an advertisement */ ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); } out: NCE_REFRELE(dst_nce); } /* * Process NDP neighbor solicitation/advertisement messages. * The checksum has already checked o.k before reaching here. */ void ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) { icmp6_t *icmp_nd; ip6_t *ip6h; int len; mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; if (!pullupmsg(mp, -1)) { ip1dbg(("ndp_input: pullupmsg failed\n")); BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); goto done; } ip6h = (ip6_t *)mp->b_rptr; if (ip6h->ip6_hops != IPV6_MAX_HOPS) { ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); goto done; } /* * NDP does not accept any extension headers between the * IP header and the ICMP header since e.g. a routing * header could be dangerous. * This assumes that any AH or ESP headers are removed * by ip prior to passing the packet to ndp_input. */ if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { ip1dbg(("ndp_input: Wrong next header 0x%x\n", ip6h->ip6_nxt)); BUMP_MIB(mib, ipv6IfIcmpInErrors); goto done; } icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); if (icmp_nd->icmp6_code != 0) { ip1dbg(("ndp_input: icmp6 code != 0 \n")); BUMP_MIB(mib, ipv6IfIcmpInErrors); goto done; } len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; /* * Make sure packet length is large enough for either * a NS or a NA icmp packet. */ if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { ip1dbg(("ndp_input: packet too short\n")); BUMP_MIB(mib, ipv6IfIcmpInErrors); goto done; } if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { ndp_input_solicit(ill, mp, dl_mp); } else { ndp_input_advert(ill, mp, dl_mp); } done: freemsg(mp); } /* * Utility routine to send an advertisement. Assumes that the NCE cannot * go away (e.g., because it's refheld). */ static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, uint_t flags) { ASSERT((flags & NDP_PROBE) == 0); if (nce->nce_flags & NCE_F_ISROUTER) flags |= NDP_ISROUTER; if (!(nce->nce_flags & NCE_F_ANYCAST)) flags |= NDP_ORIDE; return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, &nce->nce_addr, target, flags)); } /* * Utility routine to send a solicitation. Assumes that the NCE cannot * go away (e.g., because it's refheld). */ static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, uint_t flags) { if (flags & NDP_PROBE) sender = &ipv6_all_zeros; return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, sender, &nce->nce_addr, flags)); } /* * nce_xmit is called to form and transmit a ND solicitation or * advertisement ICMP packet. * * If the source address is unspecified and this isn't a probe (used for * duplicate address detection), an appropriate source address and link layer * address will be chosen here. The link layer address option is included if * the source is specified (i.e., all non-probe packets), and omitted (per the * specification) otherwise. * * It returns B_FALSE only if it does a successful put() to the * corresponding ill's ill_wq otherwise returns B_TRUE. */ static boolean_t nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target, int flag) { ill_t *hwaddr_ill; uint32_t len; icmp6_t *icmp6; mblk_t *mp; ip6_t *ip6h; nd_opt_hdr_t *opt; uint_t plen, maxplen; ip6i_t *ip6i; ipif_t *src_ipif = NULL; uint8_t *hw_addr; zoneid_t zoneid = GLOBAL_ZONEID; char buf[INET6_ADDRSTRLEN]; ASSERT(!IS_IPMP(ill)); /* * Check that the sender is actually a usable address on `ill', and if * so, track that as the src_ipif. If not, for solicitations, set the * sender to :: so that a new one will be picked below; for adverts, * drop the packet since we expect nce_xmit_advert() to always provide * a valid sender. */ if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || !src_ipif->ipif_addr_ready) { if (src_ipif != NULL) { ipif_refrele(src_ipif); src_ipif = NULL; } if (type == ND_NEIGHBOR_ADVERT) { ip1dbg(("nce_xmit: No source ipif for src %s\n", inet_ntop(AF_INET6, sender, buf, sizeof (buf)))); return (B_TRUE); } sender = &ipv6_all_zeros; } } /* * If we still have an unspecified source (sender) address and this * isn't a probe, select a source address from `ill'. */ if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { ASSERT(type != ND_NEIGHBOR_ADVERT); /* * Pick a source address for this solicitation, but restrict * the selection to addresses assigned to the output * interface. We do this because the destination will create * a neighbor cache entry for the source address of this * packet, so the source address needs to be a valid neighbor. */ src_ipif = ipif_select_source_v6(ill, target, B_TRUE, IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); if (src_ipif == NULL) { ip1dbg(("nce_xmit: No source ipif for dst %s\n", inet_ntop(AF_INET6, target, buf, sizeof (buf)))); return (B_TRUE); } sender = &src_ipif->ipif_v6src_addr; } /* * We're either sending a probe or we have a source address. */ ASSERT((flag & NDP_PROBE) || src_ipif != NULL); maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + maxplen; mp = allocb(len, BPRI_LO); if (mp == NULL) { if (src_ipif != NULL) ipif_refrele(src_ipif); return (B_TRUE); } bzero((char *)mp->b_rptr, len); mp->b_wptr = mp->b_rptr + len; ip6i = (ip6i_t *)mp->b_rptr; ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; ip6i->ip6i_nxt = IPPROTO_RAW; ip6i->ip6i_flags = IP6I_HOPLIMIT; if (flag & NDP_PROBE) ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); ip6h->ip6_nxt = IPPROTO_ICMPV6; ip6h->ip6_hops = IPV6_MAX_HOPS; ip6h->ip6_src = *sender; ip6h->ip6_dst = *target; icmp6 = (icmp6_t *)&ip6h[1]; opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t)); if (type == ND_NEIGHBOR_SOLICIT) { nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; if (!(flag & NDP_PROBE)) opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; ns->nd_ns_target = *target; if (!(flag & NDP_UNICAST)) { /* Form multicast address of the target */ ip6h->ip6_dst = ipv6_solicited_node_mcast; ip6h->ip6_dst.s6_addr32[3] |= ns->nd_ns_target.s6_addr32[3]; } } else { nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; ASSERT(!(flag & NDP_PROBE)); opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; na->nd_na_target = *sender; if (flag & NDP_ISROUTER) na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; if (flag & NDP_SOLICITED) na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; if (flag & NDP_ORIDE) na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; } hw_addr = NULL; if (!(flag & NDP_PROBE)) { /* * Use our source address to find the hardware address to put * in the packet, so that the hardware address and IP address * will match up -- even if that hardware address doesn't * match the ill we actually transmit the packet through. */ if (IS_IPMP(src_ipif->ipif_ill)) { hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); if (hwaddr_ill == NULL) { ip1dbg(("nce_xmit: no bound ill!\n")); ipif_refrele(src_ipif); freemsg(mp); return (B_TRUE); } } else { hwaddr_ill = src_ipif->ipif_ill; ill_refhold(hwaddr_ill); /* for symmetry */ } plen = roundup(sizeof (nd_opt_hdr_t) + hwaddr_ill->ill_nd_lla_len, 8); hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr; if (hw_addr != NULL) { /* Fill in link layer address and option len */ opt->nd_opt_len = (uint8_t)(plen / 8); bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); } ill_refrele(hwaddr_ill); } if (hw_addr == NULL) plen = 0; /* Fix up the length of the packet now that plen is known */ len -= (maxplen - plen); mp->b_wptr = mp->b_rptr + len; ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); icmp6->icmp6_type = type; icmp6->icmp6_code = 0; /* * Prepare for checksum by putting icmp length in the icmp * checksum field. The checksum is calculated in ip_wput_v6. */ icmp6->icmp6_cksum = ip6h->ip6_plen; /* * Before we toss the src_ipif, look up the zoneid to pass to * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT * packets to be routed correctly by IP (we cannot guarantee that the * global zone has an interface route to the destination). */ if (src_ipif != NULL) { if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) zoneid = GLOBAL_ZONEID; ipif_refrele(src_ipif); } ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); return (B_FALSE); } /* * Make a link layer address (does not include the SAP) from an nce. * To form the link layer address, use the last four bytes of ipv6 * address passed in and the fixed offset stored in nce. */ static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) { uchar_t *mask, *to; ill_t *ill = nce->nce_ill; int len; if (ill->ill_net_type == IRE_IF_NORESOLVER) return; ASSERT(nce->nce_res_mp != NULL); ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); ASSERT(nce->nce_flags & NCE_F_MAPPING); ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); ASSERT(addr != NULL); bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), addrpos, ill->ill_nd_lla_len); len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, IPV6_ADDR_LEN); mask = (uchar_t *)&nce->nce_extract_mask; mask += (IPV6_ADDR_LEN - len); addr += (IPV6_ADDR_LEN - len); to = addrpos + nce->nce_ll_extract_start; while (len-- > 0) *to++ |= *mask++ & *addr++; } mblk_t * nce_udreq_alloc(ill_t *ill) { mblk_t *template_mp = NULL; dl_unitdata_req_t *dlur; int sap_length; ASSERT(ill->ill_isv6); sap_length = ill->ill_sap_length; template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); if (template_mp == NULL) return (NULL); dlur = (dl_unitdata_req_t *)template_mp->b_rptr; dlur->dl_priority.dl_min = 0; dlur->dl_priority.dl_max = 0; dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); /* Copy in the SAP value. */ NCE_LL_SAP_COPY(ill, template_mp); return (template_mp); } /* * NDP retransmit timer. * This timer goes off when: * a. It is time to retransmit NS for resolver. * b. It is time to send reachability probes. */ void ndp_timer(void *arg) { nce_t *nce = arg; ill_t *ill = nce->nce_ill; char addrbuf[INET6_ADDRSTRLEN]; boolean_t dropped = B_FALSE; ip_stack_t *ipst = ill->ill_ipst; /* * The timer has to be cancelled by ndp_delete before doing the final * refrele. So the NCE is guaranteed to exist when the timer runs * until it clears the timeout_id. Before clearing the timeout_id * bump up the refcnt so that we can continue to use the nce */ ASSERT(nce != NULL); mutex_enter(&nce->nce_lock); NCE_REFHOLD_LOCKED(nce); nce->nce_timeout_id = 0; /* * Check the reachability state first. */ switch (nce->nce_state) { case ND_DELAY: nce->nce_state = ND_PROBE; mutex_exit(&nce->nce_lock); (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, NDP_UNICAST); if (ip_debug > 3) { /* ip2dbg */ pr_addr_dbg("ndp_timer: state for %s changed " "to PROBE\n", AF_INET6, &nce->nce_addr); } NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); NCE_REFRELE(nce); return; case ND_PROBE: /* must be retransmit timer */ nce->nce_pcnt--; ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && nce->nce_pcnt >= -1); if (nce->nce_pcnt > 0) { /* * As per RFC2461, the nce gets deleted after * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. * Note that the first unicast solicitation is sent * during the DELAY state. */ ip2dbg(("ndp_timer: pcount=%x dst %s\n", nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, addrbuf, sizeof (addrbuf)))); mutex_exit(&nce->nce_lock); dropped = nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : NDP_UNICAST); if (dropped) { mutex_enter(&nce->nce_lock); nce->nce_pcnt++; mutex_exit(&nce->nce_lock); } NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); } else if (nce->nce_pcnt < 0) { /* No hope, delete the nce */ nce->nce_state = ND_UNREACHABLE; mutex_exit(&nce->nce_lock); if (ip_debug > 2) { /* ip1dbg */ pr_addr_dbg("ndp_timer: Delete IRE for" " dst %s\n", AF_INET6, &nce->nce_addr); } ndp_delete(nce); } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { /* Wait RetransTimer, before deleting the entry */ ip2dbg(("ndp_timer: pcount=%x dst %s\n", nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, addrbuf, sizeof (addrbuf)))); mutex_exit(&nce->nce_lock); /* Wait one interval before killing */ NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { ipif_t *ipif; /* * We're done probing, and we can now declare this * address to be usable. Let IP know that it's ok to * use. */ nce->nce_state = ND_REACHABLE; mutex_exit(&nce->nce_lock); ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, nce->nce_ill); if (ipif != NULL) { if (ipif->ipif_was_dup) { char ibuf[LIFNAMSIZ + 10]; char sbuf[INET6_ADDRSTRLEN]; ipif->ipif_was_dup = B_FALSE; (void) inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sbuf, sizeof (sbuf)); ipif_get_name(ipif, ibuf, sizeof (ibuf)); cmn_err(CE_NOTE, "recovered address " "%s on %s", sbuf, ibuf); } if ((ipif->ipif_flags & IPIF_UP) && !ipif->ipif_addr_ready) ipif_up_notify(ipif); ipif->ipif_addr_ready = 1; ipif_refrele(ipif); } /* Begin defending our new address */ nce->nce_unsolicit_count = 0; dropped = nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); if (dropped) { nce->nce_unsolicit_count = 1; NDP_RESTART_TIMER(nce, ipst->ips_ip_ndp_unsolicit_interval); } else if (ipst->ips_ip_ndp_defense_interval != 0) { NDP_RESTART_TIMER(nce, ipst->ips_ip_ndp_defense_interval); } } else { /* * This is an address we're probing to be our own, but * the ill is down. Wait until it comes back before * doing anything, but switch to reachable state so * that the restart will work. */ nce->nce_state = ND_REACHABLE; mutex_exit(&nce->nce_lock); } NCE_REFRELE(nce); return; case ND_INCOMPLETE: { ip6_t *ip6h; ip6i_t *ip6i; mblk_t *mp, *datamp, *nextmp, **prevmpp; /* * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp * for any IPMP probe packets, and toss 'em. IPMP probe * packets will always be at the head of nce_qd_mp and always * have an ip6i_t header, so we can stop at the first queued * ND packet without an ip6i_t. */ prevmpp = &nce->nce_qd_mp; for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { nextmp = mp->b_next; datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; ip6h = (ip6_t *)datamp->b_rptr; if (ip6h->ip6_nxt != IPPROTO_RAW) break; ip6i = (ip6i_t *)ip6h; if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { inet_freemsg(mp); *prevmpp = nextmp; } else { prevmpp = &mp->b_next; } } ip_ndp_resolve(nce); mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); break; } case ND_REACHABLE: if (((nce->nce_flags & NCE_F_UNSOL_ADV) && nce->nce_unsolicit_count != 0) || ((nce->nce_flags & NCE_F_PERMANENT) && ipst->ips_ip_ndp_defense_interval != 0)) { if (nce->nce_unsolicit_count > 0) nce->nce_unsolicit_count--; mutex_exit(&nce->nce_lock); dropped = nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); if (dropped) { mutex_enter(&nce->nce_lock); nce->nce_unsolicit_count++; mutex_exit(&nce->nce_lock); } if (nce->nce_unsolicit_count != 0) { NDP_RESTART_TIMER(nce, ipst->ips_ip_ndp_unsolicit_interval); } else { NDP_RESTART_TIMER(nce, ipst->ips_ip_ndp_defense_interval); } } else { mutex_exit(&nce->nce_lock); } NCE_REFRELE(nce); break; default: mutex_exit(&nce->nce_lock); NCE_REFRELE(nce); break; } } /* * Set a link layer address from the ll_addr passed in. * Copy SAP from ill. */ static void nce_set_ll(nce_t *nce, uchar_t *ll_addr) { ill_t *ill = nce->nce_ill; uchar_t *woffset; ASSERT(ll_addr != NULL); /* Always called before fast_path_probe */ ASSERT(nce->nce_fp_mp == NULL); if (ill->ill_sap_length != 0) { /* * Copy the SAP type specified in the * request into the xmit template. */ NCE_LL_SAP_COPY(ill, nce->nce_res_mp); } if (ill->ill_phys_addr_length > 0) { /* * The bcopy() below used to be called for the physical address * length rather than the link layer address length. For * ethernet and many other media, the phys_addr and lla are * identical. * However, with xresolv interfaces being introduced, the * phys_addr and lla are no longer the same, and the physical * address may not have any useful meaning, so we use the lla * for IPv6 address resolution and destination addressing. * * For PPP or other interfaces with a zero length * physical address, don't do anything here. * The bcopy() with a zero phys_addr length was previously * a no-op for interfaces with a zero-length physical address. * Using the lla for them would change the way they operate. * Doing nothing in such cases preserves expected behavior. */ woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); bcopy(ll_addr, woffset, ill->ill_nd_lla_len); } } static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) { ill_t *ill = nce->nce_ill; uchar_t *ll_offset; ASSERT(nce->nce_res_mp != NULL); if (ll_addr == NULL) return (B_FALSE); ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) return (B_TRUE); return (B_FALSE); } /* * Updates the link layer address or the reachability state of * a cache entry. Reset probe counter if needed. */ static void nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) { ill_t *ill = nce->nce_ill; boolean_t need_stop_timer = B_FALSE; boolean_t need_fastpath_update = B_FALSE; ASSERT(MUTEX_HELD(&nce->nce_lock)); ASSERT(nce->nce_ipversion == IPV6_VERSION); /* * If this interface does not do NUD, there is no point * in allowing an update to the cache entry. Although * we will respond to NS. * The only time we accept an update for a resolver when * NUD is turned off is when it has just been created. * Non-Resolvers will always be created as REACHABLE. */ if (new_state != ND_UNCHANGED) { if ((nce->nce_flags & NCE_F_NONUD) && (nce->nce_state != ND_INCOMPLETE)) return; ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); need_stop_timer = B_TRUE; if (new_state == ND_REACHABLE) nce->nce_last = TICK_TO_MSEC(lbolt64); else { /* We force NUD in this case */ nce->nce_last = 0; } nce->nce_state = new_state; nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; } /* * In case of fast path we need to free the the fastpath * M_DATA and do another probe. Otherwise we can just * overwrite the DL_UNITDATA_REQ data, noting we'll lose * whatever packets that happens to be transmitting at the time. */ if (new_ll_addr != NULL) { ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); if (nce->nce_fp_mp != NULL) { freemsg(nce->nce_fp_mp); nce->nce_fp_mp = NULL; } need_fastpath_update = B_TRUE; } mutex_exit(&nce->nce_lock); if (need_stop_timer) { (void) untimeout(nce->nce_timeout_id); nce->nce_timeout_id = 0; } if (need_fastpath_update) nce_fastpath(nce); mutex_enter(&nce->nce_lock); } void nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) { uint_t count = 0; mblk_t **mpp, *tmp; ASSERT(MUTEX_HELD(&nce->nce_lock)); for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { if (++count > nce->nce_ill->ill_max_buf) { tmp = nce->nce_qd_mp->b_next; nce->nce_qd_mp->b_next = NULL; nce->nce_qd_mp->b_prev = NULL; freemsg(nce->nce_qd_mp); nce->nce_qd_mp = tmp; } } if (head_insert) { mp->b_next = nce->nce_qd_mp; nce->nce_qd_mp = mp; } else { *mpp = mp; } } static void nce_queue_mp(nce_t *nce, mblk_t *mp) { boolean_t head_insert = B_FALSE; ip6_t *ip6h; ip6i_t *ip6i; mblk_t *data_mp; ASSERT(MUTEX_HELD(&nce->nce_lock)); if (mp->b_datap->db_type == M_CTL) data_mp = mp->b_cont; else data_mp = mp; ip6h = (ip6_t *)data_mp->b_rptr; if (ip6h->ip6_nxt == IPPROTO_RAW) { /* * This message should have been pulled up already in * ip_wput_v6. We can't do pullups here because the message * could be from the nce_qd_mp which could have b_next/b_prev * non-NULL. */ ip6i = (ip6i_t *)ip6h; ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); /* * If this packet is marked IP6I_IPMP_PROBE, then we need to: * * 1. Insert it at the head of the nce_qd_mp list. Consider * the normal (non-probe) load-speading case where the * source address of the ND packet is not tied to nce_ill. * If the ill bound to the source address cannot receive, * the response to the ND packet will not be received. * However, if ND packets for nce_ill's probes are queued * behind that ND packet, those probes will also fail to * be sent, and thus in.mpathd will erroneously conclude * that nce_ill has also failed. * * 2. Drop the probe packet in ndp_timer() if the ND did * not succeed on the first attempt. This ensures that * ND problems do not manifest as probe RTT spikes. */ if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) head_insert = B_TRUE; } nce_queue_mp_common(nce, mp, head_insert); } /* * Called when address resolution failed due to a timeout. * Send an ICMP unreachable in response to all queued packets. */ void nce_resolv_failed(nce_t *nce) { mblk_t *mp, *nxt_mp, *first_mp; char buf[INET6_ADDRSTRLEN]; ip6_t *ip6h; zoneid_t zoneid = GLOBAL_ZONEID; ip_stack_t *ipst = nce->nce_ill->ill_ipst; ip1dbg(("nce_resolv_failed: dst %s\n", inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); mutex_enter(&nce->nce_lock); mp = nce->nce_qd_mp; nce->nce_qd_mp = NULL; mutex_exit(&nce->nce_lock); while (mp != NULL) { nxt_mp = mp->b_next; mp->b_next = NULL; mp->b_prev = NULL; first_mp = mp; if (mp->b_datap->db_type == M_CTL) { ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; ASSERT(io->ipsec_out_type == IPSEC_OUT); zoneid = io->ipsec_out_zoneid; ASSERT(zoneid != ALL_ZONES); mp = mp->b_cont; mp->b_next = NULL; mp->b_prev = NULL; } ip6h = (ip6_t *)mp->b_rptr; if (ip6h->ip6_nxt == IPPROTO_RAW) { ip6i_t *ip6i; /* * This message should have been pulled up already * in ip_wput_v6. ip_hdr_complete_v6 assumes that * the header is pulled up. */ ip6i = (ip6i_t *)ip6h; ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= sizeof (ip6i_t) + IPV6_HDR_LEN); mp->b_rptr += sizeof (ip6i_t); } /* * Ignore failure since icmp_unreachable_v6 will silently * drop packets with an unspecified source address. */ (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); mp = nxt_mp; } nce_cb_dispatch(nce); } /* * Called by SIOCSNDP* ioctl to add/change an nce entry * and the corresponding attributes. * Disallow states other than ND_REACHABLE or ND_STALE. */ int ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) { sin6_t *sin6; in6_addr_t *addr; nce_t *nce; int err; uint16_t new_flags = 0; uint16_t old_flags = 0; int inflags = lnr->lnr_flags; ip_stack_t *ipst = ill->ill_ipst; ASSERT(ill->ill_isv6); if ((lnr->lnr_state_create != ND_REACHABLE) && (lnr->lnr_state_create != ND_STALE)) return (EINVAL); if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) return (EINVAL); sin6 = (sin6_t *)&lnr->lnr_addr; addr = &sin6->sin6_addr; mutex_enter(&ipst->ips_ndp6->ndp_g_lock); /* We know it can not be mapping so just look in the hash table */ nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); if (nce != NULL) new_flags = nce->nce_flags; switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { case NDF_ISROUTER_ON: new_flags |= NCE_F_ISROUTER; break; case NDF_ISROUTER_OFF: new_flags &= ~NCE_F_ISROUTER; break; case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); return (EINVAL); } switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { case NDF_ANYCAST_ON: new_flags |= NCE_F_ANYCAST; break; case NDF_ANYCAST_OFF: new_flags &= ~NCE_F_ANYCAST; break; case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): mutex_exit(&ipst->ips_ndp6->ndp_g_lock); if (nce != NULL) NCE_REFRELE(nce); return (EINVAL); } if (nce == NULL) { err = ndp_add_v6(ill, (uchar_t *)lnr->lnr_hdw_addr, addr, &ipv6_all_ones, &ipv6_all_zeros, 0, new_flags, lnr->lnr_state_create, &nce); if (err != 0) { mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); return (err); } } old_flags = nce->nce_flags; if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { /* * Router turned to host, delete all ires. * XXX Just delete the entry, but we need to add too. */ nce->nce_flags &= ~NCE_F_ISROUTER; mutex_exit(&ipst->ips_ndp6->ndp_g_lock); ndp_delete(nce); NCE_REFRELE(nce); return (0); } mutex_exit(&ipst->ips_ndp6->ndp_g_lock); mutex_enter(&nce->nce_lock); nce->nce_flags = new_flags; mutex_exit(&nce->nce_lock); /* * Note that we ignore the state at this point, which * should be either STALE or REACHABLE. Instead we let * the link layer address passed in to determine the state * much like incoming packets. */ nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); NCE_REFRELE(nce); return (0); } /* * If the device driver supports it, we make nce_fp_mp to have * an M_DATA prepend. Otherwise nce_fp_mp will be null. * The caller ensures there is hold on nce for this function. * Note that since ill_fastpath_probe() copies the mblk there is * no need for the hold beyond this function. */ void nce_fastpath(nce_t *nce) { ill_t *ill = nce->nce_ill; int res; ASSERT(ill != NULL); ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); if (nce->nce_fp_mp != NULL) { /* Already contains fastpath info */ return; } if (nce->nce_res_mp != NULL) { nce_fastpath_list_add(nce); res = ill_fastpath_probe(ill, nce->nce_res_mp); /* * EAGAIN is an indication of a transient error * i.e. allocation failure etc. leave the nce in the list it * will be updated when another probe happens for another ire * if not it will be taken out of the list when the ire is * deleted. */ if (res != 0 && res != EAGAIN) nce_fastpath_list_delete(nce); } } /* * Drain the list of nce's waiting for fastpath response. */ void nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), void *arg) { nce_t *next_nce; nce_t *current_nce; nce_t *first_nce; nce_t *prev_nce = NULL; mutex_enter(&ill->ill_lock); first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; while (current_nce != (nce_t *)&ill->ill_fastpath_list) { next_nce = current_nce->nce_fastpath; /* * Take it off the list if we're flushing, or if the callback * routine tells us to do so. Otherwise, leave the nce in the * fastpath list to handle any pending response from the lower * layer. We can't drain the list when the callback routine * comparison failed, because the response is asynchronous in * nature, and may not arrive in the same order as the list * insertion. */ if (func == NULL || func(current_nce, arg)) { current_nce->nce_fastpath = NULL; if (current_nce == first_nce) ill->ill_fastpath_list = first_nce = next_nce; else prev_nce->nce_fastpath = next_nce; } else { /* previous element that is still in the list */ prev_nce = current_nce; } current_nce = next_nce; } mutex_exit(&ill->ill_lock); } /* * Add nce to the nce fastpath list. */ void nce_fastpath_list_add(nce_t *nce) { ill_t *ill; ill = nce->nce_ill; mutex_enter(&ill->ill_lock); mutex_enter(&nce->nce_lock); /* * if nce has not been deleted and * is not already in the list add it. */ if (!(nce->nce_flags & NCE_F_CONDEMNED) && (nce->nce_fastpath == NULL)) { nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; ill->ill_fastpath_list = nce; } mutex_exit(&nce->nce_lock); mutex_exit(&ill->ill_lock); } /* * remove nce from the nce fastpath list. */ void nce_fastpath_list_delete(nce_t *nce) { nce_t *nce_ptr; ill_t *ill; ill = nce->nce_ill; ASSERT(ill != NULL); mutex_enter(&ill->ill_lock); if (nce->nce_fastpath == NULL) goto done; ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); if (ill->ill_fastpath_list == nce) { ill->ill_fastpath_list = nce->nce_fastpath; } else { nce_ptr = ill->ill_fastpath_list; while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { if (nce_ptr->nce_fastpath == nce) { nce_ptr->nce_fastpath = nce->nce_fastpath; break; } nce_ptr = nce_ptr->nce_fastpath; } } nce->nce_fastpath = NULL; done: mutex_exit(&ill->ill_lock); } /* * Update all NCE's that are not in fastpath mode and * have an nce_fp_mp that matches mp. mp->b_cont contains * the fastpath header. * * Returns TRUE if entry should be dequeued, or FALSE otherwise. */ boolean_t ndp_fastpath_update(nce_t *nce, void *arg) { mblk_t *mp, *fp_mp; uchar_t *mp_rptr, *ud_mp_rptr; mblk_t *ud_mp = nce->nce_res_mp; ptrdiff_t cmplen; if (nce->nce_flags & NCE_F_MAPPING) return (B_TRUE); if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) return (B_TRUE); ip2dbg(("ndp_fastpath_update: trying\n")); mp = (mblk_t *)arg; mp_rptr = mp->b_rptr; cmplen = mp->b_wptr - mp_rptr; ASSERT(cmplen >= 0); ud_mp_rptr = ud_mp->b_rptr; /* * The nce is locked here to prevent any other threads * from accessing and changing nce_res_mp when the IPv6 address * becomes resolved to an lla while we're in the middle * of looking at and comparing the hardware address (lla). * It is also locked to prevent multiple threads in nce_fastpath_update * from examining nce_res_mp atthe same time. */ mutex_enter(&nce->nce_lock); if (ud_mp->b_wptr - ud_mp_rptr != cmplen || bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { mutex_exit(&nce->nce_lock); /* * Don't take the ire off the fastpath list yet, * since the response may come later. */ return (B_FALSE); } /* Matched - install mp as the fastpath mp */ ip1dbg(("ndp_fastpath_update: match\n")); fp_mp = dupb(mp->b_cont); if (fp_mp != NULL) { nce->nce_fp_mp = fp_mp; } mutex_exit(&nce->nce_lock); return (B_TRUE); } /* * This function handles the DL_NOTE_FASTPATH_FLUSH notification from * driver. Note that it assumes IP is exclusive... */ /* ARGSUSED */ void ndp_fastpath_flush(nce_t *nce, char *arg) { if (nce->nce_flags & NCE_F_MAPPING) return; /* No fastpath info? */ if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) return; if (nce->nce_ipversion == IPV4_VERSION && nce->nce_flags & NCE_F_BCAST) { /* * IPv4 BROADCAST entries: * We can't delete the nce since it is difficult to * recreate these without going through the * ipif down/up dance. * * All access to nce->nce_fp_mp in the case of these * is protected by nce_lock. */ mutex_enter(&nce->nce_lock); if (nce->nce_fp_mp != NULL) { freeb(nce->nce_fp_mp); nce->nce_fp_mp = NULL; mutex_exit(&nce->nce_lock); nce_fastpath(nce); } else { mutex_exit(&nce->nce_lock); } } else { /* Just delete the NCE... */ ndp_delete(nce); } } /* * Return a pointer to a given option in the packet. * Assumes that option part of the packet have already been validated. */ nd_opt_hdr_t * ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) { while (optlen > 0) { if (opt->nd_opt_type == opt_type) return (opt); optlen -= 8 * opt->nd_opt_len; opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); } return (NULL); } /* * Verify all option lengths present are > 0, also check to see * if the option lengths and packet length are consistent. */ boolean_t ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) { ASSERT(opt != NULL); while (optlen > 0) { if (opt->nd_opt_len == 0) return (B_FALSE); optlen -= 8 * opt->nd_opt_len; if (optlen < 0) return (B_FALSE); opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); } return (B_TRUE); } /* * ndp_walk function. * Free a fraction of the NCE cache entries. * A fraction of zero means to not free any in that category. */ void ndp_cache_reclaim(nce_t *nce, char *arg) { nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; uint_t rand; if (nce->nce_flags & NCE_F_PERMANENT) return; rand = (uint_t)lbolt + NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); if (ncr->ncr_host != 0 && (rand/ncr->ncr_host)*ncr->ncr_host == rand) { ndp_delete(nce); return; } } /* * ndp_walk function. * Count the number of NCEs that can be deleted. * These would be hosts but not routers. */ void ndp_cache_count(nce_t *nce, char *arg) { ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; if (nce->nce_flags & NCE_F_PERMANENT) return; ncc->ncc_total++; if (!(nce->nce_flags & NCE_F_ISROUTER)) ncc->ncc_host++; } #ifdef DEBUG void nce_trace_ref(nce_t *nce) { ASSERT(MUTEX_HELD(&nce->nce_lock)); if (nce->nce_trace_disable) return; if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { nce->nce_trace_disable = B_TRUE; nce_trace_cleanup(nce); } } void nce_untrace_ref(nce_t *nce) { ASSERT(MUTEX_HELD(&nce->nce_lock)); if (!nce->nce_trace_disable) th_trace_unref(nce); } static void nce_trace_cleanup(const nce_t *nce) { th_trace_cleanup(nce, nce->nce_trace_disable); } #endif /* * Called when address resolution fails due to a timeout. * Send an ICMP unreachable in response to all queued packets. */ void arp_resolv_failed(nce_t *nce) { mblk_t *mp, *nxt_mp, *first_mp; char buf[INET6_ADDRSTRLEN]; zoneid_t zoneid = GLOBAL_ZONEID; struct in_addr ipv4addr; ip_stack_t *ipst = nce->nce_ill->ill_ipst; IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); ip3dbg(("arp_resolv_failed: dst %s\n", inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); mutex_enter(&nce->nce_lock); mp = nce->nce_qd_mp; nce->nce_qd_mp = NULL; mutex_exit(&nce->nce_lock); while (mp != NULL) { nxt_mp = mp->b_next; mp->b_next = NULL; mp->b_prev = NULL; first_mp = mp; /* * Send icmp unreachable messages * to the hosts. */ (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); icmp_unreachable(nce->nce_ill->ill_wq, first_mp, ICMP_HOST_UNREACHABLE, zoneid, ipst); mp = nxt_mp; } } int ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, nce_t **newnce, nce_t *src_nce) { int err; nce_t *nce; in6_addr_t addr6; ip_stack_t *ipst = ill->ill_ipst; mutex_enter(&ipst->ips_ndp4->ndp_g_lock); nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); /* * NOTE: IPv4 never matches across the illgrp since the NCE's we're * looking up have fastpath headers that are inherently per-ill. */ nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); if (nce == NULL) { err = ndp_add_v4(ill, addr, flags, newnce, src_nce); } else { *newnce = nce; err = EEXIST; } mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (err); } /* * NDP Cache Entry creation routine for IPv4. * Mapped entries are handled in arp. * This routine must always be called with ndp4->ndp_g_lock held. * Prior to return, nce_refcnt is incremented. */ static int ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, nce_t **newnce, nce_t *src_nce) { static nce_t nce_nil; nce_t *nce; mblk_t *mp; mblk_t *template = NULL; nce_t **ncep; ip_stack_t *ipst = ill->ill_ipst; uint16_t state = ND_INITIAL; int err; ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); ASSERT(!ill->ill_isv6); ASSERT((flags & NCE_F_MAPPING) == 0); if (ill->ill_resolver_mp == NULL) return (EINVAL); /* * Allocate the mblk to hold the nce. */ mp = allocb(sizeof (nce_t), BPRI_MED); if (mp == NULL) return (ENOMEM); nce = (nce_t *)mp->b_rptr; mp->b_wptr = (uchar_t *)&nce[1]; *nce = nce_nil; nce->nce_ill = ill; nce->nce_ipversion = IPV4_VERSION; nce->nce_flags = flags; nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; nce->nce_rcnt = ill->ill_xmit_count; IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); nce->nce_mask = ipv6_all_ones; nce->nce_extract_mask = ipv6_all_zeros; nce->nce_ll_extract_start = 0; nce->nce_qd_mp = NULL; nce->nce_mp = mp; /* This one is for nce getting created */ nce->nce_refcnt = 1; mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); nce->nce_trace_disable = B_FALSE; if (src_nce != NULL) { /* * src_nce has been provided by the caller. The only * caller who provides a non-null, non-broadcast * src_nce is from ip_newroute() which must pass in * a ND_REACHABLE src_nce (this condition is verified * via an ASSERT for the save_ire->ire_nce in ip_newroute()) */ mutex_enter(&src_nce->nce_lock); state = src_nce->nce_state; if ((src_nce->nce_flags & NCE_F_CONDEMNED) || (ipst->ips_ndp4->ndp_g_hw_change > 0)) { /* * src_nce has been deleted, or * ip_arp_news is in the middle of * flushing entries in the the nce. * Fail the add, since we don't know * if it is safe to copy the contents of * src_nce */ DTRACE_PROBE2(nce__bad__src__nce, nce_t *, src_nce, ill_t *, ill); mutex_exit(&src_nce->nce_lock); err = EINVAL; goto err_ret; } template = copyb(src_nce->nce_res_mp); mutex_exit(&src_nce->nce_lock); if (template == NULL) { err = ENOMEM; goto err_ret; } } else if (flags & NCE_F_BCAST) { /* * broadcast nce. */ template = copyb(ill->ill_bcast_mp); if (template == NULL) { err = ENOMEM; goto err_ret; } state = ND_REACHABLE; } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { /* * NORESOLVER entries are always created in the REACHABLE * state. We create a nce_res_mp with the IP nexthop address * in the destination address in the DLPI hdr if the * physical length is exactly 4 bytes. * * XXX not clear which drivers set ill_phys_addr_length to * IP_ADDR_LEN. */ if (ill->ill_phys_addr_length == IP_ADDR_LEN) { template = ill_dlur_gen((uchar_t *)addr, ill->ill_phys_addr_length, ill->ill_sap, ill->ill_sap_length); } else { template = copyb(ill->ill_resolver_mp); } if (template == NULL) { err = ENOMEM; goto err_ret; } state = ND_REACHABLE; } nce->nce_fp_mp = NULL; nce->nce_res_mp = template; nce->nce_state = state; if (state == ND_REACHABLE) { nce->nce_last = TICK_TO_MSEC(lbolt64); nce->nce_init_time = TICK_TO_MSEC(lbolt64); } else { nce->nce_last = 0; if (state == ND_INITIAL) nce->nce_init_time = TICK_TO_MSEC(lbolt64); } ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); /* * Atomically ensure that the ill is not CONDEMNED, before * adding the NCE. */ mutex_enter(&ill->ill_lock); if (ill->ill_state_flags & ILL_CONDEMNED) { mutex_exit(&ill->ill_lock); err = EINVAL; goto err_ret; } if ((nce->nce_next = *ncep) != NULL) nce->nce_next->nce_ptpn = &nce->nce_next; *ncep = nce; nce->nce_ptpn = ncep; *newnce = nce; /* This one is for nce being used by an active thread */ NCE_REFHOLD(*newnce); /* Bump up the number of nce's referencing this ill */ DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, (char *), "nce", (void *), nce); ill->ill_nce_cnt++; mutex_exit(&ill->ill_lock); DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); return (0); err_ret: freeb(mp); freemsg(template); return (err); } /* * ndp_walk routine to delete all entries that have a given destination or * gateway address and cached link layer (MAC) address. This is used when ARP * informs us that a network-to-link-layer mapping may have changed. */ void nce_delete_hw_changed(nce_t *nce, void *arg) { nce_hw_map_t *hwm = arg; mblk_t *mp; dl_unitdata_req_t *dlu; uchar_t *macaddr; ill_t *ill; int saplen; ipaddr_t nce_addr; if (nce->nce_state != ND_REACHABLE) return; IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); if (nce_addr != hwm->hwm_addr) return; mutex_enter(&nce->nce_lock); if ((mp = nce->nce_res_mp) == NULL) { mutex_exit(&nce->nce_lock); return; } dlu = (dl_unitdata_req_t *)mp->b_rptr; macaddr = (uchar_t *)(dlu + 1); ill = nce->nce_ill; if ((saplen = ill->ill_sap_length) > 0) macaddr += saplen; else saplen = -saplen; /* * If the hardware address is unchanged, then leave this one alone. * Note that saplen == abs(saplen) now. */ if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { mutex_exit(&nce->nce_lock); return; } mutex_exit(&nce->nce_lock); DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); ndp_delete(nce); } /* * This function verifies whether a given IPv4 address is potentially known to * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, * so that it can continue to look for hardware changes on that address. */ boolean_t ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) { nce_t *nce; struct in_addr nceaddr; ip_stack_t *ipst = ns->netstack_ip; if (addr == INADDR_ANY) return (B_FALSE); mutex_enter(&ipst->ips_ndp4->ndp_g_lock); nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); for (; nce != NULL; nce = nce->nce_next) { /* Note that only v4 mapped entries are in the table. */ IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); if (addr == nceaddr.s_addr && IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { /* Single flag check; no lock needed */ if (!(nce->nce_flags & NCE_F_CONDEMNED)) break; } } mutex_exit(&ipst->ips_ndp4->ndp_g_lock); return (nce != NULL); } /* * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly * with IPMP. Specifically, since neighbor discovery is always done on * underlying interfaces (even for addresses owned by an IPMP interface), we * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface * associated with `ill' (if it exists). */ static ipif_t * ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) { ipif_t *ipif; ip_stack_t *ipst = ill->ill_ipst; ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); if (ipif == NULL && IS_UNDER_IPMP(ill)) { if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); ill_refrele(ill); } } return (ipif); }