/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Functions to implement IP address -> link layer address (PSARC 2006/482) */ #include #include #include #include #include #include /* * dispatch pending callbacks. */ void ncec_cb_dispatch(ncec_t *ncec) { ncec_cb_t *ncec_cb; ip2mac_t ip2m; mutex_enter(&ncec->ncec_lock); if (list_is_empty(&ncec->ncec_cb)) { mutex_exit(&ncec->ncec_lock); return; } ncec_ip2mac_response(&ip2m, ncec); ncec_cb_refhold_locked(ncec); /* * IP does not hold internal locks like nce_lock across calls to * other subsystems for fear of recursive lock entry and lock * hierarchy violation. The caller may be holding locks across * the call to IP. (It would be ideal if no subsystem holds locks * across calls into another subsystem, especially if calls can * happen in either direction). */ ncec_cb = list_head(&ncec->ncec_cb); for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) continue; ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED; mutex_exit(&ncec->ncec_lock); (*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg); mutex_enter(&ncec->ncec_lock); } ncec_cb_refrele(ncec); mutex_exit(&ncec->ncec_lock); } /* * fill up the ip2m response fields with inforamation from the nce. */ void ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec) { boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); sin_t *sin; sin6_t *sin6; struct sockaddr_dl *sdl; ASSERT(MUTEX_HELD(&ncec->ncec_lock)); bzero(ip2m, sizeof (*ip2m)); if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec)) ip2m->ip2mac_err = 0; else ip2m->ip2mac_err = ESRCH; if (isv6) { sin6 = (sin6_t *)&ip2m->ip2mac_pa; sin6->sin6_family = AF_INET6; sin6->sin6_addr = ncec->ncec_addr; } else { sin = (sin_t *)&ip2m->ip2mac_pa; sin->sin_family = AF_INET; IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr); } if (ip2m->ip2mac_err == 0) { sdl = &ip2m->ip2mac_ha; sdl->sdl_family = AF_LINK; sdl->sdl_type = ncec->ncec_ill->ill_type; /* * should we put ncec_ill->ill_name in there? why? * likewise for the sdl_index */ sdl->sdl_nlen = 0; sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length; if (ncec->ncec_lladdr != NULL) bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen); } } void ncec_cb_refhold_locked(ncec_t *ncec) { ASSERT(MUTEX_HELD(&ncec->ncec_lock)); ncec->ncec_cb_walker_cnt++; } void ncec_cb_refrele(ncec_t *ncec) { ncec_cb_t *ncec_cb, *ncec_cb_next = NULL; ASSERT(MUTEX_HELD(&ncec->ncec_lock)); if (--ncec->ncec_cb_walker_cnt == 0) { for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL; ncec_cb = ncec_cb_next) { ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb); if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0) continue; list_remove(&ncec->ncec_cb, ncec_cb); kmem_free(ncec_cb, sizeof (*ncec_cb)); } } } /* * add a callback to the nce, so that the callback can be invoked * after address resolution succeeds/fails. */ static ip2mac_id_t ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg) { ncec_cb_t *nce_cb; ip2mac_id_t ip2mid = NULL; ASSERT(MUTEX_HELD(&ncec->ncec_lock)); if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL) return (ip2mid); nce_cb->ncec_cb_func = cb; nce_cb->ncec_cb_arg = cbarg; /* * We identify the ncec_cb_t during cancellation by the address * of the nce_cb_t itself, and, as a short-cut for eliminating * clear mismatches, only look in the callback list of ncec's * whose address is equal to the nce_cb_id. */ nce_cb->ncec_cb_id = ncec; /* no refs! just an address */ list_insert_tail(&ncec->ncec_cb, nce_cb); ip2mid = ncec; /* this is the id to be used in ip2mac_cancel */ return (nce_cb); } /* * Resolve an IP address to a link-layer address using the data-structures * defined in PSARC 2006/482. If the current link-layer address for the * IP address is not known, the state-machine for resolving the resolution * will be triggered, and the callback function (*cb) will be invoked after * the resolution completes. */ ip2mac_id_t ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg, zoneid_t zoneid) { ncec_t *ncec; nce_t *nce = NULL; boolean_t isv6; ill_t *ill; netstack_t *ns; ip_stack_t *ipst; ip2mac_id_t ip2mid = NULL; sin_t *sin; sin6_t *sin6; int err; uint64_t delta; boolean_t need_resolve = B_FALSE; isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6); ns = netstack_find_by_zoneid(zoneid); if (ns == NULL) { ip2m->ip2mac_err = EINVAL; return (NULL); } /* * For exclusive stacks we reset the zoneid to zero * since IP uses the global zoneid in the exclusive stacks. */ if (ns->netstack_stackid != GLOBAL_NETSTACKID) zoneid = GLOBAL_ZONEID; ipst = ns->netstack_ip; /* * find the ill from the ip2m->ip2mac_ifindex */ ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst); if (ill == NULL) { ip2m->ip2mac_err = ENXIO; netstack_rele(ns); return (NULL); } if (isv6) { sin6 = (sin6_t *)&ip2m->ip2mac_pa; if (op == IP2MAC_LOOKUP) { nce = nce_lookup_v6(ill, &sin6->sin6_addr); } else { err = nce_lookup_then_add_v6(ill, NULL, ill->ill_phys_addr_length, &sin6->sin6_addr, 0, ND_UNCHANGED, &nce); } } else { sin = (sin_t *)&ip2m->ip2mac_pa; if (op == IP2MAC_LOOKUP) { nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr); } else { err = nce_lookup_then_add_v4(ill, NULL, ill->ill_phys_addr_length, &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce); } } if (op == IP2MAC_LOOKUP) { if (nce == NULL) { ip2m->ip2mac_err = ESRCH; goto done; } ncec = nce->nce_common; delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; mutex_enter(&ncec->ncec_lock); if (NCE_ISREACHABLE(ncec) && delta < (uint64_t)ill->ill_reachable_time) { ncec_ip2mac_response(ip2m, ncec); ip2m->ip2mac_err = 0; } else { ip2m->ip2mac_err = ESRCH; } mutex_exit(&ncec->ncec_lock); goto done; } else { if (err != 0 && err != EEXIST) { ip2m->ip2mac_err = err; goto done; } } ncec = nce->nce_common; delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; mutex_enter(&ncec->ncec_lock); if (NCE_ISCONDEMNED(ncec)) { ip2m->ip2mac_err = ESRCH; } else { if (NCE_ISREACHABLE(ncec)) { if (NCE_MYADDR(ncec) || delta < (uint64_t)ill->ill_reachable_time) { ncec_ip2mac_response(ip2m, ncec); ip2m->ip2mac_err = 0; mutex_exit(&ncec->ncec_lock); goto done; } /* * Since we do not control the packet output * path for ip2mac() callers, we need to verify * if the existing information in the nce is * very old, and retrigger resolution if necessary. * We will not return the existing stale * information until it is verified through a * resolver request/response exchange. * * In the future, we may want to support extensions * that do additional callbacks on link-layer updates, * so that we can return the stale information but * also update the caller if the lladdr changes. */ ncec->ncec_rcnt = ill->ill_xmit_count; ncec->ncec_state = ND_PROBE; need_resolve = B_TRUE; /* reachable but very old nce */ } else if (ncec->ncec_state == ND_INITIAL) { need_resolve = B_TRUE; /* ND_INITIAL nce */ ncec->ncec_state = ND_INCOMPLETE; } /* * NCE not known to be reachable in the recent past. We must * reconfirm the information before returning it to the caller */ if (ncec->ncec_rcnt > 0) { /* * Still resolving this ncec, so we can queue the * callback information in ncec->ncec_cb */ ip2mid = ncec_add_cb(ncec, cb, cbarg); ip2m->ip2mac_err = EINPROGRESS; } else { /* * No more retransmits allowed -- resolution failed. */ ip2m->ip2mac_err = ESRCH; } } mutex_exit(&ncec->ncec_lock); done: /* * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL, * trigger resolve. */ if (need_resolve) ip_ndp_resolve(ncec); if (nce != NULL) nce_refrele(nce); netstack_rele(ns); ill_refrele(ill); return (ip2mid); } /* * data passed to ncec_walk for canceling outstanding callbacks. */ typedef struct ip2mac_cancel_data_s { ip2mac_id_t ip2m_cancel_id; int ip2m_cancel_err; } ip2mac_cancel_data_t; /* * callback invoked for each active ncec. If the ip2mac_id_t corresponds * to an active nce_cb_t in the ncec's callback list, we want to remove * the callback (if there are no walkers) or return EBUSY to the caller */ static int ip2mac_cancel_callback(ncec_t *ncec, void *arg) { ip2mac_cancel_data_t *ip2m_wdata = arg; ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id; ncec_cb_t *ncec_cb; if (ip2m_nce_cb->ncec_cb_id != ncec) return (0); mutex_enter(&ncec->ncec_lock); if (list_is_empty(&ncec->ncec_cb)) { mutex_exit(&ncec->ncec_lock); return (0); } /* * IP does not hold internal locks like nce_lock across calls to * other subsystems for fear of recursive lock entry and lock * hierarchy violation. The caller may be holding locks across * the call to IP. (It would be ideal if no subsystem holds locks * across calls into another subsystem, especially if calls can * happen in either direction). */ ncec_cb = list_head(&ncec->ncec_cb); for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { if (ncec_cb != ip2m_nce_cb) continue; /* * If there are no walkers we can remove the nce_cb. * Otherwise the exiting walker will clean up. */ if (ncec->ncec_cb_walker_cnt == 0) { list_remove(&ncec->ncec_cb, ncec_cb); } else { ip2m_wdata->ip2m_cancel_err = EBUSY; } break; } mutex_exit(&ncec->ncec_lock); return (0); } /* * cancel an outstanding timeout set up via ip2mac */ int ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid) { netstack_t *ns; ip_stack_t *ipst; ip2mac_cancel_data_t ip2m_wdata; ns = netstack_find_by_zoneid(zoneid); if (ns == NULL) { ip2m_wdata.ip2m_cancel_err = EINVAL; return (ip2m_wdata.ip2m_cancel_err); } /* * For exclusive stacks we reset the zoneid to zero * since IP uses the global zoneid in the exclusive stacks. */ if (ns->netstack_stackid != GLOBAL_NETSTACKID) zoneid = GLOBAL_ZONEID; ipst = ns->netstack_ip; ip2m_wdata.ip2m_cancel_id = ip2mid; ip2m_wdata.ip2m_cancel_err = 0; ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst); /* * We may return EBUSY if a walk to dispatch callbacks is * in progress, in which case the caller needs to synchronize * with the registered callback function to make sure the * module does not exit when there is a callback pending. */ netstack_rele(ns); return (ip2m_wdata.ip2m_cancel_err); }