1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Functions to implement IP address -> link layer address (PSARC 2006/482) 28 */ 29 #include <inet/ip2mac.h> 30 #include <inet/ip2mac_impl.h> 31 #include <sys/zone.h> 32 #include <sys/dlpi.h> 33 #include <inet/ip_ndp.h> 34 #include <inet/ip_if.h> 35 #include <inet/ip6.h> 36 37 /* 38 * dispatch pending callbacks. 39 */ 40 void 41 nce_cb_dispatch(nce_t *nce) 42 { 43 nce_cb_t *nce_cb = list_head(&nce->nce_cb); 44 ip2mac_t ip2m; 45 46 mutex_enter(&nce->nce_lock); 47 if (list_is_empty(&nce->nce_cb)) { 48 mutex_exit(&nce->nce_lock); 49 return; 50 } 51 nce_ip2mac_response(&ip2m, nce); 52 nce_cb_refhold_locked(nce); 53 /* 54 * IP does not hold internal locks like nce_lock across calls to 55 * other subsystems for fear of recursive lock entry and lock 56 * hierarchy violation. The caller may be holding locks across 57 * the call to IP. (It would be ideal if no subsystem holds locks 58 * across calls into another subsystem, especially if calls can 59 * happen in either direction). 60 */ 61 nce_cb = list_head(&nce->nce_cb); 62 for (; nce_cb != NULL; nce_cb = list_next(&nce->nce_cb, nce_cb)) { 63 if (nce_cb->nce_cb_flags & NCE_CB_DISPATCHED) 64 continue; 65 nce_cb->nce_cb_flags |= NCE_CB_DISPATCHED; 66 mutex_exit(&nce->nce_lock); 67 (*nce_cb->nce_cb_func)(&ip2m, nce_cb->nce_cb_arg); 68 mutex_enter(&nce->nce_lock); 69 } 70 nce_cb_refrele(nce); 71 mutex_exit(&nce->nce_lock); 72 } 73 74 /* 75 * fill up the ip2m response fields with inforamation from the nce. 76 */ 77 void 78 nce_ip2mac_response(ip2mac_t *ip2m, nce_t *nce) 79 { 80 boolean_t isv6 = (nce->nce_ipversion == IPV6_VERSION); 81 sin6_t *sin6; 82 struct sockaddr_dl *sdl; 83 uchar_t *nce_lladdr; 84 85 ASSERT(MUTEX_HELD(&nce->nce_lock)); 86 bzero(ip2m, sizeof (*ip2m)); 87 if (NCE_ISREACHABLE(nce) && (nce->nce_flags & NCE_F_CONDEMNED) == 0) 88 ip2m->ip2mac_err = 0; 89 else 90 ip2m->ip2mac_err = ESRCH; 91 if (isv6) { 92 sin6 = (sin6_t *)&ip2m->ip2mac_pa; 93 sin6->sin6_family = AF_INET6; 94 sin6->sin6_addr = nce->nce_addr; 95 } 96 if (ip2m->ip2mac_err == 0) { 97 sdl = &ip2m->ip2mac_ha; 98 sdl->sdl_family = AF_LINK; 99 sdl->sdl_type = nce->nce_ill->ill_type; 100 sdl->sdl_nlen = 0; 101 sdl->sdl_alen = nce->nce_ill->ill_phys_addr_length; 102 nce_lladdr = nce->nce_res_mp->b_rptr + 103 NCE_LL_ADDR_OFFSET(nce->nce_ill); 104 bcopy(nce_lladdr, LLADDR(sdl), sdl->sdl_alen); 105 } 106 } 107 108 void 109 nce_cb_refhold_locked(nce_t *nce) 110 { 111 ASSERT(MUTEX_HELD(&nce->nce_lock)); 112 nce->nce_cb_walker_cnt++; 113 } 114 115 void 116 nce_cb_refrele(nce_t *nce) 117 { 118 nce_cb_t *nce_cb, *nce_cb_next = NULL; 119 120 ASSERT(MUTEX_HELD(&nce->nce_lock)); 121 if (--nce->nce_cb_walker_cnt == 0) { 122 for (nce_cb = list_head(&nce->nce_cb); nce_cb != NULL; 123 nce_cb = nce_cb_next) { 124 125 nce_cb_next = list_next(&nce->nce_cb, nce_cb); 126 if ((nce_cb->nce_cb_flags & NCE_CB_DISPATCHED) == 0) 127 continue; 128 list_remove(&nce->nce_cb, nce_cb); 129 kmem_free(nce_cb, sizeof (*nce_cb)); 130 } 131 } 132 } 133 134 /* 135 * add a callback to the nce, so that the callback can be invoked 136 * after address resolution succeeds/fails. 137 */ 138 static ip2mac_id_t 139 nce_add_cb(nce_t *nce, ip2mac_callback_t *cb, void *cbarg) 140 { 141 nce_cb_t *nce_cb; 142 ip2mac_id_t ip2mid = NULL; 143 144 ASSERT(MUTEX_HELD(&nce->nce_lock)); 145 if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL) 146 return (ip2mid); 147 nce_cb->nce_cb_func = cb; 148 nce_cb->nce_cb_arg = cbarg; 149 /* 150 * We identify the nce_cb_t during cancellation by the address 151 * of the nce_cb_t itself, and, as a short-cut for eliminating 152 * clear mismatches, only look in the callback list of nce's 153 * whose address is equal to the nce_cb_id. 154 */ 155 nce_cb->nce_cb_id = nce; /* no refs! just an address */ 156 list_insert_tail(&nce->nce_cb, nce_cb); 157 ip2mid = nce; /* this is the id to be used in ip2mac_cancel */ 158 159 return (nce_cb); 160 } 161 162 /* 163 * Resolve an IP address to a link-layer address using the data-structures 164 * defined in PSARC 2006/482. If the current link-layer address for the 165 * IP address is not known, the state-machine for resolving the resolution 166 * will be triggered, and the callback function (*cb) will be invoked after 167 * the resolution completes. 168 */ 169 ip2mac_id_t 170 ip2mac(uint_t flags, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg, 171 zoneid_t zoneid) 172 { 173 nce_t *nce; 174 boolean_t isv6; 175 ill_t *ill; 176 netstack_t *ns; 177 ip_stack_t *ipst; 178 ip2mac_id_t ip2mid = NULL; 179 sin6_t *sin6; 180 int err; 181 uint64_t delta; 182 183 isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6); 184 185 if (!isv6) { 186 /* 187 * IPv4 is not currently supported. 188 */ 189 ip2m->ip2mac_err = ENOTSUP; 190 return (NULL); 191 } 192 193 ns = netstack_find_by_zoneid(zoneid); 194 if (ns == NULL) { 195 ip2m->ip2mac_err = EINVAL; 196 return (NULL); 197 } 198 /* 199 * For exclusive stacks we reset the zoneid to zero 200 * since IP uses the global zoneid in the exclusive stacks. 201 */ 202 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 203 zoneid = GLOBAL_ZONEID; 204 ipst = ns->netstack_ip; 205 /* 206 * find the ill from the ip2m->ip2mac_ifindex 207 */ 208 ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, NULL, 209 NULL, NULL, NULL, ipst); 210 if (ill == NULL) { 211 ip2m->ip2mac_err = ENXIO; 212 netstack_rele(ns); 213 return (NULL); 214 } 215 if (isv6) { 216 sin6 = (sin6_t *)&ip2m->ip2mac_pa; 217 if (flags == IP2MAC_LOOKUP) { 218 nce = ndp_lookup_v6(ill, B_FALSE, &sin6->sin6_addr, 219 B_FALSE); 220 } else { 221 err = ndp_lookup_then_add_v6(ill, B_FALSE, NULL, 222 &sin6->sin6_addr, &ipv6_all_ones, &ipv6_all_zeros, 223 0, 0, ND_INCOMPLETE, &nce); 224 } 225 } else { 226 ip2m->ip2mac_err = ENOTSUP; /* yet. */ 227 goto done; 228 } 229 if (flags == IP2MAC_LOOKUP) { 230 if (nce == NULL) { 231 ip2m->ip2mac_err = ESRCH; 232 goto done; 233 } 234 mutex_enter(&nce->nce_lock); 235 if (NCE_ISREACHABLE(nce)) { 236 nce_ip2mac_response(ip2m, nce); 237 ip2m->ip2mac_err = 0; 238 } else { 239 ip2m->ip2mac_err = ESRCH; 240 } 241 mutex_exit(&nce->nce_lock); 242 NCE_REFRELE(nce); 243 goto done; 244 } else { 245 if (err != 0 && err != EEXIST) { 246 ip2m->ip2mac_err = err; 247 goto done; 248 } 249 } 250 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 251 mutex_enter(&nce->nce_lock); 252 if (nce->nce_flags & NCE_F_CONDEMNED) { 253 ip2m->ip2mac_err = ESRCH; 254 } else if (!NCE_ISREACHABLE(nce) || 255 delta > (uint64_t)ill->ill_reachable_time) { 256 if (NCE_ISREACHABLE(nce)) { 257 /* 258 * Since we do not control the packet output 259 * path for ip2mac() callers, we need to verify 260 * if the existing information in the nce is 261 * very old, and retrigger resolution if necessary. 262 * We will not return the existing stale 263 * information until it is verified through a 264 * resolver request/response exchange. 265 * 266 * In the future, we may want to support extensions 267 * that do additional callbacks on link-layer updates, 268 * so that we can return the stale information but 269 * also update the caller if the lladdr changes. 270 */ 271 nce->nce_rcnt = ill->ill_xmit_count; 272 nce->nce_state = ND_PROBE; 273 err = 0; /* treat this nce as a new one */ 274 } 275 if (nce->nce_rcnt > 0) { 276 /* 277 * Still resolving this nce, so we can 278 * queue the callback information in nce->nce_cb 279 */ 280 ip2mid = nce_add_cb(nce, cb, cbarg); 281 ip2m->ip2mac_err = EINPROGRESS; 282 } else { 283 /* 284 * Resolution failed. 285 */ 286 ip2m->ip2mac_err = ESRCH; 287 } 288 } else { 289 nce_ip2mac_response(ip2m, nce); 290 ip2m->ip2mac_err = 0; 291 } 292 if (ip2m->ip2mac_err == EINPROGRESS && err != EEXIST) 293 ip_ndp_resolve(nce); 294 mutex_exit(&nce->nce_lock); 295 NCE_REFRELE(nce); 296 done: 297 netstack_rele(ns); 298 ill_refrele(ill); 299 return (ip2mid); 300 } 301 302 /* 303 * data passed to nce_walk for canceling outstanding callbacks. 304 */ 305 typedef struct ip2mac_cancel_data_s { 306 ip2mac_id_t ip2m_cancel_id; 307 int ip2m_cancel_err; 308 } ip2mac_cancel_data_t; 309 310 /* 311 * callback invoked for each active nce. If the ip2mac_id_t corresponds 312 * to an active nce_cb_t in the nce's callback list, we want to remove 313 * the callback (if there are no walkers) or return EBUSY to the caller 314 */ 315 static int 316 ip2mac_cancel_callback(nce_t *nce, void *arg) 317 { 318 ip2mac_cancel_data_t *ip2m_wdata = arg; 319 nce_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id; 320 nce_cb_t *nce_cb; 321 322 if (ip2m_nce_cb->nce_cb_id != nce) 323 return (0); 324 325 mutex_enter(&nce->nce_lock); 326 if (list_is_empty(&nce->nce_cb)) { 327 mutex_exit(&nce->nce_lock); 328 return (0); 329 } 330 /* 331 * IP does not hold internal locks like nce_lock across calls to 332 * other subsystems for fear of recursive lock entry and lock 333 * hierarchy violation. The caller may be holding locks across 334 * the call to IP. (It would be ideal if no subsystem holds locks 335 * across calls into another subsystem, especially if calls can 336 * happen in either direction). 337 */ 338 nce_cb = list_head(&nce->nce_cb); 339 for (; nce_cb != NULL; nce_cb = list_next(&nce->nce_cb, nce_cb)) { 340 if (nce_cb != ip2m_nce_cb) 341 continue; 342 /* 343 * If there are no walkers we can remove the nce_cb. 344 * Otherwise the exiting walker will clean up. 345 */ 346 if (nce->nce_cb_walker_cnt == 0) { 347 list_remove(&nce->nce_cb, nce_cb); 348 } else { 349 ip2m_wdata->ip2m_cancel_err = EBUSY; 350 } 351 break; 352 } 353 mutex_exit(&nce->nce_lock); 354 return (0); 355 } 356 357 /* 358 * cancel an outstanding timeout set up via ip2mac 359 */ 360 int 361 ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid) 362 { 363 netstack_t *ns; 364 ip_stack_t *ipst; 365 ip2mac_cancel_data_t ip2m_wdata; 366 367 ns = netstack_find_by_zoneid(zoneid); 368 if (ns == NULL) { 369 ip2m_wdata.ip2m_cancel_err = EINVAL; 370 return (ip2m_wdata.ip2m_cancel_err); 371 } 372 /* 373 * For exclusive stacks we reset the zoneid to zero 374 * since IP uses the global zoneid in the exclusive stacks. 375 */ 376 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 377 zoneid = GLOBAL_ZONEID; 378 ipst = ns->netstack_ip; 379 380 ip2m_wdata.ip2m_cancel_id = ip2mid; 381 ip2m_wdata.ip2m_cancel_err = 0; 382 ndp_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst); 383 /* 384 * We may return EBUSY if a walk to dispatch callbacks is 385 * in progress, in which case the caller needs to synchronize 386 * with the registered callback function to make sure the 387 * module does not exit when there is a callback pending. 388 */ 389 netstack_rele(ns); 390 return (ip2m_wdata.ip2m_cancel_err); 391 } 392