1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Functions to implement IP address -> link layer address (PSARC 2006/482) 29 */ 30 #include <inet/ip2mac.h> 31 #include <inet/ip2mac_impl.h> 32 #include <sys/zone.h> 33 #include <inet/ip_ndp.h> 34 #include <inet/ip_if.h> 35 #include <inet/ip6.h> 36 37 /* 38 * dispatch pending callbacks. 39 */ 40 void 41 ncec_cb_dispatch(ncec_t *ncec) 42 { 43 ncec_cb_t *ncec_cb; 44 ip2mac_t ip2m; 45 46 mutex_enter(&ncec->ncec_lock); 47 if (list_is_empty(&ncec->ncec_cb)) { 48 mutex_exit(&ncec->ncec_lock); 49 return; 50 } 51 ncec_ip2mac_response(&ip2m, ncec); 52 ncec_cb_refhold_locked(ncec); 53 /* 54 * IP does not hold internal locks like nce_lock across calls to 55 * other subsystems for fear of recursive lock entry and lock 56 * hierarchy violation. The caller may be holding locks across 57 * the call to IP. (It would be ideal if no subsystem holds locks 58 * across calls into another subsystem, especially if calls can 59 * happen in either direction). 60 */ 61 ncec_cb = list_head(&ncec->ncec_cb); 62 for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { 63 if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) 64 continue; 65 ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED; 66 mutex_exit(&ncec->ncec_lock); 67 (*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg); 68 mutex_enter(&ncec->ncec_lock); 69 } 70 ncec_cb_refrele(ncec); 71 mutex_exit(&ncec->ncec_lock); 72 } 73 74 /* 75 * fill up the ip2m response fields with inforamation from the nce. 76 */ 77 void 78 ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec) 79 { 80 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION); 81 sin_t *sin; 82 sin6_t *sin6; 83 struct sockaddr_dl *sdl; 84 85 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 86 bzero(ip2m, sizeof (*ip2m)); 87 if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec)) 88 ip2m->ip2mac_err = 0; 89 else 90 ip2m->ip2mac_err = ESRCH; 91 if (isv6) { 92 sin6 = (sin6_t *)&ip2m->ip2mac_pa; 93 sin6->sin6_family = AF_INET6; 94 sin6->sin6_addr = ncec->ncec_addr; 95 } else { 96 sin = (sin_t *)&ip2m->ip2mac_pa; 97 sin->sin_family = AF_INET; 98 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr); 99 } 100 if (ip2m->ip2mac_err == 0) { 101 sdl = &ip2m->ip2mac_ha; 102 sdl->sdl_family = AF_LINK; 103 sdl->sdl_type = ncec->ncec_ill->ill_type; 104 /* 105 * should we put ncec_ill->ill_name in there? why? 106 * likewise for the sdl_index 107 */ 108 sdl->sdl_nlen = 0; 109 sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length; 110 if (ncec->ncec_lladdr != NULL) 111 bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen); 112 } 113 } 114 115 void 116 ncec_cb_refhold_locked(ncec_t *ncec) 117 { 118 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 119 ncec->ncec_cb_walker_cnt++; 120 } 121 122 void 123 ncec_cb_refrele(ncec_t *ncec) 124 { 125 ncec_cb_t *ncec_cb, *ncec_cb_next = NULL; 126 127 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 128 if (--ncec->ncec_cb_walker_cnt == 0) { 129 for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL; 130 ncec_cb = ncec_cb_next) { 131 132 ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb); 133 if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0) 134 continue; 135 list_remove(&ncec->ncec_cb, ncec_cb); 136 kmem_free(ncec_cb, sizeof (*ncec_cb)); 137 } 138 } 139 } 140 141 /* 142 * add a callback to the nce, so that the callback can be invoked 143 * after address resolution succeeds/fails. 144 */ 145 static ip2mac_id_t 146 ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg) 147 { 148 ncec_cb_t *nce_cb; 149 ip2mac_id_t ip2mid = NULL; 150 151 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 152 if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL) 153 return (ip2mid); 154 nce_cb->ncec_cb_func = cb; 155 nce_cb->ncec_cb_arg = cbarg; 156 /* 157 * We identify the ncec_cb_t during cancellation by the address 158 * of the nce_cb_t itself, and, as a short-cut for eliminating 159 * clear mismatches, only look in the callback list of ncec's 160 * whose address is equal to the nce_cb_id. 161 */ 162 nce_cb->ncec_cb_id = ncec; /* no refs! just an address */ 163 list_insert_tail(&ncec->ncec_cb, nce_cb); 164 ip2mid = ncec; /* this is the id to be used in ip2mac_cancel */ 165 166 return (nce_cb); 167 } 168 169 /* 170 * Resolve an IP address to a link-layer address using the data-structures 171 * defined in PSARC 2006/482. If the current link-layer address for the 172 * IP address is not known, the state-machine for resolving the resolution 173 * will be triggered, and the callback function (*cb) will be invoked after 174 * the resolution completes. 175 */ 176 ip2mac_id_t 177 ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg, 178 zoneid_t zoneid) 179 { 180 ncec_t *ncec; 181 nce_t *nce = NULL; 182 boolean_t isv6; 183 ill_t *ill; 184 netstack_t *ns; 185 ip_stack_t *ipst; 186 ip2mac_id_t ip2mid = NULL; 187 sin_t *sin; 188 sin6_t *sin6; 189 int err; 190 uint64_t delta; 191 boolean_t need_resolve = B_FALSE; 192 193 isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6); 194 195 ns = netstack_find_by_zoneid(zoneid); 196 if (ns == NULL) { 197 ip2m->ip2mac_err = EINVAL; 198 return (NULL); 199 } 200 /* 201 * For exclusive stacks we reset the zoneid to zero 202 * since IP uses the global zoneid in the exclusive stacks. 203 */ 204 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 205 zoneid = GLOBAL_ZONEID; 206 ipst = ns->netstack_ip; 207 /* 208 * find the ill from the ip2m->ip2mac_ifindex 209 */ 210 ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst); 211 if (ill == NULL) { 212 ip2m->ip2mac_err = ENXIO; 213 netstack_rele(ns); 214 return (NULL); 215 } 216 if (isv6) { 217 sin6 = (sin6_t *)&ip2m->ip2mac_pa; 218 if (op == IP2MAC_LOOKUP) { 219 nce = nce_lookup_v6(ill, &sin6->sin6_addr); 220 } else { 221 err = nce_lookup_then_add_v6(ill, NULL, 222 ill->ill_phys_addr_length, 223 &sin6->sin6_addr, 0, ND_UNCHANGED, &nce); 224 } 225 } else { 226 sin = (sin_t *)&ip2m->ip2mac_pa; 227 if (op == IP2MAC_LOOKUP) { 228 nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr); 229 } else { 230 err = nce_lookup_then_add_v4(ill, NULL, 231 ill->ill_phys_addr_length, 232 &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce); 233 } 234 } 235 if (op == IP2MAC_LOOKUP) { 236 if (nce == NULL) { 237 ip2m->ip2mac_err = ESRCH; 238 goto done; 239 } 240 ncec = nce->nce_common; 241 delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; 242 mutex_enter(&ncec->ncec_lock); 243 if (NCE_ISREACHABLE(ncec) && 244 delta < (uint64_t)ill->ill_reachable_time) { 245 ncec_ip2mac_response(ip2m, ncec); 246 ip2m->ip2mac_err = 0; 247 } else { 248 ip2m->ip2mac_err = ESRCH; 249 } 250 mutex_exit(&ncec->ncec_lock); 251 goto done; 252 } else { 253 if (err != 0 && err != EEXIST) { 254 ip2m->ip2mac_err = err; 255 goto done; 256 } 257 } 258 ncec = nce->nce_common; 259 delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last; 260 mutex_enter(&ncec->ncec_lock); 261 if (NCE_ISCONDEMNED(ncec)) { 262 ip2m->ip2mac_err = ESRCH; 263 } else { 264 if (NCE_ISREACHABLE(ncec)) { 265 if (NCE_MYADDR(ncec) || 266 delta < (uint64_t)ill->ill_reachable_time) { 267 ncec_ip2mac_response(ip2m, ncec); 268 ip2m->ip2mac_err = 0; 269 mutex_exit(&ncec->ncec_lock); 270 goto done; 271 } 272 /* 273 * Since we do not control the packet output 274 * path for ip2mac() callers, we need to verify 275 * if the existing information in the nce is 276 * very old, and retrigger resolution if necessary. 277 * We will not return the existing stale 278 * information until it is verified through a 279 * resolver request/response exchange. 280 * 281 * In the future, we may want to support extensions 282 * that do additional callbacks on link-layer updates, 283 * so that we can return the stale information but 284 * also update the caller if the lladdr changes. 285 */ 286 ncec->ncec_rcnt = ill->ill_xmit_count; 287 ncec->ncec_state = ND_PROBE; 288 need_resolve = B_TRUE; /* reachable but very old nce */ 289 } else if (ncec->ncec_state == ND_INITIAL) { 290 need_resolve = B_TRUE; /* ND_INITIAL nce */ 291 ncec->ncec_state = ND_INCOMPLETE; 292 } 293 /* 294 * NCE not known to be reachable in the recent past. We must 295 * reconfirm the information before returning it to the caller 296 */ 297 if (ncec->ncec_rcnt > 0) { 298 /* 299 * Still resolving this ncec, so we can queue the 300 * callback information in ncec->ncec_cb 301 */ 302 ip2mid = ncec_add_cb(ncec, cb, cbarg); 303 ip2m->ip2mac_err = EINPROGRESS; 304 } else { 305 /* 306 * No more retransmits allowed -- resolution failed. 307 */ 308 ip2m->ip2mac_err = ESRCH; 309 } 310 } 311 mutex_exit(&ncec->ncec_lock); 312 done: 313 /* 314 * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL, 315 * trigger resolve. 316 */ 317 if (need_resolve) 318 ip_ndp_resolve(ncec); 319 if (nce != NULL) 320 nce_refrele(nce); 321 netstack_rele(ns); 322 ill_refrele(ill); 323 return (ip2mid); 324 } 325 326 /* 327 * data passed to ncec_walk for canceling outstanding callbacks. 328 */ 329 typedef struct ip2mac_cancel_data_s { 330 ip2mac_id_t ip2m_cancel_id; 331 int ip2m_cancel_err; 332 } ip2mac_cancel_data_t; 333 334 /* 335 * callback invoked for each active ncec. If the ip2mac_id_t corresponds 336 * to an active nce_cb_t in the ncec's callback list, we want to remove 337 * the callback (if there are no walkers) or return EBUSY to the caller 338 */ 339 static int 340 ip2mac_cancel_callback(ncec_t *ncec, void *arg) 341 { 342 ip2mac_cancel_data_t *ip2m_wdata = arg; 343 ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id; 344 ncec_cb_t *ncec_cb; 345 346 if (ip2m_nce_cb->ncec_cb_id != ncec) 347 return (0); 348 349 mutex_enter(&ncec->ncec_lock); 350 if (list_is_empty(&ncec->ncec_cb)) { 351 mutex_exit(&ncec->ncec_lock); 352 return (0); 353 } 354 /* 355 * IP does not hold internal locks like nce_lock across calls to 356 * other subsystems for fear of recursive lock entry and lock 357 * hierarchy violation. The caller may be holding locks across 358 * the call to IP. (It would be ideal if no subsystem holds locks 359 * across calls into another subsystem, especially if calls can 360 * happen in either direction). 361 */ 362 ncec_cb = list_head(&ncec->ncec_cb); 363 for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) { 364 if (ncec_cb != ip2m_nce_cb) 365 continue; 366 /* 367 * If there are no walkers we can remove the nce_cb. 368 * Otherwise the exiting walker will clean up. 369 */ 370 if (ncec->ncec_cb_walker_cnt == 0) { 371 list_remove(&ncec->ncec_cb, ncec_cb); 372 } else { 373 ip2m_wdata->ip2m_cancel_err = EBUSY; 374 } 375 break; 376 } 377 mutex_exit(&ncec->ncec_lock); 378 return (0); 379 } 380 381 /* 382 * cancel an outstanding timeout set up via ip2mac 383 */ 384 int 385 ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid) 386 { 387 netstack_t *ns; 388 ip_stack_t *ipst; 389 ip2mac_cancel_data_t ip2m_wdata; 390 391 ns = netstack_find_by_zoneid(zoneid); 392 if (ns == NULL) { 393 ip2m_wdata.ip2m_cancel_err = EINVAL; 394 return (ip2m_wdata.ip2m_cancel_err); 395 } 396 /* 397 * For exclusive stacks we reset the zoneid to zero 398 * since IP uses the global zoneid in the exclusive stacks. 399 */ 400 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 401 zoneid = GLOBAL_ZONEID; 402 ipst = ns->netstack_ip; 403 404 ip2m_wdata.ip2m_cancel_id = ip2mid; 405 ip2m_wdata.ip2m_cancel_err = 0; 406 ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst); 407 /* 408 * We may return EBUSY if a walk to dispatch callbacks is 409 * in progress, in which case the caller needs to synchronize 410 * with the registered callback function to make sure the 411 * module does not exit when there is a callback pending. 412 */ 413 netstack_rele(ns); 414 return (ip2m_wdata.ip2m_cancel_err); 415 } 416