1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Functions to implement IP address -> link layer address (PSARC 2006/482)
29 */
30 #include <inet/ip2mac.h>
31 #include <inet/ip2mac_impl.h>
32 #include <sys/zone.h>
33 #include <inet/ip_ndp.h>
34 #include <inet/ip_if.h>
35 #include <inet/ip6.h>
36
37 /*
38 * dispatch pending callbacks.
39 */
40 void
ncec_cb_dispatch(ncec_t * ncec)41 ncec_cb_dispatch(ncec_t *ncec)
42 {
43 ncec_cb_t *ncec_cb;
44 ip2mac_t ip2m;
45
46 mutex_enter(&ncec->ncec_lock);
47 if (list_is_empty(&ncec->ncec_cb)) {
48 mutex_exit(&ncec->ncec_lock);
49 return;
50 }
51 ncec_ip2mac_response(&ip2m, ncec);
52 ncec_cb_refhold_locked(ncec);
53 /*
54 * IP does not hold internal locks like nce_lock across calls to
55 * other subsystems for fear of recursive lock entry and lock
56 * hierarchy violation. The caller may be holding locks across
57 * the call to IP. (It would be ideal if no subsystem holds locks
58 * across calls into another subsystem, especially if calls can
59 * happen in either direction).
60 */
61 ncec_cb = list_head(&ncec->ncec_cb);
62 for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
63 if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED)
64 continue;
65 ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED;
66 mutex_exit(&ncec->ncec_lock);
67 (*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg);
68 mutex_enter(&ncec->ncec_lock);
69 }
70 ncec_cb_refrele(ncec);
71 mutex_exit(&ncec->ncec_lock);
72 }
73
74 /*
75 * fill up the ip2m response fields with inforamation from the nce.
76 */
77 void
ncec_ip2mac_response(ip2mac_t * ip2m,ncec_t * ncec)78 ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec)
79 {
80 boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
81 sin_t *sin;
82 sin6_t *sin6;
83 struct sockaddr_dl *sdl;
84
85 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
86 bzero(ip2m, sizeof (*ip2m));
87 if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec))
88 ip2m->ip2mac_err = 0;
89 else
90 ip2m->ip2mac_err = ESRCH;
91 if (isv6) {
92 sin6 = (sin6_t *)&ip2m->ip2mac_pa;
93 sin6->sin6_family = AF_INET6;
94 sin6->sin6_addr = ncec->ncec_addr;
95 } else {
96 sin = (sin_t *)&ip2m->ip2mac_pa;
97 sin->sin_family = AF_INET;
98 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr);
99 }
100 if (ip2m->ip2mac_err == 0) {
101 sdl = &ip2m->ip2mac_ha;
102 sdl->sdl_family = AF_LINK;
103 sdl->sdl_type = ncec->ncec_ill->ill_type;
104 /*
105 * should we put ncec_ill->ill_name in there? why?
106 * likewise for the sdl_index
107 */
108 sdl->sdl_nlen = 0;
109 sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length;
110 if (ncec->ncec_lladdr != NULL)
111 bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen);
112 }
113 }
114
115 void
ncec_cb_refhold_locked(ncec_t * ncec)116 ncec_cb_refhold_locked(ncec_t *ncec)
117 {
118 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
119 ncec->ncec_cb_walker_cnt++;
120 }
121
122 void
ncec_cb_refrele(ncec_t * ncec)123 ncec_cb_refrele(ncec_t *ncec)
124 {
125 ncec_cb_t *ncec_cb, *ncec_cb_next = NULL;
126
127 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
128 if (--ncec->ncec_cb_walker_cnt == 0) {
129 for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL;
130 ncec_cb = ncec_cb_next) {
131
132 ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb);
133 if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0)
134 continue;
135 list_remove(&ncec->ncec_cb, ncec_cb);
136 kmem_free(ncec_cb, sizeof (*ncec_cb));
137 }
138 }
139 }
140
141 /*
142 * add a callback to the nce, so that the callback can be invoked
143 * after address resolution succeeds/fails.
144 */
145 static ip2mac_id_t
ncec_add_cb(ncec_t * ncec,ip2mac_callback_t * cb,void * cbarg)146 ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg)
147 {
148 ncec_cb_t *nce_cb;
149 ip2mac_id_t ip2mid = NULL;
150
151 ASSERT(MUTEX_HELD(&ncec->ncec_lock));
152 if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL)
153 return (ip2mid);
154 nce_cb->ncec_cb_func = cb;
155 nce_cb->ncec_cb_arg = cbarg;
156 /*
157 * We identify the ncec_cb_t during cancellation by the address
158 * of the nce_cb_t itself, and, as a short-cut for eliminating
159 * clear mismatches, only look in the callback list of ncec's
160 * whose address is equal to the nce_cb_id.
161 */
162 nce_cb->ncec_cb_id = ncec; /* no refs! just an address */
163 list_insert_tail(&ncec->ncec_cb, nce_cb);
164 ip2mid = ncec; /* this is the id to be used in ip2mac_cancel */
165
166 return (nce_cb);
167 }
168
169 /*
170 * Resolve an IP address to a link-layer address using the data-structures
171 * defined in PSARC 2006/482. If the current link-layer address for the
172 * IP address is not known, the state-machine for resolving the resolution
173 * will be triggered, and the callback function (*cb) will be invoked after
174 * the resolution completes.
175 */
176 ip2mac_id_t
ip2mac(uint_t op,ip2mac_t * ip2m,ip2mac_callback_t * cb,void * cbarg,zoneid_t zoneid)177 ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg,
178 zoneid_t zoneid)
179 {
180 ncec_t *ncec;
181 nce_t *nce = NULL;
182 boolean_t isv6;
183 ill_t *ill;
184 netstack_t *ns;
185 ip_stack_t *ipst;
186 ip2mac_id_t ip2mid = NULL;
187 sin_t *sin;
188 sin6_t *sin6;
189 int err;
190 uint64_t delta;
191 boolean_t need_resolve = B_FALSE;
192
193 isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6);
194
195 ns = netstack_find_by_zoneid(zoneid);
196 if (ns == NULL) {
197 ip2m->ip2mac_err = EINVAL;
198 return (NULL);
199 }
200 /*
201 * For exclusive stacks we reset the zoneid to zero
202 * since IP uses the global zoneid in the exclusive stacks.
203 */
204 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
205 zoneid = GLOBAL_ZONEID;
206 ipst = ns->netstack_ip;
207 /*
208 * find the ill from the ip2m->ip2mac_ifindex
209 */
210 ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst);
211 if (ill == NULL) {
212 ip2m->ip2mac_err = ENXIO;
213 netstack_rele(ns);
214 return (NULL);
215 }
216 if (isv6) {
217 sin6 = (sin6_t *)&ip2m->ip2mac_pa;
218 if (op == IP2MAC_LOOKUP) {
219 nce = nce_lookup_v6(ill, &sin6->sin6_addr);
220 } else {
221 err = nce_lookup_then_add_v6(ill, NULL,
222 ill->ill_phys_addr_length,
223 &sin6->sin6_addr, 0, ND_UNCHANGED, &nce);
224 }
225 } else {
226 sin = (sin_t *)&ip2m->ip2mac_pa;
227 if (op == IP2MAC_LOOKUP) {
228 nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr);
229 } else {
230 err = nce_lookup_then_add_v4(ill, NULL,
231 ill->ill_phys_addr_length,
232 &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce);
233 }
234 }
235 if (op == IP2MAC_LOOKUP) {
236 if (nce == NULL) {
237 ip2m->ip2mac_err = ESRCH;
238 goto done;
239 }
240 ncec = nce->nce_common;
241 delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last;
242 mutex_enter(&ncec->ncec_lock);
243 if (NCE_ISREACHABLE(ncec) &&
244 delta < (uint64_t)ill->ill_reachable_time) {
245 ncec_ip2mac_response(ip2m, ncec);
246 ip2m->ip2mac_err = 0;
247 } else {
248 ip2m->ip2mac_err = ESRCH;
249 }
250 mutex_exit(&ncec->ncec_lock);
251 goto done;
252 } else {
253 if (err != 0 && err != EEXIST) {
254 ip2m->ip2mac_err = err;
255 goto done;
256 }
257 }
258 ncec = nce->nce_common;
259 delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last;
260 mutex_enter(&ncec->ncec_lock);
261 if (NCE_ISCONDEMNED(ncec)) {
262 ip2m->ip2mac_err = ESRCH;
263 } else {
264 if (NCE_ISREACHABLE(ncec)) {
265 if (NCE_MYADDR(ncec) ||
266 delta < (uint64_t)ill->ill_reachable_time) {
267 ncec_ip2mac_response(ip2m, ncec);
268 ip2m->ip2mac_err = 0;
269 mutex_exit(&ncec->ncec_lock);
270 goto done;
271 }
272 /*
273 * Since we do not control the packet output
274 * path for ip2mac() callers, we need to verify
275 * if the existing information in the nce is
276 * very old, and retrigger resolution if necessary.
277 * We will not return the existing stale
278 * information until it is verified through a
279 * resolver request/response exchange.
280 *
281 * In the future, we may want to support extensions
282 * that do additional callbacks on link-layer updates,
283 * so that we can return the stale information but
284 * also update the caller if the lladdr changes.
285 */
286 ncec->ncec_rcnt = ill->ill_xmit_count;
287 ncec->ncec_state = ND_PROBE;
288 need_resolve = B_TRUE; /* reachable but very old nce */
289 } else if (ncec->ncec_state == ND_INITIAL) {
290 need_resolve = B_TRUE; /* ND_INITIAL nce */
291 ncec->ncec_state = ND_INCOMPLETE;
292 }
293 /*
294 * NCE not known to be reachable in the recent past. We must
295 * reconfirm the information before returning it to the caller
296 */
297 if (ncec->ncec_rcnt > 0) {
298 /*
299 * Still resolving this ncec, so we can queue the
300 * callback information in ncec->ncec_cb
301 */
302 ip2mid = ncec_add_cb(ncec, cb, cbarg);
303 ip2m->ip2mac_err = EINPROGRESS;
304 } else {
305 /*
306 * No more retransmits allowed -- resolution failed.
307 */
308 ip2m->ip2mac_err = ESRCH;
309 }
310 }
311 mutex_exit(&ncec->ncec_lock);
312 done:
313 /*
314 * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL,
315 * trigger resolve.
316 */
317 if (need_resolve)
318 ip_ndp_resolve(ncec);
319 if (nce != NULL)
320 nce_refrele(nce);
321 netstack_rele(ns);
322 ill_refrele(ill);
323 return (ip2mid);
324 }
325
326 /*
327 * data passed to ncec_walk for canceling outstanding callbacks.
328 */
329 typedef struct ip2mac_cancel_data_s {
330 ip2mac_id_t ip2m_cancel_id;
331 int ip2m_cancel_err;
332 } ip2mac_cancel_data_t;
333
334 /*
335 * callback invoked for each active ncec. If the ip2mac_id_t corresponds
336 * to an active nce_cb_t in the ncec's callback list, we want to remove
337 * the callback (if there are no walkers) or return EBUSY to the caller
338 */
339 static void
ip2mac_cancel_callback(ncec_t * ncec,void * arg)340 ip2mac_cancel_callback(ncec_t *ncec, void *arg)
341 {
342 ip2mac_cancel_data_t *ip2m_wdata = arg;
343 ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id;
344 ncec_cb_t *ncec_cb;
345
346 if (ip2m_nce_cb->ncec_cb_id != ncec)
347 return;
348
349 mutex_enter(&ncec->ncec_lock);
350 if (list_is_empty(&ncec->ncec_cb)) {
351 mutex_exit(&ncec->ncec_lock);
352 return;
353 }
354 /*
355 * IP does not hold internal locks like nce_lock across calls to
356 * other subsystems for fear of recursive lock entry and lock
357 * hierarchy violation. The caller may be holding locks across
358 * the call to IP. (It would be ideal if no subsystem holds locks
359 * across calls into another subsystem, especially if calls can
360 * happen in either direction).
361 */
362 ncec_cb = list_head(&ncec->ncec_cb);
363 for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
364 if (ncec_cb != ip2m_nce_cb)
365 continue;
366 /*
367 * If there are no walkers we can remove the nce_cb.
368 * Otherwise the exiting walker will clean up.
369 */
370 if (ncec->ncec_cb_walker_cnt == 0) {
371 list_remove(&ncec->ncec_cb, ncec_cb);
372 } else {
373 ip2m_wdata->ip2m_cancel_err = EBUSY;
374 }
375 break;
376 }
377 mutex_exit(&ncec->ncec_lock);
378 }
379
380 /*
381 * cancel an outstanding timeout set up via ip2mac
382 */
383 int
ip2mac_cancel(ip2mac_id_t ip2mid,zoneid_t zoneid)384 ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid)
385 {
386 netstack_t *ns;
387 ip_stack_t *ipst;
388 ip2mac_cancel_data_t ip2m_wdata;
389
390 ns = netstack_find_by_zoneid(zoneid);
391 if (ns == NULL) {
392 ip2m_wdata.ip2m_cancel_err = EINVAL;
393 return (ip2m_wdata.ip2m_cancel_err);
394 }
395 /*
396 * For exclusive stacks we reset the zoneid to zero
397 * since IP uses the global zoneid in the exclusive stacks.
398 */
399 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
400 zoneid = GLOBAL_ZONEID;
401 ipst = ns->netstack_ip;
402
403 ip2m_wdata.ip2m_cancel_id = ip2mid;
404 ip2m_wdata.ip2m_cancel_err = 0;
405 ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst);
406 /*
407 * We may return EBUSY if a walk to dispatch callbacks is
408 * in progress, in which case the caller needs to synchronize
409 * with the registered callback function to make sure the
410 * module does not exit when there is a callback pending.
411 */
412 netstack_rele(ns);
413 return (ip2m_wdata.ip2m_cancel_err);
414 }
415