1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <net/if.h>
28 #include <net/if_types.h>
29 #include <inet/ip.h>
30 #include <inet/ip_ire.h>
31 #include <inet/ip_if.h>
32 #include <sys/ethernet.h>
33 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
34
35 extern char cmlog[];
36
37 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
38
39 static void ibcm_resolver_ack(ip2mac_t *, void *);
40 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
41
42 /*
43 * delete a wait queue node from the list.
44 * assumes mutex is acquired
45 */
46 void
ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t * wqnp)47 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
48 {
49 ibcm_arp_streams_t *ib_s;
50
51 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
52
53 ib_s = wqnp->ib_str;
54 ib_s->wqnp = NULL;
55 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
56 }
57
58 /*
59 * allocate a wait queue node, and insert it in the list
60 */
61 static ibcm_arp_prwqn_t *
ibcm_arp_create_prwqn(ibcm_arp_streams_t * ib_s,ibt_ip_addr_t * dst_addr,ibt_ip_addr_t * src_addr)62 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
63 ibt_ip_addr_t *src_addr)
64 {
65 ibcm_arp_prwqn_t *wqnp;
66
67 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
68
69 if (dst_addr == NULL) {
70 return (NULL);
71 }
72 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
73 NULL) {
74 return (NULL);
75 }
76 wqnp->dst_addr = *dst_addr;
77
78 if (src_addr) {
79 wqnp->usrc_addr = *src_addr;
80 }
81 wqnp->ib_str = ib_s;
82 wqnp->ifproto = (dst_addr->family == AF_INET) ?
83 ETHERTYPE_IP : ETHERTYPE_IPV6;
84
85 ib_s->wqnp = wqnp;
86
87 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
88
89 return (wqnp);
90 }
91
92
93 /*
94 * Check if the interface is loopback or IB.
95 */
96 static int
ibcm_arp_check_interface(ill_t * ill)97 ibcm_arp_check_interface(ill_t *ill)
98 {
99 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
100 return (0);
101
102 return (ETIMEDOUT);
103 }
104
105 int
ibcm_resolver_pr_lookup(ibcm_arp_streams_t * ib_s,ibt_ip_addr_t * dst_addr,ibt_ip_addr_t * src_addr,zoneid_t myzoneid)106 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
107 ibt_ip_addr_t *src_addr, zoneid_t myzoneid)
108 {
109 ibcm_arp_prwqn_t *wqnp;
110 ire_t *ire = NULL;
111 ipif_t *ipif = NULL;
112 ill_t *ill = NULL;
113 ill_t *hwaddr_ill = NULL;
114 ip_stack_t *ipst;
115 ipaddr_t setsrcv4;
116 in6_addr_t setsrcv6;
117
118 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
119 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
120
121 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
122 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
123 "ibcm_arp_create_prwqn failed");
124 ib_s->status = ENOMEM;
125 return (1);
126 }
127
128 ipst = netstack_find_by_zoneid(myzoneid)->netstack_ip;
129 if (dst_addr->family == AF_INET) {
130 /*
131 * get an ire for the destination adress.
132 * Note that we can't use MATCH_IRE_ILL since that would
133 * require that the first ill we find have ire_ill set.
134 */
135 setsrcv4 = INADDR_ANY;
136 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
137 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
138 &setsrcv4, NULL, NULL);
139
140 ASSERT(ire != NULL);
141 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
142 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
143 "ire_route_recursive_v4 failed");
144 ib_s->status = EFAULT;
145 goto fail;
146 }
147 ill = ire_nexthop_ill(ire);
148 if (ill == NULL) {
149 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
150 "ire_nexthop_ill failed");
151 ib_s->status = EFAULT;
152 goto fail;
153 }
154
155 /* Pick a source address */
156 if (ip_select_source_v4(ill, setsrcv4, dst_addr->un.ip4addr,
157 INADDR_ANY, myzoneid, ipst, &wqnp->src_addr.un.ip4addr,
158 NULL, NULL) != 0) {
159 ib_s->status = EADDRNOTAVAIL;
160 goto fail;
161 }
162
163 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
164 wqnp->netmask.un.ip4addr = ire->ire_mask;
165 wqnp->src_addr.family = wqnp->gateway.family =
166 wqnp->netmask.family = AF_INET;
167
168 } else if (dst_addr->family == AF_INET6) {
169 /*
170 * get an ire for the destination adress.
171 * Note that we can't use MATCH_IRE_ILL since that would
172 * require that the first ill we find have ire_ill set. Thus
173 * we compare ire_ill against ipif_ill after the lookup.
174 */
175 setsrcv6 = ipv6_all_zeros;
176 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
177 myzoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
178 &setsrcv6, NULL, NULL);
179
180 ASSERT(ire != NULL);
181 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
182 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
183 "ire_route_recursive_v6 failed");
184 ib_s->status = EFAULT;
185 goto fail;
186 }
187 ill = ire_nexthop_ill(ire);
188 if (ill == NULL) {
189 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
190 "ire_nexthop_ill failed");
191 ib_s->status = EFAULT;
192 goto fail;
193 }
194
195 /* Pick a source address */
196 if (ip_select_source_v6(ill, &setsrcv6, &dst_addr->un.ip6addr,
197 myzoneid, ipst, B_FALSE, IPV6_PREFER_SRC_DEFAULT,
198 &wqnp->src_addr.un.ip6addr, NULL, NULL) != 0) {
199 ib_s->status = EADDRNOTAVAIL;
200 goto fail;
201 }
202
203 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
204 wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
205 wqnp->src_addr.family = wqnp->gateway.family =
206 wqnp->netmask.family = AF_INET6;
207 }
208
209 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
210
211 /*
212 * For IPMP data addresses, we need to use the hardware address of the
213 * interface bound to the given address.
214 */
215 if (IS_IPMP(ill)) {
216 if (wqnp->src_addr.family == AF_INET) {
217 ipif = ipif_lookup_addr(wqnp->src_addr.un.ip4addr, ill,
218 myzoneid, ipst);
219 } else {
220 ipif = ipif_lookup_addr_v6(&wqnp->src_addr.un.ip6addr,
221 ill, myzoneid, ipst);
222 }
223 if (ipif == NULL) {
224 ib_s->status = ENETUNREACH;
225 goto fail;
226 }
227
228 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
229 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
230 "no bound ill for IPMP interface %s",
231 ill->ill_name);
232 ib_s->status = EFAULT;
233 goto fail;
234 }
235 } else {
236 hwaddr_ill = ill;
237 ill_refhold(hwaddr_ill); /* for symmetry */
238 }
239
240 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
241 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
242 "ibcm_arp_check_interface failed");
243 goto fail;
244 }
245
246 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
247 hwaddr_ill->ill_phys_addr_length);
248
249 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
250 wqnp->ifname);
251
252 /*
253 * at this stage, we have the source address and the IB
254 * interface, now get the destination mac address from
255 * arp or ipv6 drivers
256 */
257 ib_s->status = ibcm_nce_lookup(wqnp, ill, myzoneid);
258 if (ib_s->status != 0) {
259 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
260 "ibcm_nce_lookup failed: %d", ib_s->status);
261 goto fail;
262 }
263
264 ill_refrele(hwaddr_ill);
265 ill_refrele(ill);
266 ire_refrele(ire);
267 if (ipif != NULL)
268 ipif_refrele(ipif);
269 netstack_rele(ipst->ips_netstack);
270
271 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
272 return (0);
273 fail:
274 if (hwaddr_ill != NULL)
275 ill_refrele(hwaddr_ill);
276 if (ill != NULL)
277 ill_refrele(ill);
278 if (ire != NULL)
279 ire_refrele(ire);
280 if (ipif != NULL)
281 ipif_refrele(ipif);
282 ibcm_arp_delete_prwqn(wqnp);
283 netstack_rele(ipst->ips_netstack);
284 return (1);
285 }
286
287 /*
288 * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
289 */
290 static int
ibcm_nce_lookup(ibcm_arp_prwqn_t * wqnp,ill_t * ill,zoneid_t zoneid)291 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
292 {
293 ip2mac_t ip2m;
294 sin_t *sin;
295 sin6_t *sin6;
296 ip2mac_id_t ip2mid;
297 int err;
298
299 if (wqnp->src_addr.family != wqnp->dst_addr.family) {
300 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
301 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
302 wqnp->dst_addr.family);
303 return (1);
304 }
305 bzero(&ip2m, sizeof (ip2m));
306
307 if (wqnp->dst_addr.family == AF_INET) {
308 sin = (sin_t *)&ip2m.ip2mac_pa;
309 sin->sin_family = AF_INET;
310 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
311 } else if (wqnp->dst_addr.family == AF_INET6) {
312 sin6 = (sin6_t *)&ip2m.ip2mac_pa;
313 sin6->sin6_family = AF_INET6;
314 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
315 } else {
316 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
317 "Family: %d", wqnp->dst_addr.family);
318 return (1);
319 }
320
321 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
322
323 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
324
325 /*
326 * issue the request to IP for Neighbor Discovery
327 */
328 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
329 zoneid);
330 err = ip2m.ip2mac_err;
331 if (err == EINPROGRESS) {
332 wqnp->ip2mac_id = ip2mid;
333 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
334 err = 0;
335 } else if (err == 0) {
336 ibcm_resolver_ack(&ip2m, wqnp);
337 }
338 return (err);
339 }
340
341 /*
342 * do sanity checks on the link-level sockaddr
343 */
344 static boolean_t
ibcm_check_sockdl(struct sockaddr_dl * sdl)345 ibcm_check_sockdl(struct sockaddr_dl *sdl)
346 {
347
348 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
349 return (B_FALSE);
350
351 return (B_TRUE);
352 }
353
354 /*
355 * callback for resolver lookups, both for success and failure.
356 * If Address resolution was succesful: return GID info.
357 */
358 static void
ibcm_resolver_ack(ip2mac_t * ip2macp,void * arg)359 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
360 {
361 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
362 ibcm_arp_streams_t *ib_s;
363 uchar_t *cp;
364 int err = 0;
365
366 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
367
368 ib_s = wqnp->ib_str;
369 mutex_enter(&ib_s->lock);
370
371 if (ip2macp->ip2mac_err != 0) {
372 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
373 cv_broadcast(&ib_s->cv);
374 err = EHOSTUNREACH;
375 goto user_callback;
376 }
377
378 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
379 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
380 "interface %s is not IB\n", wqnp->ifname);
381 err = EHOSTUNREACH;
382 goto user_callback;
383 }
384
385 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
386 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
387
388 /*
389 * at this point we have src/dst gid's derived from the mac addresses
390 * now get the hca, port
391 */
392 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
393 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
394
395 IBCM_H2N_GID(wqnp->sgid);
396 IBCM_H2N_GID(wqnp->dgid);
397
398 user_callback:
399
400 ib_s->status = err;
401 ib_s->done = B_TRUE;
402
403 /* lock is held by the caller. */
404 cv_signal(&ib_s->cv);
405 mutex_exit(&ib_s->lock);
406 }
407