1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <net/if.h> 28 #include <net/if_types.h> 29 #include <inet/ip.h> 30 #include <inet/ip_ire.h> 31 #include <inet/ip_if.h> 32 #include <sys/ib/mgt/ibcm/ibcm_arp.h> 33 34 extern char cmlog[]; 35 36 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t)) 37 38 static void ibcm_resolver_ack(ip2mac_t *, void *); 39 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid); 40 41 /* 42 * delete a wait queue node from the list. 43 * assumes mutex is acquired 44 */ 45 void 46 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp) 47 { 48 ibcm_arp_streams_t *ib_s; 49 50 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp); 51 52 ib_s = wqnp->ib_str; 53 ib_s->wqnp = NULL; 54 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t)); 55 } 56 57 /* 58 * allocate a wait queue node, and insert it in the list 59 */ 60 static ibcm_arp_prwqn_t * 61 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 62 ibt_ip_addr_t *src_addr) 63 { 64 ibcm_arp_prwqn_t *wqnp; 65 66 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s); 67 68 if (dst_addr == NULL) { 69 return (NULL); 70 } 71 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) == 72 NULL) { 73 return (NULL); 74 } 75 wqnp->dst_addr = *dst_addr; 76 77 if (src_addr) { 78 wqnp->usrc_addr = *src_addr; 79 } 80 wqnp->ib_str = ib_s; 81 wqnp->ifproto = (dst_addr->family == AF_INET) ? 82 ETHERTYPE_IP : ETHERTYPE_IPV6; 83 84 ib_s->wqnp = wqnp; 85 86 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp); 87 88 return (wqnp); 89 } 90 91 92 /* 93 * Check if the interface is loopback or IB. 94 */ 95 static int 96 ibcm_arp_check_interface(ill_t *ill) 97 { 98 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB) 99 return (0); 100 101 return (ETIMEDOUT); 102 } 103 104 int 105 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 106 ibt_ip_addr_t *src_addr) 107 { 108 ibcm_arp_prwqn_t *wqnp; 109 ire_t *ire = NULL; 110 ipif_t *ipif = NULL; 111 ill_t *ill = NULL; 112 ill_t *hwaddr_ill = NULL; 113 ip_stack_t *ipst; 114 int len; 115 ipaddr_t setsrcv4; 116 in6_addr_t setsrcv6; 117 118 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr); 119 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr); 120 121 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) { 122 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 123 "ibcm_arp_create_prwqn failed"); 124 ib_s->status = ENOMEM; 125 return (1); 126 } 127 128 ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip; 129 if (dst_addr->family == AF_INET) { 130 /* 131 * A local address is always specified, and it is used 132 * to find the zoneid. 133 */ 134 ipif = ipif_lookup_addr(src_addr->un.ip4addr, NULL, ALL_ZONES, 135 ipst); 136 if (ipif == NULL) { 137 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 138 "ipif_lookup_addr failed"); 139 ib_s->status = EFAULT; 140 goto fail; 141 } 142 143 /* 144 * get an ire for the destination adress. 145 * Note that we can't use MATCH_IRE_ILL since that would 146 * require that the first ill we find have ire_ill set. Thus 147 * we compare ire_ill against ipif_ill after the lookup. 148 */ 149 setsrcv4 = INADDR_ANY; 150 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL, 151 ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 152 &setsrcv4, NULL, NULL); 153 154 ASSERT(ire != NULL); 155 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 156 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 157 "ire_route_recursive_v4 failed"); 158 ib_s->status = EFAULT; 159 goto fail; 160 } 161 ill = ire_nexthop_ill(ire); 162 if (ill == NULL) { 163 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 164 "ire_nexthop_ill failed"); 165 ib_s->status = EFAULT; 166 goto fail; 167 } 168 if (ill != ipif->ipif_ill) { 169 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 170 "wrong ill"); 171 ib_s->status = EFAULT; 172 goto fail; 173 } 174 175 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr; 176 wqnp->netmask.un.ip4addr = ire->ire_mask; 177 wqnp->src_addr.un.ip4addr = src_addr->un.ip4addr; 178 wqnp->src_addr.family = wqnp->gateway.family = 179 wqnp->netmask.family = AF_INET; 180 181 } else if (dst_addr->family == AF_INET6) { 182 /* 183 * A local address is always specified, and it is used 184 * to find the zoneid. 185 * We should really match on scopeid for link locals here. 186 */ 187 ipif = ipif_lookup_addr_v6(&src_addr->un.ip6addr, NULL, 188 ALL_ZONES, ipst); 189 if (ipif == NULL) { 190 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 191 "ipif_lookup_addr_v6 failed"); 192 ib_s->status = EFAULT; 193 goto fail; 194 } 195 196 /* 197 * get an ire for the destination adress. 198 * Note that we can't use MATCH_IRE_ILL since that would 199 * require that the first ill we find have ire_ill set. Thus 200 * we compare ire_ill against ipif_ill after the lookup. 201 */ 202 setsrcv6 = ipv6_all_zeros; 203 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL, 204 ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 205 &setsrcv6, NULL, NULL); 206 207 ASSERT(ire != NULL); 208 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 209 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 210 "ire_route_recursive_v6 failed"); 211 ib_s->status = EFAULT; 212 goto fail; 213 } 214 ill = ire_nexthop_ill(ire); 215 if (ill == NULL) { 216 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 217 "ire_nexthop_ill failed"); 218 ib_s->status = EFAULT; 219 goto fail; 220 } 221 222 if (ill != ipif->ipif_ill) { 223 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 224 "wrong ill"); 225 ib_s->status = EFAULT; 226 goto fail; 227 } 228 229 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6; 230 wqnp->netmask.un.ip6addr = ire->ire_mask_v6; 231 wqnp->src_addr.un.ip6addr = src_addr->un.ip6addr; 232 wqnp->src_addr.family = wqnp->gateway.family = 233 wqnp->netmask.family = AF_INET6; 234 } 235 236 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname)); 237 238 /* 239 * For IPMP data addresses, we need to use the hardware address of the 240 * interface bound to the given address. 241 */ 242 if (IS_IPMP(ill)) { 243 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) { 244 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 245 "no bound ill for IPMP interface %s", 246 ill->ill_name); 247 ib_s->status = EFAULT; 248 goto fail; 249 } 250 } else { 251 hwaddr_ill = ill; 252 ill_refhold(hwaddr_ill); /* for symmetry */ 253 } 254 255 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) { 256 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 257 "ibcm_arp_check_interface failed"); 258 goto fail; 259 } 260 261 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac, 262 hwaddr_ill->ill_phys_addr_length); 263 264 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s", 265 wqnp->ifname); 266 267 /* 268 * if the user supplied a address, then verify rts returned 269 * the same address 270 */ 271 if (wqnp->usrc_addr.family) { 272 len = (wqnp->usrc_addr.family == AF_INET) ? 273 IP_ADDR_LEN : sizeof (in6_addr_t); 274 if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) { 275 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 276 "srcaddr mismatch:%d", ENETUNREACH); 277 goto fail; 278 } 279 } 280 281 /* 282 * at this stage, we have the source address and the IB 283 * interface, now get the destination mac address from 284 * arp or ipv6 drivers 285 */ 286 ib_s->status = ibcm_nce_lookup(wqnp, ill, getzoneid()); 287 if (ib_s->status != 0) { 288 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 289 "ibcm_nce_lookup failed: %d", ib_s->status); 290 goto fail; 291 } 292 293 ill_refrele(hwaddr_ill); 294 ill_refrele(ill); 295 ire_refrele(ire); 296 ipif_refrele(ipif); 297 netstack_rele(ipst->ips_netstack); 298 299 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp); 300 return (0); 301 fail: 302 if (hwaddr_ill != NULL) 303 ill_refrele(hwaddr_ill); 304 if (ill != NULL) 305 ill_refrele(ill); 306 if (ire != NULL) 307 ire_refrele(ire); 308 if (ipif != NULL) 309 ipif_refrele(ipif); 310 ibcm_arp_delete_prwqn(wqnp); 311 netstack_rele(ipst->ips_netstack); 312 return (1); 313 } 314 315 /* 316 * Query the neighbor cache for IPv4/IPv6 to mac address mapping. 317 */ 318 static int 319 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid) 320 { 321 ip2mac_t ip2m; 322 sin_t *sin; 323 sin6_t *sin6; 324 ip2mac_id_t ip2mid; 325 int err; 326 327 if (wqnp->src_addr.family != wqnp->dst_addr.family) { 328 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR " 329 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family, 330 wqnp->dst_addr.family); 331 return (1); 332 } 333 bzero(&ip2m, sizeof (ip2m)); 334 335 if (wqnp->dst_addr.family == AF_INET) { 336 sin = (sin_t *)&ip2m.ip2mac_pa; 337 sin->sin_family = AF_INET; 338 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr; 339 } else if (wqnp->dst_addr.family == AF_INET6) { 340 sin6 = (sin6_t *)&ip2m.ip2mac_pa; 341 sin6->sin6_family = AF_INET6; 342 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr; 343 } else { 344 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR " 345 "Family: %d", wqnp->dst_addr.family); 346 return (1); 347 } 348 349 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex; 350 351 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 352 353 /* 354 * issue the request to IP for Neighbor Discovery 355 */ 356 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp, 357 zoneid); 358 err = ip2m.ip2mac_err; 359 if (err == EINPROGRESS) { 360 wqnp->ip2mac_id = ip2mid; 361 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 362 err = 0; 363 } else if (err == 0) { 364 ibcm_resolver_ack(&ip2m, wqnp); 365 } 366 return (err); 367 } 368 369 /* 370 * do sanity checks on the link-level sockaddr 371 */ 372 static boolean_t 373 ibcm_check_sockdl(struct sockaddr_dl *sdl) 374 { 375 376 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL) 377 return (B_FALSE); 378 379 return (B_TRUE); 380 } 381 382 /* 383 * callback for resolver lookups, both for success and failure. 384 * If Address resolution was succesful: return GID info. 385 */ 386 static void 387 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg) 388 { 389 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; 390 ibcm_arp_streams_t *ib_s; 391 uchar_t *cp; 392 int err = 0; 393 394 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp); 395 396 ib_s = wqnp->ib_str; 397 mutex_enter(&ib_s->lock); 398 399 if (ip2macp->ip2mac_err != 0) { 400 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING; 401 cv_broadcast(&ib_s->cv); 402 err = EHOSTUNREACH; 403 goto user_callback; 404 } 405 406 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) { 407 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: " 408 "interface %s is not IB\n", wqnp->ifname); 409 err = EHOSTUNREACH; 410 goto user_callback; 411 } 412 413 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha); 414 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL); 415 416 /* 417 * at this point we have src/dst gid's derived from the mac addresses 418 * now get the hca, port 419 */ 420 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t)); 421 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); 422 423 IBCM_H2N_GID(wqnp->sgid); 424 IBCM_H2N_GID(wqnp->dgid); 425 426 user_callback: 427 428 ib_s->status = err; 429 ib_s->done = B_TRUE; 430 431 /* lock is held by the caller. */ 432 cv_signal(&ib_s->cv); 433 mutex_exit(&ib_s->lock); 434 } 435