1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/mutex.h> 37 #include <linux/inetdevice.h> 38 #include <linux/slab.h> 39 #include <linux/workqueue.h> 40 #include <net/arp.h> 41 #include <net/neighbour.h> 42 #include <net/route.h> 43 #include <net/netevent.h> 44 #include <net/ipv6_stubs.h> 45 #include <net/ip6_route.h> 46 #include <rdma/ib_addr.h> 47 #include <rdma/ib_cache.h> 48 #include <rdma/ib_sa.h> 49 #include <rdma/ib.h> 50 #include <rdma/rdma_netlink.h> 51 #include <net/netlink.h> 52 53 #include "core_priv.h" 54 55 struct addr_req { 56 struct list_head list; 57 struct sockaddr_storage src_addr; 58 struct sockaddr_storage dst_addr; 59 struct rdma_dev_addr *addr; 60 void *context; 61 void (*callback)(int status, struct sockaddr *src_addr, 62 struct rdma_dev_addr *addr, void *context); 63 unsigned long timeout; 64 struct delayed_work work; 65 bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */ 66 int status; 67 u32 seq; 68 }; 69 70 static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); 71 72 static DEFINE_SPINLOCK(lock); 73 static LIST_HEAD(req_list); 74 static struct workqueue_struct *addr_wq; 75 76 static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { 77 [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, 78 .len = sizeof(struct rdma_nla_ls_gid), 79 .validation_type = NLA_VALIDATE_MIN, 80 .min = sizeof(struct rdma_nla_ls_gid)}, 81 }; 82 83 static void ib_nl_process_ip_rsep(const struct nlmsghdr *nlh) 84 { 85 struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; 86 union ib_gid gid; 87 struct addr_req *req; 88 int found = 0; 89 int ret; 90 91 if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) 92 return; 93 94 ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), 95 nlmsg_len(nlh), ib_nl_addr_policy, NULL); 96 if (ret) 97 return; 98 99 if (!tb[LS_NLA_TYPE_DGID]) 100 return; 101 memcpy(&gid, nla_data(tb[LS_NLA_TYPE_DGID]), sizeof(gid)); 102 103 spin_lock_bh(&lock); 104 list_for_each_entry(req, &req_list, list) { 105 if (nlh->nlmsg_seq != req->seq) 106 continue; 107 /* We set the DGID part, the rest was set earlier */ 108 rdma_addr_set_dgid(req->addr, &gid); 109 req->status = 0; 110 found = 1; 111 break; 112 } 113 spin_unlock_bh(&lock); 114 115 if (!found) 116 pr_info("Couldn't find request waiting for DGID: %pI6\n", 117 &gid); 118 } 119 120 int ib_nl_handle_ip_res_resp(struct sk_buff *skb, 121 struct nlmsghdr *nlh, 122 struct netlink_ext_ack *extack) 123 { 124 if ((nlh->nlmsg_flags & NLM_F_REQUEST) || 125 !(NETLINK_CB(skb).sk)) 126 return -EPERM; 127 128 ib_nl_process_ip_rsep(nlh); 129 130 return 0; 131 } 132 133 static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, 134 const void *daddr, 135 u32 seq, u16 family) 136 { 137 struct sk_buff *skb = NULL; 138 struct nlmsghdr *nlh; 139 struct rdma_ls_ip_resolve_header *header; 140 void *data; 141 size_t size; 142 int attrtype; 143 int len; 144 145 if (family == AF_INET) { 146 size = sizeof(struct in_addr); 147 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; 148 } else { 149 size = sizeof(struct in6_addr); 150 attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; 151 } 152 153 len = nla_total_size(sizeof(size)); 154 len += NLMSG_ALIGN(sizeof(*header)); 155 156 skb = nlmsg_new(len, GFP_KERNEL); 157 if (!skb) 158 return -ENOMEM; 159 160 data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, 161 RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); 162 if (!data) { 163 nlmsg_free(skb); 164 return -ENODATA; 165 } 166 167 /* Construct the family header first */ 168 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); 169 header->ifindex = dev_addr->bound_dev_if; 170 nla_put(skb, attrtype, size, daddr); 171 172 /* Repair the nlmsg header length */ 173 nlmsg_end(skb, nlh); 174 rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); 175 176 /* Make the request retry, so when we get the response from userspace 177 * we will have something. 178 */ 179 return -ENODATA; 180 } 181 182 int rdma_addr_size(const struct sockaddr *addr) 183 { 184 switch (addr->sa_family) { 185 case AF_INET: 186 return sizeof(struct sockaddr_in); 187 case AF_INET6: 188 return sizeof(struct sockaddr_in6); 189 case AF_IB: 190 return sizeof(struct sockaddr_ib); 191 default: 192 return 0; 193 } 194 } 195 EXPORT_SYMBOL(rdma_addr_size); 196 197 int rdma_addr_size_in6(struct sockaddr_in6 *addr) 198 { 199 int ret = rdma_addr_size((struct sockaddr *) addr); 200 201 return ret <= sizeof(*addr) ? ret : 0; 202 } 203 EXPORT_SYMBOL(rdma_addr_size_in6); 204 205 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr) 206 { 207 int ret = rdma_addr_size((struct sockaddr *) addr); 208 209 return ret <= sizeof(*addr) ? ret : 0; 210 } 211 EXPORT_SYMBOL(rdma_addr_size_kss); 212 213 /** 214 * rdma_copy_src_l2_addr - Copy netdevice source addresses 215 * @dev_addr: Destination address pointer where to copy the addresses 216 * @dev: Netdevice whose source addresses to copy 217 * 218 * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. 219 * This includes unicast address, broadcast address, device type and 220 * interface index. 221 */ 222 void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, 223 const struct net_device *dev) 224 { 225 dev_addr->dev_type = dev->type; 226 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 227 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 228 dev_addr->bound_dev_if = dev->ifindex; 229 } 230 EXPORT_SYMBOL(rdma_copy_src_l2_addr); 231 232 static struct net_device * 233 rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) 234 { 235 struct net_device *dev = NULL; 236 int ret = -EADDRNOTAVAIL; 237 238 switch (src_in->sa_family) { 239 case AF_INET: 240 dev = __ip_dev_find(net, 241 ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, 242 false); 243 if (dev) 244 ret = 0; 245 break; 246 #if IS_ENABLED(CONFIG_IPV6) 247 case AF_INET6: 248 for_each_netdev_rcu(net, dev) { 249 if (ipv6_chk_addr(net, 250 &((const struct sockaddr_in6 *)src_in)->sin6_addr, 251 dev, 1)) { 252 ret = 0; 253 break; 254 } 255 } 256 break; 257 #endif 258 } 259 return ret ? ERR_PTR(ret) : dev; 260 } 261 262 int rdma_translate_ip(const struct sockaddr *addr, 263 struct rdma_dev_addr *dev_addr) 264 { 265 struct net_device *dev; 266 267 if (dev_addr->bound_dev_if) { 268 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 269 if (!dev) 270 return -ENODEV; 271 rdma_copy_src_l2_addr(dev_addr, dev); 272 dev_put(dev); 273 return 0; 274 } 275 276 rcu_read_lock(); 277 dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); 278 if (!IS_ERR(dev)) 279 rdma_copy_src_l2_addr(dev_addr, dev); 280 rcu_read_unlock(); 281 return PTR_ERR_OR_ZERO(dev); 282 } 283 EXPORT_SYMBOL(rdma_translate_ip); 284 285 static void set_timeout(struct addr_req *req, unsigned long time) 286 { 287 unsigned long delay; 288 289 delay = time - jiffies; 290 if ((long)delay < 0) 291 delay = 0; 292 293 mod_delayed_work(addr_wq, &req->work, delay); 294 } 295 296 static void queue_req(struct addr_req *req) 297 { 298 spin_lock_bh(&lock); 299 list_add_tail(&req->list, &req_list); 300 set_timeout(req, req->timeout); 301 spin_unlock_bh(&lock); 302 } 303 304 static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, 305 const void *daddr, u32 seq, u16 family) 306 { 307 if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) 308 return -EADDRNOTAVAIL; 309 310 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); 311 } 312 313 static int dst_fetch_ha(const struct dst_entry *dst, 314 struct rdma_dev_addr *dev_addr, 315 const void *daddr) 316 { 317 struct neighbour *n; 318 int ret = 0; 319 320 n = dst_neigh_lookup(dst, daddr); 321 if (!n) 322 return -ENODATA; 323 324 if (!(n->nud_state & NUD_VALID)) { 325 neigh_event_send(n, NULL); 326 ret = -ENODATA; 327 } else { 328 neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); 329 } 330 331 neigh_release(n); 332 333 return ret; 334 } 335 336 static bool has_gateway(const struct dst_entry *dst, sa_family_t family) 337 { 338 if (family == AF_INET) 339 return dst_rtable(dst)->rt_uses_gateway; 340 341 return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY; 342 } 343 344 static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, 345 const struct sockaddr *dst_in, u32 seq) 346 { 347 const struct sockaddr_in *dst_in4 = 348 (const struct sockaddr_in *)dst_in; 349 const struct sockaddr_in6 *dst_in6 = 350 (const struct sockaddr_in6 *)dst_in; 351 const void *daddr = (dst_in->sa_family == AF_INET) ? 352 (const void *)&dst_in4->sin_addr.s_addr : 353 (const void *)&dst_in6->sin6_addr; 354 sa_family_t family = dst_in->sa_family; 355 356 might_sleep(); 357 358 /* If we have a gateway in IB mode then it must be an IB network */ 359 if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) 360 return ib_nl_fetch_ha(dev_addr, daddr, seq, family); 361 else 362 return dst_fetch_ha(dst, dev_addr, daddr); 363 } 364 365 static int addr4_resolve(struct sockaddr *src_sock, 366 const struct sockaddr *dst_sock, 367 struct rdma_dev_addr *addr, 368 struct rtable **prt) 369 { 370 struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; 371 const struct sockaddr_in *dst_in = 372 (const struct sockaddr_in *)dst_sock; 373 374 __be32 src_ip = src_in->sin_addr.s_addr; 375 __be32 dst_ip = dst_in->sin_addr.s_addr; 376 struct rtable *rt; 377 struct flowi4 fl4; 378 int ret; 379 380 memset(&fl4, 0, sizeof(fl4)); 381 fl4.daddr = dst_ip; 382 fl4.saddr = src_ip; 383 fl4.flowi4_oif = addr->bound_dev_if; 384 rt = ip_route_output_key(addr->net, &fl4); 385 ret = PTR_ERR_OR_ZERO(rt); 386 if (ret) 387 return ret; 388 389 src_in->sin_addr.s_addr = fl4.saddr; 390 391 addr->hoplimit = ip4_dst_hoplimit(&rt->dst); 392 393 *prt = rt; 394 return 0; 395 } 396 397 #if IS_ENABLED(CONFIG_IPV6) 398 static int addr6_resolve(struct sockaddr *src_sock, 399 const struct sockaddr *dst_sock, 400 struct rdma_dev_addr *addr, 401 struct dst_entry **pdst) 402 { 403 struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; 404 const struct sockaddr_in6 *dst_in = 405 (const struct sockaddr_in6 *)dst_sock; 406 struct flowi6 fl6; 407 struct dst_entry *dst; 408 409 memset(&fl6, 0, sizeof fl6); 410 fl6.daddr = dst_in->sin6_addr; 411 fl6.saddr = src_in->sin6_addr; 412 fl6.flowi6_oif = addr->bound_dev_if; 413 414 dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); 415 if (IS_ERR(dst)) 416 return PTR_ERR(dst); 417 418 if (ipv6_addr_any(&src_in->sin6_addr)) 419 src_in->sin6_addr = fl6.saddr; 420 421 addr->hoplimit = ip6_dst_hoplimit(dst); 422 423 *pdst = dst; 424 return 0; 425 } 426 #else 427 static int addr6_resolve(struct sockaddr *src_sock, 428 const struct sockaddr *dst_sock, 429 struct rdma_dev_addr *addr, 430 struct dst_entry **pdst) 431 { 432 return -EADDRNOTAVAIL; 433 } 434 #endif 435 436 static bool is_dst_local(const struct dst_entry *dst) 437 { 438 if (dst->ops->family == AF_INET) 439 return !!(dst_rtable(dst)->rt_type & RTN_LOCAL); 440 else if (dst->ops->family == AF_INET6) 441 return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL); 442 else 443 return false; 444 } 445 446 static int addr_resolve_neigh(const struct dst_entry *dst, 447 const struct sockaddr *dst_in, 448 struct rdma_dev_addr *addr, 449 u32 seq) 450 { 451 if (is_dst_local(dst)) { 452 /* When the destination is local entry, source and destination 453 * are same. Skip the neighbour lookup. 454 */ 455 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 456 return 0; 457 } 458 459 return fetch_ha(dst, addr, dst_in, seq); 460 } 461 462 static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, 463 const struct sockaddr *dst_in, 464 const struct dst_entry *dst) 465 { 466 struct net_device *ndev = READ_ONCE(dst->dev); 467 468 /* A physical device must be the RDMA device to use */ 469 if (is_dst_local(dst)) { 470 int ret; 471 /* 472 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or 473 * loopback IP address. So if route is resolved to loopback 474 * interface, translate that to a real ndev based on non 475 * loopback IP address. 476 */ 477 ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); 478 if (IS_ERR(ndev)) 479 return -ENODEV; 480 ret = rdma_translate_ip(dst_in, dev_addr); 481 if (ret) 482 return ret; 483 } else { 484 rdma_copy_src_l2_addr(dev_addr, dst->dev); 485 } 486 487 /* 488 * If there's a gateway and type of device not ARPHRD_INFINIBAND, 489 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the 490 * network type accordingly. 491 */ 492 if (has_gateway(dst, dst_in->sa_family) && 493 ndev->type != ARPHRD_INFINIBAND) 494 dev_addr->network = dst_in->sa_family == AF_INET ? 495 RDMA_NETWORK_IPV4 : 496 RDMA_NETWORK_IPV6; 497 else 498 dev_addr->network = RDMA_NETWORK_IB; 499 500 return 0; 501 } 502 503 static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) 504 { 505 struct net_device *ndev; 506 507 ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); 508 if (IS_ERR(ndev)) 509 return PTR_ERR(ndev); 510 511 /* 512 * Since we are holding the rcu, reading net and ifindex 513 * are safe without any additional reference; because 514 * change_net_namespace() in net/core/dev.c does rcu sync 515 * after it changes the state to IFF_DOWN and before 516 * updating netdev fields {net, ifindex}. 517 */ 518 addr->net = dev_net(ndev); 519 addr->bound_dev_if = ndev->ifindex; 520 return 0; 521 } 522 523 static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) 524 { 525 addr->net = &init_net; 526 addr->bound_dev_if = 0; 527 } 528 529 static int addr_resolve(struct sockaddr *src_in, 530 const struct sockaddr *dst_in, 531 struct rdma_dev_addr *addr, 532 bool resolve_neigh, 533 bool resolve_by_gid_attr, 534 u32 seq) 535 { 536 struct dst_entry *dst = NULL; 537 struct rtable *rt = NULL; 538 int ret; 539 540 if (!addr->net) { 541 pr_warn_ratelimited("%s: missing namespace\n", __func__); 542 return -EINVAL; 543 } 544 545 rcu_read_lock(); 546 if (resolve_by_gid_attr) { 547 if (!addr->sgid_attr) { 548 rcu_read_unlock(); 549 pr_warn_ratelimited("%s: missing gid_attr\n", __func__); 550 return -EINVAL; 551 } 552 /* 553 * If the request is for a specific gid attribute of the 554 * rdma_dev_addr, derive net from the netdevice of the 555 * GID attribute. 556 */ 557 ret = set_addr_netns_by_gid_rcu(addr); 558 if (ret) { 559 rcu_read_unlock(); 560 return ret; 561 } 562 } 563 if (src_in->sa_family == AF_INET) { 564 ret = addr4_resolve(src_in, dst_in, addr, &rt); 565 dst = &rt->dst; 566 } else { 567 ret = addr6_resolve(src_in, dst_in, addr, &dst); 568 } 569 if (ret) { 570 rcu_read_unlock(); 571 goto done; 572 } 573 ret = rdma_set_src_addr_rcu(addr, dst_in, dst); 574 rcu_read_unlock(); 575 576 /* 577 * Resolve neighbor destination address if requested and 578 * only if src addr translation didn't fail. 579 */ 580 if (!ret && resolve_neigh) 581 ret = addr_resolve_neigh(dst, dst_in, addr, seq); 582 583 if (src_in->sa_family == AF_INET) 584 ip_rt_put(rt); 585 else 586 dst_release(dst); 587 done: 588 /* 589 * Clear the addr net to go back to its original state, only if it was 590 * derived from GID attribute in this context. 591 */ 592 if (resolve_by_gid_attr) 593 rdma_addr_set_net_defaults(addr); 594 return ret; 595 } 596 597 static void process_one_req(struct work_struct *_work) 598 { 599 struct addr_req *req; 600 struct sockaddr *src_in, *dst_in; 601 602 req = container_of(_work, struct addr_req, work.work); 603 604 if (req->status == -ENODATA) { 605 src_in = (struct sockaddr *)&req->src_addr; 606 dst_in = (struct sockaddr *)&req->dst_addr; 607 req->status = addr_resolve(src_in, dst_in, req->addr, 608 true, req->resolve_by_gid_attr, 609 req->seq); 610 if (req->status && time_after_eq(jiffies, req->timeout)) { 611 req->status = -ETIMEDOUT; 612 } else if (req->status == -ENODATA) { 613 /* requeue the work for retrying again */ 614 spin_lock_bh(&lock); 615 if (!list_empty(&req->list)) 616 set_timeout(req, req->timeout); 617 spin_unlock_bh(&lock); 618 return; 619 } 620 } 621 622 req->callback(req->status, (struct sockaddr *)&req->src_addr, 623 req->addr, req->context); 624 req->callback = NULL; 625 626 spin_lock_bh(&lock); 627 /* 628 * Although the work will normally have been canceled by the workqueue, 629 * it can still be requeued as long as it is on the req_list. 630 */ 631 cancel_delayed_work(&req->work); 632 if (!list_empty(&req->list)) { 633 list_del_init(&req->list); 634 kfree(req); 635 } 636 spin_unlock_bh(&lock); 637 } 638 639 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, 640 struct rdma_dev_addr *addr, unsigned long timeout_ms, 641 void (*callback)(int status, struct sockaddr *src_addr, 642 struct rdma_dev_addr *addr, void *context), 643 bool resolve_by_gid_attr, void *context) 644 { 645 struct sockaddr *src_in, *dst_in; 646 struct addr_req *req; 647 int ret = 0; 648 649 req = kzalloc(sizeof *req, GFP_KERNEL); 650 if (!req) 651 return -ENOMEM; 652 653 src_in = (struct sockaddr *) &req->src_addr; 654 dst_in = (struct sockaddr *) &req->dst_addr; 655 656 if (src_addr) { 657 if (src_addr->sa_family != dst_addr->sa_family) { 658 ret = -EINVAL; 659 goto err; 660 } 661 662 memcpy(src_in, src_addr, rdma_addr_size(src_addr)); 663 } else { 664 src_in->sa_family = dst_addr->sa_family; 665 } 666 667 memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); 668 req->addr = addr; 669 req->callback = callback; 670 req->context = context; 671 req->resolve_by_gid_attr = resolve_by_gid_attr; 672 INIT_DELAYED_WORK(&req->work, process_one_req); 673 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); 674 675 req->status = addr_resolve(src_in, dst_in, addr, true, 676 req->resolve_by_gid_attr, req->seq); 677 switch (req->status) { 678 case 0: 679 req->timeout = jiffies; 680 queue_req(req); 681 break; 682 case -ENODATA: 683 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 684 queue_req(req); 685 break; 686 default: 687 ret = req->status; 688 goto err; 689 } 690 return ret; 691 err: 692 kfree(req); 693 return ret; 694 } 695 EXPORT_SYMBOL(rdma_resolve_ip); 696 697 int roce_resolve_route_from_path(struct sa_path_rec *rec, 698 const struct ib_gid_attr *attr) 699 { 700 union { 701 struct sockaddr _sockaddr; 702 struct sockaddr_in _sockaddr_in; 703 struct sockaddr_in6 _sockaddr_in6; 704 } sgid, dgid; 705 struct rdma_dev_addr dev_addr = {}; 706 int ret; 707 708 might_sleep(); 709 710 if (rec->roce.route_resolved) 711 return 0; 712 713 rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); 714 rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); 715 716 if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) 717 return -EINVAL; 718 719 if (!attr || !attr->ndev) 720 return -EINVAL; 721 722 dev_addr.net = &init_net; 723 dev_addr.sgid_attr = attr; 724 725 ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, 726 &dev_addr, false, true, 0); 727 if (ret) 728 return ret; 729 730 if ((dev_addr.network == RDMA_NETWORK_IPV4 || 731 dev_addr.network == RDMA_NETWORK_IPV6) && 732 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) 733 return -EINVAL; 734 735 rec->roce.route_resolved = true; 736 return 0; 737 } 738 739 /** 740 * rdma_addr_cancel - Cancel resolve ip request 741 * @addr: Pointer to address structure given previously 742 * during rdma_resolve_ip(). 743 * rdma_addr_cancel() is synchronous function which cancels any pending 744 * request if there is any. 745 */ 746 void rdma_addr_cancel(struct rdma_dev_addr *addr) 747 { 748 struct addr_req *req, *temp_req; 749 struct addr_req *found = NULL; 750 751 spin_lock_bh(&lock); 752 list_for_each_entry_safe(req, temp_req, &req_list, list) { 753 if (req->addr == addr) { 754 /* 755 * Removing from the list means we take ownership of 756 * the req 757 */ 758 list_del_init(&req->list); 759 found = req; 760 break; 761 } 762 } 763 spin_unlock_bh(&lock); 764 765 if (!found) 766 return; 767 768 /* 769 * sync canceling the work after removing it from the req_list 770 * guarentees no work is running and none will be started. 771 */ 772 cancel_delayed_work_sync(&found->work); 773 kfree(found); 774 } 775 EXPORT_SYMBOL(rdma_addr_cancel); 776 777 struct resolve_cb_context { 778 struct completion comp; 779 int status; 780 }; 781 782 static void resolve_cb(int status, struct sockaddr *src_addr, 783 struct rdma_dev_addr *addr, void *context) 784 { 785 ((struct resolve_cb_context *)context)->status = status; 786 complete(&((struct resolve_cb_context *)context)->comp); 787 } 788 789 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 790 const union ib_gid *dgid, 791 u8 *dmac, const struct ib_gid_attr *sgid_attr, 792 int *hoplimit) 793 { 794 struct rdma_dev_addr dev_addr; 795 struct resolve_cb_context ctx; 796 union { 797 struct sockaddr_in _sockaddr_in; 798 struct sockaddr_in6 _sockaddr_in6; 799 } sgid_addr, dgid_addr; 800 int ret; 801 802 rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); 803 rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); 804 805 memset(&dev_addr, 0, sizeof(dev_addr)); 806 dev_addr.net = &init_net; 807 dev_addr.sgid_attr = sgid_attr; 808 809 init_completion(&ctx.comp); 810 ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, 811 (struct sockaddr *)&dgid_addr, &dev_addr, 1000, 812 resolve_cb, true, &ctx); 813 if (ret) 814 return ret; 815 816 wait_for_completion(&ctx.comp); 817 818 ret = ctx.status; 819 if (ret) 820 return ret; 821 822 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 823 *hoplimit = dev_addr.hoplimit; 824 return 0; 825 } 826 827 static int netevent_callback(struct notifier_block *self, unsigned long event, 828 void *ctx) 829 { 830 struct addr_req *req; 831 832 if (event == NETEVENT_NEIGH_UPDATE) { 833 struct neighbour *neigh = ctx; 834 835 if (neigh->nud_state & NUD_VALID) { 836 spin_lock_bh(&lock); 837 list_for_each_entry(req, &req_list, list) 838 set_timeout(req, jiffies); 839 spin_unlock_bh(&lock); 840 } 841 } 842 return 0; 843 } 844 845 static struct notifier_block nb = { 846 .notifier_call = netevent_callback 847 }; 848 849 int addr_init(void) 850 { 851 addr_wq = alloc_ordered_workqueue("ib_addr", 0); 852 if (!addr_wq) 853 return -ENOMEM; 854 855 register_netevent_notifier(&nb); 856 857 return 0; 858 } 859 860 void addr_cleanup(void) 861 { 862 unregister_netevent_notifier(&nb); 863 destroy_workqueue(addr_wq); 864 WARN_ON(!list_empty(&req_list)); 865 } 866