1 /* 2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 8 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 9 * 10 * This software is available to you under a choice of one of two 11 * licenses. You may choose to be licensed under the terms of the GNU 12 * General Public License (GPL) Version 2, available from the file 13 * COPYING in the main directory of this source tree, or the 14 * OpenIB.org BSD license below: 15 * 16 * Redistribution and use in source and binary forms, with or 17 * without modification, are permitted provided that the following 18 * conditions are met: 19 * 20 * - Redistributions of source code must retain the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer. 23 * 24 * - Redistributions in binary form must reproduce the above 25 * copyright notice, this list of conditions and the following 26 * disclaimer in the documentation and/or other materials 27 * provided with the distribution. 28 * 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 * SOFTWARE. 37 */ 38 39 #include <linux/errno.h> 40 #include <linux/err.h> 41 #include <linux/export.h> 42 #include <linux/string.h> 43 #include <linux/slab.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <net/addrconf.h> 47 48 #include <rdma/ib_verbs.h> 49 #include <rdma/ib_cache.h> 50 #include <rdma/ib_addr.h> 51 52 #include "core_priv.h" 53 54 static const char * const ib_events[] = { 55 [IB_EVENT_CQ_ERR] = "CQ error", 56 [IB_EVENT_QP_FATAL] = "QP fatal error", 57 [IB_EVENT_QP_REQ_ERR] = "QP request error", 58 [IB_EVENT_QP_ACCESS_ERR] = "QP access error", 59 [IB_EVENT_COMM_EST] = "communication established", 60 [IB_EVENT_SQ_DRAINED] = "send queue drained", 61 [IB_EVENT_PATH_MIG] = "path migration successful", 62 [IB_EVENT_PATH_MIG_ERR] = "path migration error", 63 [IB_EVENT_DEVICE_FATAL] = "device fatal error", 64 [IB_EVENT_PORT_ACTIVE] = "port active", 65 [IB_EVENT_PORT_ERR] = "port error", 66 [IB_EVENT_LID_CHANGE] = "LID change", 67 [IB_EVENT_PKEY_CHANGE] = "P_key change", 68 [IB_EVENT_SM_CHANGE] = "SM change", 69 [IB_EVENT_SRQ_ERR] = "SRQ error", 70 [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", 71 [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", 72 [IB_EVENT_CLIENT_REREGISTER] = "client reregister", 73 [IB_EVENT_GID_CHANGE] = "GID changed", 74 }; 75 76 const char *__attribute_const__ ib_event_msg(enum ib_event_type event) 77 { 78 size_t index = event; 79 80 return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? 81 ib_events[index] : "unrecognized event"; 82 } 83 EXPORT_SYMBOL(ib_event_msg); 84 85 static const char * const wc_statuses[] = { 86 [IB_WC_SUCCESS] = "success", 87 [IB_WC_LOC_LEN_ERR] = "local length error", 88 [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", 89 [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", 90 [IB_WC_LOC_PROT_ERR] = "local protection error", 91 [IB_WC_WR_FLUSH_ERR] = "WR flushed", 92 [IB_WC_MW_BIND_ERR] = "memory management operation error", 93 [IB_WC_BAD_RESP_ERR] = "bad response error", 94 [IB_WC_LOC_ACCESS_ERR] = "local access error", 95 [IB_WC_REM_INV_REQ_ERR] = "invalid request error", 96 [IB_WC_REM_ACCESS_ERR] = "remote access error", 97 [IB_WC_REM_OP_ERR] = "remote operation error", 98 [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", 99 [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", 100 [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", 101 [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", 102 [IB_WC_REM_ABORT_ERR] = "operation aborted", 103 [IB_WC_INV_EECN_ERR] = "invalid EE context number", 104 [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", 105 [IB_WC_FATAL_ERR] = "fatal error", 106 [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", 107 [IB_WC_GENERAL_ERR] = "general error", 108 }; 109 110 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) 111 { 112 size_t index = status; 113 114 return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? 115 wc_statuses[index] : "unrecognized status"; 116 } 117 EXPORT_SYMBOL(ib_wc_status_msg); 118 119 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) 120 { 121 switch (rate) { 122 case IB_RATE_2_5_GBPS: return 1; 123 case IB_RATE_5_GBPS: return 2; 124 case IB_RATE_10_GBPS: return 4; 125 case IB_RATE_20_GBPS: return 8; 126 case IB_RATE_30_GBPS: return 12; 127 case IB_RATE_40_GBPS: return 16; 128 case IB_RATE_60_GBPS: return 24; 129 case IB_RATE_80_GBPS: return 32; 130 case IB_RATE_120_GBPS: return 48; 131 default: return -1; 132 } 133 } 134 EXPORT_SYMBOL(ib_rate_to_mult); 135 136 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) 137 { 138 switch (mult) { 139 case 1: return IB_RATE_2_5_GBPS; 140 case 2: return IB_RATE_5_GBPS; 141 case 4: return IB_RATE_10_GBPS; 142 case 8: return IB_RATE_20_GBPS; 143 case 12: return IB_RATE_30_GBPS; 144 case 16: return IB_RATE_40_GBPS; 145 case 24: return IB_RATE_60_GBPS; 146 case 32: return IB_RATE_80_GBPS; 147 case 48: return IB_RATE_120_GBPS; 148 default: return IB_RATE_PORT_CURRENT; 149 } 150 } 151 EXPORT_SYMBOL(mult_to_ib_rate); 152 153 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) 154 { 155 switch (rate) { 156 case IB_RATE_2_5_GBPS: return 2500; 157 case IB_RATE_5_GBPS: return 5000; 158 case IB_RATE_10_GBPS: return 10000; 159 case IB_RATE_20_GBPS: return 20000; 160 case IB_RATE_30_GBPS: return 30000; 161 case IB_RATE_40_GBPS: return 40000; 162 case IB_RATE_60_GBPS: return 60000; 163 case IB_RATE_80_GBPS: return 80000; 164 case IB_RATE_120_GBPS: return 120000; 165 case IB_RATE_14_GBPS: return 14062; 166 case IB_RATE_56_GBPS: return 56250; 167 case IB_RATE_112_GBPS: return 112500; 168 case IB_RATE_168_GBPS: return 168750; 169 case IB_RATE_25_GBPS: return 25781; 170 case IB_RATE_100_GBPS: return 103125; 171 case IB_RATE_200_GBPS: return 206250; 172 case IB_RATE_300_GBPS: return 309375; 173 default: return -1; 174 } 175 } 176 EXPORT_SYMBOL(ib_rate_to_mbps); 177 178 __attribute_const__ enum rdma_transport_type 179 rdma_node_get_transport(enum rdma_node_type node_type) 180 { 181 switch (node_type) { 182 case RDMA_NODE_IB_CA: 183 case RDMA_NODE_IB_SWITCH: 184 case RDMA_NODE_IB_ROUTER: 185 return RDMA_TRANSPORT_IB; 186 case RDMA_NODE_RNIC: 187 return RDMA_TRANSPORT_IWARP; 188 case RDMA_NODE_USNIC: 189 return RDMA_TRANSPORT_USNIC; 190 case RDMA_NODE_USNIC_UDP: 191 return RDMA_TRANSPORT_USNIC_UDP; 192 default: 193 BUG(); 194 return 0; 195 } 196 } 197 EXPORT_SYMBOL(rdma_node_get_transport); 198 199 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) 200 { 201 if (device->get_link_layer) 202 return device->get_link_layer(device, port_num); 203 204 switch (rdma_node_get_transport(device->node_type)) { 205 case RDMA_TRANSPORT_IB: 206 return IB_LINK_LAYER_INFINIBAND; 207 case RDMA_TRANSPORT_IWARP: 208 case RDMA_TRANSPORT_USNIC: 209 case RDMA_TRANSPORT_USNIC_UDP: 210 return IB_LINK_LAYER_ETHERNET; 211 default: 212 return IB_LINK_LAYER_UNSPECIFIED; 213 } 214 } 215 EXPORT_SYMBOL(rdma_port_get_link_layer); 216 217 /* Protection domains */ 218 219 /** 220 * ib_alloc_pd - Allocates an unused protection domain. 221 * @device: The device on which to allocate the protection domain. 222 * 223 * A protection domain object provides an association between QPs, shared 224 * receive queues, address handles, memory regions, and memory windows. 225 * 226 * Every PD has a local_dma_lkey which can be used as the lkey value for local 227 * memory operations. 228 */ 229 struct ib_pd *ib_alloc_pd(struct ib_device *device) 230 { 231 struct ib_pd *pd; 232 233 pd = device->alloc_pd(device, NULL, NULL); 234 if (IS_ERR(pd)) 235 return pd; 236 237 pd->device = device; 238 pd->uobject = NULL; 239 pd->local_mr = NULL; 240 atomic_set(&pd->usecnt, 0); 241 242 if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) 243 pd->local_dma_lkey = device->local_dma_lkey; 244 else { 245 struct ib_mr *mr; 246 247 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); 248 if (IS_ERR(mr)) { 249 ib_dealloc_pd(pd); 250 return (struct ib_pd *)mr; 251 } 252 253 pd->local_mr = mr; 254 pd->local_dma_lkey = pd->local_mr->lkey; 255 } 256 return pd; 257 } 258 EXPORT_SYMBOL(ib_alloc_pd); 259 260 /** 261 * ib_dealloc_pd - Deallocates a protection domain. 262 * @pd: The protection domain to deallocate. 263 * 264 * It is an error to call this function while any resources in the pd still 265 * exist. The caller is responsible to synchronously destroy them and 266 * guarantee no new allocations will happen. 267 */ 268 void ib_dealloc_pd(struct ib_pd *pd) 269 { 270 int ret; 271 272 if (pd->local_mr) { 273 ret = ib_dereg_mr(pd->local_mr); 274 WARN_ON(ret); 275 pd->local_mr = NULL; 276 } 277 278 /* uverbs manipulates usecnt with proper locking, while the kabi 279 requires the caller to guarantee we can't race here. */ 280 WARN_ON(atomic_read(&pd->usecnt)); 281 282 /* Making delalloc_pd a void return is a WIP, no driver should return 283 an error here. */ 284 ret = pd->device->dealloc_pd(pd); 285 WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); 286 } 287 EXPORT_SYMBOL(ib_dealloc_pd); 288 289 /* Address handles */ 290 291 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 292 { 293 struct ib_ah *ah; 294 295 ah = pd->device->create_ah(pd, ah_attr); 296 297 if (!IS_ERR(ah)) { 298 ah->device = pd->device; 299 ah->pd = pd; 300 ah->uobject = NULL; 301 atomic_inc(&pd->usecnt); 302 } 303 304 return ah; 305 } 306 EXPORT_SYMBOL(ib_create_ah); 307 308 static int ib_get_header_version(const union rdma_network_hdr *hdr) 309 { 310 const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; 311 struct iphdr ip4h_checked; 312 const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh; 313 314 /* If it's IPv6, the version must be 6, otherwise, the first 315 * 20 bytes (before the IPv4 header) are garbled. 316 */ 317 if (ip6h->version != 6) 318 return (ip4h->version == 4) ? 4 : 0; 319 /* version may be 6 or 4 because the first 20 bytes could be garbled */ 320 321 /* RoCE v2 requires no options, thus header length 322 * must be 5 words 323 */ 324 if (ip4h->ihl != 5) 325 return 6; 326 327 /* Verify checksum. 328 * We can't write on scattered buffers so we need to copy to 329 * temp buffer. 330 */ 331 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); 332 ip4h_checked.check = 0; 333 ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5); 334 /* if IPv4 header checksum is OK, believe it */ 335 if (ip4h->check == ip4h_checked.check) 336 return 4; 337 return 6; 338 } 339 340 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, 341 u8 port_num, 342 const struct ib_grh *grh) 343 { 344 int grh_version; 345 346 if (rdma_protocol_ib(device, port_num)) 347 return RDMA_NETWORK_IB; 348 349 grh_version = ib_get_header_version((union rdma_network_hdr *)grh); 350 351 if (grh_version == 4) 352 return RDMA_NETWORK_IPV4; 353 354 if (grh->next_hdr == IPPROTO_UDP) 355 return RDMA_NETWORK_IPV6; 356 357 return RDMA_NETWORK_ROCE_V1; 358 } 359 360 struct find_gid_index_context { 361 u16 vlan_id; 362 enum ib_gid_type gid_type; 363 }; 364 365 static bool find_gid_index(const union ib_gid *gid, 366 const struct ib_gid_attr *gid_attr, 367 void *context) 368 { 369 struct find_gid_index_context *ctx = 370 (struct find_gid_index_context *)context; 371 372 if (ctx->gid_type != gid_attr->gid_type) 373 return false; 374 375 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || 376 (is_vlan_dev(gid_attr->ndev) && 377 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) 378 return false; 379 380 return true; 381 } 382 383 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, 384 u16 vlan_id, const union ib_gid *sgid, 385 enum ib_gid_type gid_type, 386 u16 *gid_index) 387 { 388 struct find_gid_index_context context = {.vlan_id = vlan_id, 389 .gid_type = gid_type}; 390 391 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, 392 &context, gid_index); 393 } 394 395 static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, 396 enum rdma_network_type net_type, 397 union ib_gid *sgid, union ib_gid *dgid) 398 { 399 struct sockaddr_in src_in; 400 struct sockaddr_in dst_in; 401 __be32 src_saddr, dst_saddr; 402 403 if (!sgid || !dgid) 404 return -EINVAL; 405 406 if (net_type == RDMA_NETWORK_IPV4) { 407 memcpy(&src_in.sin_addr.s_addr, 408 &hdr->roce4grh.saddr, 4); 409 memcpy(&dst_in.sin_addr.s_addr, 410 &hdr->roce4grh.daddr, 4); 411 src_saddr = src_in.sin_addr.s_addr; 412 dst_saddr = dst_in.sin_addr.s_addr; 413 ipv6_addr_set_v4mapped(src_saddr, 414 (struct in6_addr *)sgid); 415 ipv6_addr_set_v4mapped(dst_saddr, 416 (struct in6_addr *)dgid); 417 return 0; 418 } else if (net_type == RDMA_NETWORK_IPV6 || 419 net_type == RDMA_NETWORK_IB) { 420 *dgid = hdr->ibgrh.dgid; 421 *sgid = hdr->ibgrh.sgid; 422 return 0; 423 } else { 424 return -EINVAL; 425 } 426 } 427 428 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 429 const struct ib_wc *wc, const struct ib_grh *grh, 430 struct ib_ah_attr *ah_attr) 431 { 432 u32 flow_class; 433 u16 gid_index; 434 int ret; 435 enum rdma_network_type net_type = RDMA_NETWORK_IB; 436 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 437 union ib_gid dgid; 438 union ib_gid sgid; 439 440 memset(ah_attr, 0, sizeof *ah_attr); 441 if (rdma_cap_eth_ah(device, port_num)) { 442 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) 443 net_type = wc->network_hdr_type; 444 else 445 net_type = ib_get_net_type_by_grh(device, port_num, grh); 446 gid_type = ib_network_to_gid_type(net_type); 447 } 448 ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 449 &sgid, &dgid); 450 if (ret) 451 return ret; 452 453 if (rdma_protocol_roce(device, port_num)) { 454 int if_index = 0; 455 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? 456 wc->vlan_id : 0xffff; 457 struct net_device *idev; 458 struct net_device *resolved_dev; 459 460 if (!(wc->wc_flags & IB_WC_GRH)) 461 return -EPROTOTYPE; 462 463 if (!device->get_netdev) 464 return -EOPNOTSUPP; 465 466 idev = device->get_netdev(device, port_num); 467 if (!idev) 468 return -ENODEV; 469 470 ret = rdma_addr_find_dmac_by_grh(&dgid, &sgid, 471 ah_attr->dmac, 472 wc->wc_flags & IB_WC_WITH_VLAN ? 473 NULL : &vlan_id, 474 &if_index); 475 if (ret) { 476 dev_put(idev); 477 return ret; 478 } 479 480 resolved_dev = dev_get_by_index(&init_net, if_index); 481 if (resolved_dev->flags & IFF_LOOPBACK) { 482 dev_put(resolved_dev); 483 resolved_dev = idev; 484 dev_hold(resolved_dev); 485 } 486 rcu_read_lock(); 487 if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, 488 resolved_dev)) 489 ret = -EHOSTUNREACH; 490 rcu_read_unlock(); 491 dev_put(idev); 492 dev_put(resolved_dev); 493 if (ret) 494 return ret; 495 496 ret = get_sgid_index_from_eth(device, port_num, vlan_id, 497 &dgid, gid_type, &gid_index); 498 if (ret) 499 return ret; 500 } 501 502 ah_attr->dlid = wc->slid; 503 ah_attr->sl = wc->sl; 504 ah_attr->src_path_bits = wc->dlid_path_bits; 505 ah_attr->port_num = port_num; 506 507 if (wc->wc_flags & IB_WC_GRH) { 508 ah_attr->ah_flags = IB_AH_GRH; 509 ah_attr->grh.dgid = sgid; 510 511 if (!rdma_cap_eth_ah(device, port_num)) { 512 ret = ib_find_cached_gid_by_port(device, &dgid, 513 IB_GID_TYPE_IB, 514 port_num, NULL, 515 &gid_index); 516 if (ret) 517 return ret; 518 } 519 520 ah_attr->grh.sgid_index = (u8) gid_index; 521 flow_class = be32_to_cpu(grh->version_tclass_flow); 522 ah_attr->grh.flow_label = flow_class & 0xFFFFF; 523 ah_attr->grh.hop_limit = 0xFF; 524 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; 525 } 526 return 0; 527 } 528 EXPORT_SYMBOL(ib_init_ah_from_wc); 529 530 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 531 const struct ib_grh *grh, u8 port_num) 532 { 533 struct ib_ah_attr ah_attr; 534 int ret; 535 536 ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); 537 if (ret) 538 return ERR_PTR(ret); 539 540 return ib_create_ah(pd, &ah_attr); 541 } 542 EXPORT_SYMBOL(ib_create_ah_from_wc); 543 544 int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 545 { 546 return ah->device->modify_ah ? 547 ah->device->modify_ah(ah, ah_attr) : 548 -ENOSYS; 549 } 550 EXPORT_SYMBOL(ib_modify_ah); 551 552 int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 553 { 554 return ah->device->query_ah ? 555 ah->device->query_ah(ah, ah_attr) : 556 -ENOSYS; 557 } 558 EXPORT_SYMBOL(ib_query_ah); 559 560 int ib_destroy_ah(struct ib_ah *ah) 561 { 562 struct ib_pd *pd; 563 int ret; 564 565 pd = ah->pd; 566 ret = ah->device->destroy_ah(ah); 567 if (!ret) 568 atomic_dec(&pd->usecnt); 569 570 return ret; 571 } 572 EXPORT_SYMBOL(ib_destroy_ah); 573 574 /* Shared receive queues */ 575 576 struct ib_srq *ib_create_srq(struct ib_pd *pd, 577 struct ib_srq_init_attr *srq_init_attr) 578 { 579 struct ib_srq *srq; 580 581 if (!pd->device->create_srq) 582 return ERR_PTR(-ENOSYS); 583 584 srq = pd->device->create_srq(pd, srq_init_attr, NULL); 585 586 if (!IS_ERR(srq)) { 587 srq->device = pd->device; 588 srq->pd = pd; 589 srq->uobject = NULL; 590 srq->event_handler = srq_init_attr->event_handler; 591 srq->srq_context = srq_init_attr->srq_context; 592 srq->srq_type = srq_init_attr->srq_type; 593 if (srq->srq_type == IB_SRQT_XRC) { 594 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; 595 srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; 596 atomic_inc(&srq->ext.xrc.xrcd->usecnt); 597 atomic_inc(&srq->ext.xrc.cq->usecnt); 598 } 599 atomic_inc(&pd->usecnt); 600 atomic_set(&srq->usecnt, 0); 601 } 602 603 return srq; 604 } 605 EXPORT_SYMBOL(ib_create_srq); 606 607 int ib_modify_srq(struct ib_srq *srq, 608 struct ib_srq_attr *srq_attr, 609 enum ib_srq_attr_mask srq_attr_mask) 610 { 611 return srq->device->modify_srq ? 612 srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : 613 -ENOSYS; 614 } 615 EXPORT_SYMBOL(ib_modify_srq); 616 617 int ib_query_srq(struct ib_srq *srq, 618 struct ib_srq_attr *srq_attr) 619 { 620 return srq->device->query_srq ? 621 srq->device->query_srq(srq, srq_attr) : -ENOSYS; 622 } 623 EXPORT_SYMBOL(ib_query_srq); 624 625 int ib_destroy_srq(struct ib_srq *srq) 626 { 627 struct ib_pd *pd; 628 enum ib_srq_type srq_type; 629 struct ib_xrcd *uninitialized_var(xrcd); 630 struct ib_cq *uninitialized_var(cq); 631 int ret; 632 633 if (atomic_read(&srq->usecnt)) 634 return -EBUSY; 635 636 pd = srq->pd; 637 srq_type = srq->srq_type; 638 if (srq_type == IB_SRQT_XRC) { 639 xrcd = srq->ext.xrc.xrcd; 640 cq = srq->ext.xrc.cq; 641 } 642 643 ret = srq->device->destroy_srq(srq); 644 if (!ret) { 645 atomic_dec(&pd->usecnt); 646 if (srq_type == IB_SRQT_XRC) { 647 atomic_dec(&xrcd->usecnt); 648 atomic_dec(&cq->usecnt); 649 } 650 } 651 652 return ret; 653 } 654 EXPORT_SYMBOL(ib_destroy_srq); 655 656 /* Queue pairs */ 657 658 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) 659 { 660 struct ib_qp *qp = context; 661 unsigned long flags; 662 663 spin_lock_irqsave(&qp->device->event_handler_lock, flags); 664 list_for_each_entry(event->element.qp, &qp->open_list, open_list) 665 if (event->element.qp->event_handler) 666 event->element.qp->event_handler(event, event->element.qp->qp_context); 667 spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); 668 } 669 670 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) 671 { 672 mutex_lock(&xrcd->tgt_qp_mutex); 673 list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); 674 mutex_unlock(&xrcd->tgt_qp_mutex); 675 } 676 677 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, 678 void (*event_handler)(struct ib_event *, void *), 679 void *qp_context) 680 { 681 struct ib_qp *qp; 682 unsigned long flags; 683 684 qp = kzalloc(sizeof *qp, GFP_KERNEL); 685 if (!qp) 686 return ERR_PTR(-ENOMEM); 687 688 qp->real_qp = real_qp; 689 atomic_inc(&real_qp->usecnt); 690 qp->device = real_qp->device; 691 qp->event_handler = event_handler; 692 qp->qp_context = qp_context; 693 qp->qp_num = real_qp->qp_num; 694 qp->qp_type = real_qp->qp_type; 695 696 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 697 list_add(&qp->open_list, &real_qp->open_list); 698 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 699 700 return qp; 701 } 702 703 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, 704 struct ib_qp_open_attr *qp_open_attr) 705 { 706 struct ib_qp *qp, *real_qp; 707 708 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) 709 return ERR_PTR(-EINVAL); 710 711 qp = ERR_PTR(-EINVAL); 712 mutex_lock(&xrcd->tgt_qp_mutex); 713 list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { 714 if (real_qp->qp_num == qp_open_attr->qp_num) { 715 qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 716 qp_open_attr->qp_context); 717 break; 718 } 719 } 720 mutex_unlock(&xrcd->tgt_qp_mutex); 721 return qp; 722 } 723 EXPORT_SYMBOL(ib_open_qp); 724 725 struct ib_qp *ib_create_qp(struct ib_pd *pd, 726 struct ib_qp_init_attr *qp_init_attr) 727 { 728 struct ib_qp *qp, *real_qp; 729 struct ib_device *device; 730 731 device = pd ? pd->device : qp_init_attr->xrcd->device; 732 qp = device->create_qp(pd, qp_init_attr, NULL); 733 734 if (!IS_ERR(qp)) { 735 qp->device = device; 736 qp->real_qp = qp; 737 qp->uobject = NULL; 738 qp->qp_type = qp_init_attr->qp_type; 739 740 atomic_set(&qp->usecnt, 0); 741 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { 742 qp->event_handler = __ib_shared_qp_event_handler; 743 qp->qp_context = qp; 744 qp->pd = NULL; 745 qp->send_cq = qp->recv_cq = NULL; 746 qp->srq = NULL; 747 qp->xrcd = qp_init_attr->xrcd; 748 atomic_inc(&qp_init_attr->xrcd->usecnt); 749 INIT_LIST_HEAD(&qp->open_list); 750 751 real_qp = qp; 752 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 753 qp_init_attr->qp_context); 754 if (!IS_ERR(qp)) 755 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 756 else 757 real_qp->device->destroy_qp(real_qp); 758 } else { 759 qp->event_handler = qp_init_attr->event_handler; 760 qp->qp_context = qp_init_attr->qp_context; 761 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { 762 qp->recv_cq = NULL; 763 qp->srq = NULL; 764 } else { 765 qp->recv_cq = qp_init_attr->recv_cq; 766 atomic_inc(&qp_init_attr->recv_cq->usecnt); 767 qp->srq = qp_init_attr->srq; 768 if (qp->srq) 769 atomic_inc(&qp_init_attr->srq->usecnt); 770 } 771 772 qp->pd = pd; 773 qp->send_cq = qp_init_attr->send_cq; 774 qp->xrcd = NULL; 775 776 atomic_inc(&pd->usecnt); 777 atomic_inc(&qp_init_attr->send_cq->usecnt); 778 } 779 } 780 781 return qp; 782 } 783 EXPORT_SYMBOL(ib_create_qp); 784 785 static const struct { 786 int valid; 787 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 788 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 789 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 790 [IB_QPS_RESET] = { 791 [IB_QPS_RESET] = { .valid = 1 }, 792 [IB_QPS_INIT] = { 793 .valid = 1, 794 .req_param = { 795 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 796 IB_QP_PORT | 797 IB_QP_QKEY), 798 [IB_QPT_RAW_PACKET] = IB_QP_PORT, 799 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 800 IB_QP_PORT | 801 IB_QP_ACCESS_FLAGS), 802 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 803 IB_QP_PORT | 804 IB_QP_ACCESS_FLAGS), 805 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 806 IB_QP_PORT | 807 IB_QP_ACCESS_FLAGS), 808 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 809 IB_QP_PORT | 810 IB_QP_ACCESS_FLAGS), 811 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 812 IB_QP_QKEY), 813 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 814 IB_QP_QKEY), 815 } 816 }, 817 }, 818 [IB_QPS_INIT] = { 819 [IB_QPS_RESET] = { .valid = 1 }, 820 [IB_QPS_ERR] = { .valid = 1 }, 821 [IB_QPS_INIT] = { 822 .valid = 1, 823 .opt_param = { 824 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 825 IB_QP_PORT | 826 IB_QP_QKEY), 827 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 828 IB_QP_PORT | 829 IB_QP_ACCESS_FLAGS), 830 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 831 IB_QP_PORT | 832 IB_QP_ACCESS_FLAGS), 833 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 834 IB_QP_PORT | 835 IB_QP_ACCESS_FLAGS), 836 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 837 IB_QP_PORT | 838 IB_QP_ACCESS_FLAGS), 839 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 840 IB_QP_QKEY), 841 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 842 IB_QP_QKEY), 843 } 844 }, 845 [IB_QPS_RTR] = { 846 .valid = 1, 847 .req_param = { 848 [IB_QPT_UC] = (IB_QP_AV | 849 IB_QP_PATH_MTU | 850 IB_QP_DEST_QPN | 851 IB_QP_RQ_PSN), 852 [IB_QPT_RC] = (IB_QP_AV | 853 IB_QP_PATH_MTU | 854 IB_QP_DEST_QPN | 855 IB_QP_RQ_PSN | 856 IB_QP_MAX_DEST_RD_ATOMIC | 857 IB_QP_MIN_RNR_TIMER), 858 [IB_QPT_XRC_INI] = (IB_QP_AV | 859 IB_QP_PATH_MTU | 860 IB_QP_DEST_QPN | 861 IB_QP_RQ_PSN), 862 [IB_QPT_XRC_TGT] = (IB_QP_AV | 863 IB_QP_PATH_MTU | 864 IB_QP_DEST_QPN | 865 IB_QP_RQ_PSN | 866 IB_QP_MAX_DEST_RD_ATOMIC | 867 IB_QP_MIN_RNR_TIMER), 868 }, 869 .opt_param = { 870 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 871 IB_QP_QKEY), 872 [IB_QPT_UC] = (IB_QP_ALT_PATH | 873 IB_QP_ACCESS_FLAGS | 874 IB_QP_PKEY_INDEX), 875 [IB_QPT_RC] = (IB_QP_ALT_PATH | 876 IB_QP_ACCESS_FLAGS | 877 IB_QP_PKEY_INDEX), 878 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | 879 IB_QP_ACCESS_FLAGS | 880 IB_QP_PKEY_INDEX), 881 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | 882 IB_QP_ACCESS_FLAGS | 883 IB_QP_PKEY_INDEX), 884 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 885 IB_QP_QKEY), 886 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 887 IB_QP_QKEY), 888 }, 889 }, 890 }, 891 [IB_QPS_RTR] = { 892 [IB_QPS_RESET] = { .valid = 1 }, 893 [IB_QPS_ERR] = { .valid = 1 }, 894 [IB_QPS_RTS] = { 895 .valid = 1, 896 .req_param = { 897 [IB_QPT_UD] = IB_QP_SQ_PSN, 898 [IB_QPT_UC] = IB_QP_SQ_PSN, 899 [IB_QPT_RC] = (IB_QP_TIMEOUT | 900 IB_QP_RETRY_CNT | 901 IB_QP_RNR_RETRY | 902 IB_QP_SQ_PSN | 903 IB_QP_MAX_QP_RD_ATOMIC), 904 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | 905 IB_QP_RETRY_CNT | 906 IB_QP_RNR_RETRY | 907 IB_QP_SQ_PSN | 908 IB_QP_MAX_QP_RD_ATOMIC), 909 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | 910 IB_QP_SQ_PSN), 911 [IB_QPT_SMI] = IB_QP_SQ_PSN, 912 [IB_QPT_GSI] = IB_QP_SQ_PSN, 913 }, 914 .opt_param = { 915 [IB_QPT_UD] = (IB_QP_CUR_STATE | 916 IB_QP_QKEY), 917 [IB_QPT_UC] = (IB_QP_CUR_STATE | 918 IB_QP_ALT_PATH | 919 IB_QP_ACCESS_FLAGS | 920 IB_QP_PATH_MIG_STATE), 921 [IB_QPT_RC] = (IB_QP_CUR_STATE | 922 IB_QP_ALT_PATH | 923 IB_QP_ACCESS_FLAGS | 924 IB_QP_MIN_RNR_TIMER | 925 IB_QP_PATH_MIG_STATE), 926 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 927 IB_QP_ALT_PATH | 928 IB_QP_ACCESS_FLAGS | 929 IB_QP_PATH_MIG_STATE), 930 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 931 IB_QP_ALT_PATH | 932 IB_QP_ACCESS_FLAGS | 933 IB_QP_MIN_RNR_TIMER | 934 IB_QP_PATH_MIG_STATE), 935 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 936 IB_QP_QKEY), 937 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 938 IB_QP_QKEY), 939 } 940 } 941 }, 942 [IB_QPS_RTS] = { 943 [IB_QPS_RESET] = { .valid = 1 }, 944 [IB_QPS_ERR] = { .valid = 1 }, 945 [IB_QPS_RTS] = { 946 .valid = 1, 947 .opt_param = { 948 [IB_QPT_UD] = (IB_QP_CUR_STATE | 949 IB_QP_QKEY), 950 [IB_QPT_UC] = (IB_QP_CUR_STATE | 951 IB_QP_ACCESS_FLAGS | 952 IB_QP_ALT_PATH | 953 IB_QP_PATH_MIG_STATE), 954 [IB_QPT_RC] = (IB_QP_CUR_STATE | 955 IB_QP_ACCESS_FLAGS | 956 IB_QP_ALT_PATH | 957 IB_QP_PATH_MIG_STATE | 958 IB_QP_MIN_RNR_TIMER), 959 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 960 IB_QP_ACCESS_FLAGS | 961 IB_QP_ALT_PATH | 962 IB_QP_PATH_MIG_STATE), 963 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 964 IB_QP_ACCESS_FLAGS | 965 IB_QP_ALT_PATH | 966 IB_QP_PATH_MIG_STATE | 967 IB_QP_MIN_RNR_TIMER), 968 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 969 IB_QP_QKEY), 970 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 971 IB_QP_QKEY), 972 } 973 }, 974 [IB_QPS_SQD] = { 975 .valid = 1, 976 .opt_param = { 977 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, 978 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 979 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 980 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 981 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ 982 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 983 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY 984 } 985 }, 986 }, 987 [IB_QPS_SQD] = { 988 [IB_QPS_RESET] = { .valid = 1 }, 989 [IB_QPS_ERR] = { .valid = 1 }, 990 [IB_QPS_RTS] = { 991 .valid = 1, 992 .opt_param = { 993 [IB_QPT_UD] = (IB_QP_CUR_STATE | 994 IB_QP_QKEY), 995 [IB_QPT_UC] = (IB_QP_CUR_STATE | 996 IB_QP_ALT_PATH | 997 IB_QP_ACCESS_FLAGS | 998 IB_QP_PATH_MIG_STATE), 999 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1000 IB_QP_ALT_PATH | 1001 IB_QP_ACCESS_FLAGS | 1002 IB_QP_MIN_RNR_TIMER | 1003 IB_QP_PATH_MIG_STATE), 1004 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1005 IB_QP_ALT_PATH | 1006 IB_QP_ACCESS_FLAGS | 1007 IB_QP_PATH_MIG_STATE), 1008 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1009 IB_QP_ALT_PATH | 1010 IB_QP_ACCESS_FLAGS | 1011 IB_QP_MIN_RNR_TIMER | 1012 IB_QP_PATH_MIG_STATE), 1013 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1014 IB_QP_QKEY), 1015 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1016 IB_QP_QKEY), 1017 } 1018 }, 1019 [IB_QPS_SQD] = { 1020 .valid = 1, 1021 .opt_param = { 1022 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1023 IB_QP_QKEY), 1024 [IB_QPT_UC] = (IB_QP_AV | 1025 IB_QP_ALT_PATH | 1026 IB_QP_ACCESS_FLAGS | 1027 IB_QP_PKEY_INDEX | 1028 IB_QP_PATH_MIG_STATE), 1029 [IB_QPT_RC] = (IB_QP_PORT | 1030 IB_QP_AV | 1031 IB_QP_TIMEOUT | 1032 IB_QP_RETRY_CNT | 1033 IB_QP_RNR_RETRY | 1034 IB_QP_MAX_QP_RD_ATOMIC | 1035 IB_QP_MAX_DEST_RD_ATOMIC | 1036 IB_QP_ALT_PATH | 1037 IB_QP_ACCESS_FLAGS | 1038 IB_QP_PKEY_INDEX | 1039 IB_QP_MIN_RNR_TIMER | 1040 IB_QP_PATH_MIG_STATE), 1041 [IB_QPT_XRC_INI] = (IB_QP_PORT | 1042 IB_QP_AV | 1043 IB_QP_TIMEOUT | 1044 IB_QP_RETRY_CNT | 1045 IB_QP_RNR_RETRY | 1046 IB_QP_MAX_QP_RD_ATOMIC | 1047 IB_QP_ALT_PATH | 1048 IB_QP_ACCESS_FLAGS | 1049 IB_QP_PKEY_INDEX | 1050 IB_QP_PATH_MIG_STATE), 1051 [IB_QPT_XRC_TGT] = (IB_QP_PORT | 1052 IB_QP_AV | 1053 IB_QP_TIMEOUT | 1054 IB_QP_MAX_DEST_RD_ATOMIC | 1055 IB_QP_ALT_PATH | 1056 IB_QP_ACCESS_FLAGS | 1057 IB_QP_PKEY_INDEX | 1058 IB_QP_MIN_RNR_TIMER | 1059 IB_QP_PATH_MIG_STATE), 1060 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1061 IB_QP_QKEY), 1062 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1063 IB_QP_QKEY), 1064 } 1065 } 1066 }, 1067 [IB_QPS_SQE] = { 1068 [IB_QPS_RESET] = { .valid = 1 }, 1069 [IB_QPS_ERR] = { .valid = 1 }, 1070 [IB_QPS_RTS] = { 1071 .valid = 1, 1072 .opt_param = { 1073 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1074 IB_QP_QKEY), 1075 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1076 IB_QP_ACCESS_FLAGS), 1077 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1078 IB_QP_QKEY), 1079 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1080 IB_QP_QKEY), 1081 } 1082 } 1083 }, 1084 [IB_QPS_ERR] = { 1085 [IB_QPS_RESET] = { .valid = 1 }, 1086 [IB_QPS_ERR] = { .valid = 1 } 1087 } 1088 }; 1089 1090 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1091 enum ib_qp_type type, enum ib_qp_attr_mask mask, 1092 enum rdma_link_layer ll) 1093 { 1094 enum ib_qp_attr_mask req_param, opt_param; 1095 1096 if (cur_state < 0 || cur_state > IB_QPS_ERR || 1097 next_state < 0 || next_state > IB_QPS_ERR) 1098 return 0; 1099 1100 if (mask & IB_QP_CUR_STATE && 1101 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 1102 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 1103 return 0; 1104 1105 if (!qp_state_table[cur_state][next_state].valid) 1106 return 0; 1107 1108 req_param = qp_state_table[cur_state][next_state].req_param[type]; 1109 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 1110 1111 if ((mask & req_param) != req_param) 1112 return 0; 1113 1114 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 1115 return 0; 1116 1117 return 1; 1118 } 1119 EXPORT_SYMBOL(ib_modify_qp_is_ok); 1120 1121 int ib_resolve_eth_dmac(struct ib_qp *qp, 1122 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 1123 { 1124 int ret = 0; 1125 1126 if (*qp_attr_mask & IB_QP_AV) { 1127 if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || 1128 qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) 1129 return -EINVAL; 1130 1131 if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) 1132 return 0; 1133 1134 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 1135 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, 1136 qp_attr->ah_attr.dmac); 1137 } else { 1138 union ib_gid sgid; 1139 struct ib_gid_attr sgid_attr; 1140 int ifindex; 1141 1142 ret = ib_query_gid(qp->device, 1143 qp_attr->ah_attr.port_num, 1144 qp_attr->ah_attr.grh.sgid_index, 1145 &sgid, &sgid_attr); 1146 1147 if (ret || !sgid_attr.ndev) { 1148 if (!ret) 1149 ret = -ENXIO; 1150 goto out; 1151 } 1152 if (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 1153 /* TODO: get the hoplimit from the inet/inet6 1154 * device 1155 */ 1156 qp_attr->ah_attr.grh.hop_limit = 1157 IPV6_DEFAULT_HOPLIMIT; 1158 1159 ifindex = sgid_attr.ndev->ifindex; 1160 1161 ret = rdma_addr_find_dmac_by_grh(&sgid, 1162 &qp_attr->ah_attr.grh.dgid, 1163 qp_attr->ah_attr.dmac, 1164 NULL, &ifindex); 1165 1166 dev_put(sgid_attr.ndev); 1167 } 1168 } 1169 out: 1170 return ret; 1171 } 1172 EXPORT_SYMBOL(ib_resolve_eth_dmac); 1173 1174 1175 int ib_modify_qp(struct ib_qp *qp, 1176 struct ib_qp_attr *qp_attr, 1177 int qp_attr_mask) 1178 { 1179 int ret; 1180 1181 ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); 1182 if (ret) 1183 return ret; 1184 1185 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 1186 } 1187 EXPORT_SYMBOL(ib_modify_qp); 1188 1189 int ib_query_qp(struct ib_qp *qp, 1190 struct ib_qp_attr *qp_attr, 1191 int qp_attr_mask, 1192 struct ib_qp_init_attr *qp_init_attr) 1193 { 1194 return qp->device->query_qp ? 1195 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : 1196 -ENOSYS; 1197 } 1198 EXPORT_SYMBOL(ib_query_qp); 1199 1200 int ib_close_qp(struct ib_qp *qp) 1201 { 1202 struct ib_qp *real_qp; 1203 unsigned long flags; 1204 1205 real_qp = qp->real_qp; 1206 if (real_qp == qp) 1207 return -EINVAL; 1208 1209 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 1210 list_del(&qp->open_list); 1211 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 1212 1213 atomic_dec(&real_qp->usecnt); 1214 kfree(qp); 1215 1216 return 0; 1217 } 1218 EXPORT_SYMBOL(ib_close_qp); 1219 1220 static int __ib_destroy_shared_qp(struct ib_qp *qp) 1221 { 1222 struct ib_xrcd *xrcd; 1223 struct ib_qp *real_qp; 1224 int ret; 1225 1226 real_qp = qp->real_qp; 1227 xrcd = real_qp->xrcd; 1228 1229 mutex_lock(&xrcd->tgt_qp_mutex); 1230 ib_close_qp(qp); 1231 if (atomic_read(&real_qp->usecnt) == 0) 1232 list_del(&real_qp->xrcd_list); 1233 else 1234 real_qp = NULL; 1235 mutex_unlock(&xrcd->tgt_qp_mutex); 1236 1237 if (real_qp) { 1238 ret = ib_destroy_qp(real_qp); 1239 if (!ret) 1240 atomic_dec(&xrcd->usecnt); 1241 else 1242 __ib_insert_xrcd_qp(xrcd, real_qp); 1243 } 1244 1245 return 0; 1246 } 1247 1248 int ib_destroy_qp(struct ib_qp *qp) 1249 { 1250 struct ib_pd *pd; 1251 struct ib_cq *scq, *rcq; 1252 struct ib_srq *srq; 1253 int ret; 1254 1255 if (atomic_read(&qp->usecnt)) 1256 return -EBUSY; 1257 1258 if (qp->real_qp != qp) 1259 return __ib_destroy_shared_qp(qp); 1260 1261 pd = qp->pd; 1262 scq = qp->send_cq; 1263 rcq = qp->recv_cq; 1264 srq = qp->srq; 1265 1266 ret = qp->device->destroy_qp(qp); 1267 if (!ret) { 1268 if (pd) 1269 atomic_dec(&pd->usecnt); 1270 if (scq) 1271 atomic_dec(&scq->usecnt); 1272 if (rcq) 1273 atomic_dec(&rcq->usecnt); 1274 if (srq) 1275 atomic_dec(&srq->usecnt); 1276 } 1277 1278 return ret; 1279 } 1280 EXPORT_SYMBOL(ib_destroy_qp); 1281 1282 /* Completion queues */ 1283 1284 struct ib_cq *ib_create_cq(struct ib_device *device, 1285 ib_comp_handler comp_handler, 1286 void (*event_handler)(struct ib_event *, void *), 1287 void *cq_context, 1288 const struct ib_cq_init_attr *cq_attr) 1289 { 1290 struct ib_cq *cq; 1291 1292 cq = device->create_cq(device, cq_attr, NULL, NULL); 1293 1294 if (!IS_ERR(cq)) { 1295 cq->device = device; 1296 cq->uobject = NULL; 1297 cq->comp_handler = comp_handler; 1298 cq->event_handler = event_handler; 1299 cq->cq_context = cq_context; 1300 atomic_set(&cq->usecnt, 0); 1301 } 1302 1303 return cq; 1304 } 1305 EXPORT_SYMBOL(ib_create_cq); 1306 1307 int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 1308 { 1309 return cq->device->modify_cq ? 1310 cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; 1311 } 1312 EXPORT_SYMBOL(ib_modify_cq); 1313 1314 int ib_destroy_cq(struct ib_cq *cq) 1315 { 1316 if (atomic_read(&cq->usecnt)) 1317 return -EBUSY; 1318 1319 return cq->device->destroy_cq(cq); 1320 } 1321 EXPORT_SYMBOL(ib_destroy_cq); 1322 1323 int ib_resize_cq(struct ib_cq *cq, int cqe) 1324 { 1325 return cq->device->resize_cq ? 1326 cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; 1327 } 1328 EXPORT_SYMBOL(ib_resize_cq); 1329 1330 /* Memory regions */ 1331 1332 struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) 1333 { 1334 struct ib_mr *mr; 1335 int err; 1336 1337 err = ib_check_mr_access(mr_access_flags); 1338 if (err) 1339 return ERR_PTR(err); 1340 1341 mr = pd->device->get_dma_mr(pd, mr_access_flags); 1342 1343 if (!IS_ERR(mr)) { 1344 mr->device = pd->device; 1345 mr->pd = pd; 1346 mr->uobject = NULL; 1347 atomic_inc(&pd->usecnt); 1348 atomic_set(&mr->usecnt, 0); 1349 } 1350 1351 return mr; 1352 } 1353 EXPORT_SYMBOL(ib_get_dma_mr); 1354 1355 int ib_dereg_mr(struct ib_mr *mr) 1356 { 1357 struct ib_pd *pd; 1358 int ret; 1359 1360 if (atomic_read(&mr->usecnt)) 1361 return -EBUSY; 1362 1363 pd = mr->pd; 1364 ret = mr->device->dereg_mr(mr); 1365 if (!ret) 1366 atomic_dec(&pd->usecnt); 1367 1368 return ret; 1369 } 1370 EXPORT_SYMBOL(ib_dereg_mr); 1371 1372 /** 1373 * ib_alloc_mr() - Allocates a memory region 1374 * @pd: protection domain associated with the region 1375 * @mr_type: memory region type 1376 * @max_num_sg: maximum sg entries available for registration. 1377 * 1378 * Notes: 1379 * Memory registeration page/sg lists must not exceed max_num_sg. 1380 * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed 1381 * max_num_sg * used_page_size. 1382 * 1383 */ 1384 struct ib_mr *ib_alloc_mr(struct ib_pd *pd, 1385 enum ib_mr_type mr_type, 1386 u32 max_num_sg) 1387 { 1388 struct ib_mr *mr; 1389 1390 if (!pd->device->alloc_mr) 1391 return ERR_PTR(-ENOSYS); 1392 1393 mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); 1394 if (!IS_ERR(mr)) { 1395 mr->device = pd->device; 1396 mr->pd = pd; 1397 mr->uobject = NULL; 1398 atomic_inc(&pd->usecnt); 1399 atomic_set(&mr->usecnt, 0); 1400 } 1401 1402 return mr; 1403 } 1404 EXPORT_SYMBOL(ib_alloc_mr); 1405 1406 /* "Fast" memory regions */ 1407 1408 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, 1409 int mr_access_flags, 1410 struct ib_fmr_attr *fmr_attr) 1411 { 1412 struct ib_fmr *fmr; 1413 1414 if (!pd->device->alloc_fmr) 1415 return ERR_PTR(-ENOSYS); 1416 1417 fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); 1418 if (!IS_ERR(fmr)) { 1419 fmr->device = pd->device; 1420 fmr->pd = pd; 1421 atomic_inc(&pd->usecnt); 1422 } 1423 1424 return fmr; 1425 } 1426 EXPORT_SYMBOL(ib_alloc_fmr); 1427 1428 int ib_unmap_fmr(struct list_head *fmr_list) 1429 { 1430 struct ib_fmr *fmr; 1431 1432 if (list_empty(fmr_list)) 1433 return 0; 1434 1435 fmr = list_entry(fmr_list->next, struct ib_fmr, list); 1436 return fmr->device->unmap_fmr(fmr_list); 1437 } 1438 EXPORT_SYMBOL(ib_unmap_fmr); 1439 1440 int ib_dealloc_fmr(struct ib_fmr *fmr) 1441 { 1442 struct ib_pd *pd; 1443 int ret; 1444 1445 pd = fmr->pd; 1446 ret = fmr->device->dealloc_fmr(fmr); 1447 if (!ret) 1448 atomic_dec(&pd->usecnt); 1449 1450 return ret; 1451 } 1452 EXPORT_SYMBOL(ib_dealloc_fmr); 1453 1454 /* Multicast groups */ 1455 1456 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1457 { 1458 int ret; 1459 1460 if (!qp->device->attach_mcast) 1461 return -ENOSYS; 1462 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 1463 return -EINVAL; 1464 1465 ret = qp->device->attach_mcast(qp, gid, lid); 1466 if (!ret) 1467 atomic_inc(&qp->usecnt); 1468 return ret; 1469 } 1470 EXPORT_SYMBOL(ib_attach_mcast); 1471 1472 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1473 { 1474 int ret; 1475 1476 if (!qp->device->detach_mcast) 1477 return -ENOSYS; 1478 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 1479 return -EINVAL; 1480 1481 ret = qp->device->detach_mcast(qp, gid, lid); 1482 if (!ret) 1483 atomic_dec(&qp->usecnt); 1484 return ret; 1485 } 1486 EXPORT_SYMBOL(ib_detach_mcast); 1487 1488 struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) 1489 { 1490 struct ib_xrcd *xrcd; 1491 1492 if (!device->alloc_xrcd) 1493 return ERR_PTR(-ENOSYS); 1494 1495 xrcd = device->alloc_xrcd(device, NULL, NULL); 1496 if (!IS_ERR(xrcd)) { 1497 xrcd->device = device; 1498 xrcd->inode = NULL; 1499 atomic_set(&xrcd->usecnt, 0); 1500 mutex_init(&xrcd->tgt_qp_mutex); 1501 INIT_LIST_HEAD(&xrcd->tgt_qp_list); 1502 } 1503 1504 return xrcd; 1505 } 1506 EXPORT_SYMBOL(ib_alloc_xrcd); 1507 1508 int ib_dealloc_xrcd(struct ib_xrcd *xrcd) 1509 { 1510 struct ib_qp *qp; 1511 int ret; 1512 1513 if (atomic_read(&xrcd->usecnt)) 1514 return -EBUSY; 1515 1516 while (!list_empty(&xrcd->tgt_qp_list)) { 1517 qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); 1518 ret = ib_destroy_qp(qp); 1519 if (ret) 1520 return ret; 1521 } 1522 1523 return xrcd->device->dealloc_xrcd(xrcd); 1524 } 1525 EXPORT_SYMBOL(ib_dealloc_xrcd); 1526 1527 struct ib_flow *ib_create_flow(struct ib_qp *qp, 1528 struct ib_flow_attr *flow_attr, 1529 int domain) 1530 { 1531 struct ib_flow *flow_id; 1532 if (!qp->device->create_flow) 1533 return ERR_PTR(-ENOSYS); 1534 1535 flow_id = qp->device->create_flow(qp, flow_attr, domain); 1536 if (!IS_ERR(flow_id)) 1537 atomic_inc(&qp->usecnt); 1538 return flow_id; 1539 } 1540 EXPORT_SYMBOL(ib_create_flow); 1541 1542 int ib_destroy_flow(struct ib_flow *flow_id) 1543 { 1544 int err; 1545 struct ib_qp *qp = flow_id->qp; 1546 1547 err = qp->device->destroy_flow(flow_id); 1548 if (!err) 1549 atomic_dec(&qp->usecnt); 1550 return err; 1551 } 1552 EXPORT_SYMBOL(ib_destroy_flow); 1553 1554 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, 1555 struct ib_mr_status *mr_status) 1556 { 1557 return mr->device->check_mr_status ? 1558 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; 1559 } 1560 EXPORT_SYMBOL(ib_check_mr_status); 1561 1562 /** 1563 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list 1564 * and set it the memory region. 1565 * @mr: memory region 1566 * @sg: dma mapped scatterlist 1567 * @sg_nents: number of entries in sg 1568 * @page_size: page vector desired page size 1569 * 1570 * Constraints: 1571 * - The first sg element is allowed to have an offset. 1572 * - Each sg element must be aligned to page_size (or physically 1573 * contiguous to the previous element). In case an sg element has a 1574 * non contiguous offset, the mapping prefix will not include it. 1575 * - The last sg element is allowed to have length less than page_size. 1576 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 1577 * then only max_num_sg entries will be mapped. 1578 * 1579 * Returns the number of sg elements that were mapped to the memory region. 1580 * 1581 * After this completes successfully, the memory region 1582 * is ready for registration. 1583 */ 1584 int ib_map_mr_sg(struct ib_mr *mr, 1585 struct scatterlist *sg, 1586 int sg_nents, 1587 unsigned int page_size) 1588 { 1589 if (unlikely(!mr->device->map_mr_sg)) 1590 return -ENOSYS; 1591 1592 mr->page_size = page_size; 1593 1594 return mr->device->map_mr_sg(mr, sg, sg_nents); 1595 } 1596 EXPORT_SYMBOL(ib_map_mr_sg); 1597 1598 /** 1599 * ib_sg_to_pages() - Convert the largest prefix of a sg list 1600 * to a page vector 1601 * @mr: memory region 1602 * @sgl: dma mapped scatterlist 1603 * @sg_nents: number of entries in sg 1604 * @set_page: driver page assignment function pointer 1605 * 1606 * Core service helper for drivers to convert the largest 1607 * prefix of given sg list to a page vector. The sg list 1608 * prefix converted is the prefix that meet the requirements 1609 * of ib_map_mr_sg. 1610 * 1611 * Returns the number of sg elements that were assigned to 1612 * a page vector. 1613 */ 1614 int ib_sg_to_pages(struct ib_mr *mr, 1615 struct scatterlist *sgl, 1616 int sg_nents, 1617 int (*set_page)(struct ib_mr *, u64)) 1618 { 1619 struct scatterlist *sg; 1620 u64 last_end_dma_addr = 0, last_page_addr = 0; 1621 unsigned int last_page_off = 0; 1622 u64 page_mask = ~((u64)mr->page_size - 1); 1623 int i, ret; 1624 1625 mr->iova = sg_dma_address(&sgl[0]); 1626 mr->length = 0; 1627 1628 for_each_sg(sgl, sg, sg_nents, i) { 1629 u64 dma_addr = sg_dma_address(sg); 1630 unsigned int dma_len = sg_dma_len(sg); 1631 u64 end_dma_addr = dma_addr + dma_len; 1632 u64 page_addr = dma_addr & page_mask; 1633 1634 /* 1635 * For the second and later elements, check whether either the 1636 * end of element i-1 or the start of element i is not aligned 1637 * on a page boundary. 1638 */ 1639 if (i && (last_page_off != 0 || page_addr != dma_addr)) { 1640 /* Stop mapping if there is a gap. */ 1641 if (last_end_dma_addr != dma_addr) 1642 break; 1643 1644 /* 1645 * Coalesce this element with the last. If it is small 1646 * enough just update mr->length. Otherwise start 1647 * mapping from the next page. 1648 */ 1649 goto next_page; 1650 } 1651 1652 do { 1653 ret = set_page(mr, page_addr); 1654 if (unlikely(ret < 0)) 1655 return i ? : ret; 1656 next_page: 1657 page_addr += mr->page_size; 1658 } while (page_addr < end_dma_addr); 1659 1660 mr->length += dma_len; 1661 last_end_dma_addr = end_dma_addr; 1662 last_page_addr = end_dma_addr & page_mask; 1663 last_page_off = end_dma_addr & ~page_mask; 1664 } 1665 1666 return i; 1667 } 1668 EXPORT_SYMBOL(ib_sg_to_pages); 1669