1 /* 2 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 3 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 4 * Copyright (c) 2004 Intel Corporation. All rights reserved. 5 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 6 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 7 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 8 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 9 * 10 * This software is available to you under a choice of one of two 11 * licenses. You may choose to be licensed under the terms of the GNU 12 * General Public License (GPL) Version 2, available from the file 13 * COPYING in the main directory of this source tree, or the 14 * OpenIB.org BSD license below: 15 * 16 * Redistribution and use in source and binary forms, with or 17 * without modification, are permitted provided that the following 18 * conditions are met: 19 * 20 * - Redistributions of source code must retain the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer. 23 * 24 * - Redistributions in binary form must reproduce the above 25 * copyright notice, this list of conditions and the following 26 * disclaimer in the documentation and/or other materials 27 * provided with the distribution. 28 * 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 * SOFTWARE. 37 */ 38 39 #include <linux/errno.h> 40 #include <linux/err.h> 41 #include <linux/export.h> 42 #include <linux/string.h> 43 #include <linux/slab.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <net/addrconf.h> 47 #include <linux/security.h> 48 49 #include <rdma/ib_verbs.h> 50 #include <rdma/ib_cache.h> 51 #include <rdma/ib_addr.h> 52 #include <rdma/ib_umem.h> 53 #include <rdma/rw.h> 54 #include <rdma/lag.h> 55 56 #include "core_priv.h" 57 #include <trace/events/rdma_core.h> 58 59 static int ib_resolve_eth_dmac(struct ib_device *device, 60 struct rdma_ah_attr *ah_attr); 61 62 static const char * const ib_events[] = { 63 [IB_EVENT_CQ_ERR] = "CQ error", 64 [IB_EVENT_QP_FATAL] = "QP fatal error", 65 [IB_EVENT_QP_REQ_ERR] = "QP request error", 66 [IB_EVENT_QP_ACCESS_ERR] = "QP access error", 67 [IB_EVENT_COMM_EST] = "communication established", 68 [IB_EVENT_SQ_DRAINED] = "send queue drained", 69 [IB_EVENT_PATH_MIG] = "path migration successful", 70 [IB_EVENT_PATH_MIG_ERR] = "path migration error", 71 [IB_EVENT_DEVICE_FATAL] = "device fatal error", 72 [IB_EVENT_PORT_ACTIVE] = "port active", 73 [IB_EVENT_PORT_ERR] = "port error", 74 [IB_EVENT_LID_CHANGE] = "LID change", 75 [IB_EVENT_PKEY_CHANGE] = "P_key change", 76 [IB_EVENT_SM_CHANGE] = "SM change", 77 [IB_EVENT_SRQ_ERR] = "SRQ error", 78 [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", 79 [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", 80 [IB_EVENT_CLIENT_REREGISTER] = "client reregister", 81 [IB_EVENT_GID_CHANGE] = "GID changed", 82 [IB_EVENT_DEVICE_SPEED_CHANGE] = "device speed change" 83 }; 84 85 const char *__attribute_const__ ib_event_msg(enum ib_event_type event) 86 { 87 size_t index = event; 88 89 return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? 90 ib_events[index] : "unrecognized event"; 91 } 92 EXPORT_SYMBOL(ib_event_msg); 93 94 static const char * const wc_statuses[] = { 95 [IB_WC_SUCCESS] = "success", 96 [IB_WC_LOC_LEN_ERR] = "local length error", 97 [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", 98 [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", 99 [IB_WC_LOC_PROT_ERR] = "local protection error", 100 [IB_WC_WR_FLUSH_ERR] = "WR flushed", 101 [IB_WC_MW_BIND_ERR] = "memory bind operation error", 102 [IB_WC_BAD_RESP_ERR] = "bad response error", 103 [IB_WC_LOC_ACCESS_ERR] = "local access error", 104 [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error", 105 [IB_WC_REM_ACCESS_ERR] = "remote access error", 106 [IB_WC_REM_OP_ERR] = "remote operation error", 107 [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", 108 [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", 109 [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", 110 [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", 111 [IB_WC_REM_ABORT_ERR] = "operation aborted", 112 [IB_WC_INV_EECN_ERR] = "invalid EE context number", 113 [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", 114 [IB_WC_FATAL_ERR] = "fatal error", 115 [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", 116 [IB_WC_GENERAL_ERR] = "general error", 117 }; 118 119 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) 120 { 121 size_t index = status; 122 123 return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? 124 wc_statuses[index] : "unrecognized status"; 125 } 126 EXPORT_SYMBOL(ib_wc_status_msg); 127 128 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) 129 { 130 switch (rate) { 131 case IB_RATE_2_5_GBPS: return 1; 132 case IB_RATE_5_GBPS: return 2; 133 case IB_RATE_10_GBPS: return 4; 134 case IB_RATE_20_GBPS: return 8; 135 case IB_RATE_30_GBPS: return 12; 136 case IB_RATE_40_GBPS: return 16; 137 case IB_RATE_60_GBPS: return 24; 138 case IB_RATE_80_GBPS: return 32; 139 case IB_RATE_120_GBPS: return 48; 140 case IB_RATE_14_GBPS: return 6; 141 case IB_RATE_56_GBPS: return 22; 142 case IB_RATE_112_GBPS: return 45; 143 case IB_RATE_168_GBPS: return 67; 144 case IB_RATE_25_GBPS: return 10; 145 case IB_RATE_100_GBPS: return 40; 146 case IB_RATE_200_GBPS: return 80; 147 case IB_RATE_300_GBPS: return 120; 148 case IB_RATE_28_GBPS: return 11; 149 case IB_RATE_50_GBPS: return 20; 150 case IB_RATE_400_GBPS: return 160; 151 case IB_RATE_600_GBPS: return 240; 152 case IB_RATE_800_GBPS: return 320; 153 case IB_RATE_1600_GBPS: return 640; 154 default: return -1; 155 } 156 } 157 EXPORT_SYMBOL(ib_rate_to_mult); 158 159 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) 160 { 161 switch (mult) { 162 case 1: return IB_RATE_2_5_GBPS; 163 case 2: return IB_RATE_5_GBPS; 164 case 4: return IB_RATE_10_GBPS; 165 case 8: return IB_RATE_20_GBPS; 166 case 12: return IB_RATE_30_GBPS; 167 case 16: return IB_RATE_40_GBPS; 168 case 24: return IB_RATE_60_GBPS; 169 case 32: return IB_RATE_80_GBPS; 170 case 48: return IB_RATE_120_GBPS; 171 case 6: return IB_RATE_14_GBPS; 172 case 22: return IB_RATE_56_GBPS; 173 case 45: return IB_RATE_112_GBPS; 174 case 67: return IB_RATE_168_GBPS; 175 case 10: return IB_RATE_25_GBPS; 176 case 40: return IB_RATE_100_GBPS; 177 case 80: return IB_RATE_200_GBPS; 178 case 120: return IB_RATE_300_GBPS; 179 case 11: return IB_RATE_28_GBPS; 180 case 20: return IB_RATE_50_GBPS; 181 case 160: return IB_RATE_400_GBPS; 182 case 240: return IB_RATE_600_GBPS; 183 case 320: return IB_RATE_800_GBPS; 184 case 640: return IB_RATE_1600_GBPS; 185 default: return IB_RATE_PORT_CURRENT; 186 } 187 } 188 EXPORT_SYMBOL(mult_to_ib_rate); 189 190 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) 191 { 192 switch (rate) { 193 case IB_RATE_2_5_GBPS: return 2500; 194 case IB_RATE_5_GBPS: return 5000; 195 case IB_RATE_10_GBPS: return 10000; 196 case IB_RATE_20_GBPS: return 20000; 197 case IB_RATE_30_GBPS: return 30000; 198 case IB_RATE_40_GBPS: return 40000; 199 case IB_RATE_60_GBPS: return 60000; 200 case IB_RATE_80_GBPS: return 80000; 201 case IB_RATE_120_GBPS: return 120000; 202 case IB_RATE_14_GBPS: return 14062; 203 case IB_RATE_56_GBPS: return 56250; 204 case IB_RATE_112_GBPS: return 112500; 205 case IB_RATE_168_GBPS: return 168750; 206 case IB_RATE_25_GBPS: return 25781; 207 case IB_RATE_100_GBPS: return 103125; 208 case IB_RATE_200_GBPS: return 206250; 209 case IB_RATE_300_GBPS: return 309375; 210 case IB_RATE_28_GBPS: return 28125; 211 case IB_RATE_50_GBPS: return 53125; 212 case IB_RATE_400_GBPS: return 425000; 213 case IB_RATE_600_GBPS: return 637500; 214 case IB_RATE_800_GBPS: return 850000; 215 case IB_RATE_1600_GBPS: return 1700000; 216 default: return -1; 217 } 218 } 219 EXPORT_SYMBOL(ib_rate_to_mbps); 220 221 struct ib_speed_attr { 222 const char *str; 223 int speed; 224 }; 225 226 #define IB_SPEED_ATTR(speed_type, _str, _speed) \ 227 [speed_type] = {.str = _str, .speed = _speed} 228 229 static const struct ib_speed_attr ib_speed_attrs[] = { 230 IB_SPEED_ATTR(IB_SPEED_SDR, " SDR", 25), 231 IB_SPEED_ATTR(IB_SPEED_DDR, " DDR", 50), 232 IB_SPEED_ATTR(IB_SPEED_QDR, " QDR", 100), 233 IB_SPEED_ATTR(IB_SPEED_FDR10, " FDR10", 100), 234 IB_SPEED_ATTR(IB_SPEED_FDR, " FDR", 140), 235 IB_SPEED_ATTR(IB_SPEED_EDR, " EDR", 250), 236 IB_SPEED_ATTR(IB_SPEED_HDR, " HDR", 500), 237 IB_SPEED_ATTR(IB_SPEED_NDR, " NDR", 1000), 238 IB_SPEED_ATTR(IB_SPEED_XDR, " XDR", 2000), 239 }; 240 241 int ib_port_attr_to_speed_info(struct ib_port_attr *attr, 242 struct ib_port_speed_info *speed_info) 243 { 244 int speed_idx = attr->active_speed; 245 246 switch (attr->active_speed) { 247 case IB_SPEED_DDR: 248 case IB_SPEED_QDR: 249 case IB_SPEED_FDR10: 250 case IB_SPEED_FDR: 251 case IB_SPEED_EDR: 252 case IB_SPEED_HDR: 253 case IB_SPEED_NDR: 254 case IB_SPEED_XDR: 255 case IB_SPEED_SDR: 256 break; 257 default: 258 speed_idx = IB_SPEED_SDR; /* Default to SDR for invalid rates */ 259 break; 260 } 261 262 speed_info->str = ib_speed_attrs[speed_idx].str; 263 speed_info->rate = ib_speed_attrs[speed_idx].speed; 264 speed_info->rate *= ib_width_enum_to_int(attr->active_width); 265 if (speed_info->rate < 0) 266 return -EINVAL; 267 268 return 0; 269 } 270 EXPORT_SYMBOL(ib_port_attr_to_speed_info); 271 272 __attribute_const__ enum rdma_transport_type 273 rdma_node_get_transport(unsigned int node_type) 274 { 275 276 if (node_type == RDMA_NODE_USNIC) 277 return RDMA_TRANSPORT_USNIC; 278 if (node_type == RDMA_NODE_USNIC_UDP) 279 return RDMA_TRANSPORT_USNIC_UDP; 280 if (node_type == RDMA_NODE_RNIC) 281 return RDMA_TRANSPORT_IWARP; 282 if (node_type == RDMA_NODE_UNSPECIFIED) 283 return RDMA_TRANSPORT_UNSPECIFIED; 284 285 return RDMA_TRANSPORT_IB; 286 } 287 EXPORT_SYMBOL(rdma_node_get_transport); 288 289 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, 290 u32 port_num) 291 { 292 enum rdma_transport_type lt; 293 if (device->ops.get_link_layer) 294 return device->ops.get_link_layer(device, port_num); 295 296 lt = rdma_node_get_transport(device->node_type); 297 if (lt == RDMA_TRANSPORT_IB) 298 return IB_LINK_LAYER_INFINIBAND; 299 300 return IB_LINK_LAYER_ETHERNET; 301 } 302 EXPORT_SYMBOL(rdma_port_get_link_layer); 303 304 /* Protection domains */ 305 306 /** 307 * __ib_alloc_pd - Allocates an unused protection domain. 308 * @device: The device on which to allocate the protection domain. 309 * @flags: protection domain flags 310 * @caller: caller's build-time module name 311 * 312 * A protection domain object provides an association between QPs, shared 313 * receive queues, address handles, memory regions, and memory windows. 314 * 315 * Every PD has a local_dma_lkey which can be used as the lkey value for local 316 * memory operations. 317 */ 318 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, 319 const char *caller) 320 { 321 struct ib_pd *pd; 322 int mr_access_flags = 0; 323 int ret; 324 325 pd = rdma_zalloc_drv_obj(device, ib_pd); 326 if (!pd) 327 return ERR_PTR(-ENOMEM); 328 329 pd->device = device; 330 pd->flags = flags; 331 332 rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); 333 rdma_restrack_set_name(&pd->res, caller); 334 335 ret = device->ops.alloc_pd(pd, NULL); 336 if (ret) { 337 rdma_restrack_put(&pd->res); 338 kfree(pd); 339 return ERR_PTR(ret); 340 } 341 rdma_restrack_add(&pd->res); 342 343 if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) 344 pd->local_dma_lkey = device->local_dma_lkey; 345 else 346 mr_access_flags |= IB_ACCESS_LOCAL_WRITE; 347 348 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 349 pr_warn("%s: enabling unsafe global rkey\n", caller); 350 mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; 351 } 352 353 if (mr_access_flags) { 354 struct ib_mr *mr; 355 356 mr = pd->device->ops.get_dma_mr(pd, mr_access_flags); 357 if (IS_ERR(mr)) { 358 ib_dealloc_pd(pd); 359 return ERR_CAST(mr); 360 } 361 362 mr->device = pd->device; 363 mr->pd = pd; 364 mr->type = IB_MR_TYPE_DMA; 365 mr->uobject = NULL; 366 mr->need_inval = false; 367 368 pd->__internal_mr = mr; 369 370 if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) 371 pd->local_dma_lkey = pd->__internal_mr->lkey; 372 373 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) 374 pd->unsafe_global_rkey = pd->__internal_mr->rkey; 375 } 376 377 return pd; 378 } 379 EXPORT_SYMBOL(__ib_alloc_pd); 380 381 /** 382 * ib_dealloc_pd_user - Deallocates a protection domain. 383 * @pd: The protection domain to deallocate. 384 * @udata: Valid user data or NULL for kernel object 385 * 386 * It is an error to call this function while any resources in the pd still 387 * exist. The caller is responsible to synchronously destroy them and 388 * guarantee no new allocations will happen. 389 */ 390 int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) 391 { 392 int ret; 393 394 if (pd->__internal_mr) { 395 ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL); 396 WARN_ON(ret); 397 pd->__internal_mr = NULL; 398 } 399 400 ret = pd->device->ops.dealloc_pd(pd, udata); 401 if (ret) 402 return ret; 403 404 rdma_restrack_del(&pd->res); 405 kfree(pd); 406 return ret; 407 } 408 EXPORT_SYMBOL(ib_dealloc_pd_user); 409 410 /* Address handles */ 411 412 /** 413 * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination. 414 * @dest: Pointer to destination ah_attr. Contents of the destination 415 * pointer is assumed to be invalid and attribute are overwritten. 416 * @src: Pointer to source ah_attr. 417 */ 418 void rdma_copy_ah_attr(struct rdma_ah_attr *dest, 419 const struct rdma_ah_attr *src) 420 { 421 *dest = *src; 422 if (dest->grh.sgid_attr) 423 rdma_hold_gid_attr(dest->grh.sgid_attr); 424 } 425 EXPORT_SYMBOL(rdma_copy_ah_attr); 426 427 /** 428 * rdma_replace_ah_attr - Replace valid ah_attr with new one. 429 * @old: Pointer to existing ah_attr which needs to be replaced. 430 * old is assumed to be valid or zero'd 431 * @new: Pointer to the new ah_attr. 432 * 433 * rdma_replace_ah_attr() first releases any reference in the old ah_attr if 434 * old the ah_attr is valid; after that it copies the new attribute and holds 435 * the reference to the replaced ah_attr. 436 */ 437 void rdma_replace_ah_attr(struct rdma_ah_attr *old, 438 const struct rdma_ah_attr *new) 439 { 440 rdma_destroy_ah_attr(old); 441 *old = *new; 442 if (old->grh.sgid_attr) 443 rdma_hold_gid_attr(old->grh.sgid_attr); 444 } 445 EXPORT_SYMBOL(rdma_replace_ah_attr); 446 447 /** 448 * rdma_move_ah_attr - Move ah_attr pointed by source to destination. 449 * @dest: Pointer to destination ah_attr to copy to. 450 * dest is assumed to be valid or zero'd 451 * @src: Pointer to the new ah_attr. 452 * 453 * rdma_move_ah_attr() first releases any reference in the destination ah_attr 454 * if it is valid. This also transfers ownership of internal references from 455 * src to dest, making src invalid in the process. No new reference of the src 456 * ah_attr is taken. 457 */ 458 void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src) 459 { 460 rdma_destroy_ah_attr(dest); 461 *dest = *src; 462 src->grh.sgid_attr = NULL; 463 } 464 EXPORT_SYMBOL(rdma_move_ah_attr); 465 466 /* 467 * Validate that the rdma_ah_attr is valid for the device before passing it 468 * off to the driver. 469 */ 470 static int rdma_check_ah_attr(struct ib_device *device, 471 struct rdma_ah_attr *ah_attr) 472 { 473 if (!rdma_is_port_valid(device, ah_attr->port_num)) 474 return -EINVAL; 475 476 if ((rdma_is_grh_required(device, ah_attr->port_num) || 477 ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) && 478 !(ah_attr->ah_flags & IB_AH_GRH)) 479 return -EINVAL; 480 481 if (ah_attr->grh.sgid_attr) { 482 /* 483 * Make sure the passed sgid_attr is consistent with the 484 * parameters 485 */ 486 if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index || 487 ah_attr->grh.sgid_attr->port_num != ah_attr->port_num) 488 return -EINVAL; 489 } 490 return 0; 491 } 492 493 /* 494 * If the ah requires a GRH then ensure that sgid_attr pointer is filled in. 495 * On success the caller is responsible to call rdma_unfill_sgid_attr(). 496 */ 497 static int rdma_fill_sgid_attr(struct ib_device *device, 498 struct rdma_ah_attr *ah_attr, 499 const struct ib_gid_attr **old_sgid_attr) 500 { 501 const struct ib_gid_attr *sgid_attr; 502 struct ib_global_route *grh; 503 int ret; 504 505 *old_sgid_attr = ah_attr->grh.sgid_attr; 506 507 ret = rdma_check_ah_attr(device, ah_attr); 508 if (ret) 509 return ret; 510 511 if (!(ah_attr->ah_flags & IB_AH_GRH)) 512 return 0; 513 514 grh = rdma_ah_retrieve_grh(ah_attr); 515 if (grh->sgid_attr) 516 return 0; 517 518 sgid_attr = 519 rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index); 520 if (IS_ERR(sgid_attr)) 521 return PTR_ERR(sgid_attr); 522 523 /* Move ownerhip of the kref into the ah_attr */ 524 grh->sgid_attr = sgid_attr; 525 return 0; 526 } 527 528 static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr, 529 const struct ib_gid_attr *old_sgid_attr) 530 { 531 /* 532 * Fill didn't change anything, the caller retains ownership of 533 * whatever it passed 534 */ 535 if (ah_attr->grh.sgid_attr == old_sgid_attr) 536 return; 537 538 /* 539 * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller 540 * doesn't see any change in the rdma_ah_attr. If we get here 541 * old_sgid_attr is NULL. 542 */ 543 rdma_destroy_ah_attr(ah_attr); 544 } 545 546 static const struct ib_gid_attr * 547 rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, 548 const struct ib_gid_attr *old_attr) 549 { 550 if (old_attr) 551 rdma_put_gid_attr(old_attr); 552 if (ah_attr->ah_flags & IB_AH_GRH) { 553 rdma_hold_gid_attr(ah_attr->grh.sgid_attr); 554 return ah_attr->grh.sgid_attr; 555 } 556 return NULL; 557 } 558 559 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, 560 struct rdma_ah_attr *ah_attr, 561 u32 flags, 562 struct ib_udata *udata, 563 struct net_device *xmit_slave) 564 { 565 struct rdma_ah_init_attr init_attr = {}; 566 struct ib_device *device = pd->device; 567 struct ib_ah *ah; 568 int ret; 569 570 might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); 571 572 if (!udata && !device->ops.create_ah) 573 return ERR_PTR(-EOPNOTSUPP); 574 575 ah = rdma_zalloc_drv_obj_gfp( 576 device, ib_ah, 577 (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); 578 if (!ah) 579 return ERR_PTR(-ENOMEM); 580 581 ah->device = device; 582 ah->pd = pd; 583 ah->type = ah_attr->type; 584 ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); 585 init_attr.ah_attr = ah_attr; 586 init_attr.flags = flags; 587 init_attr.xmit_slave = xmit_slave; 588 589 if (udata) 590 ret = device->ops.create_user_ah(ah, &init_attr, udata); 591 else 592 ret = device->ops.create_ah(ah, &init_attr, NULL); 593 if (ret) { 594 if (ah->sgid_attr) 595 rdma_put_gid_attr(ah->sgid_attr); 596 kfree(ah); 597 return ERR_PTR(ret); 598 } 599 600 atomic_inc(&pd->usecnt); 601 return ah; 602 } 603 604 /** 605 * rdma_create_ah - Creates an address handle for the 606 * given address vector. 607 * @pd: The protection domain associated with the address handle. 608 * @ah_attr: The attributes of the address vector. 609 * @flags: Create address handle flags (see enum rdma_create_ah_flags). 610 * 611 * It returns 0 on success and returns appropriate error code on error. 612 * The address handle is used to reference a local or global destination 613 * in all UD QP post sends. 614 */ 615 struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, 616 u32 flags) 617 { 618 const struct ib_gid_attr *old_sgid_attr; 619 struct net_device *slave; 620 struct ib_ah *ah; 621 int ret; 622 623 ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); 624 if (ret) 625 return ERR_PTR(ret); 626 slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, 627 (flags & RDMA_CREATE_AH_SLEEPABLE) ? 628 GFP_KERNEL : GFP_ATOMIC); 629 if (IS_ERR(slave)) { 630 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); 631 return ERR_CAST(slave); 632 } 633 ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); 634 rdma_lag_put_ah_roce_slave(slave); 635 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); 636 return ah; 637 } 638 EXPORT_SYMBOL(rdma_create_ah); 639 640 /** 641 * rdma_create_user_ah - Creates an address handle for the 642 * given address vector. 643 * It resolves destination mac address for ah attribute of RoCE type. 644 * @pd: The protection domain associated with the address handle. 645 * @ah_attr: The attributes of the address vector. 646 * @udata: pointer to user's input output buffer information need by 647 * provider driver. 648 * 649 * It returns 0 on success and returns appropriate error code on error. 650 * The address handle is used to reference a local or global destination 651 * in all UD QP post sends. 652 */ 653 struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, 654 struct rdma_ah_attr *ah_attr, 655 struct ib_udata *udata) 656 { 657 const struct ib_gid_attr *old_sgid_attr; 658 struct ib_ah *ah; 659 int err; 660 661 err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); 662 if (err) 663 return ERR_PTR(err); 664 665 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { 666 err = ib_resolve_eth_dmac(pd->device, ah_attr); 667 if (err) { 668 ah = ERR_PTR(err); 669 goto out; 670 } 671 } 672 673 ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, 674 udata, NULL); 675 676 out: 677 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); 678 return ah; 679 } 680 EXPORT_SYMBOL(rdma_create_user_ah); 681 682 int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) 683 { 684 const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; 685 struct iphdr ip4h_checked; 686 const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh; 687 688 /* If it's IPv6, the version must be 6, otherwise, the first 689 * 20 bytes (before the IPv4 header) are garbled. 690 */ 691 if (ip6h->version != 6) 692 return (ip4h->version == 4) ? 4 : 0; 693 /* version may be 6 or 4 because the first 20 bytes could be garbled */ 694 695 /* RoCE v2 requires no options, thus header length 696 * must be 5 words 697 */ 698 if (ip4h->ihl != 5) 699 return 6; 700 701 /* Verify checksum. 702 * We can't write on scattered buffers so we need to copy to 703 * temp buffer. 704 */ 705 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); 706 ip4h_checked.check = 0; 707 ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5); 708 /* if IPv4 header checksum is OK, believe it */ 709 if (ip4h->check == ip4h_checked.check) 710 return 4; 711 return 6; 712 } 713 EXPORT_SYMBOL(ib_get_rdma_header_version); 714 715 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, 716 u32 port_num, 717 const struct ib_grh *grh) 718 { 719 int grh_version; 720 721 if (rdma_protocol_ib(device, port_num)) 722 return RDMA_NETWORK_IB; 723 724 grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); 725 726 if (grh_version == 4) 727 return RDMA_NETWORK_IPV4; 728 729 if (grh->next_hdr == IPPROTO_UDP) 730 return RDMA_NETWORK_IPV6; 731 732 return RDMA_NETWORK_ROCE_V1; 733 } 734 735 struct find_gid_index_context { 736 u16 vlan_id; 737 enum ib_gid_type gid_type; 738 }; 739 740 static bool find_gid_index(const union ib_gid *gid, 741 const struct ib_gid_attr *gid_attr, 742 void *context) 743 { 744 struct find_gid_index_context *ctx = context; 745 u16 vlan_id = 0xffff; 746 int ret; 747 748 if (ctx->gid_type != gid_attr->gid_type) 749 return false; 750 751 ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); 752 if (ret) 753 return false; 754 755 return ctx->vlan_id == vlan_id; 756 } 757 758 static const struct ib_gid_attr * 759 get_sgid_attr_from_eth(struct ib_device *device, u32 port_num, 760 u16 vlan_id, const union ib_gid *sgid, 761 enum ib_gid_type gid_type) 762 { 763 struct find_gid_index_context context = {.vlan_id = vlan_id, 764 .gid_type = gid_type}; 765 766 return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index, 767 &context); 768 } 769 770 int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, 771 enum rdma_network_type net_type, 772 union ib_gid *sgid, union ib_gid *dgid) 773 { 774 struct sockaddr_in src_in; 775 struct sockaddr_in dst_in; 776 __be32 src_saddr, dst_saddr; 777 778 if (!sgid || !dgid) 779 return -EINVAL; 780 781 if (net_type == RDMA_NETWORK_IPV4) { 782 memcpy(&src_in.sin_addr.s_addr, 783 &hdr->roce4grh.saddr, 4); 784 memcpy(&dst_in.sin_addr.s_addr, 785 &hdr->roce4grh.daddr, 4); 786 src_saddr = src_in.sin_addr.s_addr; 787 dst_saddr = dst_in.sin_addr.s_addr; 788 ipv6_addr_set_v4mapped(src_saddr, 789 (struct in6_addr *)sgid); 790 ipv6_addr_set_v4mapped(dst_saddr, 791 (struct in6_addr *)dgid); 792 return 0; 793 } else if (net_type == RDMA_NETWORK_IPV6 || 794 net_type == RDMA_NETWORK_IB || net_type == RDMA_NETWORK_ROCE_V1) { 795 *dgid = hdr->ibgrh.dgid; 796 *sgid = hdr->ibgrh.sgid; 797 return 0; 798 } else { 799 return -EINVAL; 800 } 801 } 802 EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); 803 804 /* Resolve destination mac address and hop limit for unicast destination 805 * GID entry, considering the source GID entry as well. 806 * ah_attribute must have valid port_num, sgid_index. 807 */ 808 static int ib_resolve_unicast_gid_dmac(struct ib_device *device, 809 struct rdma_ah_attr *ah_attr) 810 { 811 struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr); 812 const struct ib_gid_attr *sgid_attr = grh->sgid_attr; 813 int hop_limit = 0xff; 814 int ret = 0; 815 816 /* If destination is link local and source GID is RoCEv1, 817 * IP stack is not used. 818 */ 819 if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && 820 sgid_attr->gid_type == IB_GID_TYPE_ROCE) { 821 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, 822 ah_attr->roce.dmac); 823 return ret; 824 } 825 826 ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, 827 ah_attr->roce.dmac, 828 sgid_attr, &hop_limit); 829 830 grh->hop_limit = hop_limit; 831 return ret; 832 } 833 834 /* 835 * This function initializes address handle attributes from the incoming packet. 836 * Incoming packet has dgid of the receiver node on which this code is 837 * getting executed and, sgid contains the GID of the sender. 838 * 839 * When resolving mac address of destination, the arrived dgid is used 840 * as sgid and, sgid is used as dgid because sgid contains destinations 841 * GID whom to respond to. 842 * 843 * On success the caller is responsible to call rdma_destroy_ah_attr on the 844 * attr. 845 */ 846 int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, 847 const struct ib_wc *wc, const struct ib_grh *grh, 848 struct rdma_ah_attr *ah_attr) 849 { 850 u32 flow_class; 851 int ret; 852 enum rdma_network_type net_type = RDMA_NETWORK_IB; 853 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 854 const struct ib_gid_attr *sgid_attr; 855 int hoplimit = 0xff; 856 union ib_gid dgid; 857 union ib_gid sgid; 858 859 might_sleep(); 860 861 memset(ah_attr, 0, sizeof *ah_attr); 862 ah_attr->type = rdma_ah_find_type(device, port_num); 863 if (rdma_cap_eth_ah(device, port_num)) { 864 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) 865 net_type = wc->network_hdr_type; 866 else 867 net_type = ib_get_net_type_by_grh(device, port_num, grh); 868 gid_type = ib_network_to_gid_type(net_type); 869 } 870 ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, 871 &sgid, &dgid); 872 if (ret) 873 return ret; 874 875 rdma_ah_set_sl(ah_attr, wc->sl); 876 rdma_ah_set_port_num(ah_attr, port_num); 877 878 if (rdma_protocol_roce(device, port_num)) { 879 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? 880 wc->vlan_id : 0xffff; 881 882 if (!(wc->wc_flags & IB_WC_GRH)) 883 return -EPROTOTYPE; 884 885 sgid_attr = get_sgid_attr_from_eth(device, port_num, 886 vlan_id, &dgid, 887 gid_type); 888 if (IS_ERR(sgid_attr)) 889 return PTR_ERR(sgid_attr); 890 891 flow_class = be32_to_cpu(grh->version_tclass_flow); 892 rdma_move_grh_sgid_attr(ah_attr, 893 &sgid, 894 flow_class & 0xFFFFF, 895 hoplimit, 896 (flow_class >> 20) & 0xFF, 897 sgid_attr); 898 899 ret = ib_resolve_unicast_gid_dmac(device, ah_attr); 900 if (ret) 901 rdma_destroy_ah_attr(ah_attr); 902 903 return ret; 904 } else { 905 rdma_ah_set_dlid(ah_attr, wc->slid); 906 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); 907 908 if ((wc->wc_flags & IB_WC_GRH) == 0) 909 return 0; 910 911 if (dgid.global.interface_id != 912 cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { 913 sgid_attr = rdma_find_gid_by_port( 914 device, &dgid, IB_GID_TYPE_IB, port_num, NULL); 915 } else 916 sgid_attr = rdma_get_gid_attr(device, port_num, 0); 917 918 if (IS_ERR(sgid_attr)) 919 return PTR_ERR(sgid_attr); 920 flow_class = be32_to_cpu(grh->version_tclass_flow); 921 rdma_move_grh_sgid_attr(ah_attr, 922 &sgid, 923 flow_class & 0xFFFFF, 924 hoplimit, 925 (flow_class >> 20) & 0xFF, 926 sgid_attr); 927 928 return 0; 929 } 930 } 931 EXPORT_SYMBOL(ib_init_ah_attr_from_wc); 932 933 /** 934 * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership 935 * of the reference 936 * 937 * @attr: Pointer to AH attribute structure 938 * @dgid: Destination GID 939 * @flow_label: Flow label 940 * @hop_limit: Hop limit 941 * @traffic_class: traffic class 942 * @sgid_attr: Pointer to SGID attribute 943 * 944 * This takes ownership of the sgid_attr reference. The caller must ensure 945 * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after 946 * calling this function. 947 */ 948 void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid, 949 u32 flow_label, u8 hop_limit, u8 traffic_class, 950 const struct ib_gid_attr *sgid_attr) 951 { 952 rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit, 953 traffic_class); 954 attr->grh.sgid_attr = sgid_attr; 955 } 956 EXPORT_SYMBOL(rdma_move_grh_sgid_attr); 957 958 /** 959 * rdma_destroy_ah_attr - Release reference to SGID attribute of 960 * ah attribute. 961 * @ah_attr: Pointer to ah attribute 962 * 963 * Release reference to the SGID attribute of the ah attribute if it is 964 * non NULL. It is safe to call this multiple times, and safe to call it on 965 * a zero initialized ah_attr. 966 */ 967 void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr) 968 { 969 if (ah_attr->grh.sgid_attr) { 970 rdma_put_gid_attr(ah_attr->grh.sgid_attr); 971 ah_attr->grh.sgid_attr = NULL; 972 } 973 } 974 EXPORT_SYMBOL(rdma_destroy_ah_attr); 975 976 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 977 const struct ib_grh *grh, u32 port_num) 978 { 979 struct rdma_ah_attr ah_attr; 980 struct ib_ah *ah; 981 int ret; 982 983 ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); 984 if (ret) 985 return ERR_PTR(ret); 986 987 ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); 988 989 rdma_destroy_ah_attr(&ah_attr); 990 return ah; 991 } 992 EXPORT_SYMBOL(ib_create_ah_from_wc); 993 994 int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) 995 { 996 const struct ib_gid_attr *old_sgid_attr; 997 int ret; 998 999 if (ah->type != ah_attr->type) 1000 return -EINVAL; 1001 1002 ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr); 1003 if (ret) 1004 return ret; 1005 1006 ret = ah->device->ops.modify_ah ? 1007 ah->device->ops.modify_ah(ah, ah_attr) : 1008 -EOPNOTSUPP; 1009 1010 ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr); 1011 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); 1012 return ret; 1013 } 1014 EXPORT_SYMBOL(rdma_modify_ah); 1015 1016 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) 1017 { 1018 ah_attr->grh.sgid_attr = NULL; 1019 1020 return ah->device->ops.query_ah ? 1021 ah->device->ops.query_ah(ah, ah_attr) : 1022 -EOPNOTSUPP; 1023 } 1024 EXPORT_SYMBOL(rdma_query_ah); 1025 1026 int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) 1027 { 1028 const struct ib_gid_attr *sgid_attr = ah->sgid_attr; 1029 struct ib_pd *pd; 1030 int ret; 1031 1032 might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); 1033 1034 pd = ah->pd; 1035 1036 ret = ah->device->ops.destroy_ah(ah, flags); 1037 if (ret) 1038 return ret; 1039 1040 atomic_dec(&pd->usecnt); 1041 if (sgid_attr) 1042 rdma_put_gid_attr(sgid_attr); 1043 1044 kfree(ah); 1045 return ret; 1046 } 1047 EXPORT_SYMBOL(rdma_destroy_ah_user); 1048 1049 /* Shared receive queues */ 1050 1051 /** 1052 * ib_create_srq_user - Creates a SRQ associated with the specified protection 1053 * domain. 1054 * @pd: The protection domain associated with the SRQ. 1055 * @srq_init_attr: A list of initial attributes required to create the 1056 * SRQ. If SRQ creation succeeds, then the attributes are updated to 1057 * the actual capabilities of the created SRQ. 1058 * @uobject: uobject pointer if this is not a kernel SRQ 1059 * @udata: udata pointer if this is not a kernel SRQ 1060 * 1061 * srq_attr->max_wr and srq_attr->max_sge are read the determine the 1062 * requested size of the SRQ, and set to the actual values allocated 1063 * on return. If ib_create_srq() succeeds, then max_wr and max_sge 1064 * will always be at least as large as the requested values. 1065 */ 1066 struct ib_srq *ib_create_srq_user(struct ib_pd *pd, 1067 struct ib_srq_init_attr *srq_init_attr, 1068 struct ib_usrq_object *uobject, 1069 struct ib_udata *udata) 1070 { 1071 struct ib_srq *srq; 1072 int ret; 1073 1074 srq = rdma_zalloc_drv_obj(pd->device, ib_srq); 1075 if (!srq) 1076 return ERR_PTR(-ENOMEM); 1077 1078 srq->device = pd->device; 1079 srq->pd = pd; 1080 srq->event_handler = srq_init_attr->event_handler; 1081 srq->srq_context = srq_init_attr->srq_context; 1082 srq->srq_type = srq_init_attr->srq_type; 1083 srq->uobject = uobject; 1084 1085 if (ib_srq_has_cq(srq->srq_type)) { 1086 srq->ext.cq = srq_init_attr->ext.cq; 1087 atomic_inc(&srq->ext.cq->usecnt); 1088 } 1089 if (srq->srq_type == IB_SRQT_XRC) { 1090 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; 1091 if (srq->ext.xrc.xrcd) 1092 atomic_inc(&srq->ext.xrc.xrcd->usecnt); 1093 } 1094 atomic_inc(&pd->usecnt); 1095 1096 rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ); 1097 rdma_restrack_parent_name(&srq->res, &pd->res); 1098 1099 ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); 1100 if (ret) { 1101 rdma_restrack_put(&srq->res); 1102 atomic_dec(&pd->usecnt); 1103 if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) 1104 atomic_dec(&srq->ext.xrc.xrcd->usecnt); 1105 if (ib_srq_has_cq(srq->srq_type)) 1106 atomic_dec(&srq->ext.cq->usecnt); 1107 kfree(srq); 1108 return ERR_PTR(ret); 1109 } 1110 1111 rdma_restrack_add(&srq->res); 1112 1113 return srq; 1114 } 1115 EXPORT_SYMBOL(ib_create_srq_user); 1116 1117 int ib_modify_srq(struct ib_srq *srq, 1118 struct ib_srq_attr *srq_attr, 1119 enum ib_srq_attr_mask srq_attr_mask) 1120 { 1121 return srq->device->ops.modify_srq ? 1122 srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask, 1123 NULL) : -EOPNOTSUPP; 1124 } 1125 EXPORT_SYMBOL(ib_modify_srq); 1126 1127 int ib_query_srq(struct ib_srq *srq, 1128 struct ib_srq_attr *srq_attr) 1129 { 1130 return srq->device->ops.query_srq ? 1131 srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP; 1132 } 1133 EXPORT_SYMBOL(ib_query_srq); 1134 1135 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) 1136 { 1137 int ret; 1138 1139 if (atomic_read(&srq->usecnt)) 1140 return -EBUSY; 1141 1142 ret = srq->device->ops.destroy_srq(srq, udata); 1143 if (ret) 1144 return ret; 1145 1146 atomic_dec(&srq->pd->usecnt); 1147 if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) 1148 atomic_dec(&srq->ext.xrc.xrcd->usecnt); 1149 if (ib_srq_has_cq(srq->srq_type)) 1150 atomic_dec(&srq->ext.cq->usecnt); 1151 rdma_restrack_del(&srq->res); 1152 kfree(srq); 1153 1154 return ret; 1155 } 1156 EXPORT_SYMBOL(ib_destroy_srq_user); 1157 1158 /* Queue pairs */ 1159 1160 static void __ib_qp_event_handler(struct ib_event *event, void *context) 1161 { 1162 struct ib_qp *qp = event->element.qp; 1163 1164 if (event->event == IB_EVENT_QP_LAST_WQE_REACHED) 1165 complete(&qp->srq_completion); 1166 if (qp->registered_event_handler) 1167 qp->registered_event_handler(event, qp->qp_context); 1168 } 1169 1170 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) 1171 { 1172 struct ib_qp *qp = context; 1173 unsigned long flags; 1174 1175 spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); 1176 list_for_each_entry(event->element.qp, &qp->open_list, open_list) 1177 if (event->element.qp->event_handler) 1178 event->element.qp->event_handler(event, event->element.qp->qp_context); 1179 spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); 1180 } 1181 1182 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, 1183 void (*event_handler)(struct ib_event *, void *), 1184 void *qp_context) 1185 { 1186 struct ib_qp *qp; 1187 unsigned long flags; 1188 int err; 1189 1190 qp = kzalloc_obj(*qp); 1191 if (!qp) 1192 return ERR_PTR(-ENOMEM); 1193 1194 qp->real_qp = real_qp; 1195 err = ib_open_shared_qp_security(qp, real_qp->device); 1196 if (err) { 1197 kfree(qp); 1198 return ERR_PTR(err); 1199 } 1200 1201 qp->real_qp = real_qp; 1202 atomic_inc(&real_qp->usecnt); 1203 qp->device = real_qp->device; 1204 qp->event_handler = event_handler; 1205 qp->qp_context = qp_context; 1206 qp->qp_num = real_qp->qp_num; 1207 qp->qp_type = real_qp->qp_type; 1208 1209 spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); 1210 list_add(&qp->open_list, &real_qp->open_list); 1211 spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); 1212 1213 return qp; 1214 } 1215 1216 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, 1217 struct ib_qp_open_attr *qp_open_attr) 1218 { 1219 struct ib_qp *qp, *real_qp; 1220 1221 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) 1222 return ERR_PTR(-EINVAL); 1223 1224 down_read(&xrcd->tgt_qps_rwsem); 1225 real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num); 1226 if (!real_qp) { 1227 up_read(&xrcd->tgt_qps_rwsem); 1228 return ERR_PTR(-EINVAL); 1229 } 1230 qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 1231 qp_open_attr->qp_context); 1232 up_read(&xrcd->tgt_qps_rwsem); 1233 return qp; 1234 } 1235 EXPORT_SYMBOL(ib_open_qp); 1236 1237 static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, 1238 struct ib_qp_init_attr *qp_init_attr) 1239 { 1240 struct ib_qp *real_qp = qp; 1241 int err; 1242 1243 qp->event_handler = __ib_shared_qp_event_handler; 1244 qp->qp_context = qp; 1245 qp->pd = NULL; 1246 qp->send_cq = qp->recv_cq = NULL; 1247 qp->srq = NULL; 1248 qp->xrcd = qp_init_attr->xrcd; 1249 atomic_inc(&qp_init_attr->xrcd->usecnt); 1250 INIT_LIST_HEAD(&qp->open_list); 1251 1252 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 1253 qp_init_attr->qp_context); 1254 if (IS_ERR(qp)) 1255 return qp; 1256 1257 err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num, 1258 real_qp, GFP_KERNEL)); 1259 if (err) { 1260 ib_close_qp(qp); 1261 return ERR_PTR(err); 1262 } 1263 return qp; 1264 } 1265 1266 static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd, 1267 struct ib_qp_init_attr *attr, 1268 struct ib_udata *udata, 1269 struct ib_uqp_object *uobj, const char *caller) 1270 { 1271 struct ib_udata dummy = {}; 1272 struct ib_qp *qp; 1273 int ret; 1274 1275 if (!dev->ops.create_qp) 1276 return ERR_PTR(-EOPNOTSUPP); 1277 1278 qp = rdma_zalloc_drv_obj_numa(dev, ib_qp); 1279 if (!qp) 1280 return ERR_PTR(-ENOMEM); 1281 1282 qp->device = dev; 1283 qp->pd = pd; 1284 qp->uobject = uobj; 1285 qp->real_qp = qp; 1286 1287 qp->qp_type = attr->qp_type; 1288 qp->rwq_ind_tbl = attr->rwq_ind_tbl; 1289 qp->srq = attr->srq; 1290 qp->event_handler = __ib_qp_event_handler; 1291 qp->registered_event_handler = attr->event_handler; 1292 qp->port = attr->port_num; 1293 qp->qp_context = attr->qp_context; 1294 1295 spin_lock_init(&qp->mr_lock); 1296 INIT_LIST_HEAD(&qp->rdma_mrs); 1297 INIT_LIST_HEAD(&qp->sig_mrs); 1298 init_completion(&qp->srq_completion); 1299 1300 qp->send_cq = attr->send_cq; 1301 qp->recv_cq = attr->recv_cq; 1302 1303 rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); 1304 WARN_ONCE(!udata && !caller, "Missing kernel QP owner"); 1305 rdma_restrack_set_name(&qp->res, udata ? NULL : caller); 1306 ret = dev->ops.create_qp(qp, attr, udata); 1307 if (ret) 1308 goto err_create; 1309 1310 /* 1311 * TODO: The mlx4 internally overwrites send_cq and recv_cq. 1312 * Unfortunately, it is not an easy task to fix that driver. 1313 */ 1314 qp->send_cq = attr->send_cq; 1315 qp->recv_cq = attr->recv_cq; 1316 1317 ret = ib_create_qp_security(qp, dev); 1318 if (ret) 1319 goto err_security; 1320 1321 rdma_restrack_add(&qp->res); 1322 return qp; 1323 1324 err_security: 1325 qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL); 1326 err_create: 1327 rdma_restrack_put(&qp->res); 1328 kfree(qp); 1329 return ERR_PTR(ret); 1330 1331 } 1332 1333 /** 1334 * ib_create_qp_user - Creates a QP associated with the specified protection 1335 * domain. 1336 * @dev: IB device 1337 * @pd: The protection domain associated with the QP. 1338 * @attr: A list of initial attributes required to create the 1339 * QP. If QP creation succeeds, then the attributes are updated to 1340 * the actual capabilities of the created QP. 1341 * @udata: User data 1342 * @uobj: uverbs obect 1343 * @caller: caller's build-time module name 1344 */ 1345 struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd, 1346 struct ib_qp_init_attr *attr, 1347 struct ib_udata *udata, 1348 struct ib_uqp_object *uobj, const char *caller) 1349 { 1350 struct ib_qp *qp, *xrc_qp; 1351 1352 if (attr->qp_type == IB_QPT_XRC_TGT) 1353 qp = create_qp(dev, pd, attr, NULL, NULL, caller); 1354 else 1355 qp = create_qp(dev, pd, attr, udata, uobj, NULL); 1356 if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp)) 1357 return qp; 1358 1359 xrc_qp = create_xrc_qp_user(qp, attr); 1360 if (IS_ERR(xrc_qp)) { 1361 ib_destroy_qp(qp); 1362 return xrc_qp; 1363 } 1364 1365 xrc_qp->uobject = uobj; 1366 return xrc_qp; 1367 } 1368 EXPORT_SYMBOL(ib_create_qp_user); 1369 1370 void ib_qp_usecnt_inc(struct ib_qp *qp) 1371 { 1372 if (qp->pd) 1373 atomic_inc(&qp->pd->usecnt); 1374 if (qp->send_cq) 1375 atomic_inc(&qp->send_cq->usecnt); 1376 if (qp->recv_cq) 1377 atomic_inc(&qp->recv_cq->usecnt); 1378 if (qp->srq) 1379 atomic_inc(&qp->srq->usecnt); 1380 if (qp->rwq_ind_tbl) 1381 atomic_inc(&qp->rwq_ind_tbl->usecnt); 1382 } 1383 EXPORT_SYMBOL(ib_qp_usecnt_inc); 1384 1385 void ib_qp_usecnt_dec(struct ib_qp *qp) 1386 { 1387 if (qp->rwq_ind_tbl) 1388 atomic_dec(&qp->rwq_ind_tbl->usecnt); 1389 if (qp->srq) 1390 atomic_dec(&qp->srq->usecnt); 1391 if (qp->recv_cq) 1392 atomic_dec(&qp->recv_cq->usecnt); 1393 if (qp->send_cq) 1394 atomic_dec(&qp->send_cq->usecnt); 1395 if (qp->pd) 1396 atomic_dec(&qp->pd->usecnt); 1397 } 1398 EXPORT_SYMBOL(ib_qp_usecnt_dec); 1399 1400 struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, 1401 struct ib_qp_init_attr *qp_init_attr, 1402 const char *caller) 1403 { 1404 struct ib_device *device = pd->device; 1405 struct ib_qp *qp; 1406 int ret; 1407 1408 /* 1409 * If the callers is using the RDMA API calculate the resources 1410 * needed for the RDMA READ/WRITE operations. 1411 * 1412 * Note that these callers need to pass in a port number. 1413 */ 1414 if (qp_init_attr->cap.max_rdma_ctxs) 1415 rdma_rw_init_qp(device, qp_init_attr); 1416 1417 qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller); 1418 if (IS_ERR(qp)) 1419 return qp; 1420 1421 ib_qp_usecnt_inc(qp); 1422 1423 if (qp_init_attr->cap.max_rdma_ctxs) { 1424 ret = rdma_rw_init_mrs(qp, qp_init_attr); 1425 if (ret) 1426 goto err; 1427 } 1428 1429 /* 1430 * Note: all hw drivers guarantee that max_send_sge is lower than 1431 * the device RDMA WRITE SGE limit but not all hw drivers ensure that 1432 * max_send_sge <= max_sge_rd. 1433 */ 1434 qp->max_write_sge = qp_init_attr->cap.max_send_sge; 1435 qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, 1436 device->attrs.max_sge_rd); 1437 if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) 1438 qp->integrity_en = true; 1439 1440 return qp; 1441 1442 err: 1443 ib_destroy_qp(qp); 1444 return ERR_PTR(ret); 1445 1446 } 1447 EXPORT_SYMBOL(ib_create_qp_kernel); 1448 1449 static const struct { 1450 int valid; 1451 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 1452 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 1453 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 1454 [IB_QPS_RESET] = { 1455 [IB_QPS_RESET] = { .valid = 1 }, 1456 [IB_QPS_INIT] = { 1457 .valid = 1, 1458 .req_param = { 1459 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1460 IB_QP_PORT | 1461 IB_QP_QKEY), 1462 [IB_QPT_RAW_PACKET] = IB_QP_PORT, 1463 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 1464 IB_QP_PORT | 1465 IB_QP_ACCESS_FLAGS), 1466 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 1467 IB_QP_PORT | 1468 IB_QP_ACCESS_FLAGS), 1469 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 1470 IB_QP_PORT | 1471 IB_QP_ACCESS_FLAGS), 1472 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 1473 IB_QP_PORT | 1474 IB_QP_ACCESS_FLAGS), 1475 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1476 IB_QP_QKEY), 1477 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1478 IB_QP_QKEY), 1479 } 1480 }, 1481 }, 1482 [IB_QPS_INIT] = { 1483 [IB_QPS_RESET] = { .valid = 1 }, 1484 [IB_QPS_ERR] = { .valid = 1 }, 1485 [IB_QPS_INIT] = { 1486 .valid = 1, 1487 .opt_param = { 1488 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1489 IB_QP_PORT | 1490 IB_QP_QKEY), 1491 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 1492 IB_QP_PORT | 1493 IB_QP_ACCESS_FLAGS), 1494 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 1495 IB_QP_PORT | 1496 IB_QP_ACCESS_FLAGS), 1497 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 1498 IB_QP_PORT | 1499 IB_QP_ACCESS_FLAGS), 1500 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 1501 IB_QP_PORT | 1502 IB_QP_ACCESS_FLAGS), 1503 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1504 IB_QP_QKEY), 1505 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1506 IB_QP_QKEY), 1507 } 1508 }, 1509 [IB_QPS_RTR] = { 1510 .valid = 1, 1511 .req_param = { 1512 [IB_QPT_UC] = (IB_QP_AV | 1513 IB_QP_PATH_MTU | 1514 IB_QP_DEST_QPN | 1515 IB_QP_RQ_PSN), 1516 [IB_QPT_RC] = (IB_QP_AV | 1517 IB_QP_PATH_MTU | 1518 IB_QP_DEST_QPN | 1519 IB_QP_RQ_PSN | 1520 IB_QP_MAX_DEST_RD_ATOMIC | 1521 IB_QP_MIN_RNR_TIMER), 1522 [IB_QPT_XRC_INI] = (IB_QP_AV | 1523 IB_QP_PATH_MTU | 1524 IB_QP_DEST_QPN | 1525 IB_QP_RQ_PSN), 1526 [IB_QPT_XRC_TGT] = (IB_QP_AV | 1527 IB_QP_PATH_MTU | 1528 IB_QP_DEST_QPN | 1529 IB_QP_RQ_PSN | 1530 IB_QP_MAX_DEST_RD_ATOMIC | 1531 IB_QP_MIN_RNR_TIMER), 1532 }, 1533 .opt_param = { 1534 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1535 IB_QP_QKEY), 1536 [IB_QPT_UC] = (IB_QP_ALT_PATH | 1537 IB_QP_ACCESS_FLAGS | 1538 IB_QP_PKEY_INDEX), 1539 [IB_QPT_RC] = (IB_QP_ALT_PATH | 1540 IB_QP_ACCESS_FLAGS | 1541 IB_QP_PKEY_INDEX | 1542 IB_QP_RATE_LIMIT), 1543 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | 1544 IB_QP_ACCESS_FLAGS | 1545 IB_QP_PKEY_INDEX), 1546 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | 1547 IB_QP_ACCESS_FLAGS | 1548 IB_QP_PKEY_INDEX), 1549 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1550 IB_QP_QKEY), 1551 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1552 IB_QP_QKEY), 1553 }, 1554 }, 1555 }, 1556 [IB_QPS_RTR] = { 1557 [IB_QPS_RESET] = { .valid = 1 }, 1558 [IB_QPS_ERR] = { .valid = 1 }, 1559 [IB_QPS_RTS] = { 1560 .valid = 1, 1561 .req_param = { 1562 [IB_QPT_UD] = IB_QP_SQ_PSN, 1563 [IB_QPT_UC] = IB_QP_SQ_PSN, 1564 [IB_QPT_RC] = (IB_QP_TIMEOUT | 1565 IB_QP_RETRY_CNT | 1566 IB_QP_RNR_RETRY | 1567 IB_QP_SQ_PSN | 1568 IB_QP_MAX_QP_RD_ATOMIC), 1569 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | 1570 IB_QP_RETRY_CNT | 1571 IB_QP_RNR_RETRY | 1572 IB_QP_SQ_PSN | 1573 IB_QP_MAX_QP_RD_ATOMIC), 1574 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | 1575 IB_QP_SQ_PSN), 1576 [IB_QPT_SMI] = IB_QP_SQ_PSN, 1577 [IB_QPT_GSI] = IB_QP_SQ_PSN, 1578 }, 1579 .opt_param = { 1580 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1581 IB_QP_QKEY), 1582 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1583 IB_QP_ALT_PATH | 1584 IB_QP_ACCESS_FLAGS | 1585 IB_QP_PATH_MIG_STATE), 1586 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1587 IB_QP_ALT_PATH | 1588 IB_QP_ACCESS_FLAGS | 1589 IB_QP_MIN_RNR_TIMER | 1590 IB_QP_PATH_MIG_STATE | 1591 IB_QP_RATE_LIMIT), 1592 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1593 IB_QP_ALT_PATH | 1594 IB_QP_ACCESS_FLAGS | 1595 IB_QP_PATH_MIG_STATE), 1596 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1597 IB_QP_ALT_PATH | 1598 IB_QP_ACCESS_FLAGS | 1599 IB_QP_MIN_RNR_TIMER | 1600 IB_QP_PATH_MIG_STATE), 1601 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1602 IB_QP_QKEY), 1603 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1604 IB_QP_QKEY), 1605 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1606 } 1607 } 1608 }, 1609 [IB_QPS_RTS] = { 1610 [IB_QPS_RESET] = { .valid = 1 }, 1611 [IB_QPS_ERR] = { .valid = 1 }, 1612 [IB_QPS_RTS] = { 1613 .valid = 1, 1614 .opt_param = { 1615 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1616 IB_QP_QKEY), 1617 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1618 IB_QP_ACCESS_FLAGS | 1619 IB_QP_ALT_PATH | 1620 IB_QP_PATH_MIG_STATE), 1621 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1622 IB_QP_ACCESS_FLAGS | 1623 IB_QP_ALT_PATH | 1624 IB_QP_PATH_MIG_STATE | 1625 IB_QP_MIN_RNR_TIMER | 1626 IB_QP_RATE_LIMIT), 1627 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1628 IB_QP_ACCESS_FLAGS | 1629 IB_QP_ALT_PATH | 1630 IB_QP_PATH_MIG_STATE), 1631 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1632 IB_QP_ACCESS_FLAGS | 1633 IB_QP_ALT_PATH | 1634 IB_QP_PATH_MIG_STATE | 1635 IB_QP_MIN_RNR_TIMER), 1636 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1637 IB_QP_QKEY), 1638 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1639 IB_QP_QKEY), 1640 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1641 } 1642 }, 1643 [IB_QPS_SQD] = { 1644 .valid = 1, 1645 .opt_param = { 1646 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1647 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1648 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1649 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1650 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ 1651 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1652 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY 1653 } 1654 }, 1655 }, 1656 [IB_QPS_SQD] = { 1657 [IB_QPS_RESET] = { .valid = 1 }, 1658 [IB_QPS_ERR] = { .valid = 1 }, 1659 [IB_QPS_RTS] = { 1660 .valid = 1, 1661 .opt_param = { 1662 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1663 IB_QP_QKEY), 1664 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1665 IB_QP_ALT_PATH | 1666 IB_QP_ACCESS_FLAGS | 1667 IB_QP_PATH_MIG_STATE), 1668 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1669 IB_QP_ALT_PATH | 1670 IB_QP_ACCESS_FLAGS | 1671 IB_QP_MIN_RNR_TIMER | 1672 IB_QP_PATH_MIG_STATE), 1673 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1674 IB_QP_ALT_PATH | 1675 IB_QP_ACCESS_FLAGS | 1676 IB_QP_PATH_MIG_STATE), 1677 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1678 IB_QP_ALT_PATH | 1679 IB_QP_ACCESS_FLAGS | 1680 IB_QP_MIN_RNR_TIMER | 1681 IB_QP_PATH_MIG_STATE), 1682 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1683 IB_QP_QKEY), 1684 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1685 IB_QP_QKEY), 1686 } 1687 }, 1688 [IB_QPS_SQD] = { 1689 .valid = 1, 1690 .opt_param = { 1691 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1692 IB_QP_QKEY), 1693 [IB_QPT_UC] = (IB_QP_AV | 1694 IB_QP_ALT_PATH | 1695 IB_QP_ACCESS_FLAGS | 1696 IB_QP_PKEY_INDEX | 1697 IB_QP_PATH_MIG_STATE), 1698 [IB_QPT_RC] = (IB_QP_PORT | 1699 IB_QP_AV | 1700 IB_QP_TIMEOUT | 1701 IB_QP_RETRY_CNT | 1702 IB_QP_RNR_RETRY | 1703 IB_QP_MAX_QP_RD_ATOMIC | 1704 IB_QP_MAX_DEST_RD_ATOMIC | 1705 IB_QP_ALT_PATH | 1706 IB_QP_ACCESS_FLAGS | 1707 IB_QP_PKEY_INDEX | 1708 IB_QP_MIN_RNR_TIMER | 1709 IB_QP_PATH_MIG_STATE), 1710 [IB_QPT_XRC_INI] = (IB_QP_PORT | 1711 IB_QP_AV | 1712 IB_QP_TIMEOUT | 1713 IB_QP_RETRY_CNT | 1714 IB_QP_RNR_RETRY | 1715 IB_QP_MAX_QP_RD_ATOMIC | 1716 IB_QP_ALT_PATH | 1717 IB_QP_ACCESS_FLAGS | 1718 IB_QP_PKEY_INDEX | 1719 IB_QP_PATH_MIG_STATE), 1720 [IB_QPT_XRC_TGT] = (IB_QP_PORT | 1721 IB_QP_AV | 1722 IB_QP_TIMEOUT | 1723 IB_QP_MAX_DEST_RD_ATOMIC | 1724 IB_QP_ALT_PATH | 1725 IB_QP_ACCESS_FLAGS | 1726 IB_QP_PKEY_INDEX | 1727 IB_QP_MIN_RNR_TIMER | 1728 IB_QP_PATH_MIG_STATE), 1729 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1730 IB_QP_QKEY), 1731 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1732 IB_QP_QKEY), 1733 } 1734 } 1735 }, 1736 [IB_QPS_SQE] = { 1737 [IB_QPS_RESET] = { .valid = 1 }, 1738 [IB_QPS_ERR] = { .valid = 1 }, 1739 [IB_QPS_RTS] = { 1740 .valid = 1, 1741 .opt_param = { 1742 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1743 IB_QP_QKEY), 1744 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1745 IB_QP_ACCESS_FLAGS), 1746 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1747 IB_QP_QKEY), 1748 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1749 IB_QP_QKEY), 1750 } 1751 } 1752 }, 1753 [IB_QPS_ERR] = { 1754 [IB_QPS_RESET] = { .valid = 1 }, 1755 [IB_QPS_ERR] = { .valid = 1 } 1756 } 1757 }; 1758 1759 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1760 enum ib_qp_type type, enum ib_qp_attr_mask mask) 1761 { 1762 enum ib_qp_attr_mask req_param, opt_param; 1763 1764 if (mask & IB_QP_CUR_STATE && 1765 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 1766 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 1767 return false; 1768 1769 if (!qp_state_table[cur_state][next_state].valid) 1770 return false; 1771 1772 req_param = qp_state_table[cur_state][next_state].req_param[type]; 1773 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 1774 1775 if ((mask & req_param) != req_param) 1776 return false; 1777 1778 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 1779 return false; 1780 1781 return true; 1782 } 1783 EXPORT_SYMBOL(ib_modify_qp_is_ok); 1784 1785 /** 1786 * ib_resolve_eth_dmac - Resolve destination mac address 1787 * @device: Device to consider 1788 * @ah_attr: address handle attribute which describes the 1789 * source and destination parameters 1790 * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It 1791 * returns 0 on success or appropriate error code. It initializes the 1792 * necessary ah_attr fields when call is successful. 1793 */ 1794 static int ib_resolve_eth_dmac(struct ib_device *device, 1795 struct rdma_ah_attr *ah_attr) 1796 { 1797 int ret = 0; 1798 1799 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1800 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1801 __be32 addr = 0; 1802 1803 memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4); 1804 ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac); 1805 } else { 1806 ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw, 1807 (char *)ah_attr->roce.dmac); 1808 } 1809 } else { 1810 ret = ib_resolve_unicast_gid_dmac(device, ah_attr); 1811 } 1812 return ret; 1813 } 1814 1815 static bool is_qp_type_connected(const struct ib_qp *qp) 1816 { 1817 return (qp->qp_type == IB_QPT_UC || 1818 qp->qp_type == IB_QPT_RC || 1819 qp->qp_type == IB_QPT_XRC_INI || 1820 qp->qp_type == IB_QPT_XRC_TGT); 1821 } 1822 1823 /* 1824 * IB core internal function to perform QP attributes modification. 1825 */ 1826 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, 1827 int attr_mask, struct ib_udata *udata) 1828 { 1829 u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1830 const struct ib_gid_attr *old_sgid_attr_av; 1831 const struct ib_gid_attr *old_sgid_attr_alt_av; 1832 int ret; 1833 1834 attr->xmit_slave = NULL; 1835 if (attr_mask & IB_QP_AV) { 1836 ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, 1837 &old_sgid_attr_av); 1838 if (ret) 1839 return ret; 1840 1841 if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && 1842 is_qp_type_connected(qp)) { 1843 struct net_device *slave; 1844 1845 /* 1846 * If the user provided the qp_attr then we have to 1847 * resolve it. Kerne users have to provide already 1848 * resolved rdma_ah_attr's. 1849 */ 1850 if (udata) { 1851 ret = ib_resolve_eth_dmac(qp->device, 1852 &attr->ah_attr); 1853 if (ret) 1854 goto out_av; 1855 } 1856 slave = rdma_lag_get_ah_roce_slave(qp->device, 1857 &attr->ah_attr, 1858 GFP_KERNEL); 1859 if (IS_ERR(slave)) { 1860 ret = PTR_ERR(slave); 1861 goto out_av; 1862 } 1863 attr->xmit_slave = slave; 1864 } 1865 } 1866 if (attr_mask & IB_QP_ALT_PATH) { 1867 /* 1868 * FIXME: This does not track the migration state, so if the 1869 * user loads a new alternate path after the HW has migrated 1870 * from primary->alternate we will keep the wrong 1871 * references. This is OK for IB because the reference 1872 * counting does not serve any functional purpose. 1873 */ 1874 ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr, 1875 &old_sgid_attr_alt_av); 1876 if (ret) 1877 goto out_av; 1878 1879 /* 1880 * Today the core code can only handle alternate paths and APM 1881 * for IB. Ban them in roce mode. 1882 */ 1883 if (!(rdma_protocol_ib(qp->device, 1884 attr->alt_ah_attr.port_num) && 1885 rdma_protocol_ib(qp->device, port))) { 1886 ret = -EINVAL; 1887 goto out; 1888 } 1889 } 1890 1891 if (rdma_ib_or_roce(qp->device, port)) { 1892 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { 1893 dev_warn(&qp->device->dev, 1894 "%s rq_psn overflow, masking to 24 bits\n", 1895 __func__); 1896 attr->rq_psn &= 0xffffff; 1897 } 1898 1899 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { 1900 dev_warn(&qp->device->dev, 1901 " %s sq_psn overflow, masking to 24 bits\n", 1902 __func__); 1903 attr->sq_psn &= 0xffffff; 1904 } 1905 } 1906 1907 /* 1908 * Bind this qp to a counter automatically based on the rdma counter 1909 * rules. This only set in RST2INIT with port specified 1910 */ 1911 if (!qp->counter && (attr_mask & IB_QP_PORT) && 1912 ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT)) 1913 rdma_counter_bind_qp_auto(qp, attr->port_num); 1914 1915 ret = ib_security_modify_qp(qp, attr, attr_mask, udata); 1916 if (ret) 1917 goto out; 1918 1919 if (attr_mask & IB_QP_PORT) 1920 qp->port = attr->port_num; 1921 if (attr_mask & IB_QP_AV) 1922 qp->av_sgid_attr = 1923 rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr); 1924 if (attr_mask & IB_QP_ALT_PATH) 1925 qp->alt_path_sgid_attr = rdma_update_sgid_attr( 1926 &attr->alt_ah_attr, qp->alt_path_sgid_attr); 1927 1928 out: 1929 if (attr_mask & IB_QP_ALT_PATH) 1930 rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); 1931 out_av: 1932 if (attr_mask & IB_QP_AV) { 1933 rdma_lag_put_ah_roce_slave(attr->xmit_slave); 1934 rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); 1935 } 1936 return ret; 1937 } 1938 1939 /** 1940 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. 1941 * @ib_qp: The QP to modify. 1942 * @attr: On input, specifies the QP attributes to modify. On output, 1943 * the current values of selected QP attributes are returned. 1944 * @attr_mask: A bit-mask used to specify which attributes of the QP 1945 * are being modified. 1946 * @udata: pointer to user's input output buffer information 1947 * are being modified. 1948 * It returns 0 on success and returns appropriate error code on error. 1949 */ 1950 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, 1951 int attr_mask, struct ib_udata *udata) 1952 { 1953 return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); 1954 } 1955 EXPORT_SYMBOL(ib_modify_qp_with_udata); 1956 1957 static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, 1958 u16 *speed, u8 *width) 1959 { 1960 if (!lanes) { 1961 if (netdev_speed <= SPEED_1000) { 1962 *width = IB_WIDTH_1X; 1963 *speed = IB_SPEED_SDR; 1964 } else if (netdev_speed <= SPEED_10000) { 1965 *width = IB_WIDTH_1X; 1966 *speed = IB_SPEED_FDR10; 1967 } else if (netdev_speed <= SPEED_20000) { 1968 *width = IB_WIDTH_4X; 1969 *speed = IB_SPEED_DDR; 1970 } else if (netdev_speed <= SPEED_25000) { 1971 *width = IB_WIDTH_1X; 1972 *speed = IB_SPEED_EDR; 1973 } else if (netdev_speed <= SPEED_40000) { 1974 *width = IB_WIDTH_4X; 1975 *speed = IB_SPEED_FDR10; 1976 } else if (netdev_speed <= SPEED_50000) { 1977 *width = IB_WIDTH_2X; 1978 *speed = IB_SPEED_EDR; 1979 } else if (netdev_speed <= SPEED_100000) { 1980 *width = IB_WIDTH_4X; 1981 *speed = IB_SPEED_EDR; 1982 } else if (netdev_speed <= SPEED_200000) { 1983 *width = IB_WIDTH_4X; 1984 *speed = IB_SPEED_HDR; 1985 } else { 1986 *width = IB_WIDTH_4X; 1987 *speed = IB_SPEED_NDR; 1988 } 1989 1990 return; 1991 } 1992 1993 switch (lanes) { 1994 case 1: 1995 *width = IB_WIDTH_1X; 1996 break; 1997 case 2: 1998 *width = IB_WIDTH_2X; 1999 break; 2000 case 4: 2001 *width = IB_WIDTH_4X; 2002 break; 2003 case 8: 2004 *width = IB_WIDTH_8X; 2005 break; 2006 case 12: 2007 *width = IB_WIDTH_12X; 2008 break; 2009 default: 2010 *width = IB_WIDTH_1X; 2011 } 2012 2013 switch (netdev_speed / lanes) { 2014 case SPEED_2500: 2015 *speed = IB_SPEED_SDR; 2016 break; 2017 case SPEED_5000: 2018 *speed = IB_SPEED_DDR; 2019 break; 2020 case SPEED_10000: 2021 *speed = IB_SPEED_FDR10; 2022 break; 2023 case SPEED_14000: 2024 *speed = IB_SPEED_FDR; 2025 break; 2026 case SPEED_25000: 2027 *speed = IB_SPEED_EDR; 2028 break; 2029 case SPEED_50000: 2030 *speed = IB_SPEED_HDR; 2031 break; 2032 case SPEED_100000: 2033 *speed = IB_SPEED_NDR; 2034 break; 2035 default: 2036 *speed = IB_SPEED_SDR; 2037 } 2038 } 2039 2040 int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) 2041 { 2042 int rc; 2043 u32 netdev_speed; 2044 struct net_device *netdev; 2045 struct ethtool_link_ksettings lksettings = {}; 2046 2047 if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) 2048 return -EINVAL; 2049 2050 netdev = ib_device_get_netdev(dev, port_num); 2051 if (!netdev) 2052 return -ENODEV; 2053 2054 rtnl_lock(); 2055 rc = __ethtool_get_link_ksettings(netdev, &lksettings); 2056 rtnl_unlock(); 2057 2058 dev_put(netdev); 2059 2060 if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) { 2061 netdev_speed = lksettings.base.speed; 2062 } else { 2063 netdev_speed = SPEED_1000; 2064 if (rc) 2065 pr_warn("%s speed is unknown, defaulting to %u\n", 2066 netdev->name, netdev_speed); 2067 } 2068 2069 ib_get_width_and_speed(netdev_speed, lksettings.lanes, 2070 speed, width); 2071 2072 return 0; 2073 } 2074 EXPORT_SYMBOL(ib_get_eth_speed); 2075 2076 int ib_modify_qp(struct ib_qp *qp, 2077 struct ib_qp_attr *qp_attr, 2078 int qp_attr_mask) 2079 { 2080 return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 2081 } 2082 EXPORT_SYMBOL(ib_modify_qp); 2083 2084 int ib_query_qp(struct ib_qp *qp, 2085 struct ib_qp_attr *qp_attr, 2086 int qp_attr_mask, 2087 struct ib_qp_init_attr *qp_init_attr) 2088 { 2089 qp_attr->ah_attr.grh.sgid_attr = NULL; 2090 qp_attr->alt_ah_attr.grh.sgid_attr = NULL; 2091 2092 return qp->device->ops.query_qp ? 2093 qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask, 2094 qp_init_attr) : -EOPNOTSUPP; 2095 } 2096 EXPORT_SYMBOL(ib_query_qp); 2097 2098 int ib_close_qp(struct ib_qp *qp) 2099 { 2100 struct ib_qp *real_qp; 2101 unsigned long flags; 2102 2103 real_qp = qp->real_qp; 2104 if (real_qp == qp) 2105 return -EINVAL; 2106 2107 spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); 2108 list_del(&qp->open_list); 2109 spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); 2110 2111 atomic_dec(&real_qp->usecnt); 2112 if (qp->qp_sec) 2113 ib_close_shared_qp_security(qp->qp_sec); 2114 kfree(qp); 2115 2116 return 0; 2117 } 2118 EXPORT_SYMBOL(ib_close_qp); 2119 2120 static int __ib_destroy_shared_qp(struct ib_qp *qp) 2121 { 2122 struct ib_xrcd *xrcd; 2123 struct ib_qp *real_qp; 2124 int ret; 2125 2126 real_qp = qp->real_qp; 2127 xrcd = real_qp->xrcd; 2128 down_write(&xrcd->tgt_qps_rwsem); 2129 ib_close_qp(qp); 2130 if (atomic_read(&real_qp->usecnt) == 0) 2131 xa_erase(&xrcd->tgt_qps, real_qp->qp_num); 2132 else 2133 real_qp = NULL; 2134 up_write(&xrcd->tgt_qps_rwsem); 2135 2136 if (real_qp) { 2137 ret = ib_destroy_qp(real_qp); 2138 if (!ret) 2139 atomic_dec(&xrcd->usecnt); 2140 } 2141 2142 return 0; 2143 } 2144 2145 int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) 2146 { 2147 const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; 2148 const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; 2149 struct ib_qp_security *sec; 2150 int ret; 2151 2152 WARN_ON_ONCE(qp->mrs_used > 0); 2153 2154 if (atomic_read(&qp->usecnt)) 2155 return -EBUSY; 2156 2157 if (qp->real_qp != qp) 2158 return __ib_destroy_shared_qp(qp); 2159 2160 sec = qp->qp_sec; 2161 if (sec) 2162 ib_destroy_qp_security_begin(sec); 2163 2164 if (!qp->uobject) 2165 rdma_rw_cleanup_mrs(qp); 2166 2167 rdma_counter_unbind_qp(qp, qp->port, true); 2168 ret = qp->device->ops.destroy_qp(qp, udata); 2169 if (ret) { 2170 if (sec) 2171 ib_destroy_qp_security_abort(sec); 2172 return ret; 2173 } 2174 2175 if (alt_path_sgid_attr) 2176 rdma_put_gid_attr(alt_path_sgid_attr); 2177 if (av_sgid_attr) 2178 rdma_put_gid_attr(av_sgid_attr); 2179 2180 ib_qp_usecnt_dec(qp); 2181 if (sec) 2182 ib_destroy_qp_security_end(sec); 2183 2184 rdma_restrack_del(&qp->res); 2185 kfree(qp); 2186 return ret; 2187 } 2188 EXPORT_SYMBOL(ib_destroy_qp_user); 2189 2190 /* Completion queues */ 2191 2192 struct ib_cq *__ib_create_cq(struct ib_device *device, 2193 ib_comp_handler comp_handler, 2194 void (*event_handler)(struct ib_event *, void *), 2195 void *cq_context, 2196 const struct ib_cq_init_attr *cq_attr, 2197 const char *caller) 2198 { 2199 struct ib_cq *cq; 2200 int ret; 2201 2202 cq = rdma_zalloc_drv_obj(device, ib_cq); 2203 if (!cq) 2204 return ERR_PTR(-ENOMEM); 2205 2206 if (WARN_ON_ONCE(!cq_attr->cqe)) 2207 return ERR_PTR(-EINVAL); 2208 2209 cq->device = device; 2210 cq->comp_handler = comp_handler; 2211 cq->event_handler = event_handler; 2212 cq->cq_context = cq_context; 2213 atomic_set(&cq->usecnt, 0); 2214 2215 rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); 2216 rdma_restrack_set_name(&cq->res, caller); 2217 2218 ret = device->ops.create_cq(cq, cq_attr, NULL); 2219 if (ret) { 2220 rdma_restrack_put(&cq->res); 2221 kfree(cq); 2222 return ERR_PTR(ret); 2223 } 2224 /* 2225 * We are in kernel verbs flow and drivers are not allowed 2226 * to set umem pointer, it needs to stay NULL. 2227 */ 2228 WARN_ON_ONCE(cq->umem); 2229 2230 rdma_restrack_add(&cq->res); 2231 return cq; 2232 } 2233 EXPORT_SYMBOL(__ib_create_cq); 2234 2235 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) 2236 { 2237 if (cq->shared) 2238 return -EOPNOTSUPP; 2239 2240 return cq->device->ops.modify_cq ? 2241 cq->device->ops.modify_cq(cq, cq_count, 2242 cq_period) : -EOPNOTSUPP; 2243 } 2244 EXPORT_SYMBOL(rdma_set_cq_moderation); 2245 2246 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) 2247 { 2248 int ret; 2249 2250 if (WARN_ON_ONCE(cq->shared)) 2251 return -EOPNOTSUPP; 2252 2253 if (atomic_read(&cq->usecnt)) 2254 return -EBUSY; 2255 2256 ret = cq->device->ops.destroy_cq(cq, udata); 2257 if (ret) 2258 return ret; 2259 2260 ib_umem_release(cq->umem); 2261 rdma_restrack_del(&cq->res); 2262 kfree(cq); 2263 return ret; 2264 } 2265 EXPORT_SYMBOL(ib_destroy_cq_user); 2266 2267 /* Memory regions */ 2268 2269 struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 2270 u64 virt_addr, int access_flags) 2271 { 2272 struct ib_mr *mr; 2273 2274 if (access_flags & IB_ACCESS_ON_DEMAND) { 2275 if (!(pd->device->attrs.kernel_cap_flags & 2276 IBK_ON_DEMAND_PAGING)) { 2277 pr_debug("ODP support not available\n"); 2278 return ERR_PTR(-EINVAL); 2279 } 2280 } 2281 2282 mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr, 2283 access_flags, NULL, NULL); 2284 2285 if (IS_ERR(mr)) 2286 return mr; 2287 2288 mr->device = pd->device; 2289 mr->type = IB_MR_TYPE_USER; 2290 mr->pd = pd; 2291 mr->dm = NULL; 2292 atomic_inc(&pd->usecnt); 2293 mr->iova = virt_addr; 2294 mr->length = length; 2295 2296 rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); 2297 rdma_restrack_parent_name(&mr->res, &pd->res); 2298 rdma_restrack_add(&mr->res); 2299 2300 return mr; 2301 } 2302 EXPORT_SYMBOL(ib_reg_user_mr); 2303 2304 int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, 2305 u32 flags, struct ib_sge *sg_list, u32 num_sge) 2306 { 2307 if (!pd->device->ops.advise_mr) 2308 return -EOPNOTSUPP; 2309 2310 if (!num_sge) 2311 return 0; 2312 2313 return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge, 2314 NULL); 2315 } 2316 EXPORT_SYMBOL(ib_advise_mr); 2317 2318 int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) 2319 { 2320 struct ib_pd *pd = mr->pd; 2321 struct ib_dm *dm = mr->dm; 2322 struct ib_dmah *dmah = mr->dmah; 2323 struct ib_sig_attrs *sig_attrs = mr->sig_attrs; 2324 int ret; 2325 2326 trace_mr_dereg(mr); 2327 rdma_restrack_del(&mr->res); 2328 ret = mr->device->ops.dereg_mr(mr, udata); 2329 if (!ret) { 2330 atomic_dec(&pd->usecnt); 2331 if (dm) 2332 atomic_dec(&dm->usecnt); 2333 if (dmah) 2334 atomic_dec(&dmah->usecnt); 2335 kfree(sig_attrs); 2336 } 2337 2338 return ret; 2339 } 2340 EXPORT_SYMBOL(ib_dereg_mr_user); 2341 2342 /** 2343 * ib_alloc_mr() - Allocates a memory region 2344 * @pd: protection domain associated with the region 2345 * @mr_type: memory region type 2346 * @max_num_sg: maximum sg entries available for registration. 2347 * 2348 * Notes: 2349 * Memory registeration page/sg lists must not exceed max_num_sg. 2350 * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed 2351 * max_num_sg * used_page_size. 2352 * 2353 */ 2354 struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2355 u32 max_num_sg) 2356 { 2357 struct ib_mr *mr; 2358 2359 if (!pd->device->ops.alloc_mr) { 2360 mr = ERR_PTR(-EOPNOTSUPP); 2361 goto out; 2362 } 2363 2364 if (mr_type == IB_MR_TYPE_INTEGRITY) { 2365 WARN_ON_ONCE(1); 2366 mr = ERR_PTR(-EINVAL); 2367 goto out; 2368 } 2369 2370 mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); 2371 if (IS_ERR(mr)) 2372 goto out; 2373 2374 mr->device = pd->device; 2375 mr->pd = pd; 2376 mr->dm = NULL; 2377 mr->uobject = NULL; 2378 atomic_inc(&pd->usecnt); 2379 mr->need_inval = false; 2380 mr->type = mr_type; 2381 mr->sig_attrs = NULL; 2382 2383 rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); 2384 rdma_restrack_parent_name(&mr->res, &pd->res); 2385 rdma_restrack_add(&mr->res); 2386 out: 2387 trace_mr_alloc(pd, mr_type, max_num_sg, mr); 2388 return mr; 2389 } 2390 EXPORT_SYMBOL(ib_alloc_mr); 2391 2392 /** 2393 * ib_alloc_mr_integrity() - Allocates an integrity memory region 2394 * @pd: protection domain associated with the region 2395 * @max_num_data_sg: maximum data sg entries available for registration 2396 * @max_num_meta_sg: maximum metadata sg entries available for 2397 * registration 2398 * 2399 * Notes: 2400 * Memory registration page/sg lists must not exceed max_num_sg, 2401 * also the integrity page/sg lists must not exceed max_num_meta_sg. 2402 * 2403 */ 2404 struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, 2405 u32 max_num_data_sg, 2406 u32 max_num_meta_sg) 2407 { 2408 struct ib_mr *mr; 2409 struct ib_sig_attrs *sig_attrs; 2410 2411 if (!pd->device->ops.alloc_mr_integrity || 2412 !pd->device->ops.map_mr_sg_pi) { 2413 mr = ERR_PTR(-EOPNOTSUPP); 2414 goto out; 2415 } 2416 2417 if (!max_num_meta_sg) { 2418 mr = ERR_PTR(-EINVAL); 2419 goto out; 2420 } 2421 2422 sig_attrs = kzalloc_obj(struct ib_sig_attrs); 2423 if (!sig_attrs) { 2424 mr = ERR_PTR(-ENOMEM); 2425 goto out; 2426 } 2427 2428 mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, 2429 max_num_meta_sg); 2430 if (IS_ERR(mr)) { 2431 kfree(sig_attrs); 2432 goto out; 2433 } 2434 2435 mr->device = pd->device; 2436 mr->pd = pd; 2437 mr->dm = NULL; 2438 mr->uobject = NULL; 2439 atomic_inc(&pd->usecnt); 2440 mr->need_inval = false; 2441 mr->type = IB_MR_TYPE_INTEGRITY; 2442 mr->sig_attrs = sig_attrs; 2443 2444 rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); 2445 rdma_restrack_parent_name(&mr->res, &pd->res); 2446 rdma_restrack_add(&mr->res); 2447 out: 2448 trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); 2449 return mr; 2450 } 2451 EXPORT_SYMBOL(ib_alloc_mr_integrity); 2452 2453 /* Multicast groups */ 2454 2455 static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) 2456 { 2457 struct ib_qp_init_attr init_attr = {}; 2458 struct ib_qp_attr attr = {}; 2459 int num_eth_ports = 0; 2460 unsigned int port; 2461 2462 /* If QP state >= init, it is assigned to a port and we can check this 2463 * port only. 2464 */ 2465 if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) { 2466 if (attr.qp_state >= IB_QPS_INIT) { 2467 if (rdma_port_get_link_layer(qp->device, attr.port_num) != 2468 IB_LINK_LAYER_INFINIBAND) 2469 return true; 2470 goto lid_check; 2471 } 2472 } 2473 2474 /* Can't get a quick answer, iterate over all ports */ 2475 rdma_for_each_port(qp->device, port) 2476 if (rdma_port_get_link_layer(qp->device, port) != 2477 IB_LINK_LAYER_INFINIBAND) 2478 num_eth_ports++; 2479 2480 /* If we have at lease one Ethernet port, RoCE annex declares that 2481 * multicast LID should be ignored. We can't tell at this step if the 2482 * QP belongs to an IB or Ethernet port. 2483 */ 2484 if (num_eth_ports) 2485 return true; 2486 2487 /* If all the ports are IB, we can check according to IB spec. */ 2488 lid_check: 2489 return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || 2490 lid == be16_to_cpu(IB_LID_PERMISSIVE)); 2491 } 2492 2493 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 2494 { 2495 int ret; 2496 2497 if (!qp->device->ops.attach_mcast) 2498 return -EOPNOTSUPP; 2499 2500 if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || 2501 qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) 2502 return -EINVAL; 2503 2504 ret = qp->device->ops.attach_mcast(qp, gid, lid); 2505 if (!ret) 2506 atomic_inc(&qp->usecnt); 2507 return ret; 2508 } 2509 EXPORT_SYMBOL(ib_attach_mcast); 2510 2511 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 2512 { 2513 int ret; 2514 2515 if (!qp->device->ops.detach_mcast) 2516 return -EOPNOTSUPP; 2517 2518 if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || 2519 qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) 2520 return -EINVAL; 2521 2522 ret = qp->device->ops.detach_mcast(qp, gid, lid); 2523 if (!ret) 2524 atomic_dec(&qp->usecnt); 2525 return ret; 2526 } 2527 EXPORT_SYMBOL(ib_detach_mcast); 2528 2529 /** 2530 * ib_alloc_xrcd_user - Allocates an XRC domain. 2531 * @device: The device on which to allocate the XRC domain. 2532 * @inode: inode to connect XRCD 2533 * @udata: Valid user data or NULL for kernel object 2534 */ 2535 struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, 2536 struct inode *inode, struct ib_udata *udata) 2537 { 2538 struct ib_xrcd *xrcd; 2539 int ret; 2540 2541 if (!device->ops.alloc_xrcd) 2542 return ERR_PTR(-EOPNOTSUPP); 2543 2544 xrcd = rdma_zalloc_drv_obj(device, ib_xrcd); 2545 if (!xrcd) 2546 return ERR_PTR(-ENOMEM); 2547 2548 xrcd->device = device; 2549 xrcd->inode = inode; 2550 atomic_set(&xrcd->usecnt, 0); 2551 init_rwsem(&xrcd->tgt_qps_rwsem); 2552 xa_init(&xrcd->tgt_qps); 2553 2554 ret = device->ops.alloc_xrcd(xrcd, udata); 2555 if (ret) 2556 goto err; 2557 return xrcd; 2558 err: 2559 kfree(xrcd); 2560 return ERR_PTR(ret); 2561 } 2562 EXPORT_SYMBOL(ib_alloc_xrcd_user); 2563 2564 /** 2565 * ib_dealloc_xrcd_user - Deallocates an XRC domain. 2566 * @xrcd: The XRC domain to deallocate. 2567 * @udata: Valid user data or NULL for kernel object 2568 */ 2569 int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) 2570 { 2571 int ret; 2572 2573 if (atomic_read(&xrcd->usecnt)) 2574 return -EBUSY; 2575 2576 WARN_ON(!xa_empty(&xrcd->tgt_qps)); 2577 ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata); 2578 if (ret) 2579 return ret; 2580 kfree(xrcd); 2581 return ret; 2582 } 2583 EXPORT_SYMBOL(ib_dealloc_xrcd_user); 2584 2585 /** 2586 * ib_create_wq - Creates a WQ associated with the specified protection 2587 * domain. 2588 * @pd: The protection domain associated with the WQ. 2589 * @wq_attr: A list of initial attributes required to create the 2590 * WQ. If WQ creation succeeds, then the attributes are updated to 2591 * the actual capabilities of the created WQ. 2592 * 2593 * wq_attr->max_wr and wq_attr->max_sge determine 2594 * the requested size of the WQ, and set to the actual values allocated 2595 * on return. 2596 * If ib_create_wq() succeeds, then max_wr and max_sge will always be 2597 * at least as large as the requested values. 2598 */ 2599 struct ib_wq *ib_create_wq(struct ib_pd *pd, 2600 struct ib_wq_init_attr *wq_attr) 2601 { 2602 struct ib_wq *wq; 2603 2604 if (!pd->device->ops.create_wq) 2605 return ERR_PTR(-EOPNOTSUPP); 2606 2607 wq = pd->device->ops.create_wq(pd, wq_attr, NULL); 2608 if (!IS_ERR(wq)) { 2609 wq->event_handler = wq_attr->event_handler; 2610 wq->wq_context = wq_attr->wq_context; 2611 wq->wq_type = wq_attr->wq_type; 2612 wq->cq = wq_attr->cq; 2613 wq->device = pd->device; 2614 wq->pd = pd; 2615 wq->uobject = NULL; 2616 atomic_inc(&pd->usecnt); 2617 atomic_inc(&wq_attr->cq->usecnt); 2618 atomic_set(&wq->usecnt, 0); 2619 } 2620 return wq; 2621 } 2622 EXPORT_SYMBOL(ib_create_wq); 2623 2624 /** 2625 * ib_destroy_wq_user - Destroys the specified user WQ. 2626 * @wq: The WQ to destroy. 2627 * @udata: Valid user data 2628 */ 2629 int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata) 2630 { 2631 struct ib_cq *cq = wq->cq; 2632 struct ib_pd *pd = wq->pd; 2633 int ret; 2634 2635 if (atomic_read(&wq->usecnt)) 2636 return -EBUSY; 2637 2638 ret = wq->device->ops.destroy_wq(wq, udata); 2639 if (ret) 2640 return ret; 2641 2642 atomic_dec(&pd->usecnt); 2643 atomic_dec(&cq->usecnt); 2644 return ret; 2645 } 2646 EXPORT_SYMBOL(ib_destroy_wq_user); 2647 2648 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, 2649 struct ib_mr_status *mr_status) 2650 { 2651 if (!mr->device->ops.check_mr_status) 2652 return -EOPNOTSUPP; 2653 2654 return mr->device->ops.check_mr_status(mr, check_mask, mr_status); 2655 } 2656 EXPORT_SYMBOL(ib_check_mr_status); 2657 2658 int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, 2659 int state) 2660 { 2661 if (!device->ops.set_vf_link_state) 2662 return -EOPNOTSUPP; 2663 2664 return device->ops.set_vf_link_state(device, vf, port, state); 2665 } 2666 EXPORT_SYMBOL(ib_set_vf_link_state); 2667 2668 int ib_get_vf_config(struct ib_device *device, int vf, u32 port, 2669 struct ifla_vf_info *info) 2670 { 2671 if (!device->ops.get_vf_config) 2672 return -EOPNOTSUPP; 2673 2674 return device->ops.get_vf_config(device, vf, port, info); 2675 } 2676 EXPORT_SYMBOL(ib_get_vf_config); 2677 2678 int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, 2679 struct ifla_vf_stats *stats) 2680 { 2681 if (!device->ops.get_vf_stats) 2682 return -EOPNOTSUPP; 2683 2684 return device->ops.get_vf_stats(device, vf, port, stats); 2685 } 2686 EXPORT_SYMBOL(ib_get_vf_stats); 2687 2688 int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, 2689 int type) 2690 { 2691 if (!device->ops.set_vf_guid) 2692 return -EOPNOTSUPP; 2693 2694 return device->ops.set_vf_guid(device, vf, port, guid, type); 2695 } 2696 EXPORT_SYMBOL(ib_set_vf_guid); 2697 2698 int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, 2699 struct ifla_vf_guid *node_guid, 2700 struct ifla_vf_guid *port_guid) 2701 { 2702 if (!device->ops.get_vf_guid) 2703 return -EOPNOTSUPP; 2704 2705 return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); 2706 } 2707 EXPORT_SYMBOL(ib_get_vf_guid); 2708 /** 2709 * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection 2710 * information) and set an appropriate memory region for registration. 2711 * @mr: memory region 2712 * @data_sg: dma mapped scatterlist for data 2713 * @data_sg_nents: number of entries in data_sg 2714 * @data_sg_offset: offset in bytes into data_sg 2715 * @meta_sg: dma mapped scatterlist for metadata 2716 * @meta_sg_nents: number of entries in meta_sg 2717 * @meta_sg_offset: offset in bytes into meta_sg 2718 * @page_size: page vector desired page size 2719 * 2720 * Constraints: 2721 * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. 2722 * 2723 * Return: 0 on success. 2724 * 2725 * After this completes successfully, the memory region 2726 * is ready for registration. 2727 */ 2728 int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, 2729 int data_sg_nents, unsigned int *data_sg_offset, 2730 struct scatterlist *meta_sg, int meta_sg_nents, 2731 unsigned int *meta_sg_offset, unsigned int page_size) 2732 { 2733 if (unlikely(!mr->device->ops.map_mr_sg_pi || 2734 WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) 2735 return -EOPNOTSUPP; 2736 2737 mr->page_size = page_size; 2738 2739 return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, 2740 data_sg_offset, meta_sg, 2741 meta_sg_nents, meta_sg_offset); 2742 } 2743 EXPORT_SYMBOL(ib_map_mr_sg_pi); 2744 2745 /** 2746 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list 2747 * and set it the memory region. 2748 * @mr: memory region 2749 * @sg: dma mapped scatterlist 2750 * @sg_nents: number of entries in sg 2751 * @sg_offset: offset in bytes into sg 2752 * @page_size: page vector desired page size 2753 * 2754 * Constraints: 2755 * 2756 * - The first sg element is allowed to have an offset. 2757 * - Each sg element must either be aligned to page_size or virtually 2758 * contiguous to the previous element. In case an sg element has a 2759 * non-contiguous offset, the mapping prefix will not include it. 2760 * - The last sg element is allowed to have length less than page_size. 2761 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 2762 * then only max_num_sg entries will be mapped. 2763 * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these 2764 * constraints holds and the page_size argument is ignored. 2765 * 2766 * Returns the number of sg elements that were mapped to the memory region. 2767 * 2768 * After this completes successfully, the memory region 2769 * is ready for registration. 2770 */ 2771 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 2772 unsigned int *sg_offset, unsigned int page_size) 2773 { 2774 if (unlikely(!mr->device->ops.map_mr_sg)) 2775 return -EOPNOTSUPP; 2776 2777 mr->page_size = page_size; 2778 2779 return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset); 2780 } 2781 EXPORT_SYMBOL(ib_map_mr_sg); 2782 2783 /** 2784 * ib_sg_to_pages() - Convert the largest prefix of a sg list 2785 * to a page vector 2786 * @mr: memory region 2787 * @sgl: dma mapped scatterlist 2788 * @sg_nents: number of entries in sg 2789 * @sg_offset_p: ==== ======================================================= 2790 * IN start offset in bytes into sg 2791 * OUT offset in bytes for element n of the sg of the first 2792 * byte that has not been processed where n is the return 2793 * value of this function. 2794 * ==== ======================================================= 2795 * @set_page: driver page assignment function pointer 2796 * 2797 * Core service helper for drivers to convert the largest 2798 * prefix of given sg list to a page vector. The sg list 2799 * prefix converted is the prefix that meet the requirements 2800 * of ib_map_mr_sg. 2801 * 2802 * Returns the number of sg elements that were assigned to 2803 * a page vector. 2804 */ 2805 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 2806 unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) 2807 { 2808 struct scatterlist *sg; 2809 u64 last_end_dma_addr = 0; 2810 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2811 unsigned int last_page_off = 0; 2812 u64 page_mask = ~((u64)mr->page_size - 1); 2813 int i, ret; 2814 2815 if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) 2816 return -EINVAL; 2817 2818 mr->iova = sg_dma_address(&sgl[0]) + sg_offset; 2819 mr->length = 0; 2820 2821 for_each_sg(sgl, sg, sg_nents, i) { 2822 u64 dma_addr = sg_dma_address(sg) + sg_offset; 2823 u64 prev_addr = dma_addr; 2824 unsigned int dma_len = sg_dma_len(sg) - sg_offset; 2825 u64 end_dma_addr = dma_addr + dma_len; 2826 u64 page_addr = dma_addr & page_mask; 2827 2828 /* 2829 * For the second and later elements, check whether either the 2830 * end of element i-1 or the start of element i is not aligned 2831 * on a page boundary. 2832 */ 2833 if (i && (last_page_off != 0 || page_addr != dma_addr)) { 2834 /* Stop mapping if there is a gap. */ 2835 if (last_end_dma_addr != dma_addr) 2836 break; 2837 2838 /* 2839 * Coalesce this element with the last. If it is small 2840 * enough just update mr->length. Otherwise start 2841 * mapping from the next page. 2842 */ 2843 goto next_page; 2844 } 2845 2846 do { 2847 ret = set_page(mr, page_addr); 2848 if (unlikely(ret < 0)) { 2849 sg_offset = prev_addr - sg_dma_address(sg); 2850 mr->length += prev_addr - dma_addr; 2851 if (sg_offset_p) 2852 *sg_offset_p = sg_offset; 2853 return i || sg_offset ? i : ret; 2854 } 2855 prev_addr = page_addr; 2856 next_page: 2857 page_addr += mr->page_size; 2858 } while (page_addr < end_dma_addr); 2859 2860 mr->length += dma_len; 2861 last_end_dma_addr = end_dma_addr; 2862 last_page_off = end_dma_addr & ~page_mask; 2863 2864 sg_offset = 0; 2865 } 2866 2867 if (sg_offset_p) 2868 *sg_offset_p = 0; 2869 return i; 2870 } 2871 EXPORT_SYMBOL(ib_sg_to_pages); 2872 2873 struct ib_drain_cqe { 2874 struct ib_cqe cqe; 2875 struct completion done; 2876 }; 2877 2878 static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) 2879 { 2880 struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, 2881 cqe); 2882 2883 complete(&cqe->done); 2884 } 2885 2886 /* 2887 * Post a WR and block until its completion is reaped for the SQ. 2888 */ 2889 static void __ib_drain_sq(struct ib_qp *qp) 2890 { 2891 struct ib_cq *cq = qp->send_cq; 2892 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2893 struct ib_drain_cqe sdrain; 2894 struct ib_rdma_wr swr = { 2895 .wr = { 2896 .next = NULL, 2897 { .wr_cqe = &sdrain.cqe, }, 2898 .opcode = IB_WR_RDMA_WRITE, 2899 }, 2900 }; 2901 int ret; 2902 2903 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2904 if (ret) { 2905 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2906 return; 2907 } 2908 2909 sdrain.cqe.done = ib_drain_qp_done; 2910 init_completion(&sdrain.done); 2911 2912 ret = ib_post_send(qp, &swr.wr, NULL); 2913 if (ret) { 2914 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2915 return; 2916 } 2917 2918 if (cq->poll_ctx == IB_POLL_DIRECT) 2919 while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0) 2920 ib_process_cq_direct(cq, -1); 2921 else 2922 wait_for_completion(&sdrain.done); 2923 } 2924 2925 /* 2926 * Post a WR and block until its completion is reaped for the RQ. 2927 */ 2928 static void __ib_drain_rq(struct ib_qp *qp) 2929 { 2930 struct ib_cq *cq = qp->recv_cq; 2931 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2932 struct ib_drain_cqe rdrain; 2933 struct ib_recv_wr rwr = {}; 2934 int ret; 2935 2936 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2937 if (ret) { 2938 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2939 return; 2940 } 2941 2942 rwr.wr_cqe = &rdrain.cqe; 2943 rdrain.cqe.done = ib_drain_qp_done; 2944 init_completion(&rdrain.done); 2945 2946 ret = ib_post_recv(qp, &rwr, NULL); 2947 if (ret) { 2948 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2949 return; 2950 } 2951 2952 if (cq->poll_ctx == IB_POLL_DIRECT) 2953 while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0) 2954 ib_process_cq_direct(cq, -1); 2955 else 2956 wait_for_completion(&rdrain.done); 2957 } 2958 2959 /* 2960 * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout 2961 * expires. 2962 * @qp: queue pair associated with SRQ to drain 2963 * 2964 * Quoting 10.3.1 Queue Pair and EE Context States: 2965 * 2966 * Note, for QPs that are associated with an SRQ, the Consumer should take the 2967 * QP through the Error State before invoking a Destroy QP or a Modify QP to the 2968 * Reset State. The Consumer may invoke the Destroy QP without first performing 2969 * a Modify QP to the Error State and waiting for the Affiliated Asynchronous 2970 * Last WQE Reached Event. However, if the Consumer does not wait for the 2971 * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment 2972 * leakage may occur. Therefore, it is good programming practice to tear down a 2973 * QP that is associated with an SRQ by using the following process: 2974 * 2975 * - Put the QP in the Error State 2976 * - Wait for the Affiliated Asynchronous Last WQE Reached Event; 2977 * - either: 2978 * drain the CQ by invoking the Poll CQ verb and either wait for CQ 2979 * to be empty or the number of Poll CQ operations has exceeded 2980 * CQ capacity size; 2981 * - or 2982 * post another WR that completes on the same CQ and wait for this 2983 * WR to return as a WC; 2984 * - and then invoke a Destroy QP or Reset QP. 2985 * 2986 * We use the first option. 2987 */ 2988 static void __ib_drain_srq(struct ib_qp *qp) 2989 { 2990 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2991 struct ib_cq *cq; 2992 int n, polled = 0; 2993 int ret; 2994 2995 if (!qp->srq) { 2996 WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp); 2997 return; 2998 } 2999 3000 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 3001 if (ret) { 3002 WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret); 3003 return; 3004 } 3005 3006 if (ib_srq_has_cq(qp->srq->srq_type)) { 3007 cq = qp->srq->ext.cq; 3008 } else if (qp->recv_cq) { 3009 cq = qp->recv_cq; 3010 } else { 3011 WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp); 3012 return; 3013 } 3014 3015 if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) { 3016 while (polled != cq->cqe) { 3017 n = ib_process_cq_direct(cq, cq->cqe - polled); 3018 if (!n) 3019 return; 3020 polled += n; 3021 } 3022 } 3023 } 3024 3025 /** 3026 * ib_drain_sq() - Block until all SQ CQEs have been consumed by the 3027 * application. 3028 * @qp: queue pair to drain 3029 * 3030 * If the device has a provider-specific drain function, then 3031 * call that. Otherwise call the generic drain function 3032 * __ib_drain_sq(). 3033 * 3034 * The caller must: 3035 * 3036 * ensure there is room in the CQ and SQ for the drain work request and 3037 * completion. 3038 * 3039 * allocate the CQ using ib_alloc_cq(). 3040 * 3041 * ensure that there are no other contexts that are posting WRs concurrently. 3042 * Otherwise the drain is not guaranteed. 3043 */ 3044 void ib_drain_sq(struct ib_qp *qp) 3045 { 3046 if (qp->device->ops.drain_sq) 3047 qp->device->ops.drain_sq(qp); 3048 else 3049 __ib_drain_sq(qp); 3050 trace_cq_drain_complete(qp->send_cq); 3051 } 3052 EXPORT_SYMBOL(ib_drain_sq); 3053 3054 /** 3055 * ib_drain_rq() - Block until all RQ CQEs have been consumed by the 3056 * application. 3057 * @qp: queue pair to drain 3058 * 3059 * If the device has a provider-specific drain function, then 3060 * call that. Otherwise call the generic drain function 3061 * __ib_drain_rq(). 3062 * 3063 * The caller must: 3064 * 3065 * ensure there is room in the CQ and RQ for the drain work request and 3066 * completion. 3067 * 3068 * allocate the CQ using ib_alloc_cq(). 3069 * 3070 * ensure that there are no other contexts that are posting WRs concurrently. 3071 * Otherwise the drain is not guaranteed. 3072 */ 3073 void ib_drain_rq(struct ib_qp *qp) 3074 { 3075 if (qp->device->ops.drain_rq) 3076 qp->device->ops.drain_rq(qp); 3077 else 3078 __ib_drain_rq(qp); 3079 trace_cq_drain_complete(qp->recv_cq); 3080 } 3081 EXPORT_SYMBOL(ib_drain_rq); 3082 3083 /** 3084 * ib_drain_qp() - Block until all CQEs have been consumed by the 3085 * application on both the RQ and SQ. 3086 * @qp: queue pair to drain 3087 * 3088 * The caller must: 3089 * 3090 * ensure there is room in the CQ(s), SQ, and RQ for drain work requests 3091 * and completions. 3092 * 3093 * allocate the CQs using ib_alloc_cq(). 3094 * 3095 * ensure that there are no other contexts that are posting WRs concurrently. 3096 * Otherwise the drain is not guaranteed. 3097 */ 3098 void ib_drain_qp(struct ib_qp *qp) 3099 { 3100 ib_drain_sq(qp); 3101 if (!qp->srq) 3102 ib_drain_rq(qp); 3103 else 3104 __ib_drain_srq(qp); 3105 } 3106 EXPORT_SYMBOL(ib_drain_qp); 3107 3108 struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, 3109 enum rdma_netdev_t type, const char *name, 3110 unsigned char name_assign_type, 3111 void (*setup)(struct net_device *)) 3112 { 3113 struct rdma_netdev_alloc_params params; 3114 struct net_device *netdev; 3115 int rc; 3116 3117 if (!device->ops.rdma_netdev_get_params) 3118 return ERR_PTR(-EOPNOTSUPP); 3119 3120 rc = device->ops.rdma_netdev_get_params(device, port_num, type, 3121 ¶ms); 3122 if (rc) 3123 return ERR_PTR(rc); 3124 3125 netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type, 3126 setup, params.txqs, params.rxqs); 3127 if (!netdev) 3128 return ERR_PTR(-ENOMEM); 3129 3130 return netdev; 3131 } 3132 EXPORT_SYMBOL(rdma_alloc_netdev); 3133 3134 int rdma_init_netdev(struct ib_device *device, u32 port_num, 3135 enum rdma_netdev_t type, const char *name, 3136 unsigned char name_assign_type, 3137 void (*setup)(struct net_device *), 3138 struct net_device *netdev) 3139 { 3140 struct rdma_netdev_alloc_params params; 3141 int rc; 3142 3143 if (!device->ops.rdma_netdev_get_params) 3144 return -EOPNOTSUPP; 3145 3146 rc = device->ops.rdma_netdev_get_params(device, port_num, type, 3147 ¶ms); 3148 if (rc) 3149 return rc; 3150 3151 return params.initialize_rdma_netdev(device, port_num, 3152 netdev, params.param); 3153 } 3154 EXPORT_SYMBOL(rdma_init_netdev); 3155 3156 /** 3157 * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct 3158 * for the drivers. 3159 * @descs: array of static descriptors 3160 * @num_counters: number of elements in array 3161 * @lifespan: milliseconds between updates 3162 */ 3163 struct rdma_hw_stats *rdma_alloc_hw_stats_struct( 3164 const struct rdma_stat_desc *descs, int num_counters, 3165 unsigned long lifespan) 3166 { 3167 struct rdma_hw_stats *stats; 3168 3169 stats = kzalloc_flex(*stats, value, num_counters); 3170 if (!stats) 3171 return NULL; 3172 3173 stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), 3174 sizeof(*stats->is_disabled), GFP_KERNEL); 3175 if (!stats->is_disabled) 3176 goto err; 3177 3178 stats->descs = descs; 3179 stats->num_counters = num_counters; 3180 stats->lifespan = msecs_to_jiffies(lifespan); 3181 mutex_init(&stats->lock); 3182 3183 return stats; 3184 3185 err: 3186 kfree(stats); 3187 return NULL; 3188 } 3189 EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); 3190 3191 /** 3192 * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats 3193 * @stats: statistics to release 3194 */ 3195 void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) 3196 { 3197 if (!stats) 3198 return; 3199 3200 kfree(stats->is_disabled); 3201 kfree(stats); 3202 } 3203 EXPORT_SYMBOL(rdma_free_hw_stats_struct); 3204