1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #define _GNU_SOURCE 35 #include <config.h> 36 37 #include <infiniband/endian.h> 38 #include <stdio.h> 39 #include <unistd.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <string.h> 43 #include <dirent.h> 44 #include <netinet/in.h> 45 #include <netinet/ip.h> 46 #include <sys/socket.h> 47 48 #include "ibverbs.h" 49 #ifndef NRESOLVE_NEIGH 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include "neigh.h" 53 #endif 54 55 /* Hack to avoid GCC's -Wmissing-prototypes and the similar error from sparse 56 with these prototypes. Symbol versionining requires the goofy names, the 57 prototype must match the version in verbs.h. 58 */ 59 int __ibv_query_device(struct ibv_context *context, 60 struct ibv_device_attr *device_attr); 61 int __ibv_query_port(struct ibv_context *context, uint8_t port_num, 62 struct ibv_port_attr *port_attr); 63 int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, 64 union ibv_gid *gid); 65 int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, int index, 66 __be16 *pkey); 67 struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context); 68 int __ibv_dealloc_pd(struct ibv_pd *pd); 69 struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, 70 int access); 71 int __ibv_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, 72 size_t length, int access); 73 int __ibv_dereg_mr(struct ibv_mr *mr); 74 struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, 75 void *cq_context, 76 struct ibv_comp_channel *channel, 77 int comp_vector); 78 int __ibv_resize_cq(struct ibv_cq *cq, int cqe); 79 int __ibv_destroy_cq(struct ibv_cq *cq); 80 int __ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, 81 void **cq_context); 82 void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents); 83 struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, 84 struct ibv_srq_init_attr *srq_init_attr); 85 int __ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, 86 int srq_attr_mask); 87 int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); 88 int __ibv_destroy_srq(struct ibv_srq *srq); 89 struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, 90 struct ibv_qp_init_attr *qp_init_attr); 91 int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 92 struct ibv_qp_init_attr *init_attr); 93 int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); 94 int __ibv_destroy_qp(struct ibv_qp *qp); 95 struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); 96 int __ibv_destroy_ah(struct ibv_ah *ah); 97 int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 98 uint16_t lid); 99 int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 100 uint16_t lid); 101 102 int __attribute__((const)) ibv_rate_to_mult(enum ibv_rate rate) 103 { 104 switch (rate) { 105 case IBV_RATE_2_5_GBPS: return 1; 106 case IBV_RATE_5_GBPS: return 2; 107 case IBV_RATE_10_GBPS: return 4; 108 case IBV_RATE_20_GBPS: return 8; 109 case IBV_RATE_30_GBPS: return 12; 110 case IBV_RATE_40_GBPS: return 16; 111 case IBV_RATE_60_GBPS: return 24; 112 case IBV_RATE_80_GBPS: return 32; 113 case IBV_RATE_120_GBPS: return 48; 114 case IBV_RATE_28_GBPS: return 11; 115 case IBV_RATE_50_GBPS: return 20; 116 case IBV_RATE_400_GBPS: return 160; 117 case IBV_RATE_600_GBPS: return 240; 118 case IBV_RATE_800_GBPS: return 320; 119 case IBV_RATE_1200_GBPS: return 480; 120 default: return -1; 121 } 122 } 123 124 enum ibv_rate __attribute__((const)) mult_to_ibv_rate(int mult) 125 { 126 switch (mult) { 127 case 1: return IBV_RATE_2_5_GBPS; 128 case 2: return IBV_RATE_5_GBPS; 129 case 4: return IBV_RATE_10_GBPS; 130 case 8: return IBV_RATE_20_GBPS; 131 case 12: return IBV_RATE_30_GBPS; 132 case 16: return IBV_RATE_40_GBPS; 133 case 24: return IBV_RATE_60_GBPS; 134 case 32: return IBV_RATE_80_GBPS; 135 case 48: return IBV_RATE_120_GBPS; 136 case 11: return IBV_RATE_28_GBPS; 137 case 20: return IBV_RATE_50_GBPS; 138 case 160: return IBV_RATE_400_GBPS; 139 case 240: return IBV_RATE_600_GBPS; 140 case 320: return IBV_RATE_800_GBPS; 141 case 480: return IBV_RATE_1200_GBPS; 142 default: return IBV_RATE_MAX; 143 } 144 } 145 146 int __attribute__((const)) ibv_rate_to_mbps(enum ibv_rate rate) 147 { 148 switch (rate) { 149 case IBV_RATE_2_5_GBPS: return 2500; 150 case IBV_RATE_5_GBPS: return 5000; 151 case IBV_RATE_10_GBPS: return 10000; 152 case IBV_RATE_20_GBPS: return 20000; 153 case IBV_RATE_30_GBPS: return 30000; 154 case IBV_RATE_40_GBPS: return 40000; 155 case IBV_RATE_60_GBPS: return 60000; 156 case IBV_RATE_80_GBPS: return 80000; 157 case IBV_RATE_120_GBPS: return 120000; 158 case IBV_RATE_14_GBPS: return 14062; 159 case IBV_RATE_56_GBPS: return 56250; 160 case IBV_RATE_112_GBPS: return 112500; 161 case IBV_RATE_168_GBPS: return 168750; 162 case IBV_RATE_25_GBPS: return 25781; 163 case IBV_RATE_100_GBPS: return 103125; 164 case IBV_RATE_200_GBPS: return 206250; 165 case IBV_RATE_300_GBPS: return 309375; 166 case IBV_RATE_28_GBPS: return 28125; 167 case IBV_RATE_50_GBPS: return 53125; 168 case IBV_RATE_400_GBPS: return 425000; 169 case IBV_RATE_600_GBPS: return 637500; 170 case IBV_RATE_800_GBPS: return 850000; 171 case IBV_RATE_1200_GBPS: return 1275000; 172 default: return -1; 173 } 174 } 175 176 enum ibv_rate __attribute__((const)) mbps_to_ibv_rate(int mbps) 177 { 178 switch (mbps) { 179 case 2500: return IBV_RATE_2_5_GBPS; 180 case 5000: return IBV_RATE_5_GBPS; 181 case 10000: return IBV_RATE_10_GBPS; 182 case 20000: return IBV_RATE_20_GBPS; 183 case 30000: return IBV_RATE_30_GBPS; 184 case 40000: return IBV_RATE_40_GBPS; 185 case 60000: return IBV_RATE_60_GBPS; 186 case 80000: return IBV_RATE_80_GBPS; 187 case 120000: return IBV_RATE_120_GBPS; 188 case 14062: return IBV_RATE_14_GBPS; 189 case 56250: return IBV_RATE_56_GBPS; 190 case 112500: return IBV_RATE_112_GBPS; 191 case 168750: return IBV_RATE_168_GBPS; 192 case 25781: return IBV_RATE_25_GBPS; 193 case 103125: return IBV_RATE_100_GBPS; 194 case 206250: return IBV_RATE_200_GBPS; 195 case 309375: return IBV_RATE_300_GBPS; 196 case 28125: return IBV_RATE_28_GBPS; 197 case 53125: return IBV_RATE_50_GBPS; 198 case 425000: return IBV_RATE_400_GBPS; 199 case 637500: return IBV_RATE_600_GBPS; 200 case 850000: return IBV_RATE_800_GBPS; 201 case 1275000: return IBV_RATE_1200_GBPS; 202 default: return IBV_RATE_MAX; 203 } 204 } 205 206 int __ibv_query_device(struct ibv_context *context, 207 struct ibv_device_attr *device_attr) 208 { 209 return context->ops.query_device(context, device_attr); 210 } 211 default_symver(__ibv_query_device, ibv_query_device); 212 213 int __ibv_query_port(struct ibv_context *context, uint8_t port_num, 214 struct ibv_port_attr *port_attr) 215 { 216 return context->ops.query_port(context, port_num, port_attr); 217 } 218 default_symver(__ibv_query_port, ibv_query_port); 219 220 int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, 221 int index, union ibv_gid *gid) 222 { 223 char name[24]; 224 char attr[41]; 225 uint16_t val; 226 int i; 227 228 snprintf(name, sizeof name, "ports/%d/gids/%d", port_num, index); 229 230 if (ibv_read_sysfs_file(context->device->ibdev_path, name, 231 attr, sizeof attr) < 0) 232 return -1; 233 234 for (i = 0; i < 8; ++i) { 235 if (sscanf(attr + i * 5, "%hx", &val) != 1) 236 return -1; 237 gid->raw[i * 2 ] = val >> 8; 238 gid->raw[i * 2 + 1] = val & 0xff; 239 } 240 241 return 0; 242 } 243 default_symver(__ibv_query_gid, ibv_query_gid); 244 245 int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, 246 int index, __be16 *pkey) 247 { 248 char name[24]; 249 char attr[8]; 250 uint16_t val; 251 252 snprintf(name, sizeof name, "ports/%d/pkeys/%d", port_num, index); 253 254 if (ibv_read_sysfs_file(context->device->ibdev_path, name, 255 attr, sizeof attr) < 0) 256 return -1; 257 258 if (sscanf(attr, "%hx", &val) != 1) 259 return -1; 260 261 *pkey = htobe16(val); 262 return 0; 263 } 264 default_symver(__ibv_query_pkey, ibv_query_pkey); 265 266 struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context) 267 { 268 struct ibv_pd *pd; 269 270 pd = context->ops.alloc_pd(context); 271 if (pd) 272 pd->context = context; 273 274 return pd; 275 } 276 default_symver(__ibv_alloc_pd, ibv_alloc_pd); 277 278 int __ibv_dealloc_pd(struct ibv_pd *pd) 279 { 280 return pd->context->ops.dealloc_pd(pd); 281 } 282 default_symver(__ibv_dealloc_pd, ibv_dealloc_pd); 283 284 struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, 285 size_t length, int access) 286 { 287 struct ibv_mr *mr; 288 289 if (ibv_dontfork_range(addr, length)) 290 return NULL; 291 292 mr = pd->context->ops.reg_mr(pd, addr, length, access); 293 if (mr) { 294 mr->context = pd->context; 295 mr->pd = pd; 296 mr->addr = addr; 297 mr->length = length; 298 } else 299 ibv_dofork_range(addr, length); 300 301 return mr; 302 } 303 default_symver(__ibv_reg_mr, ibv_reg_mr); 304 305 int __ibv_rereg_mr(struct ibv_mr *mr, int flags, 306 struct ibv_pd *pd, void *addr, 307 size_t length, int access) 308 { 309 int dofork_onfail = 0; 310 int err; 311 void *old_addr; 312 size_t old_len; 313 314 if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) { 315 errno = EINVAL; 316 return IBV_REREG_MR_ERR_INPUT; 317 } 318 319 if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && 320 (!length || !addr)) { 321 errno = EINVAL; 322 return IBV_REREG_MR_ERR_INPUT; 323 } 324 325 if (access && !(flags & IBV_REREG_MR_CHANGE_ACCESS)) { 326 errno = EINVAL; 327 return IBV_REREG_MR_ERR_INPUT; 328 } 329 330 if (!mr->context->ops.rereg_mr) { 331 errno = ENOSYS; 332 return IBV_REREG_MR_ERR_INPUT; 333 } 334 335 if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { 336 err = ibv_dontfork_range(addr, length); 337 if (err) 338 return IBV_REREG_MR_ERR_DONT_FORK_NEW; 339 dofork_onfail = 1; 340 } 341 342 old_addr = mr->addr; 343 old_len = mr->length; 344 err = mr->context->ops.rereg_mr(mr, flags, pd, addr, length, access); 345 if (!err) { 346 if (flags & IBV_REREG_MR_CHANGE_PD) 347 mr->pd = pd; 348 if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { 349 mr->addr = addr; 350 mr->length = length; 351 err = ibv_dofork_range(old_addr, old_len); 352 if (err) 353 return IBV_REREG_MR_ERR_DO_FORK_OLD; 354 } 355 } else { 356 err = IBV_REREG_MR_ERR_CMD; 357 if (dofork_onfail) { 358 if (ibv_dofork_range(addr, length)) 359 err = IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW; 360 } 361 } 362 363 return err; 364 } 365 default_symver(__ibv_rereg_mr, ibv_rereg_mr); 366 367 int __ibv_dereg_mr(struct ibv_mr *mr) 368 { 369 int ret; 370 void *addr = mr->addr; 371 size_t length = mr->length; 372 373 ret = mr->context->ops.dereg_mr(mr); 374 if (!ret) 375 ibv_dofork_range(addr, length); 376 377 return ret; 378 } 379 default_symver(__ibv_dereg_mr, ibv_dereg_mr); 380 381 static struct ibv_comp_channel *ibv_create_comp_channel_v2(struct ibv_context *context) 382 { 383 struct ibv_abi_compat_v2 *t = context->abi_compat; 384 static int warned; 385 386 if (!pthread_mutex_trylock(&t->in_use)) 387 return &t->channel; 388 389 if (!warned) { 390 fprintf(stderr, PFX "Warning: kernel's ABI version %d limits capacity.\n" 391 " Only one completion channel can be created per context.\n", 392 abi_ver); 393 ++warned; 394 } 395 396 return NULL; 397 } 398 399 struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context) 400 { 401 struct ibv_comp_channel *channel; 402 struct ibv_create_comp_channel cmd; 403 struct ibv_create_comp_channel_resp resp; 404 405 if (abi_ver <= 2) 406 return ibv_create_comp_channel_v2(context); 407 408 channel = malloc(sizeof *channel); 409 if (!channel) 410 return NULL; 411 412 IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_COMP_CHANNEL, &resp, sizeof resp); 413 if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) { 414 free(channel); 415 return NULL; 416 } 417 418 (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); 419 420 channel->context = context; 421 channel->fd = resp.fd; 422 channel->refcnt = 0; 423 424 return channel; 425 } 426 427 static int ibv_destroy_comp_channel_v2(struct ibv_comp_channel *channel) 428 { 429 struct ibv_abi_compat_v2 *t = (struct ibv_abi_compat_v2 *) channel; 430 pthread_mutex_unlock(&t->in_use); 431 return 0; 432 } 433 434 int ibv_destroy_comp_channel(struct ibv_comp_channel *channel) 435 { 436 struct ibv_context *context; 437 int ret; 438 439 context = channel->context; 440 pthread_mutex_lock(&context->mutex); 441 442 if (channel->refcnt) { 443 ret = EBUSY; 444 goto out; 445 } 446 447 if (abi_ver <= 2) { 448 ret = ibv_destroy_comp_channel_v2(channel); 449 goto out; 450 } 451 452 close(channel->fd); 453 free(channel); 454 ret = 0; 455 456 out: 457 pthread_mutex_unlock(&context->mutex); 458 459 return ret; 460 } 461 462 struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, 463 struct ibv_comp_channel *channel, int comp_vector) 464 { 465 struct ibv_cq *cq; 466 int err = 0; 467 468 cq = context->ops.create_cq(context, cqe, channel, comp_vector); 469 470 if (!cq) 471 return NULL; 472 473 err = verbs_init_cq(cq, context, channel, cq_context); 474 if (err) 475 goto err; 476 477 return cq; 478 479 err: 480 context->ops.destroy_cq(cq); 481 482 return NULL; 483 } 484 default_symver(__ibv_create_cq, ibv_create_cq); 485 486 int __ibv_resize_cq(struct ibv_cq *cq, int cqe) 487 { 488 if (!cq->context->ops.resize_cq) 489 return ENOSYS; 490 491 return cq->context->ops.resize_cq(cq, cqe); 492 } 493 default_symver(__ibv_resize_cq, ibv_resize_cq); 494 495 int __ibv_destroy_cq(struct ibv_cq *cq) 496 { 497 struct ibv_comp_channel *channel = cq->channel; 498 int ret; 499 500 ret = cq->context->ops.destroy_cq(cq); 501 502 if (channel) { 503 if (!ret) { 504 pthread_mutex_lock(&channel->context->mutex); 505 --channel->refcnt; 506 pthread_mutex_unlock(&channel->context->mutex); 507 } 508 } 509 510 return ret; 511 } 512 default_symver(__ibv_destroy_cq, ibv_destroy_cq); 513 514 int __ibv_get_cq_event(struct ibv_comp_channel *channel, 515 struct ibv_cq **cq, void **cq_context) 516 { 517 struct ibv_comp_event ev; 518 519 if (read(channel->fd, &ev, sizeof ev) != sizeof ev) 520 return -1; 521 522 *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle; 523 *cq_context = (*cq)->cq_context; 524 525 if ((*cq)->context->ops.cq_event) 526 (*cq)->context->ops.cq_event(*cq); 527 528 return 0; 529 } 530 default_symver(__ibv_get_cq_event, ibv_get_cq_event); 531 532 void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) 533 { 534 pthread_mutex_lock(&cq->mutex); 535 cq->comp_events_completed += nevents; 536 pthread_cond_signal(&cq->cond); 537 pthread_mutex_unlock(&cq->mutex); 538 } 539 default_symver(__ibv_ack_cq_events, ibv_ack_cq_events); 540 541 struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, 542 struct ibv_srq_init_attr *srq_init_attr) 543 { 544 struct ibv_srq *srq; 545 546 if (!pd->context->ops.create_srq) 547 return NULL; 548 549 srq = pd->context->ops.create_srq(pd, srq_init_attr); 550 if (!srq) 551 return NULL; 552 553 srq->context = pd->context; 554 srq->srq_context = srq_init_attr->srq_context; 555 srq->pd = pd; 556 srq->events_completed = 0; 557 if (pthread_mutex_init(&srq->mutex, NULL)) 558 goto err; 559 if (pthread_cond_init(&srq->cond, NULL)) 560 goto err_mutex; 561 562 return srq; 563 564 err_mutex: 565 pthread_mutex_destroy(&srq->mutex); 566 err: 567 pd->context->ops.destroy_srq(srq); 568 569 return NULL; 570 } 571 default_symver(__ibv_create_srq, ibv_create_srq); 572 573 int __ibv_modify_srq(struct ibv_srq *srq, 574 struct ibv_srq_attr *srq_attr, 575 int srq_attr_mask) 576 { 577 return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask); 578 } 579 default_symver(__ibv_modify_srq, ibv_modify_srq); 580 581 int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) 582 { 583 return srq->context->ops.query_srq(srq, srq_attr); 584 } 585 default_symver(__ibv_query_srq, ibv_query_srq); 586 587 int __ibv_destroy_srq(struct ibv_srq *srq) 588 { 589 pthread_cond_destroy(&srq->cond); 590 pthread_mutex_destroy(&srq->mutex); 591 return srq->context->ops.destroy_srq(srq); 592 } 593 default_symver(__ibv_destroy_srq, ibv_destroy_srq); 594 595 struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, 596 struct ibv_qp_init_attr *qp_init_attr) 597 { 598 struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); 599 600 if (qp) { 601 qp->context = pd->context; 602 qp->qp_context = qp_init_attr->qp_context; 603 qp->pd = pd; 604 qp->send_cq = qp_init_attr->send_cq; 605 qp->recv_cq = qp_init_attr->recv_cq; 606 qp->srq = qp_init_attr->srq; 607 qp->qp_type = qp_init_attr->qp_type; 608 qp->state = IBV_QPS_RESET; 609 qp->events_completed = 0; 610 pthread_mutex_init(&qp->mutex, NULL); 611 pthread_cond_init(&qp->cond, NULL); 612 } 613 614 return qp; 615 } 616 default_symver(__ibv_create_qp, ibv_create_qp); 617 618 int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 619 int attr_mask, 620 struct ibv_qp_init_attr *init_attr) 621 { 622 int ret; 623 624 ret = qp->context->ops.query_qp(qp, attr, attr_mask, init_attr); 625 if (ret) 626 return ret; 627 628 if (attr_mask & IBV_QP_STATE) 629 qp->state = attr->qp_state; 630 631 return 0; 632 } 633 default_symver(__ibv_query_qp, ibv_query_qp); 634 635 int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 636 int attr_mask) 637 { 638 int ret; 639 640 ret = qp->context->ops.modify_qp(qp, attr, attr_mask); 641 if (ret) 642 return ret; 643 644 if (attr_mask & IBV_QP_STATE) 645 qp->state = attr->qp_state; 646 647 return 0; 648 } 649 default_symver(__ibv_modify_qp, ibv_modify_qp); 650 651 int __ibv_destroy_qp(struct ibv_qp *qp) 652 { 653 return qp->context->ops.destroy_qp(qp); 654 } 655 default_symver(__ibv_destroy_qp, ibv_destroy_qp); 656 657 struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) 658 { 659 struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); 660 661 if (ah) { 662 ah->context = pd->context; 663 ah->pd = pd; 664 } 665 666 return ah; 667 } 668 default_symver(__ibv_create_ah, ibv_create_ah); 669 670 /* GID types as appear in sysfs, no change is expected as of ABI 671 * compatibility. 672 */ 673 #define V1_TYPE "IB/RoCE v1" 674 #define V2_TYPE "RoCE v2" 675 int ibv_query_gid_type(struct ibv_context *context, uint8_t port_num, 676 unsigned int index, enum ibv_gid_type *type) 677 { 678 char name[32]; 679 char buff[11]; 680 681 snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, 682 index); 683 684 /* Reset errno so that we can rely on its value upon any error flow in 685 * ibv_read_sysfs_file. 686 */ 687 errno = 0; 688 if (ibv_read_sysfs_file(context->device->ibdev_path, name, buff, 689 sizeof(buff)) <= 0) { 690 char *dir_path; 691 DIR *dir; 692 693 if (errno == EINVAL) { 694 /* In IB, this file doesn't exist and the kernel sets 695 * errno to -EINVAL. 696 */ 697 *type = IBV_GID_TYPE_IB_ROCE_V1; 698 return 0; 699 } 700 if (asprintf(&dir_path, "%s/%s/%d/%s/", 701 context->device->ibdev_path, "ports", port_num, 702 "gid_attrs") < 0) 703 return -1; 704 dir = opendir(dir_path); 705 free(dir_path); 706 if (!dir) { 707 if (errno == ENOENT) 708 /* Assuming that if gid_attrs doesn't exist, 709 * we have an old kernel and all GIDs are 710 * IB/RoCE v1 711 */ 712 *type = IBV_GID_TYPE_IB_ROCE_V1; 713 else 714 return -1; 715 } else { 716 closedir(dir); 717 errno = EFAULT; 718 return -1; 719 } 720 } else { 721 if (!strcmp(buff, V1_TYPE)) { 722 *type = IBV_GID_TYPE_IB_ROCE_V1; 723 } else if (!strcmp(buff, V2_TYPE)) { 724 *type = IBV_GID_TYPE_ROCE_V2; 725 } else { 726 errno = ENOTSUP; 727 return -1; 728 } 729 } 730 731 return 0; 732 } 733 734 static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, 735 union ibv_gid *gid, enum ibv_gid_type gid_type) 736 { 737 enum ibv_gid_type sgid_type = 0; 738 union ibv_gid sgid; 739 int i = 0, ret; 740 741 do { 742 ret = ibv_query_gid(context, port_num, i, &sgid); 743 if (!ret) { 744 ret = ibv_query_gid_type(context, port_num, i, 745 &sgid_type); 746 } 747 i++; 748 } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)) || 749 (gid_type != sgid_type))); 750 751 return ret ? ret : i - 1; 752 } 753 754 static inline void map_ipv4_addr_to_ipv6(__be32 ipv4, struct in6_addr *ipv6) 755 { 756 ipv6->s6_addr32[0] = 0; 757 ipv6->s6_addr32[1] = 0; 758 ipv6->s6_addr32[2] = htobe32(0x0000FFFF); 759 ipv6->s6_addr32[3] = ipv4; 760 } 761 762 static inline __sum16 ipv4_calc_hdr_csum(uint16_t *data, unsigned int num_hwords) 763 { 764 unsigned int i = 0; 765 uint32_t sum = 0; 766 767 for (i = 0; i < num_hwords; i++) 768 sum += *(data++); 769 770 sum = (sum & 0xffff) + (sum >> 16); 771 772 return (__sum16)~sum; 773 } 774 775 static inline int get_grh_header_version(struct ibv_grh *grh) 776 { 777 int ip6h_version = (be32toh(grh->version_tclass_flow) >> 28) & 0xf; 778 struct ip *ip4h = (struct ip *)((void *)grh + 20); 779 struct ip ip4h_checked; 780 781 if (ip6h_version != 6) { 782 if (ip4h->ip_v == 4) 783 return 4; 784 errno = EPROTONOSUPPORT; 785 return -1; 786 } 787 /* version may be 6 or 4 */ 788 if (ip4h->ip_hl != 5) /* IPv4 header length must be 5 for RoCE v2. */ 789 return 6; 790 /* 791 * Verify checksum. 792 * We can't write on scattered buffers so we have to copy to temp 793 * buffer. 794 */ 795 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); 796 /* Need to set the checksum field (check) to 0 before re-calculating 797 * the checksum. 798 */ 799 ip4h_checked.ip_sum = 0; 800 ip4h_checked.ip_sum = ipv4_calc_hdr_csum((uint16_t *)&ip4h_checked, 10); 801 /* if IPv4 header checksum is OK, believe it */ 802 if (ip4h->ip_sum == ip4h_checked.ip_sum) 803 return 4; 804 return 6; 805 } 806 807 static inline void set_ah_attr_generic_fields(struct ibv_ah_attr *ah_attr, 808 struct ibv_wc *wc, 809 struct ibv_grh *grh, 810 uint8_t port_num) 811 { 812 uint32_t flow_class; 813 814 flow_class = be32toh(grh->version_tclass_flow); 815 ah_attr->grh.flow_label = flow_class & 0xFFFFF; 816 ah_attr->dlid = wc->slid; 817 ah_attr->sl = wc->sl; 818 ah_attr->src_path_bits = wc->dlid_path_bits; 819 ah_attr->port_num = port_num; 820 } 821 822 static inline int set_ah_attr_by_ipv4(struct ibv_context *context, 823 struct ibv_ah_attr *ah_attr, 824 struct ip *ip4h, uint8_t port_num) 825 { 826 union ibv_gid sgid; 827 int ret; 828 829 /* No point searching multicast GIDs in GID table */ 830 if (IN_CLASSD(be32toh(ip4h->ip_dst.s_addr))) { 831 errno = EINVAL; 832 return -1; 833 } 834 835 map_ipv4_addr_to_ipv6(ip4h->ip_dst.s_addr, (struct in6_addr *)&sgid); 836 ret = ibv_find_gid_index(context, port_num, &sgid, 837 IBV_GID_TYPE_ROCE_V2); 838 if (ret < 0) 839 return ret; 840 841 map_ipv4_addr_to_ipv6(ip4h->ip_src.s_addr, 842 (struct in6_addr *)&ah_attr->grh.dgid); 843 ah_attr->grh.sgid_index = (uint8_t) ret; 844 ah_attr->grh.hop_limit = ip4h->ip_ttl; 845 ah_attr->grh.traffic_class = ip4h->ip_tos; 846 847 return 0; 848 } 849 850 #define IB_NEXT_HDR 0x1b 851 static inline int set_ah_attr_by_ipv6(struct ibv_context *context, 852 struct ibv_ah_attr *ah_attr, 853 struct ibv_grh *grh, uint8_t port_num) 854 { 855 uint32_t flow_class; 856 uint32_t sgid_type; 857 int ret; 858 859 /* No point searching multicast GIDs in GID table */ 860 if (grh->dgid.raw[0] == 0xFF) { 861 errno = EINVAL; 862 return -1; 863 } 864 865 ah_attr->grh.dgid = grh->sgid; 866 if (grh->next_hdr == IPPROTO_UDP) { 867 sgid_type = IBV_GID_TYPE_ROCE_V2; 868 } else if (grh->next_hdr == IB_NEXT_HDR) { 869 sgid_type = IBV_GID_TYPE_IB_ROCE_V1; 870 } else { 871 errno = EPROTONOSUPPORT; 872 return -1; 873 } 874 875 ret = ibv_find_gid_index(context, port_num, &grh->dgid, 876 sgid_type); 877 if (ret < 0) 878 return ret; 879 880 ah_attr->grh.sgid_index = (uint8_t) ret; 881 flow_class = be32toh(grh->version_tclass_flow); 882 ah_attr->grh.hop_limit = grh->hop_limit; 883 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; 884 885 return 0; 886 } 887 888 int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num, 889 struct ibv_wc *wc, struct ibv_grh *grh, 890 struct ibv_ah_attr *ah_attr) 891 { 892 int version; 893 int ret = 0; 894 895 memset(ah_attr, 0, sizeof *ah_attr); 896 set_ah_attr_generic_fields(ah_attr, wc, grh, port_num); 897 898 if (wc->wc_flags & IBV_WC_GRH) { 899 ah_attr->is_global = 1; 900 version = get_grh_header_version(grh); 901 902 if (version == 4) 903 ret = set_ah_attr_by_ipv4(context, ah_attr, 904 (struct ip *)((void *)grh + 20), 905 port_num); 906 else if (version == 6) 907 ret = set_ah_attr_by_ipv6(context, ah_attr, grh, 908 port_num); 909 else 910 ret = -1; 911 } 912 913 return ret; 914 } 915 916 struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, 917 struct ibv_grh *grh, uint8_t port_num) 918 { 919 struct ibv_ah_attr ah_attr; 920 int ret; 921 922 ret = ibv_init_ah_from_wc(pd->context, port_num, wc, grh, &ah_attr); 923 if (ret) 924 return NULL; 925 926 return ibv_create_ah(pd, &ah_attr); 927 } 928 929 int __ibv_destroy_ah(struct ibv_ah *ah) 930 { 931 return ah->context->ops.destroy_ah(ah); 932 } 933 default_symver(__ibv_destroy_ah, ibv_destroy_ah); 934 935 int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) 936 { 937 return qp->context->ops.attach_mcast(qp, gid, lid); 938 } 939 default_symver(__ibv_attach_mcast, ibv_attach_mcast); 940 941 int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) 942 { 943 return qp->context->ops.detach_mcast(qp, gid, lid); 944 } 945 default_symver(__ibv_detach_mcast, ibv_detach_mcast); 946 947 static inline int ipv6_addr_v4mapped(const struct in6_addr *a) 948 { 949 return IN6_IS_ADDR_V4MAPPED(a) || 950 /* IPv4 encoded multicast addresses */ 951 (a->s6_addr32[0] == htobe32(0xff0e0000) && 952 ((a->s6_addr32[1] | 953 (a->s6_addr32[2] ^ htobe32(0x0000ffff))) == 0UL)); 954 } 955 956 struct peer_address { 957 void *address; 958 uint32_t size; 959 }; 960 961 static inline int create_peer_from_gid(int family, void *raw_gid, 962 struct peer_address *peer_address) 963 { 964 switch (family) { 965 case AF_INET: 966 peer_address->address = raw_gid + 12; 967 peer_address->size = 4; 968 break; 969 case AF_INET6: 970 peer_address->address = raw_gid; 971 peer_address->size = 16; 972 break; 973 default: 974 return -1; 975 } 976 977 return 0; 978 } 979 980 #define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 981 int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, 982 struct ibv_ah_attr *attr, 983 uint8_t eth_mac[ETHERNET_LL_SIZE], 984 uint16_t *vid) 985 { 986 #ifndef NRESOLVE_NEIGH 987 int dst_family; 988 int src_family; 989 int oif; 990 struct get_neigh_handler neigh_handler; 991 union ibv_gid sgid; 992 int ether_len; 993 struct peer_address src; 994 struct peer_address dst; 995 uint16_t ret_vid; 996 int ret = -EINVAL; 997 int err; 998 999 err = ibv_query_gid(context, attr->port_num, 1000 attr->grh.sgid_index, &sgid); 1001 1002 if (err) 1003 return err; 1004 1005 err = neigh_init_resources(&neigh_handler, 1006 NEIGH_GET_DEFAULT_TIMEOUT_MS); 1007 1008 if (err) 1009 return err; 1010 1011 dst_family = ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ? 1012 AF_INET : AF_INET6; 1013 src_family = ipv6_addr_v4mapped((struct in6_addr *)sgid.raw) ? 1014 AF_INET : AF_INET6; 1015 1016 if (create_peer_from_gid(dst_family, attr->grh.dgid.raw, &dst)) 1017 goto free_resources; 1018 1019 if (create_peer_from_gid(src_family, &sgid.raw, &src)) 1020 goto free_resources; 1021 1022 if (neigh_set_dst(&neigh_handler, dst_family, dst.address, 1023 dst.size)) 1024 goto free_resources; 1025 1026 if (neigh_set_src(&neigh_handler, src_family, src.address, 1027 src.size)) 1028 goto free_resources; 1029 1030 oif = neigh_get_oif_from_src(&neigh_handler); 1031 1032 if (oif > 0) 1033 neigh_set_oif(&neigh_handler, oif); 1034 else 1035 goto free_resources; 1036 1037 ret = -EHOSTUNREACH; 1038 1039 /* blocking call */ 1040 if (process_get_neigh(&neigh_handler)) 1041 goto free_resources; 1042 1043 ret_vid = neigh_get_vlan_id_from_dev(&neigh_handler); 1044 1045 if (ret_vid <= 0xfff) 1046 neigh_set_vlan_id(&neigh_handler, ret_vid); 1047 1048 /* We are using only Ethernet here */ 1049 ether_len = neigh_get_ll(&neigh_handler, 1050 eth_mac, 1051 sizeof(uint8_t) * ETHERNET_LL_SIZE); 1052 1053 if (ether_len <= 0) 1054 goto free_resources; 1055 1056 *vid = ret_vid; 1057 1058 ret = 0; 1059 1060 free_resources: 1061 neigh_free_resources(&neigh_handler); 1062 1063 return ret; 1064 #else 1065 return -ENOSYS; 1066 #endif 1067 } 1068