1 /* 2 * Copyright (c) 2007 Cisco, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <infiniband/endian.h> 36 #include <stdlib.h> 37 #include <stdio.h> 38 #include <string.h> 39 #include <pthread.h> 40 #include <errno.h> 41 42 #include "mlx4.h" 43 #include "mlx4-abi.h" 44 #include "wqe.h" 45 46 int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr) 47 { 48 struct ibv_query_device cmd; 49 uint64_t raw_fw_ver; 50 unsigned major, minor, sub_minor; 51 int ret; 52 53 ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd); 54 if (ret) 55 return ret; 56 57 major = (raw_fw_ver >> 32) & 0xffff; 58 minor = (raw_fw_ver >> 16) & 0xffff; 59 sub_minor = raw_fw_ver & 0xffff; 60 61 snprintf(attr->fw_ver, sizeof attr->fw_ver, 62 "%d.%d.%03d", major, minor, sub_minor); 63 64 return 0; 65 } 66 67 int mlx4_query_device_ex(struct ibv_context *context, 68 const struct ibv_query_device_ex_input *input, 69 struct ibv_device_attr_ex *attr, 70 size_t attr_size) 71 { 72 struct mlx4_context *mctx = to_mctx(context); 73 struct mlx4_query_device_ex_resp resp = {}; 74 struct mlx4_query_device_ex cmd = {}; 75 uint64_t raw_fw_ver; 76 unsigned sub_minor; 77 unsigned major; 78 unsigned minor; 79 int err; 80 81 err = ibv_cmd_query_device_ex(context, input, attr, attr_size, 82 &raw_fw_ver, 83 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 84 &resp.ibv_resp, sizeof(resp.ibv_resp), 85 sizeof(resp)); 86 if (err) 87 return err; 88 89 if (resp.comp_mask & MLX4_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET) { 90 mctx->core_clock.offset = resp.hca_core_clock_offset; 91 mctx->core_clock.offset_valid = 1; 92 } 93 94 major = (raw_fw_ver >> 32) & 0xffff; 95 minor = (raw_fw_ver >> 16) & 0xffff; 96 sub_minor = raw_fw_ver & 0xffff; 97 98 snprintf(attr->orig_attr.fw_ver, sizeof attr->orig_attr.fw_ver, 99 "%d.%d.%03d", major, minor, sub_minor); 100 101 return 0; 102 } 103 104 #define READL(ptr) (*((uint32_t *)(ptr))) 105 static int mlx4_read_clock(struct ibv_context *context, uint64_t *cycles) 106 { 107 unsigned int clockhi, clocklo, clockhi1; 108 int i; 109 struct mlx4_context *ctx = to_mctx(context); 110 111 if (!ctx->hca_core_clock) 112 return -EOPNOTSUPP; 113 114 /* Handle wraparound */ 115 for (i = 0; i < 2; i++) { 116 clockhi = be32toh(READL(ctx->hca_core_clock)); 117 clocklo = be32toh(READL(ctx->hca_core_clock + 4)); 118 clockhi1 = be32toh(READL(ctx->hca_core_clock)); 119 if (clockhi == clockhi1) 120 break; 121 } 122 123 *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; 124 125 return 0; 126 } 127 128 int mlx4_query_rt_values(struct ibv_context *context, 129 struct ibv_values_ex *values) 130 { 131 uint32_t comp_mask = 0; 132 int err = 0; 133 134 if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { 135 uint64_t cycles; 136 137 err = mlx4_read_clock(context, &cycles); 138 if (!err) { 139 values->raw_clock.tv_sec = 0; 140 values->raw_clock.tv_nsec = cycles; 141 comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; 142 } 143 } 144 145 values->comp_mask = comp_mask; 146 147 return err; 148 } 149 150 int mlx4_query_port(struct ibv_context *context, uint8_t port, 151 struct ibv_port_attr *attr) 152 { 153 struct ibv_query_port cmd; 154 int err; 155 156 err = ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 157 if (!err && port <= MLX4_PORTS_NUM && port > 0) { 158 struct mlx4_context *mctx = to_mctx(context); 159 if (!mctx->port_query_cache[port - 1].valid) { 160 mctx->port_query_cache[port - 1].link_layer = 161 attr->link_layer; 162 mctx->port_query_cache[port - 1].caps = 163 attr->port_cap_flags; 164 mctx->port_query_cache[port - 1].valid = 1; 165 } 166 } 167 168 return err; 169 } 170 171 /* Only the fields in the port cache will be valid */ 172 static int query_port_cache(struct ibv_context *context, uint8_t port_num, 173 struct ibv_port_attr *port_attr) 174 { 175 struct mlx4_context *mctx = to_mctx(context); 176 if (port_num <= 0 || port_num > MLX4_PORTS_NUM) 177 return -EINVAL; 178 if (mctx->port_query_cache[port_num - 1].valid) { 179 port_attr->link_layer = 180 mctx-> 181 port_query_cache[port_num - 1]. 182 link_layer; 183 port_attr->port_cap_flags = 184 mctx-> 185 port_query_cache[port_num - 1]. 186 caps; 187 return 0; 188 } 189 return mlx4_query_port(context, port_num, 190 (struct ibv_port_attr *)port_attr); 191 192 } 193 194 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context) 195 { 196 struct ibv_alloc_pd cmd; 197 struct mlx4_alloc_pd_resp resp; 198 struct mlx4_pd *pd; 199 200 pd = malloc(sizeof *pd); 201 if (!pd) 202 return NULL; 203 204 if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, 205 &resp.ibv_resp, sizeof resp)) { 206 free(pd); 207 return NULL; 208 } 209 210 pd->pdn = resp.pdn; 211 212 return &pd->ibv_pd; 213 } 214 215 int mlx4_free_pd(struct ibv_pd *pd) 216 { 217 int ret; 218 219 ret = ibv_cmd_dealloc_pd(pd); 220 if (ret) 221 return ret; 222 223 free(to_mpd(pd)); 224 return 0; 225 } 226 227 struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context, 228 struct ibv_xrcd_init_attr *attr) 229 { 230 struct ibv_open_xrcd cmd; 231 struct ibv_open_xrcd_resp resp; 232 struct verbs_xrcd *xrcd; 233 int ret; 234 235 xrcd = calloc(1, sizeof *xrcd); 236 if (!xrcd) 237 return NULL; 238 239 ret = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), attr, 240 &cmd, sizeof cmd, &resp, sizeof resp); 241 if (ret) 242 goto err; 243 244 return &xrcd->xrcd; 245 246 err: 247 free(xrcd); 248 return NULL; 249 } 250 251 int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd) 252 { 253 struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); 254 int ret; 255 256 ret = ibv_cmd_close_xrcd(xrcd); 257 if (!ret) 258 free(xrcd); 259 260 return ret; 261 } 262 263 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length, 264 int access) 265 { 266 struct ibv_mr *mr; 267 struct ibv_reg_mr cmd; 268 struct ibv_reg_mr_resp resp; 269 int ret; 270 271 mr = malloc(sizeof *mr); 272 if (!mr) 273 return NULL; 274 275 ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, 276 access, mr, &cmd, sizeof cmd, 277 &resp, sizeof resp); 278 if (ret) { 279 free(mr); 280 return NULL; 281 } 282 283 return mr; 284 } 285 286 int mlx4_rereg_mr(struct ibv_mr *mr, 287 int flags, 288 struct ibv_pd *pd, void *addr, 289 size_t length, int access) 290 { 291 struct ibv_rereg_mr cmd; 292 struct ibv_rereg_mr_resp resp; 293 294 if (flags & IBV_REREG_MR_KEEP_VALID) 295 return ENOTSUP; 296 297 return ibv_cmd_rereg_mr(mr, flags, addr, length, 298 (uintptr_t)addr, 299 access, pd, 300 &cmd, sizeof(cmd), 301 &resp, sizeof(resp)); 302 } 303 304 int mlx4_dereg_mr(struct ibv_mr *mr) 305 { 306 int ret; 307 308 ret = ibv_cmd_dereg_mr(mr); 309 if (ret) 310 return ret; 311 312 free(mr); 313 return 0; 314 } 315 316 struct ibv_mw *mlx4_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 317 { 318 struct ibv_mw *mw; 319 struct ibv_alloc_mw cmd; 320 struct ibv_alloc_mw_resp resp; 321 int ret; 322 323 mw = calloc(1, sizeof(*mw)); 324 if (!mw) 325 return NULL; 326 327 ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), 328 &resp, sizeof(resp)); 329 330 if (ret) { 331 free(mw); 332 return NULL; 333 } 334 335 return mw; 336 } 337 338 int mlx4_dealloc_mw(struct ibv_mw *mw) 339 { 340 int ret; 341 struct ibv_dealloc_mw cmd; 342 343 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 344 if (ret) 345 return ret; 346 347 free(mw); 348 return 0; 349 } 350 351 int mlx4_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 352 struct ibv_mw_bind *mw_bind) 353 { 354 struct ibv_send_wr *bad_wr = NULL; 355 struct ibv_send_wr wr = { }; 356 int ret; 357 358 359 wr.opcode = IBV_WR_BIND_MW; 360 wr.next = NULL; 361 362 wr.wr_id = mw_bind->wr_id; 363 wr.send_flags = mw_bind->send_flags; 364 365 wr.bind_mw.mw = mw; 366 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 367 wr.bind_mw.bind_info = mw_bind->bind_info; 368 369 ret = mlx4_post_send(qp, &wr, &bad_wr); 370 371 if (ret) 372 return ret; 373 374 /* updating the mw with the latest rkey. */ 375 mw->rkey = wr.bind_mw.rkey; 376 377 return 0; 378 } 379 380 int align_queue_size(int req) 381 { 382 int nent; 383 384 for (nent = 1; nent < req; nent <<= 1) 385 ; /* nothing */ 386 387 return nent; 388 } 389 390 enum { 391 CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | 392 IBV_WC_EX_WITH_COMPLETION_TIMESTAMP 393 }; 394 395 enum { 396 CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS 397 }; 398 399 enum { 400 CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED 401 }; 402 403 404 static int mlx4_cmd_create_cq(struct ibv_context *context, 405 struct ibv_cq_init_attr_ex *cq_attr, 406 struct mlx4_cq *cq) 407 { 408 struct mlx4_create_cq cmd = {}; 409 struct mlx4_create_cq_resp resp = {}; 410 int ret; 411 412 cmd.buf_addr = (uintptr_t) cq->buf.buf; 413 cmd.db_addr = (uintptr_t) cq->set_ci_db; 414 415 ret = ibv_cmd_create_cq(context, cq_attr->cqe, cq_attr->channel, 416 cq_attr->comp_vector, 417 ibv_cq_ex_to_cq(&cq->ibv_cq), 418 &cmd.ibv_cmd, sizeof(cmd), 419 &resp.ibv_resp, sizeof(resp)); 420 if (!ret) 421 cq->cqn = resp.cqn; 422 423 return ret; 424 425 } 426 427 static int mlx4_cmd_create_cq_ex(struct ibv_context *context, 428 struct ibv_cq_init_attr_ex *cq_attr, 429 struct mlx4_cq *cq) 430 { 431 struct mlx4_create_cq_ex cmd = {}; 432 struct mlx4_create_cq_resp_ex resp = {}; 433 int ret; 434 435 cmd.buf_addr = (uintptr_t) cq->buf.buf; 436 cmd.db_addr = (uintptr_t) cq->set_ci_db; 437 438 ret = ibv_cmd_create_cq_ex(context, cq_attr, 439 &cq->ibv_cq, &cmd.ibv_cmd, 440 sizeof(cmd.ibv_cmd), 441 sizeof(cmd), 442 &resp.ibv_resp, 443 sizeof(resp.ibv_resp), 444 sizeof(resp)); 445 if (!ret) 446 cq->cqn = resp.cqn; 447 448 return ret; 449 } 450 451 static struct ibv_cq_ex *create_cq(struct ibv_context *context, 452 struct ibv_cq_init_attr_ex *cq_attr, 453 int cq_alloc_flags) 454 { 455 struct mlx4_cq *cq; 456 int ret; 457 struct mlx4_context *mctx = to_mctx(context); 458 459 /* Sanity check CQ size before proceeding */ 460 if (cq_attr->cqe > 0x3fffff) { 461 errno = EINVAL; 462 return NULL; 463 } 464 465 if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { 466 errno = ENOTSUP; 467 return NULL; 468 } 469 470 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && 471 cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { 472 errno = ENOTSUP; 473 return NULL; 474 } 475 476 if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) 477 return NULL; 478 479 /* mlx4 devices don't support slid and sl in cqe when completion 480 * timestamp is enabled in the CQ 481 */ 482 if ((cq_attr->wc_flags & (IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL)) && 483 (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)) { 484 errno = ENOTSUP; 485 return NULL; 486 } 487 488 cq = malloc(sizeof *cq); 489 if (!cq) 490 return NULL; 491 492 cq->cons_index = 0; 493 494 if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE)) 495 goto err; 496 497 cq_attr->cqe = align_queue_size(cq_attr->cqe + 1); 498 499 if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cq_attr->cqe, mctx->cqe_size)) 500 goto err_spl; 501 502 cq->cqe_size = mctx->cqe_size; 503 cq->set_ci_db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ); 504 if (!cq->set_ci_db) 505 goto err_buf; 506 507 cq->arm_db = cq->set_ci_db + 1; 508 *cq->arm_db = 0; 509 cq->arm_sn = 1; 510 *cq->set_ci_db = 0; 511 cq->flags = cq_alloc_flags; 512 513 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && 514 cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) 515 cq->flags |= MLX4_CQ_FLAGS_SINGLE_THREADED; 516 517 --cq_attr->cqe; 518 if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED) 519 ret = mlx4_cmd_create_cq_ex(context, cq_attr, cq); 520 else 521 ret = mlx4_cmd_create_cq(context, cq_attr, cq); 522 523 if (ret) 524 goto err_db; 525 526 527 if (cq_alloc_flags & MLX4_CQ_FLAGS_EXTENDED) 528 mlx4_cq_fill_pfns(cq, cq_attr); 529 530 return &cq->ibv_cq; 531 532 err_db: 533 mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db); 534 535 err_buf: 536 mlx4_free_buf(&cq->buf); 537 538 err_spl: 539 pthread_spin_destroy(&cq->lock); 540 541 err: 542 free(cq); 543 544 return NULL; 545 } 546 547 struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe, 548 struct ibv_comp_channel *channel, 549 int comp_vector) 550 { 551 struct ibv_cq_ex *cq; 552 struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, 553 .comp_vector = comp_vector, 554 .wc_flags = IBV_WC_STANDARD_FLAGS}; 555 556 cq = create_cq(context, &cq_attr, 0); 557 return cq ? ibv_cq_ex_to_cq(cq) : NULL; 558 } 559 560 struct ibv_cq_ex *mlx4_create_cq_ex(struct ibv_context *context, 561 struct ibv_cq_init_attr_ex *cq_attr) 562 { 563 /* 564 * Make local copy since some attributes might be adjusted 565 * for internal use. 566 */ 567 struct ibv_cq_init_attr_ex cq_attr_c = {.cqe = cq_attr->cqe, 568 .channel = cq_attr->channel, 569 .comp_vector = cq_attr->comp_vector, 570 .wc_flags = cq_attr->wc_flags, 571 .comp_mask = cq_attr->comp_mask, 572 .flags = cq_attr->flags}; 573 574 return create_cq(context, &cq_attr_c, MLX4_CQ_FLAGS_EXTENDED); 575 } 576 577 int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe) 578 { 579 struct mlx4_cq *cq = to_mcq(ibcq); 580 struct mlx4_resize_cq cmd; 581 struct ibv_resize_cq_resp resp; 582 struct mlx4_buf buf; 583 int old_cqe, outst_cqe, ret; 584 585 /* Sanity check CQ size before proceeding */ 586 if (cqe > 0x3fffff) 587 return EINVAL; 588 589 pthread_spin_lock(&cq->lock); 590 591 cqe = align_queue_size(cqe + 1); 592 if (cqe == ibcq->cqe + 1) { 593 ret = 0; 594 goto out; 595 } 596 597 /* Can't be smaller then the number of outstanding CQEs */ 598 outst_cqe = mlx4_get_outstanding_cqes(cq); 599 if (cqe < outst_cqe + 1) { 600 ret = EINVAL; 601 goto out; 602 } 603 604 ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe, cq->cqe_size); 605 if (ret) 606 goto out; 607 608 old_cqe = ibcq->cqe; 609 cmd.buf_addr = (uintptr_t) buf.buf; 610 611 ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd, 612 &resp, sizeof resp); 613 if (ret) { 614 mlx4_free_buf(&buf); 615 goto out; 616 } 617 618 mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe); 619 620 mlx4_free_buf(&cq->buf); 621 cq->buf = buf; 622 mlx4_update_cons_index(cq); 623 624 out: 625 pthread_spin_unlock(&cq->lock); 626 return ret; 627 } 628 629 int mlx4_destroy_cq(struct ibv_cq *cq) 630 { 631 int ret; 632 633 ret = ibv_cmd_destroy_cq(cq); 634 if (ret) 635 return ret; 636 637 verbs_cleanup_cq(cq); 638 pthread_spin_destroy(&to_mcq(cq)->lock); 639 mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db); 640 mlx4_free_buf(&to_mcq(cq)->buf); 641 free(to_mcq(cq)); 642 643 return 0; 644 } 645 646 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, 647 struct ibv_srq_init_attr *attr) 648 { 649 struct mlx4_create_srq cmd; 650 struct mlx4_create_srq_resp resp; 651 struct mlx4_srq *srq; 652 int ret; 653 654 /* Sanity check SRQ size before proceeding */ 655 if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) 656 return NULL; 657 658 srq = malloc(sizeof *srq); 659 if (!srq) 660 return NULL; 661 662 if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) 663 goto err; 664 665 srq->max = align_queue_size(attr->attr.max_wr + 1); 666 srq->max_gs = attr->attr.max_sge; 667 srq->counter = 0; 668 srq->ext_srq = 0; 669 670 if (mlx4_alloc_srq_buf(pd, &attr->attr, srq)) 671 goto err_spl; 672 673 srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); 674 if (!srq->db) 675 goto err_free; 676 677 *srq->db = 0; 678 679 cmd.buf_addr = (uintptr_t) srq->buf.buf; 680 cmd.db_addr = (uintptr_t) srq->db; 681 682 ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr, 683 &cmd.ibv_cmd, sizeof cmd, 684 &resp.ibv_resp, sizeof resp); 685 if (ret) 686 goto err_db; 687 688 return &srq->verbs_srq.srq; 689 690 err_db: 691 mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db); 692 693 err_free: 694 free(srq->wrid); 695 mlx4_free_buf(&srq->buf); 696 697 err_spl: 698 pthread_spin_destroy(&srq->lock); 699 700 err: 701 free(srq); 702 703 return NULL; 704 } 705 706 struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context, 707 struct ibv_srq_init_attr_ex *attr_ex) 708 { 709 if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) || 710 (attr_ex->srq_type == IBV_SRQT_BASIC)) 711 return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex); 712 else if (attr_ex->srq_type == IBV_SRQT_XRC) 713 return mlx4_create_xrc_srq(context, attr_ex); 714 715 return NULL; 716 } 717 718 int mlx4_modify_srq(struct ibv_srq *srq, 719 struct ibv_srq_attr *attr, 720 int attr_mask) 721 { 722 struct ibv_modify_srq cmd; 723 724 return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); 725 } 726 727 int mlx4_query_srq(struct ibv_srq *srq, 728 struct ibv_srq_attr *attr) 729 { 730 struct ibv_query_srq cmd; 731 732 return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); 733 } 734 735 int mlx4_destroy_srq(struct ibv_srq *srq) 736 { 737 int ret; 738 739 if (to_msrq(srq)->ext_srq) 740 return mlx4_destroy_xrc_srq(srq); 741 742 ret = ibv_cmd_destroy_srq(srq); 743 if (ret) 744 return ret; 745 746 mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db); 747 mlx4_free_buf(&to_msrq(srq)->buf); 748 free(to_msrq(srq)->wrid); 749 pthread_spin_destroy(&to_msrq(srq)->lock); 750 free(to_msrq(srq)); 751 752 return 0; 753 } 754 755 static int mlx4_cmd_create_qp_ex(struct ibv_context *context, 756 struct ibv_qp_init_attr_ex *attr, 757 struct mlx4_create_qp *cmd, 758 struct mlx4_qp *qp) 759 { 760 struct mlx4_create_qp_ex cmd_ex; 761 struct mlx4_create_qp_resp_ex resp; 762 int ret; 763 764 memset(&cmd_ex, 0, sizeof(cmd_ex)); 765 memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle, 766 offsetof(typeof(cmd->ibv_cmd), is_srq) + 767 sizeof(cmd->ibv_cmd.is_srq) - 768 offsetof(typeof(cmd->ibv_cmd), user_handle)); 769 770 memcpy(&cmd_ex.drv_ex, &cmd->buf_addr, 771 offsetof(typeof(*cmd), sq_no_prefetch) + 772 sizeof(cmd->sq_no_prefetch) - sizeof(cmd->ibv_cmd)); 773 774 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, 775 sizeof(qp->verbs_qp), attr, 776 &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd), 777 sizeof(cmd_ex), &resp.ibv_resp, 778 sizeof(resp.ibv_resp), sizeof(resp)); 779 return ret; 780 } 781 782 enum { 783 MLX4_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | 784 IBV_QP_INIT_ATTR_XRCD | 785 IBV_QP_INIT_ATTR_CREATE_FLAGS), 786 }; 787 788 enum { 789 MLX4_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS), 790 }; 791 792 struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context, 793 struct ibv_qp_init_attr_ex *attr) 794 { 795 struct mlx4_context *ctx = to_mctx(context); 796 struct mlx4_create_qp cmd; 797 struct ibv_create_qp_resp resp; 798 struct mlx4_qp *qp; 799 int ret; 800 801 /* Sanity check QP size before proceeding */ 802 if (ctx->max_qp_wr) { /* mlx4_query_device succeeded */ 803 if (attr->cap.max_send_wr > ctx->max_qp_wr || 804 attr->cap.max_recv_wr > ctx->max_qp_wr || 805 attr->cap.max_send_sge > ctx->max_sge || 806 attr->cap.max_recv_sge > ctx->max_sge) 807 return NULL; 808 } else { 809 if (attr->cap.max_send_wr > 65536 || 810 attr->cap.max_recv_wr > 65536 || 811 attr->cap.max_send_sge > 64 || 812 attr->cap.max_recv_sge > 64) 813 return NULL; 814 } 815 if (attr->cap.max_inline_data > 1024) 816 return NULL; 817 818 if (attr->comp_mask & ~MLX4_CREATE_QP_SUP_COMP_MASK) 819 return NULL; 820 821 qp = calloc(1, sizeof *qp); 822 if (!qp) 823 return NULL; 824 825 if (attr->qp_type == IBV_QPT_XRC_RECV) { 826 attr->cap.max_send_wr = qp->sq.wqe_cnt = 0; 827 } else { 828 mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp); 829 /* 830 * We need to leave 2 KB + 1 WQE of headroom in the SQ to 831 * allow HW to prefetch. 832 */ 833 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; 834 qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes); 835 } 836 837 if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND || 838 attr->qp_type == IBV_QPT_XRC_RECV) { 839 attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0; 840 } else { 841 qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr); 842 if (attr->cap.max_recv_sge < 1) 843 attr->cap.max_recv_sge = 1; 844 if (attr->cap.max_recv_wr < 1) 845 attr->cap.max_recv_wr = 1; 846 } 847 848 if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp)) 849 goto err; 850 851 mlx4_init_qp_indices(qp); 852 853 if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE)) 854 goto err_free; 855 if (pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) 856 goto err_sq_spl; 857 858 if (attr->cap.max_recv_sge) { 859 qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ); 860 if (!qp->db) 861 goto err_rq_spl; 862 863 *qp->db = 0; 864 cmd.db_addr = (uintptr_t) qp->db; 865 } else { 866 cmd.db_addr = 0; 867 } 868 869 cmd.buf_addr = (uintptr_t) qp->buf.buf; 870 cmd.log_sq_stride = qp->sq.wqe_shift; 871 for (cmd.log_sq_bb_count = 0; 872 qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count; 873 ++cmd.log_sq_bb_count) 874 ; /* nothing */ 875 cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */ 876 memset(cmd.reserved, 0, sizeof cmd.reserved); 877 pthread_mutex_lock(&to_mctx(context)->qp_table_mutex); 878 879 if (attr->comp_mask & MLX4_CREATE_QP_EX2_COMP_MASK) 880 ret = mlx4_cmd_create_qp_ex(context, attr, &cmd, qp); 881 else 882 ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, 883 sizeof(qp->verbs_qp), attr, 884 &cmd.ibv_cmd, sizeof(cmd), &resp, 885 sizeof(resp)); 886 if (ret) 887 goto err_rq_db; 888 889 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { 890 ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp); 891 if (ret) 892 goto err_destroy; 893 } 894 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex); 895 896 qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr; 897 qp->rq.max_gs = attr->cap.max_recv_sge; 898 if (attr->qp_type != IBV_QPT_XRC_RECV) 899 mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type); 900 901 qp->doorbell_qpn = htobe32(qp->verbs_qp.qp.qp_num << 8); 902 if (attr->sq_sig_all) 903 qp->sq_signal_bits = htobe32(MLX4_WQE_CTRL_CQ_UPDATE); 904 else 905 qp->sq_signal_bits = 0; 906 907 return &qp->verbs_qp.qp; 908 909 err_destroy: 910 ibv_cmd_destroy_qp(&qp->verbs_qp.qp); 911 912 err_rq_db: 913 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex); 914 if (attr->cap.max_recv_sge) 915 mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db); 916 err_rq_spl: 917 pthread_spin_destroy(&qp->rq.lock); 918 err_sq_spl: 919 pthread_spin_destroy(&qp->sq.lock); 920 err_free: 921 free(qp->sq.wrid); 922 if (qp->rq.wqe_cnt) 923 free(qp->rq.wrid); 924 mlx4_free_buf(&qp->buf); 925 926 err: 927 free(qp); 928 929 return NULL; 930 } 931 932 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) 933 { 934 struct ibv_qp_init_attr_ex attr_ex; 935 struct ibv_qp *qp; 936 937 memcpy(&attr_ex, attr, sizeof *attr); 938 attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; 939 attr_ex.pd = pd; 940 qp = mlx4_create_qp_ex(pd->context, &attr_ex); 941 if (qp) 942 memcpy(attr, &attr_ex, sizeof *attr); 943 return qp; 944 } 945 946 struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr) 947 { 948 struct ibv_open_qp cmd; 949 struct ibv_create_qp_resp resp; 950 struct mlx4_qp *qp; 951 int ret; 952 953 qp = calloc(1, sizeof *qp); 954 if (!qp) 955 return NULL; 956 957 ret = ibv_cmd_open_qp(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr, 958 &cmd, sizeof cmd, &resp, sizeof resp); 959 if (ret) 960 goto err; 961 962 return &qp->verbs_qp.qp; 963 964 err: 965 free(qp); 966 return NULL; 967 } 968 969 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, 970 int attr_mask, 971 struct ibv_qp_init_attr *init_attr) 972 { 973 struct ibv_query_qp cmd; 974 struct mlx4_qp *qp = to_mqp(ibqp); 975 int ret; 976 977 ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd); 978 if (ret) 979 return ret; 980 981 init_attr->cap.max_send_wr = qp->sq.max_post; 982 init_attr->cap.max_send_sge = qp->sq.max_gs; 983 init_attr->cap.max_inline_data = qp->max_inline_data; 984 985 attr->cap = init_attr->cap; 986 987 return 0; 988 } 989 990 int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 991 int attr_mask) 992 { 993 struct ibv_modify_qp cmd = {}; 994 struct ibv_port_attr port_attr; 995 struct mlx4_qp *mqp = to_mqp(qp); 996 struct ibv_device_attr device_attr; 997 int ret; 998 999 memset(&device_attr, 0, sizeof(device_attr)); 1000 if (attr_mask & IBV_QP_PORT) { 1001 ret = ibv_query_port(qp->context, attr->port_num, 1002 &port_attr); 1003 if (ret) 1004 return ret; 1005 mqp->link_layer = port_attr.link_layer; 1006 1007 ret = ibv_query_device(qp->context, &device_attr); 1008 if (ret) 1009 return ret; 1010 1011 switch(qp->qp_type) { 1012 case IBV_QPT_UD: 1013 if ((mqp->link_layer == IBV_LINK_LAYER_INFINIBAND) && 1014 (device_attr.device_cap_flags & IBV_DEVICE_UD_IP_CSUM)) 1015 mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_UD_OVER_IB | 1016 MLX4_RX_CSUM_VALID; 1017 break; 1018 case IBV_QPT_RAW_PACKET: 1019 if ((mqp->link_layer == IBV_LINK_LAYER_ETHERNET) && 1020 (device_attr.device_cap_flags & IBV_DEVICE_RAW_IP_CSUM)) 1021 mqp->qp_cap_cache |= MLX4_CSUM_SUPPORT_RAW_OVER_ETH | 1022 MLX4_RX_CSUM_VALID; 1023 break; 1024 default: 1025 break; 1026 } 1027 1028 } 1029 1030 if (qp->state == IBV_QPS_RESET && 1031 attr_mask & IBV_QP_STATE && 1032 attr->qp_state == IBV_QPS_INIT) { 1033 mlx4_qp_init_sq_ownership(to_mqp(qp)); 1034 } 1035 1036 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd); 1037 1038 if (!ret && 1039 (attr_mask & IBV_QP_STATE) && 1040 attr->qp_state == IBV_QPS_RESET) { 1041 if (qp->recv_cq) 1042 mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, 1043 qp->srq ? to_msrq(qp->srq) : NULL); 1044 if (qp->send_cq && qp->send_cq != qp->recv_cq) 1045 mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); 1046 1047 mlx4_init_qp_indices(to_mqp(qp)); 1048 if (to_mqp(qp)->rq.wqe_cnt) 1049 *to_mqp(qp)->db = 0; 1050 } 1051 1052 return ret; 1053 } 1054 1055 static void mlx4_lock_cqs(struct ibv_qp *qp) 1056 { 1057 struct mlx4_cq *send_cq = to_mcq(qp->send_cq); 1058 struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); 1059 1060 if (!qp->send_cq || !qp->recv_cq) { 1061 if (qp->send_cq) 1062 pthread_spin_lock(&send_cq->lock); 1063 else if (qp->recv_cq) 1064 pthread_spin_lock(&recv_cq->lock); 1065 } else if (send_cq == recv_cq) { 1066 pthread_spin_lock(&send_cq->lock); 1067 } else if (send_cq->cqn < recv_cq->cqn) { 1068 pthread_spin_lock(&send_cq->lock); 1069 pthread_spin_lock(&recv_cq->lock); 1070 } else { 1071 pthread_spin_lock(&recv_cq->lock); 1072 pthread_spin_lock(&send_cq->lock); 1073 } 1074 } 1075 1076 static void mlx4_unlock_cqs(struct ibv_qp *qp) 1077 { 1078 struct mlx4_cq *send_cq = to_mcq(qp->send_cq); 1079 struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq); 1080 1081 1082 if (!qp->send_cq || !qp->recv_cq) { 1083 if (qp->send_cq) 1084 pthread_spin_unlock(&send_cq->lock); 1085 else if (qp->recv_cq) 1086 pthread_spin_unlock(&recv_cq->lock); 1087 } else if (send_cq == recv_cq) { 1088 pthread_spin_unlock(&send_cq->lock); 1089 } else if (send_cq->cqn < recv_cq->cqn) { 1090 pthread_spin_unlock(&recv_cq->lock); 1091 pthread_spin_unlock(&send_cq->lock); 1092 } else { 1093 pthread_spin_unlock(&send_cq->lock); 1094 pthread_spin_unlock(&recv_cq->lock); 1095 } 1096 } 1097 1098 int mlx4_destroy_qp(struct ibv_qp *ibqp) 1099 { 1100 struct mlx4_qp *qp = to_mqp(ibqp); 1101 int ret; 1102 1103 pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex); 1104 ret = ibv_cmd_destroy_qp(ibqp); 1105 if (ret) { 1106 pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex); 1107 return ret; 1108 } 1109 1110 mlx4_lock_cqs(ibqp); 1111 1112 if (ibqp->recv_cq) 1113 __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num, 1114 ibqp->srq ? to_msrq(ibqp->srq) : NULL); 1115 if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) 1116 __mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL); 1117 1118 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) 1119 mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num); 1120 1121 mlx4_unlock_cqs(ibqp); 1122 pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex); 1123 1124 pthread_spin_destroy(&qp->rq.lock); 1125 pthread_spin_destroy(&qp->sq.lock); 1126 1127 if (qp->rq.wqe_cnt) { 1128 mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db); 1129 free(qp->rq.wrid); 1130 } 1131 if (qp->sq.wqe_cnt) 1132 free(qp->sq.wrid); 1133 mlx4_free_buf(&qp->buf); 1134 free(qp); 1135 1136 return 0; 1137 } 1138 1139 static int link_local_gid(const union ibv_gid *gid) 1140 { 1141 uint32_t *tmp = (uint32_t *)gid->raw; 1142 uint32_t hi = tmp[0]; 1143 uint32_t lo = tmp[1]; 1144 1145 if (hi == htobe32(0xfe800000) && lo == 0) 1146 return 1; 1147 1148 return 0; 1149 } 1150 1151 static int is_multicast_gid(const union ibv_gid *gid) 1152 { 1153 return gid->raw[0] == 0xff; 1154 } 1155 1156 static uint16_t get_vlan_id(union ibv_gid *gid) 1157 { 1158 uint16_t vid; 1159 vid = gid->raw[11] << 8 | gid->raw[12]; 1160 return vid < 0x1000 ? vid : 0xffff; 1161 } 1162 1163 static int mlx4_resolve_grh_to_l2(struct ibv_pd *pd, struct mlx4_ah *ah, 1164 struct ibv_ah_attr *attr) 1165 { 1166 int err, i; 1167 uint16_t vid; 1168 union ibv_gid sgid; 1169 1170 if (link_local_gid(&attr->grh.dgid)) { 1171 memcpy(ah->mac, &attr->grh.dgid.raw[8], 3); 1172 memcpy(ah->mac + 3, &attr->grh.dgid.raw[13], 3); 1173 ah->mac[0] ^= 2; 1174 1175 vid = get_vlan_id(&attr->grh.dgid); 1176 } else if (is_multicast_gid(&attr->grh.dgid)) { 1177 ah->mac[0] = 0x33; 1178 ah->mac[1] = 0x33; 1179 for (i = 2; i < 6; ++i) 1180 ah->mac[i] = attr->grh.dgid.raw[i + 10]; 1181 1182 err = ibv_query_gid(pd->context, attr->port_num, 1183 attr->grh.sgid_index, &sgid); 1184 if (err) 1185 return err; 1186 1187 ah->av.dlid = htobe16(0xc000); 1188 ah->av.port_pd |= htobe32(1 << 31); 1189 1190 vid = get_vlan_id(&sgid); 1191 } else 1192 return 1; 1193 1194 if (vid != 0xffff) { 1195 ah->av.port_pd |= htobe32(1 << 29); 1196 ah->vlan = vid | ((attr->sl & 7) << 13); 1197 } 1198 1199 return 0; 1200 } 1201 1202 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) 1203 { 1204 struct mlx4_ah *ah; 1205 struct ibv_port_attr port_attr; 1206 1207 if (query_port_cache(pd->context, attr->port_num, &port_attr)) 1208 return NULL; 1209 1210 ah = malloc(sizeof *ah); 1211 if (!ah) 1212 return NULL; 1213 1214 memset(&ah->av, 0, sizeof ah->av); 1215 1216 ah->av.port_pd = htobe32(to_mpd(pd)->pdn | (attr->port_num << 24)); 1217 1218 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 1219 ah->av.g_slid = attr->src_path_bits; 1220 ah->av.dlid = htobe16(attr->dlid); 1221 ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 28); 1222 } else 1223 ah->av.sl_tclass_flowlabel = htobe32(attr->sl << 29); 1224 1225 if (attr->static_rate) { 1226 ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET; 1227 /* XXX check rate cap? */ 1228 } 1229 if (attr->is_global) { 1230 ah->av.g_slid |= 0x80; 1231 ah->av.gid_index = attr->grh.sgid_index; 1232 ah->av.hop_limit = attr->grh.hop_limit; 1233 ah->av.sl_tclass_flowlabel |= 1234 htobe32((attr->grh.traffic_class << 20) | 1235 attr->grh.flow_label); 1236 memcpy(ah->av.dgid, attr->grh.dgid.raw, 16); 1237 } 1238 1239 if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { 1240 if (port_attr.port_cap_flags & IBV_PORT_IP_BASED_GIDS) { 1241 uint16_t vid; 1242 1243 if (ibv_resolve_eth_l2_from_gid(pd->context, attr, 1244 ah->mac, &vid)) { 1245 free(ah); 1246 return NULL; 1247 } 1248 1249 if (vid <= 0xfff) { 1250 ah->av.port_pd |= htobe32(1 << 29); 1251 ah->vlan = vid | 1252 ((attr->sl & 7) << 13); 1253 } 1254 1255 } else { 1256 if (mlx4_resolve_grh_to_l2(pd, ah, attr)) { 1257 free(ah); 1258 return NULL; 1259 } 1260 } 1261 } 1262 1263 return &ah->ibv_ah; 1264 } 1265 1266 int mlx4_destroy_ah(struct ibv_ah *ah) 1267 { 1268 free(to_mah(ah)); 1269 1270 return 0; 1271 } 1272