1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #define _GNU_SOURCE 33 #include <config.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <unistd.h> 38 #include <string.h> 39 #include <sys/types.h> 40 #include <sys/socket.h> 41 #include <sys/time.h> 42 #include <netdb.h> 43 #include <stdlib.h> 44 #include <getopt.h> 45 #include <arpa/inet.h> 46 #include <time.h> 47 #include <inttypes.h> 48 49 #include "pingpong.h" 50 51 #include <sys/param.h> 52 53 enum { 54 PINGPONG_RECV_WRID = 1, 55 PINGPONG_SEND_WRID = 2, 56 }; 57 58 static int page_size; 59 static int use_odp; 60 static int use_ts; 61 62 struct pingpong_context { 63 struct ibv_context *context; 64 struct ibv_comp_channel *channel; 65 struct ibv_pd *pd; 66 struct ibv_mr *mr; 67 union { 68 struct ibv_cq *cq; 69 struct ibv_cq_ex *cq_ex; 70 } cq_s; 71 struct ibv_qp *qp; 72 void *buf; 73 int size; 74 int send_flags; 75 int rx_depth; 76 int pending; 77 struct ibv_port_attr portinfo; 78 uint64_t completion_timestamp_mask; 79 }; 80 81 static struct ibv_cq *pp_cq(struct pingpong_context *ctx) 82 { 83 return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) : 84 ctx->cq_s.cq; 85 } 86 87 struct pingpong_dest { 88 int lid; 89 int qpn; 90 int psn; 91 union ibv_gid gid; 92 }; 93 94 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, 95 enum ibv_mtu mtu, int sl, 96 struct pingpong_dest *dest, int sgid_idx) 97 { 98 struct ibv_qp_attr attr = { 99 .qp_state = IBV_QPS_RTR, 100 .path_mtu = mtu, 101 .dest_qp_num = dest->qpn, 102 .rq_psn = dest->psn, 103 .max_dest_rd_atomic = 1, 104 .min_rnr_timer = 12, 105 .ah_attr = { 106 .is_global = 0, 107 .dlid = dest->lid, 108 .sl = sl, 109 .src_path_bits = 0, 110 .port_num = port 111 } 112 }; 113 114 if (dest->gid.global.interface_id) { 115 attr.ah_attr.is_global = 1; 116 attr.ah_attr.grh.hop_limit = 1; 117 attr.ah_attr.grh.dgid = dest->gid; 118 attr.ah_attr.grh.sgid_index = sgid_idx; 119 } 120 if (ibv_modify_qp(ctx->qp, &attr, 121 IBV_QP_STATE | 122 IBV_QP_AV | 123 IBV_QP_PATH_MTU | 124 IBV_QP_DEST_QPN | 125 IBV_QP_RQ_PSN | 126 IBV_QP_MAX_DEST_RD_ATOMIC | 127 IBV_QP_MIN_RNR_TIMER)) { 128 fprintf(stderr, "Failed to modify QP to RTR\n"); 129 return 1; 130 } 131 132 attr.qp_state = IBV_QPS_RTS; 133 attr.timeout = 14; 134 attr.retry_cnt = 7; 135 attr.rnr_retry = 7; 136 attr.sq_psn = my_psn; 137 attr.max_rd_atomic = 1; 138 if (ibv_modify_qp(ctx->qp, &attr, 139 IBV_QP_STATE | 140 IBV_QP_TIMEOUT | 141 IBV_QP_RETRY_CNT | 142 IBV_QP_RNR_RETRY | 143 IBV_QP_SQ_PSN | 144 IBV_QP_MAX_QP_RD_ATOMIC)) { 145 fprintf(stderr, "Failed to modify QP to RTS\n"); 146 return 1; 147 } 148 149 return 0; 150 } 151 152 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, 153 const struct pingpong_dest *my_dest) 154 { 155 struct addrinfo *res, *t; 156 struct addrinfo hints = { 157 .ai_family = AF_UNSPEC, 158 .ai_socktype = SOCK_STREAM 159 }; 160 char *service; 161 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; 162 int n; 163 int sockfd = -1; 164 struct pingpong_dest *rem_dest = NULL; 165 char gid[33]; 166 167 if (asprintf(&service, "%d", port) < 0) 168 return NULL; 169 170 n = getaddrinfo(servername, service, &hints, &res); 171 172 if (n < 0) { 173 fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); 174 free(service); 175 return NULL; 176 } 177 178 for (t = res; t; t = t->ai_next) { 179 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); 180 if (sockfd >= 0) { 181 if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) 182 break; 183 close(sockfd); 184 sockfd = -1; 185 } 186 } 187 188 freeaddrinfo_null(res); 189 free(service); 190 191 if (sockfd < 0) { 192 fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); 193 return NULL; 194 } 195 196 gid_to_wire_gid(&my_dest->gid, gid); 197 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, 198 my_dest->psn, gid); 199 if (write(sockfd, msg, sizeof msg) != sizeof msg) { 200 fprintf(stderr, "Couldn't send local address\n"); 201 goto out; 202 } 203 204 if (read(sockfd, msg, sizeof msg) != sizeof msg || 205 write(sockfd, "done", sizeof "done") != sizeof "done") { 206 perror("client read/write"); 207 fprintf(stderr, "Couldn't read/write remote address\n"); 208 goto out; 209 } 210 211 rem_dest = malloc(sizeof *rem_dest); 212 if (!rem_dest) 213 goto out; 214 215 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, 216 &rem_dest->psn, gid); 217 wire_gid_to_gid(gid, &rem_dest->gid); 218 219 out: 220 close(sockfd); 221 return rem_dest; 222 } 223 224 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, 225 int ib_port, enum ibv_mtu mtu, 226 int port, int sl, 227 const struct pingpong_dest *my_dest, 228 int sgid_idx) 229 { 230 struct addrinfo *res, *t; 231 struct addrinfo hints = { 232 .ai_flags = AI_PASSIVE, 233 .ai_family = AF_UNSPEC, 234 .ai_socktype = SOCK_STREAM 235 }; 236 char *service; 237 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; 238 int n; 239 int sockfd = -1, connfd; 240 struct pingpong_dest *rem_dest = NULL; 241 char gid[33]; 242 243 if (asprintf(&service, "%d", port) < 0) 244 return NULL; 245 246 n = getaddrinfo(NULL, service, &hints, &res); 247 248 if (n < 0) { 249 fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); 250 free(service); 251 return NULL; 252 } 253 254 for (t = res; t; t = t->ai_next) { 255 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); 256 if (sockfd >= 0) { 257 n = 1; 258 259 setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); 260 261 if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) 262 break; 263 close(sockfd); 264 sockfd = -1; 265 } 266 } 267 268 freeaddrinfo_null(res); 269 free(service); 270 271 if (sockfd < 0) { 272 fprintf(stderr, "Couldn't listen to port %d\n", port); 273 return NULL; 274 } 275 276 if (listen(sockfd, 1) < 0) { 277 perror("listen() failed"); 278 close(sockfd); 279 return NULL; 280 } 281 connfd = accept(sockfd, NULL, NULL); 282 close(sockfd); 283 if (connfd < 0) { 284 fprintf(stderr, "accept() failed\n"); 285 return NULL; 286 } 287 288 n = read(connfd, msg, sizeof msg); 289 if (n != sizeof msg) { 290 perror("server read"); 291 fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); 292 goto out; 293 } 294 295 rem_dest = malloc(sizeof *rem_dest); 296 if (!rem_dest) 297 goto out; 298 299 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, 300 &rem_dest->psn, gid); 301 wire_gid_to_gid(gid, &rem_dest->gid); 302 303 if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, 304 sgid_idx)) { 305 fprintf(stderr, "Couldn't connect to remote QP\n"); 306 free(rem_dest); 307 rem_dest = NULL; 308 goto out; 309 } 310 311 312 gid_to_wire_gid(&my_dest->gid, gid); 313 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, 314 my_dest->psn, gid); 315 if (write(connfd, msg, sizeof msg) != sizeof msg || 316 read(connfd, msg, sizeof msg) != sizeof "done") { 317 fprintf(stderr, "Couldn't send/recv local address\n"); 318 free(rem_dest); 319 rem_dest = NULL; 320 goto out; 321 } 322 323 324 out: 325 close(connfd); 326 return rem_dest; 327 } 328 329 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, 330 int rx_depth, int port, 331 int use_event) 332 { 333 struct pingpong_context *ctx; 334 int access_flags = IBV_ACCESS_LOCAL_WRITE; 335 336 ctx = calloc(1, sizeof *ctx); 337 if (!ctx) 338 return NULL; 339 340 ctx->size = size; 341 ctx->send_flags = IBV_SEND_SIGNALED; 342 ctx->rx_depth = rx_depth; 343 344 ctx->buf = memalign(page_size, size); 345 if (!ctx->buf) { 346 fprintf(stderr, "Couldn't allocate work buf.\n"); 347 goto clean_ctx; 348 } 349 350 /* FIXME memset(ctx->buf, 0, size); */ 351 memset(ctx->buf, 0x7b, size); 352 353 ctx->context = ibv_open_device(ib_dev); 354 if (!ctx->context) { 355 fprintf(stderr, "Couldn't get context for %s\n", 356 ibv_get_device_name(ib_dev)); 357 goto clean_buffer; 358 } 359 360 if (use_event) { 361 ctx->channel = ibv_create_comp_channel(ctx->context); 362 if (!ctx->channel) { 363 fprintf(stderr, "Couldn't create completion channel\n"); 364 goto clean_device; 365 } 366 } else 367 ctx->channel = NULL; 368 369 ctx->pd = ibv_alloc_pd(ctx->context); 370 if (!ctx->pd) { 371 fprintf(stderr, "Couldn't allocate PD\n"); 372 goto clean_comp_channel; 373 } 374 375 if (use_odp || use_ts) { 376 const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | 377 IBV_ODP_SUPPORT_RECV; 378 struct ibv_device_attr_ex attrx; 379 380 if (ibv_query_device_ex(ctx->context, NULL, &attrx)) { 381 fprintf(stderr, "Couldn't query device for its features\n"); 382 goto clean_comp_channel; 383 } 384 385 if (use_odp) { 386 if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || 387 (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { 388 fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); 389 goto clean_comp_channel; 390 } 391 access_flags |= IBV_ACCESS_ON_DEMAND; 392 } 393 394 if (use_ts) { 395 if (!attrx.completion_timestamp_mask) { 396 fprintf(stderr, "The device isn't completion timestamp capable\n"); 397 goto clean_comp_channel; 398 } 399 ctx->completion_timestamp_mask = attrx.completion_timestamp_mask; 400 } 401 } 402 ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); 403 404 if (!ctx->mr) { 405 fprintf(stderr, "Couldn't register MR\n"); 406 goto clean_pd; 407 } 408 409 if (use_ts) { 410 struct ibv_cq_init_attr_ex attr_ex = { 411 .cqe = rx_depth + 1, 412 .cq_context = NULL, 413 .channel = ctx->channel, 414 .comp_vector = 0, 415 .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP 416 }; 417 418 ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex); 419 } else { 420 ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, 421 ctx->channel, 0); 422 } 423 424 if (!pp_cq(ctx)) { 425 fprintf(stderr, "Couldn't create CQ\n"); 426 goto clean_mr; 427 } 428 429 { 430 struct ibv_qp_attr attr; 431 struct ibv_qp_init_attr init_attr = { 432 .send_cq = pp_cq(ctx), 433 .recv_cq = pp_cq(ctx), 434 .cap = { 435 .max_send_wr = 1, 436 .max_recv_wr = rx_depth, 437 .max_send_sge = 1, 438 .max_recv_sge = 1 439 }, 440 .qp_type = IBV_QPT_RC 441 }; 442 443 ctx->qp = ibv_create_qp(ctx->pd, &init_attr); 444 if (!ctx->qp) { 445 fprintf(stderr, "Couldn't create QP\n"); 446 goto clean_cq; 447 } 448 449 ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); 450 if (init_attr.cap.max_inline_data >= size) { 451 ctx->send_flags |= IBV_SEND_INLINE; 452 } 453 } 454 455 { 456 struct ibv_qp_attr attr = { 457 .qp_state = IBV_QPS_INIT, 458 .pkey_index = 0, 459 .port_num = port, 460 .qp_access_flags = 0 461 }; 462 463 if (ibv_modify_qp(ctx->qp, &attr, 464 IBV_QP_STATE | 465 IBV_QP_PKEY_INDEX | 466 IBV_QP_PORT | 467 IBV_QP_ACCESS_FLAGS)) { 468 fprintf(stderr, "Failed to modify QP to INIT\n"); 469 goto clean_qp; 470 } 471 } 472 473 return ctx; 474 475 clean_qp: 476 ibv_destroy_qp(ctx->qp); 477 478 clean_cq: 479 ibv_destroy_cq(pp_cq(ctx)); 480 481 clean_mr: 482 ibv_dereg_mr(ctx->mr); 483 484 clean_pd: 485 ibv_dealloc_pd(ctx->pd); 486 487 clean_comp_channel: 488 if (ctx->channel) 489 ibv_destroy_comp_channel(ctx->channel); 490 491 clean_device: 492 ibv_close_device(ctx->context); 493 494 clean_buffer: 495 free(ctx->buf); 496 497 clean_ctx: 498 free(ctx); 499 500 return NULL; 501 } 502 503 static int pp_close_ctx(struct pingpong_context *ctx) 504 { 505 if (ibv_destroy_qp(ctx->qp)) { 506 fprintf(stderr, "Couldn't destroy QP\n"); 507 return 1; 508 } 509 510 if (ibv_destroy_cq(pp_cq(ctx))) { 511 fprintf(stderr, "Couldn't destroy CQ\n"); 512 return 1; 513 } 514 515 if (ibv_dereg_mr(ctx->mr)) { 516 fprintf(stderr, "Couldn't deregister MR\n"); 517 return 1; 518 } 519 520 if (ibv_dealloc_pd(ctx->pd)) { 521 fprintf(stderr, "Couldn't deallocate PD\n"); 522 return 1; 523 } 524 525 if (ctx->channel) { 526 if (ibv_destroy_comp_channel(ctx->channel)) { 527 fprintf(stderr, "Couldn't destroy completion channel\n"); 528 return 1; 529 } 530 } 531 532 if (ibv_close_device(ctx->context)) { 533 fprintf(stderr, "Couldn't release context\n"); 534 return 1; 535 } 536 537 free(ctx->buf); 538 free(ctx); 539 540 return 0; 541 } 542 543 static int pp_post_recv(struct pingpong_context *ctx, int n) 544 { 545 struct ibv_sge list = { 546 .addr = (uintptr_t) ctx->buf, 547 .length = ctx->size, 548 .lkey = ctx->mr->lkey 549 }; 550 struct ibv_recv_wr wr = { 551 .wr_id = PINGPONG_RECV_WRID, 552 .sg_list = &list, 553 .num_sge = 1, 554 }; 555 struct ibv_recv_wr *bad_wr; 556 int i; 557 558 for (i = 0; i < n; ++i) 559 if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) 560 break; 561 562 return i; 563 } 564 565 static int pp_post_send(struct pingpong_context *ctx) 566 { 567 struct ibv_sge list = { 568 .addr = (uintptr_t) ctx->buf, 569 .length = ctx->size, 570 .lkey = ctx->mr->lkey 571 }; 572 struct ibv_send_wr wr = { 573 .wr_id = PINGPONG_SEND_WRID, 574 .sg_list = &list, 575 .num_sge = 1, 576 .opcode = IBV_WR_SEND, 577 .send_flags = ctx->send_flags, 578 }; 579 struct ibv_send_wr *bad_wr; 580 581 return ibv_post_send(ctx->qp, &wr, &bad_wr); 582 } 583 584 struct ts_params { 585 uint64_t comp_recv_max_time_delta; 586 uint64_t comp_recv_min_time_delta; 587 uint64_t comp_recv_total_time_delta; 588 uint64_t comp_recv_prev_time; 589 int last_comp_with_ts; 590 unsigned int comp_with_time_iters; 591 }; 592 593 static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt, 594 int *rcnt, int *routs, int iters, 595 uint64_t wr_id, enum ibv_wc_status status, 596 uint64_t completion_timestamp, 597 struct ts_params *ts) 598 { 599 if (status != IBV_WC_SUCCESS) { 600 fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", 601 ibv_wc_status_str(status), 602 status, (int)wr_id); 603 return 1; 604 } 605 606 switch ((int)wr_id) { 607 case PINGPONG_SEND_WRID: 608 ++(*scnt); 609 break; 610 611 case PINGPONG_RECV_WRID: 612 if (--(*routs) <= 1) { 613 *routs += pp_post_recv(ctx, ctx->rx_depth - *routs); 614 if (*routs < ctx->rx_depth) { 615 fprintf(stderr, 616 "Couldn't post receive (%d)\n", 617 *routs); 618 return 1; 619 } 620 } 621 622 ++(*rcnt); 623 if (use_ts) { 624 if (ts->last_comp_with_ts) { 625 uint64_t delta; 626 627 /* checking whether the clock was wrapped around */ 628 if (completion_timestamp >= ts->comp_recv_prev_time) 629 delta = completion_timestamp - ts->comp_recv_prev_time; 630 else 631 delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time + 632 completion_timestamp + 1; 633 634 ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta); 635 ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta); 636 ts->comp_recv_total_time_delta += delta; 637 ts->comp_with_time_iters++; 638 } 639 640 ts->comp_recv_prev_time = completion_timestamp; 641 ts->last_comp_with_ts = 1; 642 } else { 643 ts->last_comp_with_ts = 0; 644 } 645 646 break; 647 648 default: 649 fprintf(stderr, "Completion for unknown wr_id %d\n", 650 (int)wr_id); 651 return 1; 652 } 653 654 ctx->pending &= ~(int)wr_id; 655 if (*scnt < iters && !ctx->pending) { 656 if (pp_post_send(ctx)) { 657 fprintf(stderr, "Couldn't post send\n"); 658 return 1; 659 } 660 ctx->pending = PINGPONG_RECV_WRID | 661 PINGPONG_SEND_WRID; 662 } 663 664 return 0; 665 } 666 667 static void usage(const char *argv0) 668 { 669 printf("Usage:\n"); 670 printf(" %s start a server and wait for connection\n", argv0); 671 printf(" %s <host> connect to server at <host>\n", argv0); 672 printf("\n"); 673 printf("Options:\n"); 674 printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n"); 675 printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n"); 676 printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n"); 677 printf(" -s, --size=<size> size of message to exchange (default 4096)\n"); 678 printf(" -m, --mtu=<size> path MTU (default 1024)\n"); 679 printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n"); 680 printf(" -n, --iters=<iters> number of exchanges (default 1000)\n"); 681 printf(" -l, --sl=<sl> service level value\n"); 682 printf(" -e, --events sleep on CQ events (default poll)\n"); 683 printf(" -g, --gid-idx=<gid index> local port gid index\n"); 684 printf(" -o, --odp use on demand paging\n"); 685 printf(" -t, --ts get CQE with timestamp\n"); 686 } 687 688 int main(int argc, char *argv[]) 689 { 690 struct ibv_device **dev_list; 691 struct ibv_device *ib_dev; 692 struct pingpong_context *ctx; 693 struct pingpong_dest my_dest; 694 struct pingpong_dest *rem_dest; 695 struct timeval start, end; 696 char *ib_devname = NULL; 697 char *servername = NULL; 698 unsigned int port = 18515; 699 int ib_port = 1; 700 unsigned int size = 4096; 701 enum ibv_mtu mtu = IBV_MTU_1024; 702 unsigned int rx_depth = 500; 703 unsigned int iters = 1000; 704 int use_event = 0; 705 int routs; 706 int rcnt, scnt; 707 int num_cq_events = 0; 708 int sl = 0; 709 int gidx = -1; 710 char gid[33]; 711 struct ts_params ts; 712 713 srand48(getpid() * time(NULL)); 714 715 while (1) { 716 int c; 717 718 static struct option long_options[] = { 719 { .name = "port", .has_arg = 1, .val = 'p' }, 720 { .name = "ib-dev", .has_arg = 1, .val = 'd' }, 721 { .name = "ib-port", .has_arg = 1, .val = 'i' }, 722 { .name = "size", .has_arg = 1, .val = 's' }, 723 { .name = "mtu", .has_arg = 1, .val = 'm' }, 724 { .name = "rx-depth", .has_arg = 1, .val = 'r' }, 725 { .name = "iters", .has_arg = 1, .val = 'n' }, 726 { .name = "sl", .has_arg = 1, .val = 'l' }, 727 { .name = "events", .has_arg = 0, .val = 'e' }, 728 { .name = "gid-idx", .has_arg = 1, .val = 'g' }, 729 { .name = "odp", .has_arg = 0, .val = 'o' }, 730 { .name = "ts", .has_arg = 0, .val = 't' }, 731 {} 732 }; 733 734 c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot", 735 long_options, NULL); 736 737 if (c == -1) 738 break; 739 740 switch (c) { 741 case 'p': 742 port = strtoul(optarg, NULL, 0); 743 if (port > 65535) { 744 usage(argv[0]); 745 return 1; 746 } 747 break; 748 749 case 'd': 750 ib_devname = strdupa(optarg); 751 break; 752 753 case 'i': 754 ib_port = strtol(optarg, NULL, 0); 755 if (ib_port < 1) { 756 usage(argv[0]); 757 return 1; 758 } 759 break; 760 761 case 's': 762 size = strtoul(optarg, NULL, 0); 763 break; 764 765 case 'm': 766 mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); 767 if (mtu == 0) { 768 usage(argv[0]); 769 return 1; 770 } 771 break; 772 773 case 'r': 774 rx_depth = strtoul(optarg, NULL, 0); 775 break; 776 777 case 'n': 778 iters = strtoul(optarg, NULL, 0); 779 break; 780 781 case 'l': 782 sl = strtol(optarg, NULL, 0); 783 break; 784 785 case 'e': 786 ++use_event; 787 break; 788 789 case 'g': 790 gidx = strtol(optarg, NULL, 0); 791 break; 792 793 case 'o': 794 use_odp = 1; 795 break; 796 case 't': 797 use_ts = 1; 798 break; 799 800 default: 801 usage(argv[0]); 802 return 1; 803 } 804 } 805 806 if (optind == argc - 1) 807 servername = strdupa(argv[optind]); 808 else if (optind < argc) { 809 usage(argv[0]); 810 return 1; 811 } 812 813 if (use_ts) { 814 ts.comp_recv_max_time_delta = 0; 815 ts.comp_recv_min_time_delta = 0xffffffff; 816 ts.comp_recv_total_time_delta = 0; 817 ts.comp_recv_prev_time = 0; 818 ts.last_comp_with_ts = 0; 819 ts.comp_with_time_iters = 0; 820 } 821 822 page_size = sysconf(_SC_PAGESIZE); 823 824 dev_list = ibv_get_device_list(NULL); 825 if (!dev_list) { 826 perror("Failed to get IB devices list"); 827 return 1; 828 } 829 830 if (!ib_devname) { 831 ib_dev = *dev_list; 832 if (!ib_dev) { 833 fprintf(stderr, "No IB devices found\n"); 834 return 1; 835 } 836 } else { 837 int i; 838 for (i = 0; dev_list[i]; ++i) 839 if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) 840 break; 841 ib_dev = dev_list[i]; 842 if (!ib_dev) { 843 fprintf(stderr, "IB device %s not found\n", ib_devname); 844 return 1; 845 } 846 } 847 848 ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); 849 if (!ctx) 850 return 1; 851 852 routs = pp_post_recv(ctx, ctx->rx_depth); 853 if (routs < ctx->rx_depth) { 854 fprintf(stderr, "Couldn't post receive (%d)\n", routs); 855 return 1; 856 } 857 858 if (use_event) 859 if (ibv_req_notify_cq(pp_cq(ctx), 0)) { 860 fprintf(stderr, "Couldn't request CQ notification\n"); 861 return 1; 862 } 863 864 865 if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { 866 fprintf(stderr, "Couldn't get port info\n"); 867 return 1; 868 } 869 870 my_dest.lid = ctx->portinfo.lid; 871 if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && 872 !my_dest.lid) { 873 fprintf(stderr, "Couldn't get local LID\n"); 874 return 1; 875 } 876 877 if (gidx >= 0) { 878 if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { 879 fprintf(stderr, "can't read sgid of index %d\n", gidx); 880 return 1; 881 } 882 } else 883 memset(&my_dest.gid, 0, sizeof my_dest.gid); 884 885 my_dest.qpn = ctx->qp->qp_num; 886 my_dest.psn = lrand48() & 0xffffff; 887 inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); 888 printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", 889 my_dest.lid, my_dest.qpn, my_dest.psn, gid); 890 891 892 if (servername) 893 rem_dest = pp_client_exch_dest(servername, port, &my_dest); 894 else 895 rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, 896 &my_dest, gidx); 897 898 if (!rem_dest) 899 return 1; 900 901 inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); 902 printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", 903 rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); 904 905 if (servername) 906 if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, 907 gidx)) 908 return 1; 909 910 ctx->pending = PINGPONG_RECV_WRID; 911 912 if (servername) { 913 if (pp_post_send(ctx)) { 914 fprintf(stderr, "Couldn't post send\n"); 915 return 1; 916 } 917 ctx->pending |= PINGPONG_SEND_WRID; 918 } 919 920 if (gettimeofday(&start, NULL)) { 921 perror("gettimeofday"); 922 return 1; 923 } 924 925 rcnt = scnt = 0; 926 while (rcnt < iters || scnt < iters) { 927 int ret; 928 929 if (use_event) { 930 struct ibv_cq *ev_cq; 931 void *ev_ctx; 932 933 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { 934 fprintf(stderr, "Failed to get cq_event\n"); 935 return 1; 936 } 937 938 ++num_cq_events; 939 940 if (ev_cq != pp_cq(ctx)) { 941 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); 942 return 1; 943 } 944 945 if (ibv_req_notify_cq(pp_cq(ctx), 0)) { 946 fprintf(stderr, "Couldn't request CQ notification\n"); 947 return 1; 948 } 949 } 950 951 if (use_ts) { 952 struct ibv_poll_cq_attr attr = {}; 953 954 do { 955 ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr); 956 } while (!use_event && ret == ENOENT); 957 958 if (ret) { 959 fprintf(stderr, "poll CQ failed %d\n", ret); 960 return ret; 961 } 962 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 963 iters, 964 ctx->cq_s.cq_ex->wr_id, 965 ctx->cq_s.cq_ex->status, 966 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), 967 &ts); 968 if (ret) { 969 ibv_end_poll(ctx->cq_s.cq_ex); 970 return ret; 971 } 972 ret = ibv_next_poll(ctx->cq_s.cq_ex); 973 if (!ret) 974 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 975 iters, 976 ctx->cq_s.cq_ex->wr_id, 977 ctx->cq_s.cq_ex->status, 978 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), 979 &ts); 980 ibv_end_poll(ctx->cq_s.cq_ex); 981 if (ret && ret != ENOENT) { 982 fprintf(stderr, "poll CQ failed %d\n", ret); 983 return ret; 984 } 985 } else { 986 int ne, i; 987 struct ibv_wc wc[2]; 988 989 do { 990 ne = ibv_poll_cq(pp_cq(ctx), 2, wc); 991 if (ne < 0) { 992 fprintf(stderr, "poll CQ failed %d\n", ne); 993 return 1; 994 } 995 } while (!use_event && ne < 1); 996 997 for (i = 0; i < ne; ++i) { 998 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 999 iters, 1000 wc[i].wr_id, 1001 wc[i].status, 1002 0, &ts); 1003 if (ret) { 1004 fprintf(stderr, "parse WC failed %d\n", ne); 1005 return 1; 1006 } 1007 } 1008 } 1009 } 1010 1011 if (gettimeofday(&end, NULL)) { 1012 perror("gettimeofday"); 1013 return 1; 1014 } 1015 1016 { 1017 float usec = (end.tv_sec - start.tv_sec) * 1000000 + 1018 (end.tv_usec - start.tv_usec); 1019 long long bytes = (long long) size * iters * 2; 1020 1021 printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", 1022 bytes, usec / 1000000., bytes * 8. / usec); 1023 printf("%d iters in %.2f seconds = %.2f usec/iter\n", 1024 iters, usec / 1000000., usec / iters); 1025 1026 if (use_ts && ts.comp_with_time_iters) { 1027 printf("Max receive completion clock cycles = %" PRIu64 "\n", 1028 ts.comp_recv_max_time_delta); 1029 printf("Min receive completion clock cycles = %" PRIu64 "\n", 1030 ts.comp_recv_min_time_delta); 1031 printf("Average receive completion clock cycles = %f\n", 1032 (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters); 1033 } 1034 } 1035 1036 ibv_ack_cq_events(pp_cq(ctx), num_cq_events); 1037 1038 if (pp_close_ctx(ctx)) 1039 return 1; 1040 1041 ibv_free_device_list(dev_list); 1042 free(rem_dest); 1043 1044 return 0; 1045 } 1046