1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #define _GNU_SOURCE 33 #include <config.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <unistd.h> 38 #include <string.h> 39 #include <sys/types.h> 40 #include <sys/socket.h> 41 #include <sys/time.h> 42 #include <netdb.h> 43 #include <stdlib.h> 44 #include <getopt.h> 45 #include <arpa/inet.h> 46 #include <time.h> 47 #include <inttypes.h> 48 49 #include "pingpong.h" 50 51 #include <sys/param.h> 52 53 enum { 54 PINGPONG_RECV_WRID = 1, 55 PINGPONG_SEND_WRID = 2, 56 }; 57 58 static int page_size; 59 static int use_odp; 60 static int use_ts; 61 62 struct pingpong_context { 63 struct ibv_context *context; 64 struct ibv_comp_channel *channel; 65 struct ibv_pd *pd; 66 struct ibv_mr *mr; 67 union { 68 struct ibv_cq *cq; 69 struct ibv_cq_ex *cq_ex; 70 } cq_s; 71 struct ibv_qp *qp; 72 void *buf; 73 int size; 74 int send_flags; 75 int rx_depth; 76 int pending; 77 struct ibv_port_attr portinfo; 78 uint64_t completion_timestamp_mask; 79 }; 80 81 static struct ibv_cq *pp_cq(struct pingpong_context *ctx) 82 { 83 return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) : 84 ctx->cq_s.cq; 85 } 86 87 struct pingpong_dest { 88 int lid; 89 int qpn; 90 int psn; 91 union ibv_gid gid; 92 }; 93 94 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, 95 enum ibv_mtu mtu, int sl, 96 struct pingpong_dest *dest, int sgid_idx) 97 { 98 struct ibv_qp_attr attr = { 99 .qp_state = IBV_QPS_RTR, 100 .path_mtu = mtu, 101 .dest_qp_num = dest->qpn, 102 .rq_psn = dest->psn, 103 .max_dest_rd_atomic = 1, 104 .min_rnr_timer = 12, 105 .ah_attr = { 106 .is_global = 0, 107 .dlid = dest->lid, 108 .sl = sl, 109 .src_path_bits = 0, 110 .port_num = port 111 } 112 }; 113 114 if (dest->gid.global.interface_id) { 115 attr.ah_attr.is_global = 1; 116 attr.ah_attr.grh.hop_limit = 1; 117 attr.ah_attr.grh.dgid = dest->gid; 118 attr.ah_attr.grh.sgid_index = sgid_idx; 119 } 120 if (ibv_modify_qp(ctx->qp, &attr, 121 IBV_QP_STATE | 122 IBV_QP_AV | 123 IBV_QP_PATH_MTU | 124 IBV_QP_DEST_QPN | 125 IBV_QP_RQ_PSN | 126 IBV_QP_MAX_DEST_RD_ATOMIC | 127 IBV_QP_MIN_RNR_TIMER)) { 128 fprintf(stderr, "Failed to modify QP to RTR\n"); 129 return 1; 130 } 131 132 attr.qp_state = IBV_QPS_RTS; 133 attr.timeout = 14; 134 attr.retry_cnt = 7; 135 attr.rnr_retry = 7; 136 attr.sq_psn = my_psn; 137 attr.max_rd_atomic = 1; 138 if (ibv_modify_qp(ctx->qp, &attr, 139 IBV_QP_STATE | 140 IBV_QP_TIMEOUT | 141 IBV_QP_RETRY_CNT | 142 IBV_QP_RNR_RETRY | 143 IBV_QP_SQ_PSN | 144 IBV_QP_MAX_QP_RD_ATOMIC)) { 145 fprintf(stderr, "Failed to modify QP to RTS\n"); 146 return 1; 147 } 148 149 return 0; 150 } 151 152 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, 153 const struct pingpong_dest *my_dest) 154 { 155 struct addrinfo *res, *t; 156 struct addrinfo hints = { 157 .ai_family = AF_UNSPEC, 158 .ai_socktype = SOCK_STREAM 159 }; 160 char *service; 161 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; 162 int n; 163 int sockfd = -1; 164 struct pingpong_dest *rem_dest = NULL; 165 char gid[33]; 166 167 if (asprintf(&service, "%d", port) < 0) 168 return NULL; 169 170 n = getaddrinfo(servername, service, &hints, &res); 171 172 if (n < 0) { 173 fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); 174 free(service); 175 return NULL; 176 } 177 178 for (t = res; t; t = t->ai_next) { 179 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); 180 if (sockfd >= 0) { 181 if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) 182 break; 183 close(sockfd); 184 sockfd = -1; 185 } 186 } 187 188 freeaddrinfo_null(res); 189 free(service); 190 191 if (sockfd < 0) { 192 fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); 193 return NULL; 194 } 195 196 gid_to_wire_gid(&my_dest->gid, gid); 197 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, 198 my_dest->psn, gid); 199 if (write(sockfd, msg, sizeof msg) != sizeof msg) { 200 fprintf(stderr, "Couldn't send local address\n"); 201 goto out; 202 } 203 204 if (read(sockfd, msg, sizeof msg) != sizeof msg || 205 write(sockfd, "done", sizeof "done") != sizeof "done") { 206 perror("client read/write"); 207 fprintf(stderr, "Couldn't read/write remote address\n"); 208 goto out; 209 } 210 211 rem_dest = malloc(sizeof *rem_dest); 212 if (!rem_dest) 213 goto out; 214 215 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, 216 &rem_dest->psn, gid); 217 wire_gid_to_gid(gid, &rem_dest->gid); 218 219 out: 220 close(sockfd); 221 return rem_dest; 222 } 223 224 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, 225 int ib_port, enum ibv_mtu mtu, 226 int port, int sl, 227 const struct pingpong_dest *my_dest, 228 int sgid_idx) 229 { 230 struct addrinfo *res, *t; 231 struct addrinfo hints = { 232 .ai_flags = AI_PASSIVE, 233 .ai_family = AF_INET, 234 .ai_socktype = SOCK_STREAM 235 }; 236 char *service; 237 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; 238 int n; 239 int sockfd = -1, connfd; 240 struct pingpong_dest *rem_dest = NULL; 241 char gid[33]; 242 243 if (asprintf(&service, "%d", port) < 0) 244 return NULL; 245 246 n = getaddrinfo(NULL, service, &hints, &res); 247 248 if (n < 0) { 249 fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); 250 free(service); 251 return NULL; 252 } 253 254 for (t = res; t; t = t->ai_next) { 255 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); 256 if (sockfd >= 0) { 257 n = 1; 258 259 setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); 260 261 if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) 262 break; 263 close(sockfd); 264 sockfd = -1; 265 } 266 } 267 268 freeaddrinfo_null(res); 269 free(service); 270 271 if (sockfd < 0) { 272 fprintf(stderr, "Couldn't listen to port %d\n", port); 273 return NULL; 274 } 275 276 listen(sockfd, 1); 277 connfd = accept(sockfd, NULL, NULL); 278 close(sockfd); 279 if (connfd < 0) { 280 fprintf(stderr, "accept() failed\n"); 281 return NULL; 282 } 283 284 n = read(connfd, msg, sizeof msg); 285 if (n != sizeof msg) { 286 perror("server read"); 287 fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); 288 goto out; 289 } 290 291 rem_dest = malloc(sizeof *rem_dest); 292 if (!rem_dest) 293 goto out; 294 295 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, 296 &rem_dest->psn, gid); 297 wire_gid_to_gid(gid, &rem_dest->gid); 298 299 if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, 300 sgid_idx)) { 301 fprintf(stderr, "Couldn't connect to remote QP\n"); 302 free(rem_dest); 303 rem_dest = NULL; 304 goto out; 305 } 306 307 308 gid_to_wire_gid(&my_dest->gid, gid); 309 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, 310 my_dest->psn, gid); 311 if (write(connfd, msg, sizeof msg) != sizeof msg || 312 read(connfd, msg, sizeof msg) != sizeof "done") { 313 fprintf(stderr, "Couldn't send/recv local address\n"); 314 free(rem_dest); 315 rem_dest = NULL; 316 goto out; 317 } 318 319 320 out: 321 close(connfd); 322 return rem_dest; 323 } 324 325 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, 326 int rx_depth, int port, 327 int use_event) 328 { 329 struct pingpong_context *ctx; 330 int access_flags = IBV_ACCESS_LOCAL_WRITE; 331 332 ctx = calloc(1, sizeof *ctx); 333 if (!ctx) 334 return NULL; 335 336 ctx->size = size; 337 ctx->send_flags = IBV_SEND_SIGNALED; 338 ctx->rx_depth = rx_depth; 339 340 ctx->buf = memalign(page_size, size); 341 if (!ctx->buf) { 342 fprintf(stderr, "Couldn't allocate work buf.\n"); 343 goto clean_ctx; 344 } 345 346 /* FIXME memset(ctx->buf, 0, size); */ 347 memset(ctx->buf, 0x7b, size); 348 349 ctx->context = ibv_open_device(ib_dev); 350 if (!ctx->context) { 351 fprintf(stderr, "Couldn't get context for %s\n", 352 ibv_get_device_name(ib_dev)); 353 goto clean_buffer; 354 } 355 356 if (use_event) { 357 ctx->channel = ibv_create_comp_channel(ctx->context); 358 if (!ctx->channel) { 359 fprintf(stderr, "Couldn't create completion channel\n"); 360 goto clean_device; 361 } 362 } else 363 ctx->channel = NULL; 364 365 ctx->pd = ibv_alloc_pd(ctx->context); 366 if (!ctx->pd) { 367 fprintf(stderr, "Couldn't allocate PD\n"); 368 goto clean_comp_channel; 369 } 370 371 if (use_odp || use_ts) { 372 const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | 373 IBV_ODP_SUPPORT_RECV; 374 struct ibv_device_attr_ex attrx; 375 376 if (ibv_query_device_ex(ctx->context, NULL, &attrx)) { 377 fprintf(stderr, "Couldn't query device for its features\n"); 378 goto clean_comp_channel; 379 } 380 381 if (use_odp) { 382 if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || 383 (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { 384 fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); 385 goto clean_comp_channel; 386 } 387 access_flags |= IBV_ACCESS_ON_DEMAND; 388 } 389 390 if (use_ts) { 391 if (!attrx.completion_timestamp_mask) { 392 fprintf(stderr, "The device isn't completion timestamp capable\n"); 393 goto clean_comp_channel; 394 } 395 ctx->completion_timestamp_mask = attrx.completion_timestamp_mask; 396 } 397 } 398 ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); 399 400 if (!ctx->mr) { 401 fprintf(stderr, "Couldn't register MR\n"); 402 goto clean_pd; 403 } 404 405 if (use_ts) { 406 struct ibv_cq_init_attr_ex attr_ex = { 407 .cqe = rx_depth + 1, 408 .cq_context = NULL, 409 .channel = ctx->channel, 410 .comp_vector = 0, 411 .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP 412 }; 413 414 ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex); 415 } else { 416 ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, 417 ctx->channel, 0); 418 } 419 420 if (!pp_cq(ctx)) { 421 fprintf(stderr, "Couldn't create CQ\n"); 422 goto clean_mr; 423 } 424 425 { 426 struct ibv_qp_attr attr; 427 struct ibv_qp_init_attr init_attr = { 428 .send_cq = pp_cq(ctx), 429 .recv_cq = pp_cq(ctx), 430 .cap = { 431 .max_send_wr = 1, 432 .max_recv_wr = rx_depth, 433 .max_send_sge = 1, 434 .max_recv_sge = 1 435 }, 436 .qp_type = IBV_QPT_RC 437 }; 438 439 ctx->qp = ibv_create_qp(ctx->pd, &init_attr); 440 if (!ctx->qp) { 441 fprintf(stderr, "Couldn't create QP\n"); 442 goto clean_cq; 443 } 444 445 ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); 446 if (init_attr.cap.max_inline_data >= size) { 447 ctx->send_flags |= IBV_SEND_INLINE; 448 } 449 } 450 451 { 452 struct ibv_qp_attr attr = { 453 .qp_state = IBV_QPS_INIT, 454 .pkey_index = 0, 455 .port_num = port, 456 .qp_access_flags = 0 457 }; 458 459 if (ibv_modify_qp(ctx->qp, &attr, 460 IBV_QP_STATE | 461 IBV_QP_PKEY_INDEX | 462 IBV_QP_PORT | 463 IBV_QP_ACCESS_FLAGS)) { 464 fprintf(stderr, "Failed to modify QP to INIT\n"); 465 goto clean_qp; 466 } 467 } 468 469 return ctx; 470 471 clean_qp: 472 ibv_destroy_qp(ctx->qp); 473 474 clean_cq: 475 ibv_destroy_cq(pp_cq(ctx)); 476 477 clean_mr: 478 ibv_dereg_mr(ctx->mr); 479 480 clean_pd: 481 ibv_dealloc_pd(ctx->pd); 482 483 clean_comp_channel: 484 if (ctx->channel) 485 ibv_destroy_comp_channel(ctx->channel); 486 487 clean_device: 488 ibv_close_device(ctx->context); 489 490 clean_buffer: 491 free(ctx->buf); 492 493 clean_ctx: 494 free(ctx); 495 496 return NULL; 497 } 498 499 static int pp_close_ctx(struct pingpong_context *ctx) 500 { 501 if (ibv_destroy_qp(ctx->qp)) { 502 fprintf(stderr, "Couldn't destroy QP\n"); 503 return 1; 504 } 505 506 if (ibv_destroy_cq(pp_cq(ctx))) { 507 fprintf(stderr, "Couldn't destroy CQ\n"); 508 return 1; 509 } 510 511 if (ibv_dereg_mr(ctx->mr)) { 512 fprintf(stderr, "Couldn't deregister MR\n"); 513 return 1; 514 } 515 516 if (ibv_dealloc_pd(ctx->pd)) { 517 fprintf(stderr, "Couldn't deallocate PD\n"); 518 return 1; 519 } 520 521 if (ctx->channel) { 522 if (ibv_destroy_comp_channel(ctx->channel)) { 523 fprintf(stderr, "Couldn't destroy completion channel\n"); 524 return 1; 525 } 526 } 527 528 if (ibv_close_device(ctx->context)) { 529 fprintf(stderr, "Couldn't release context\n"); 530 return 1; 531 } 532 533 free(ctx->buf); 534 free(ctx); 535 536 return 0; 537 } 538 539 static int pp_post_recv(struct pingpong_context *ctx, int n) 540 { 541 struct ibv_sge list = { 542 .addr = (uintptr_t) ctx->buf, 543 .length = ctx->size, 544 .lkey = ctx->mr->lkey 545 }; 546 struct ibv_recv_wr wr = { 547 .wr_id = PINGPONG_RECV_WRID, 548 .sg_list = &list, 549 .num_sge = 1, 550 }; 551 struct ibv_recv_wr *bad_wr; 552 int i; 553 554 for (i = 0; i < n; ++i) 555 if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) 556 break; 557 558 return i; 559 } 560 561 static int pp_post_send(struct pingpong_context *ctx) 562 { 563 struct ibv_sge list = { 564 .addr = (uintptr_t) ctx->buf, 565 .length = ctx->size, 566 .lkey = ctx->mr->lkey 567 }; 568 struct ibv_send_wr wr = { 569 .wr_id = PINGPONG_SEND_WRID, 570 .sg_list = &list, 571 .num_sge = 1, 572 .opcode = IBV_WR_SEND, 573 .send_flags = ctx->send_flags, 574 }; 575 struct ibv_send_wr *bad_wr; 576 577 return ibv_post_send(ctx->qp, &wr, &bad_wr); 578 } 579 580 struct ts_params { 581 uint64_t comp_recv_max_time_delta; 582 uint64_t comp_recv_min_time_delta; 583 uint64_t comp_recv_total_time_delta; 584 uint64_t comp_recv_prev_time; 585 int last_comp_with_ts; 586 unsigned int comp_with_time_iters; 587 }; 588 589 static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt, 590 int *rcnt, int *routs, int iters, 591 uint64_t wr_id, enum ibv_wc_status status, 592 uint64_t completion_timestamp, 593 struct ts_params *ts) 594 { 595 if (status != IBV_WC_SUCCESS) { 596 fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", 597 ibv_wc_status_str(status), 598 status, (int)wr_id); 599 return 1; 600 } 601 602 switch ((int)wr_id) { 603 case PINGPONG_SEND_WRID: 604 ++(*scnt); 605 break; 606 607 case PINGPONG_RECV_WRID: 608 if (--(*routs) <= 1) { 609 *routs += pp_post_recv(ctx, ctx->rx_depth - *routs); 610 if (*routs < ctx->rx_depth) { 611 fprintf(stderr, 612 "Couldn't post receive (%d)\n", 613 *routs); 614 return 1; 615 } 616 } 617 618 ++(*rcnt); 619 if (use_ts) { 620 if (ts->last_comp_with_ts) { 621 uint64_t delta; 622 623 /* checking whether the clock was wrapped around */ 624 if (completion_timestamp >= ts->comp_recv_prev_time) 625 delta = completion_timestamp - ts->comp_recv_prev_time; 626 else 627 delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time + 628 completion_timestamp + 1; 629 630 ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta); 631 ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta); 632 ts->comp_recv_total_time_delta += delta; 633 ts->comp_with_time_iters++; 634 } 635 636 ts->comp_recv_prev_time = completion_timestamp; 637 ts->last_comp_with_ts = 1; 638 } else { 639 ts->last_comp_with_ts = 0; 640 } 641 642 break; 643 644 default: 645 fprintf(stderr, "Completion for unknown wr_id %d\n", 646 (int)wr_id); 647 return 1; 648 } 649 650 ctx->pending &= ~(int)wr_id; 651 if (*scnt < iters && !ctx->pending) { 652 if (pp_post_send(ctx)) { 653 fprintf(stderr, "Couldn't post send\n"); 654 return 1; 655 } 656 ctx->pending = PINGPONG_RECV_WRID | 657 PINGPONG_SEND_WRID; 658 } 659 660 return 0; 661 } 662 663 static void usage(const char *argv0) 664 { 665 printf("Usage:\n"); 666 printf(" %s start a server and wait for connection\n", argv0); 667 printf(" %s <host> connect to server at <host>\n", argv0); 668 printf("\n"); 669 printf("Options:\n"); 670 printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n"); 671 printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n"); 672 printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n"); 673 printf(" -s, --size=<size> size of message to exchange (default 4096)\n"); 674 printf(" -m, --mtu=<size> path MTU (default 1024)\n"); 675 printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n"); 676 printf(" -n, --iters=<iters> number of exchanges (default 1000)\n"); 677 printf(" -l, --sl=<sl> service level value\n"); 678 printf(" -e, --events sleep on CQ events (default poll)\n"); 679 printf(" -g, --gid-idx=<gid index> local port gid index\n"); 680 printf(" -o, --odp use on demand paging\n"); 681 printf(" -t, --ts get CQE with timestamp\n"); 682 } 683 684 int main(int argc, char *argv[]) 685 { 686 struct ibv_device **dev_list; 687 struct ibv_device *ib_dev; 688 struct pingpong_context *ctx; 689 struct pingpong_dest my_dest; 690 struct pingpong_dest *rem_dest; 691 struct timeval start, end; 692 char *ib_devname = NULL; 693 char *servername = NULL; 694 unsigned int port = 18515; 695 int ib_port = 1; 696 unsigned int size = 4096; 697 enum ibv_mtu mtu = IBV_MTU_1024; 698 unsigned int rx_depth = 500; 699 unsigned int iters = 1000; 700 int use_event = 0; 701 int routs; 702 int rcnt, scnt; 703 int num_cq_events = 0; 704 int sl = 0; 705 int gidx = -1; 706 char gid[33]; 707 struct ts_params ts; 708 709 srand48(getpid() * time(NULL)); 710 711 while (1) { 712 int c; 713 714 static struct option long_options[] = { 715 { .name = "port", .has_arg = 1, .val = 'p' }, 716 { .name = "ib-dev", .has_arg = 1, .val = 'd' }, 717 { .name = "ib-port", .has_arg = 1, .val = 'i' }, 718 { .name = "size", .has_arg = 1, .val = 's' }, 719 { .name = "mtu", .has_arg = 1, .val = 'm' }, 720 { .name = "rx-depth", .has_arg = 1, .val = 'r' }, 721 { .name = "iters", .has_arg = 1, .val = 'n' }, 722 { .name = "sl", .has_arg = 1, .val = 'l' }, 723 { .name = "events", .has_arg = 0, .val = 'e' }, 724 { .name = "gid-idx", .has_arg = 1, .val = 'g' }, 725 { .name = "odp", .has_arg = 0, .val = 'o' }, 726 { .name = "ts", .has_arg = 0, .val = 't' }, 727 {} 728 }; 729 730 c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot", 731 long_options, NULL); 732 733 if (c == -1) 734 break; 735 736 switch (c) { 737 case 'p': 738 port = strtoul(optarg, NULL, 0); 739 if (port > 65535) { 740 usage(argv[0]); 741 return 1; 742 } 743 break; 744 745 case 'd': 746 ib_devname = strdupa(optarg); 747 break; 748 749 case 'i': 750 ib_port = strtol(optarg, NULL, 0); 751 if (ib_port < 1) { 752 usage(argv[0]); 753 return 1; 754 } 755 break; 756 757 case 's': 758 size = strtoul(optarg, NULL, 0); 759 break; 760 761 case 'm': 762 mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); 763 if (mtu == 0) { 764 usage(argv[0]); 765 return 1; 766 } 767 break; 768 769 case 'r': 770 rx_depth = strtoul(optarg, NULL, 0); 771 break; 772 773 case 'n': 774 iters = strtoul(optarg, NULL, 0); 775 break; 776 777 case 'l': 778 sl = strtol(optarg, NULL, 0); 779 break; 780 781 case 'e': 782 ++use_event; 783 break; 784 785 case 'g': 786 gidx = strtol(optarg, NULL, 0); 787 break; 788 789 case 'o': 790 use_odp = 1; 791 break; 792 case 't': 793 use_ts = 1; 794 break; 795 796 default: 797 usage(argv[0]); 798 return 1; 799 } 800 } 801 802 if (optind == argc - 1) 803 servername = strdupa(argv[optind]); 804 else if (optind < argc) { 805 usage(argv[0]); 806 return 1; 807 } 808 809 if (use_ts) { 810 ts.comp_recv_max_time_delta = 0; 811 ts.comp_recv_min_time_delta = 0xffffffff; 812 ts.comp_recv_total_time_delta = 0; 813 ts.comp_recv_prev_time = 0; 814 ts.last_comp_with_ts = 0; 815 ts.comp_with_time_iters = 0; 816 } 817 818 page_size = sysconf(_SC_PAGESIZE); 819 820 dev_list = ibv_get_device_list(NULL); 821 if (!dev_list) { 822 perror("Failed to get IB devices list"); 823 return 1; 824 } 825 826 if (!ib_devname) { 827 ib_dev = *dev_list; 828 if (!ib_dev) { 829 fprintf(stderr, "No IB devices found\n"); 830 return 1; 831 } 832 } else { 833 int i; 834 for (i = 0; dev_list[i]; ++i) 835 if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) 836 break; 837 ib_dev = dev_list[i]; 838 if (!ib_dev) { 839 fprintf(stderr, "IB device %s not found\n", ib_devname); 840 return 1; 841 } 842 } 843 844 ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); 845 if (!ctx) 846 return 1; 847 848 routs = pp_post_recv(ctx, ctx->rx_depth); 849 if (routs < ctx->rx_depth) { 850 fprintf(stderr, "Couldn't post receive (%d)\n", routs); 851 return 1; 852 } 853 854 if (use_event) 855 if (ibv_req_notify_cq(pp_cq(ctx), 0)) { 856 fprintf(stderr, "Couldn't request CQ notification\n"); 857 return 1; 858 } 859 860 861 if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { 862 fprintf(stderr, "Couldn't get port info\n"); 863 return 1; 864 } 865 866 my_dest.lid = ctx->portinfo.lid; 867 if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && 868 !my_dest.lid) { 869 fprintf(stderr, "Couldn't get local LID\n"); 870 return 1; 871 } 872 873 if (gidx >= 0) { 874 if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { 875 fprintf(stderr, "can't read sgid of index %d\n", gidx); 876 return 1; 877 } 878 } else 879 memset(&my_dest.gid, 0, sizeof my_dest.gid); 880 881 my_dest.qpn = ctx->qp->qp_num; 882 my_dest.psn = lrand48() & 0xffffff; 883 inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); 884 printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", 885 my_dest.lid, my_dest.qpn, my_dest.psn, gid); 886 887 888 if (servername) 889 rem_dest = pp_client_exch_dest(servername, port, &my_dest); 890 else 891 rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, 892 &my_dest, gidx); 893 894 if (!rem_dest) 895 return 1; 896 897 inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); 898 printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", 899 rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); 900 901 if (servername) 902 if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, 903 gidx)) 904 return 1; 905 906 ctx->pending = PINGPONG_RECV_WRID; 907 908 if (servername) { 909 if (pp_post_send(ctx)) { 910 fprintf(stderr, "Couldn't post send\n"); 911 return 1; 912 } 913 ctx->pending |= PINGPONG_SEND_WRID; 914 } 915 916 if (gettimeofday(&start, NULL)) { 917 perror("gettimeofday"); 918 return 1; 919 } 920 921 rcnt = scnt = 0; 922 while (rcnt < iters || scnt < iters) { 923 int ret; 924 925 if (use_event) { 926 struct ibv_cq *ev_cq; 927 void *ev_ctx; 928 929 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { 930 fprintf(stderr, "Failed to get cq_event\n"); 931 return 1; 932 } 933 934 ++num_cq_events; 935 936 if (ev_cq != pp_cq(ctx)) { 937 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); 938 return 1; 939 } 940 941 if (ibv_req_notify_cq(pp_cq(ctx), 0)) { 942 fprintf(stderr, "Couldn't request CQ notification\n"); 943 return 1; 944 } 945 } 946 947 if (use_ts) { 948 struct ibv_poll_cq_attr attr = {}; 949 950 do { 951 ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr); 952 } while (!use_event && ret == ENOENT); 953 954 if (ret) { 955 fprintf(stderr, "poll CQ failed %d\n", ret); 956 return ret; 957 } 958 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 959 iters, 960 ctx->cq_s.cq_ex->wr_id, 961 ctx->cq_s.cq_ex->status, 962 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), 963 &ts); 964 if (ret) { 965 ibv_end_poll(ctx->cq_s.cq_ex); 966 return ret; 967 } 968 ret = ibv_next_poll(ctx->cq_s.cq_ex); 969 if (!ret) 970 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 971 iters, 972 ctx->cq_s.cq_ex->wr_id, 973 ctx->cq_s.cq_ex->status, 974 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), 975 &ts); 976 ibv_end_poll(ctx->cq_s.cq_ex); 977 if (ret && ret != ENOENT) { 978 fprintf(stderr, "poll CQ failed %d\n", ret); 979 return ret; 980 } 981 } else { 982 int ne, i; 983 struct ibv_wc wc[2]; 984 985 do { 986 ne = ibv_poll_cq(pp_cq(ctx), 2, wc); 987 if (ne < 0) { 988 fprintf(stderr, "poll CQ failed %d\n", ne); 989 return 1; 990 } 991 } while (!use_event && ne < 1); 992 993 for (i = 0; i < ne; ++i) { 994 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, 995 iters, 996 wc[i].wr_id, 997 wc[i].status, 998 0, &ts); 999 if (ret) { 1000 fprintf(stderr, "parse WC failed %d\n", ne); 1001 return 1; 1002 } 1003 } 1004 } 1005 } 1006 1007 if (gettimeofday(&end, NULL)) { 1008 perror("gettimeofday"); 1009 return 1; 1010 } 1011 1012 { 1013 float usec = (end.tv_sec - start.tv_sec) * 1000000 + 1014 (end.tv_usec - start.tv_usec); 1015 long long bytes = (long long) size * iters * 2; 1016 1017 printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", 1018 bytes, usec / 1000000., bytes * 8. / usec); 1019 printf("%d iters in %.2f seconds = %.2f usec/iter\n", 1020 iters, usec / 1000000., usec / iters); 1021 1022 if (use_ts && ts.comp_with_time_iters) { 1023 printf("Max receive completion clock cycles = %" PRIu64 "\n", 1024 ts.comp_recv_max_time_delta); 1025 printf("Min receive completion clock cycles = %" PRIu64 "\n", 1026 ts.comp_recv_min_time_delta); 1027 printf("Average receive completion clock cycles = %f\n", 1028 (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters); 1029 } 1030 } 1031 1032 ibv_ack_cq_events(pp_cq(ctx), num_cq_events); 1033 1034 if (pp_close_ctx(ctx)) 1035 return 1; 1036 1037 ibv_free_device_list(dev_list); 1038 free(rem_dest); 1039 1040 return 0; 1041 } 1042