1 /*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #define _GNU_SOURCE
33 #include <config.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #include <sys/socket.h>
41 #include <sys/time.h>
42 #include <netdb.h>
43 #include <stdlib.h>
44 #include <getopt.h>
45 #include <arpa/inet.h>
46 #include <time.h>
47 #include <inttypes.h>
48
49 #include "pingpong.h"
50
51 #include <sys/param.h>
52
53 enum {
54 PINGPONG_RECV_WRID = 1,
55 PINGPONG_SEND_WRID = 2,
56 };
57
58 static int page_size;
59 static int use_odp;
60 static int use_ts;
61
62 struct pingpong_context {
63 struct ibv_context *context;
64 struct ibv_comp_channel *channel;
65 struct ibv_pd *pd;
66 struct ibv_mr *mr;
67 union {
68 struct ibv_cq *cq;
69 struct ibv_cq_ex *cq_ex;
70 } cq_s;
71 struct ibv_qp *qp;
72 void *buf;
73 int size;
74 int send_flags;
75 int rx_depth;
76 int pending;
77 struct ibv_port_attr portinfo;
78 uint64_t completion_timestamp_mask;
79 };
80
pp_cq(struct pingpong_context * ctx)81 static struct ibv_cq *pp_cq(struct pingpong_context *ctx)
82 {
83 return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) :
84 ctx->cq_s.cq;
85 }
86
87 struct pingpong_dest {
88 int lid;
89 int qpn;
90 int psn;
91 union ibv_gid gid;
92 };
93
pp_connect_ctx(struct pingpong_context * ctx,int port,int my_psn,enum ibv_mtu mtu,int sl,struct pingpong_dest * dest,int sgid_idx)94 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
95 enum ibv_mtu mtu, int sl,
96 struct pingpong_dest *dest, int sgid_idx)
97 {
98 struct ibv_qp_attr attr = {
99 .qp_state = IBV_QPS_RTR,
100 .path_mtu = mtu,
101 .dest_qp_num = dest->qpn,
102 .rq_psn = dest->psn,
103 .max_dest_rd_atomic = 1,
104 .min_rnr_timer = 12,
105 .ah_attr = {
106 .is_global = 0,
107 .dlid = dest->lid,
108 .sl = sl,
109 .src_path_bits = 0,
110 .port_num = port
111 }
112 };
113
114 if (dest->gid.global.interface_id) {
115 attr.ah_attr.is_global = 1;
116 attr.ah_attr.grh.hop_limit = 1;
117 attr.ah_attr.grh.dgid = dest->gid;
118 attr.ah_attr.grh.sgid_index = sgid_idx;
119 }
120 if (ibv_modify_qp(ctx->qp, &attr,
121 IBV_QP_STATE |
122 IBV_QP_AV |
123 IBV_QP_PATH_MTU |
124 IBV_QP_DEST_QPN |
125 IBV_QP_RQ_PSN |
126 IBV_QP_MAX_DEST_RD_ATOMIC |
127 IBV_QP_MIN_RNR_TIMER)) {
128 fprintf(stderr, "Failed to modify QP to RTR\n");
129 return 1;
130 }
131
132 attr.qp_state = IBV_QPS_RTS;
133 attr.timeout = 14;
134 attr.retry_cnt = 7;
135 attr.rnr_retry = 7;
136 attr.sq_psn = my_psn;
137 attr.max_rd_atomic = 1;
138 if (ibv_modify_qp(ctx->qp, &attr,
139 IBV_QP_STATE |
140 IBV_QP_TIMEOUT |
141 IBV_QP_RETRY_CNT |
142 IBV_QP_RNR_RETRY |
143 IBV_QP_SQ_PSN |
144 IBV_QP_MAX_QP_RD_ATOMIC)) {
145 fprintf(stderr, "Failed to modify QP to RTS\n");
146 return 1;
147 }
148
149 return 0;
150 }
151
pp_client_exch_dest(const char * servername,int port,const struct pingpong_dest * my_dest)152 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
153 const struct pingpong_dest *my_dest)
154 {
155 struct addrinfo *res, *t;
156 struct addrinfo hints = {
157 .ai_family = AF_UNSPEC,
158 .ai_socktype = SOCK_STREAM
159 };
160 char *service;
161 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
162 int n;
163 int sockfd = -1;
164 struct pingpong_dest *rem_dest = NULL;
165 char gid[33];
166
167 if (asprintf(&service, "%d", port) < 0)
168 return NULL;
169
170 n = getaddrinfo(servername, service, &hints, &res);
171
172 if (n < 0) {
173 fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
174 free(service);
175 return NULL;
176 }
177
178 for (t = res; t; t = t->ai_next) {
179 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
180 if (sockfd >= 0) {
181 if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
182 break;
183 close(sockfd);
184 sockfd = -1;
185 }
186 }
187
188 freeaddrinfo_null(res);
189 free(service);
190
191 if (sockfd < 0) {
192 fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
193 return NULL;
194 }
195
196 gid_to_wire_gid(&my_dest->gid, gid);
197 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
198 my_dest->psn, gid);
199 if (write(sockfd, msg, sizeof msg) != sizeof msg) {
200 fprintf(stderr, "Couldn't send local address\n");
201 goto out;
202 }
203
204 if (read(sockfd, msg, sizeof msg) != sizeof msg ||
205 write(sockfd, "done", sizeof "done") != sizeof "done") {
206 perror("client read/write");
207 fprintf(stderr, "Couldn't read/write remote address\n");
208 goto out;
209 }
210
211 rem_dest = malloc(sizeof *rem_dest);
212 if (!rem_dest)
213 goto out;
214
215 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
216 &rem_dest->psn, gid);
217 wire_gid_to_gid(gid, &rem_dest->gid);
218
219 out:
220 close(sockfd);
221 return rem_dest;
222 }
223
pp_server_exch_dest(struct pingpong_context * ctx,int ib_port,enum ibv_mtu mtu,int port,int sl,const struct pingpong_dest * my_dest,int sgid_idx)224 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
225 int ib_port, enum ibv_mtu mtu,
226 int port, int sl,
227 const struct pingpong_dest *my_dest,
228 int sgid_idx)
229 {
230 struct addrinfo *res, *t;
231 struct addrinfo hints = {
232 .ai_flags = AI_PASSIVE,
233 .ai_family = AF_UNSPEC,
234 .ai_socktype = SOCK_STREAM
235 };
236 char *service;
237 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
238 int n;
239 int sockfd = -1, connfd;
240 struct pingpong_dest *rem_dest = NULL;
241 char gid[33];
242
243 if (asprintf(&service, "%d", port) < 0)
244 return NULL;
245
246 n = getaddrinfo(NULL, service, &hints, &res);
247
248 if (n < 0) {
249 fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
250 free(service);
251 return NULL;
252 }
253
254 for (t = res; t; t = t->ai_next) {
255 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
256 if (sockfd >= 0) {
257 n = 1;
258
259 setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
260
261 if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
262 break;
263 close(sockfd);
264 sockfd = -1;
265 }
266 }
267
268 freeaddrinfo_null(res);
269 free(service);
270
271 if (sockfd < 0) {
272 fprintf(stderr, "Couldn't listen to port %d\n", port);
273 return NULL;
274 }
275
276 if (listen(sockfd, 1) < 0) {
277 perror("listen() failed");
278 close(sockfd);
279 return NULL;
280 }
281 connfd = accept(sockfd, NULL, NULL);
282 close(sockfd);
283 if (connfd < 0) {
284 fprintf(stderr, "accept() failed\n");
285 return NULL;
286 }
287
288 n = read(connfd, msg, sizeof msg);
289 if (n != sizeof msg) {
290 perror("server read");
291 fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
292 goto out;
293 }
294
295 rem_dest = malloc(sizeof *rem_dest);
296 if (!rem_dest)
297 goto out;
298
299 sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
300 &rem_dest->psn, gid);
301 wire_gid_to_gid(gid, &rem_dest->gid);
302
303 if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest,
304 sgid_idx)) {
305 fprintf(stderr, "Couldn't connect to remote QP\n");
306 free(rem_dest);
307 rem_dest = NULL;
308 goto out;
309 }
310
311
312 gid_to_wire_gid(&my_dest->gid, gid);
313 sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
314 my_dest->psn, gid);
315 if (write(connfd, msg, sizeof msg) != sizeof msg ||
316 read(connfd, msg, sizeof msg) != sizeof "done") {
317 fprintf(stderr, "Couldn't send/recv local address\n");
318 free(rem_dest);
319 rem_dest = NULL;
320 goto out;
321 }
322
323
324 out:
325 close(connfd);
326 return rem_dest;
327 }
328
pp_init_ctx(struct ibv_device * ib_dev,int size,int rx_depth,int port,int use_event)329 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
330 int rx_depth, int port,
331 int use_event)
332 {
333 struct pingpong_context *ctx;
334 int access_flags = IBV_ACCESS_LOCAL_WRITE;
335
336 ctx = calloc(1, sizeof *ctx);
337 if (!ctx)
338 return NULL;
339
340 ctx->size = size;
341 ctx->send_flags = IBV_SEND_SIGNALED;
342 ctx->rx_depth = rx_depth;
343
344 ctx->buf = memalign(page_size, size);
345 if (!ctx->buf) {
346 fprintf(stderr, "Couldn't allocate work buf.\n");
347 goto clean_ctx;
348 }
349
350 /* FIXME memset(ctx->buf, 0, size); */
351 memset(ctx->buf, 0x7b, size);
352
353 ctx->context = ibv_open_device(ib_dev);
354 if (!ctx->context) {
355 fprintf(stderr, "Couldn't get context for %s\n",
356 ibv_get_device_name(ib_dev));
357 goto clean_buffer;
358 }
359
360 if (use_event) {
361 ctx->channel = ibv_create_comp_channel(ctx->context);
362 if (!ctx->channel) {
363 fprintf(stderr, "Couldn't create completion channel\n");
364 goto clean_device;
365 }
366 } else
367 ctx->channel = NULL;
368
369 ctx->pd = ibv_alloc_pd(ctx->context);
370 if (!ctx->pd) {
371 fprintf(stderr, "Couldn't allocate PD\n");
372 goto clean_comp_channel;
373 }
374
375 if (use_odp || use_ts) {
376 const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND |
377 IBV_ODP_SUPPORT_RECV;
378 struct ibv_device_attr_ex attrx;
379
380 if (ibv_query_device_ex(ctx->context, NULL, &attrx)) {
381 fprintf(stderr, "Couldn't query device for its features\n");
382 goto clean_comp_channel;
383 }
384
385 if (use_odp) {
386 if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) ||
387 (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) {
388 fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n");
389 goto clean_comp_channel;
390 }
391 access_flags |= IBV_ACCESS_ON_DEMAND;
392 }
393
394 if (use_ts) {
395 if (!attrx.completion_timestamp_mask) {
396 fprintf(stderr, "The device isn't completion timestamp capable\n");
397 goto clean_comp_channel;
398 }
399 ctx->completion_timestamp_mask = attrx.completion_timestamp_mask;
400 }
401 }
402 ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags);
403
404 if (!ctx->mr) {
405 fprintf(stderr, "Couldn't register MR\n");
406 goto clean_pd;
407 }
408
409 if (use_ts) {
410 struct ibv_cq_init_attr_ex attr_ex = {
411 .cqe = rx_depth + 1,
412 .cq_context = NULL,
413 .channel = ctx->channel,
414 .comp_vector = 0,
415 .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP
416 };
417
418 ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex);
419 } else {
420 ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
421 ctx->channel, 0);
422 }
423
424 if (!pp_cq(ctx)) {
425 fprintf(stderr, "Couldn't create CQ\n");
426 goto clean_mr;
427 }
428
429 {
430 struct ibv_qp_attr attr;
431 struct ibv_qp_init_attr init_attr = {
432 .send_cq = pp_cq(ctx),
433 .recv_cq = pp_cq(ctx),
434 .cap = {
435 .max_send_wr = 1,
436 .max_recv_wr = rx_depth,
437 .max_send_sge = 1,
438 .max_recv_sge = 1
439 },
440 .qp_type = IBV_QPT_RC
441 };
442
443 ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
444 if (!ctx->qp) {
445 fprintf(stderr, "Couldn't create QP\n");
446 goto clean_cq;
447 }
448
449 ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
450 if (init_attr.cap.max_inline_data >= size) {
451 ctx->send_flags |= IBV_SEND_INLINE;
452 }
453 }
454
455 {
456 struct ibv_qp_attr attr = {
457 .qp_state = IBV_QPS_INIT,
458 .pkey_index = 0,
459 .port_num = port,
460 .qp_access_flags = 0
461 };
462
463 if (ibv_modify_qp(ctx->qp, &attr,
464 IBV_QP_STATE |
465 IBV_QP_PKEY_INDEX |
466 IBV_QP_PORT |
467 IBV_QP_ACCESS_FLAGS)) {
468 fprintf(stderr, "Failed to modify QP to INIT\n");
469 goto clean_qp;
470 }
471 }
472
473 return ctx;
474
475 clean_qp:
476 ibv_destroy_qp(ctx->qp);
477
478 clean_cq:
479 ibv_destroy_cq(pp_cq(ctx));
480
481 clean_mr:
482 ibv_dereg_mr(ctx->mr);
483
484 clean_pd:
485 ibv_dealloc_pd(ctx->pd);
486
487 clean_comp_channel:
488 if (ctx->channel)
489 ibv_destroy_comp_channel(ctx->channel);
490
491 clean_device:
492 ibv_close_device(ctx->context);
493
494 clean_buffer:
495 free(ctx->buf);
496
497 clean_ctx:
498 free(ctx);
499
500 return NULL;
501 }
502
pp_close_ctx(struct pingpong_context * ctx)503 static int pp_close_ctx(struct pingpong_context *ctx)
504 {
505 if (ibv_destroy_qp(ctx->qp)) {
506 fprintf(stderr, "Couldn't destroy QP\n");
507 return 1;
508 }
509
510 if (ibv_destroy_cq(pp_cq(ctx))) {
511 fprintf(stderr, "Couldn't destroy CQ\n");
512 return 1;
513 }
514
515 if (ibv_dereg_mr(ctx->mr)) {
516 fprintf(stderr, "Couldn't deregister MR\n");
517 return 1;
518 }
519
520 if (ibv_dealloc_pd(ctx->pd)) {
521 fprintf(stderr, "Couldn't deallocate PD\n");
522 return 1;
523 }
524
525 if (ctx->channel) {
526 if (ibv_destroy_comp_channel(ctx->channel)) {
527 fprintf(stderr, "Couldn't destroy completion channel\n");
528 return 1;
529 }
530 }
531
532 if (ibv_close_device(ctx->context)) {
533 fprintf(stderr, "Couldn't release context\n");
534 return 1;
535 }
536
537 free(ctx->buf);
538 free(ctx);
539
540 return 0;
541 }
542
pp_post_recv(struct pingpong_context * ctx,int n)543 static int pp_post_recv(struct pingpong_context *ctx, int n)
544 {
545 struct ibv_sge list = {
546 .addr = (uintptr_t) ctx->buf,
547 .length = ctx->size,
548 .lkey = ctx->mr->lkey
549 };
550 struct ibv_recv_wr wr = {
551 .wr_id = PINGPONG_RECV_WRID,
552 .sg_list = &list,
553 .num_sge = 1,
554 };
555 struct ibv_recv_wr *bad_wr;
556 int i;
557
558 for (i = 0; i < n; ++i)
559 if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
560 break;
561
562 return i;
563 }
564
pp_post_send(struct pingpong_context * ctx)565 static int pp_post_send(struct pingpong_context *ctx)
566 {
567 struct ibv_sge list = {
568 .addr = (uintptr_t) ctx->buf,
569 .length = ctx->size,
570 .lkey = ctx->mr->lkey
571 };
572 struct ibv_send_wr wr = {
573 .wr_id = PINGPONG_SEND_WRID,
574 .sg_list = &list,
575 .num_sge = 1,
576 .opcode = IBV_WR_SEND,
577 .send_flags = ctx->send_flags,
578 };
579 struct ibv_send_wr *bad_wr;
580
581 return ibv_post_send(ctx->qp, &wr, &bad_wr);
582 }
583
584 struct ts_params {
585 uint64_t comp_recv_max_time_delta;
586 uint64_t comp_recv_min_time_delta;
587 uint64_t comp_recv_total_time_delta;
588 uint64_t comp_recv_prev_time;
589 int last_comp_with_ts;
590 unsigned int comp_with_time_iters;
591 };
592
parse_single_wc(struct pingpong_context * ctx,int * scnt,int * rcnt,int * routs,int iters,uint64_t wr_id,enum ibv_wc_status status,uint64_t completion_timestamp,struct ts_params * ts)593 static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt,
594 int *rcnt, int *routs, int iters,
595 uint64_t wr_id, enum ibv_wc_status status,
596 uint64_t completion_timestamp,
597 struct ts_params *ts)
598 {
599 if (status != IBV_WC_SUCCESS) {
600 fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
601 ibv_wc_status_str(status),
602 status, (int)wr_id);
603 return 1;
604 }
605
606 switch ((int)wr_id) {
607 case PINGPONG_SEND_WRID:
608 ++(*scnt);
609 break;
610
611 case PINGPONG_RECV_WRID:
612 if (--(*routs) <= 1) {
613 *routs += pp_post_recv(ctx, ctx->rx_depth - *routs);
614 if (*routs < ctx->rx_depth) {
615 fprintf(stderr,
616 "Couldn't post receive (%d)\n",
617 *routs);
618 return 1;
619 }
620 }
621
622 ++(*rcnt);
623 if (use_ts) {
624 if (ts->last_comp_with_ts) {
625 uint64_t delta;
626
627 /* checking whether the clock was wrapped around */
628 if (completion_timestamp >= ts->comp_recv_prev_time)
629 delta = completion_timestamp - ts->comp_recv_prev_time;
630 else
631 delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time +
632 completion_timestamp + 1;
633
634 ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta);
635 ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta);
636 ts->comp_recv_total_time_delta += delta;
637 ts->comp_with_time_iters++;
638 }
639
640 ts->comp_recv_prev_time = completion_timestamp;
641 ts->last_comp_with_ts = 1;
642 } else {
643 ts->last_comp_with_ts = 0;
644 }
645
646 break;
647
648 default:
649 fprintf(stderr, "Completion for unknown wr_id %d\n",
650 (int)wr_id);
651 return 1;
652 }
653
654 ctx->pending &= ~(int)wr_id;
655 if (*scnt < iters && !ctx->pending) {
656 if (pp_post_send(ctx)) {
657 fprintf(stderr, "Couldn't post send\n");
658 return 1;
659 }
660 ctx->pending = PINGPONG_RECV_WRID |
661 PINGPONG_SEND_WRID;
662 }
663
664 return 0;
665 }
666
usage(const char * argv0)667 static void usage(const char *argv0)
668 {
669 printf("Usage:\n");
670 printf(" %s start a server and wait for connection\n", argv0);
671 printf(" %s <host> connect to server at <host>\n", argv0);
672 printf("\n");
673 printf("Options:\n");
674 printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
675 printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
676 printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
677 printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
678 printf(" -m, --mtu=<size> path MTU (default 1024)\n");
679 printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
680 printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
681 printf(" -l, --sl=<sl> service level value\n");
682 printf(" -e, --events sleep on CQ events (default poll)\n");
683 printf(" -g, --gid-idx=<gid index> local port gid index\n");
684 printf(" -o, --odp use on demand paging\n");
685 printf(" -t, --ts get CQE with timestamp\n");
686 }
687
main(int argc,char * argv[])688 int main(int argc, char *argv[])
689 {
690 struct ibv_device **dev_list;
691 struct ibv_device *ib_dev;
692 struct pingpong_context *ctx;
693 struct pingpong_dest my_dest;
694 struct pingpong_dest *rem_dest;
695 struct timeval start, end;
696 char *ib_devname = NULL;
697 char *servername = NULL;
698 unsigned int port = 18515;
699 int ib_port = 1;
700 unsigned int size = 4096;
701 enum ibv_mtu mtu = IBV_MTU_1024;
702 unsigned int rx_depth = 500;
703 unsigned int iters = 1000;
704 int use_event = 0;
705 int routs;
706 int rcnt, scnt;
707 int num_cq_events = 0;
708 int sl = 0;
709 int gidx = -1;
710 char gid[33];
711 struct ts_params ts;
712
713 srand48(getpid() * time(NULL));
714
715 while (1) {
716 int c;
717
718 static struct option long_options[] = {
719 { .name = "port", .has_arg = 1, .val = 'p' },
720 { .name = "ib-dev", .has_arg = 1, .val = 'd' },
721 { .name = "ib-port", .has_arg = 1, .val = 'i' },
722 { .name = "size", .has_arg = 1, .val = 's' },
723 { .name = "mtu", .has_arg = 1, .val = 'm' },
724 { .name = "rx-depth", .has_arg = 1, .val = 'r' },
725 { .name = "iters", .has_arg = 1, .val = 'n' },
726 { .name = "sl", .has_arg = 1, .val = 'l' },
727 { .name = "events", .has_arg = 0, .val = 'e' },
728 { .name = "gid-idx", .has_arg = 1, .val = 'g' },
729 { .name = "odp", .has_arg = 0, .val = 'o' },
730 { .name = "ts", .has_arg = 0, .val = 't' },
731 {}
732 };
733
734 c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot",
735 long_options, NULL);
736
737 if (c == -1)
738 break;
739
740 switch (c) {
741 case 'p':
742 port = strtoul(optarg, NULL, 0);
743 if (port > 65535) {
744 usage(argv[0]);
745 return 1;
746 }
747 break;
748
749 case 'd':
750 ib_devname = strdupa(optarg);
751 break;
752
753 case 'i':
754 ib_port = strtol(optarg, NULL, 0);
755 if (ib_port < 1) {
756 usage(argv[0]);
757 return 1;
758 }
759 break;
760
761 case 's':
762 size = strtoul(optarg, NULL, 0);
763 break;
764
765 case 'm':
766 mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
767 if (mtu == 0) {
768 usage(argv[0]);
769 return 1;
770 }
771 break;
772
773 case 'r':
774 rx_depth = strtoul(optarg, NULL, 0);
775 break;
776
777 case 'n':
778 iters = strtoul(optarg, NULL, 0);
779 break;
780
781 case 'l':
782 sl = strtol(optarg, NULL, 0);
783 break;
784
785 case 'e':
786 ++use_event;
787 break;
788
789 case 'g':
790 gidx = strtol(optarg, NULL, 0);
791 break;
792
793 case 'o':
794 use_odp = 1;
795 break;
796 case 't':
797 use_ts = 1;
798 break;
799
800 default:
801 usage(argv[0]);
802 return 1;
803 }
804 }
805
806 if (optind == argc - 1)
807 servername = strdupa(argv[optind]);
808 else if (optind < argc) {
809 usage(argv[0]);
810 return 1;
811 }
812
813 if (use_ts) {
814 ts.comp_recv_max_time_delta = 0;
815 ts.comp_recv_min_time_delta = 0xffffffff;
816 ts.comp_recv_total_time_delta = 0;
817 ts.comp_recv_prev_time = 0;
818 ts.last_comp_with_ts = 0;
819 ts.comp_with_time_iters = 0;
820 }
821
822 page_size = sysconf(_SC_PAGESIZE);
823
824 dev_list = ibv_get_device_list(NULL);
825 if (!dev_list) {
826 perror("Failed to get IB devices list");
827 return 1;
828 }
829
830 if (!ib_devname) {
831 ib_dev = *dev_list;
832 if (!ib_dev) {
833 fprintf(stderr, "No IB devices found\n");
834 return 1;
835 }
836 } else {
837 int i;
838 for (i = 0; dev_list[i]; ++i)
839 if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
840 break;
841 ib_dev = dev_list[i];
842 if (!ib_dev) {
843 fprintf(stderr, "IB device %s not found\n", ib_devname);
844 return 1;
845 }
846 }
847
848 ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
849 if (!ctx)
850 return 1;
851
852 routs = pp_post_recv(ctx, ctx->rx_depth);
853 if (routs < ctx->rx_depth) {
854 fprintf(stderr, "Couldn't post receive (%d)\n", routs);
855 return 1;
856 }
857
858 if (use_event)
859 if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
860 fprintf(stderr, "Couldn't request CQ notification\n");
861 return 1;
862 }
863
864
865 if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
866 fprintf(stderr, "Couldn't get port info\n");
867 return 1;
868 }
869
870 my_dest.lid = ctx->portinfo.lid;
871 if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET &&
872 !my_dest.lid) {
873 fprintf(stderr, "Couldn't get local LID\n");
874 return 1;
875 }
876
877 if (gidx >= 0) {
878 if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
879 fprintf(stderr, "can't read sgid of index %d\n", gidx);
880 return 1;
881 }
882 } else
883 memset(&my_dest.gid, 0, sizeof my_dest.gid);
884
885 my_dest.qpn = ctx->qp->qp_num;
886 my_dest.psn = lrand48() & 0xffffff;
887 inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid);
888 printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
889 my_dest.lid, my_dest.qpn, my_dest.psn, gid);
890
891
892 if (servername)
893 rem_dest = pp_client_exch_dest(servername, port, &my_dest);
894 else
895 rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
896 &my_dest, gidx);
897
898 if (!rem_dest)
899 return 1;
900
901 inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
902 printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
903 rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid);
904
905 if (servername)
906 if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest,
907 gidx))
908 return 1;
909
910 ctx->pending = PINGPONG_RECV_WRID;
911
912 if (servername) {
913 if (pp_post_send(ctx)) {
914 fprintf(stderr, "Couldn't post send\n");
915 return 1;
916 }
917 ctx->pending |= PINGPONG_SEND_WRID;
918 }
919
920 if (gettimeofday(&start, NULL)) {
921 perror("gettimeofday");
922 return 1;
923 }
924
925 rcnt = scnt = 0;
926 while (rcnt < iters || scnt < iters) {
927 int ret;
928
929 if (use_event) {
930 struct ibv_cq *ev_cq;
931 void *ev_ctx;
932
933 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
934 fprintf(stderr, "Failed to get cq_event\n");
935 return 1;
936 }
937
938 ++num_cq_events;
939
940 if (ev_cq != pp_cq(ctx)) {
941 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
942 return 1;
943 }
944
945 if (ibv_req_notify_cq(pp_cq(ctx), 0)) {
946 fprintf(stderr, "Couldn't request CQ notification\n");
947 return 1;
948 }
949 }
950
951 if (use_ts) {
952 struct ibv_poll_cq_attr attr = {};
953
954 do {
955 ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr);
956 } while (!use_event && ret == ENOENT);
957
958 if (ret) {
959 fprintf(stderr, "poll CQ failed %d\n", ret);
960 return ret;
961 }
962 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
963 iters,
964 ctx->cq_s.cq_ex->wr_id,
965 ctx->cq_s.cq_ex->status,
966 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
967 &ts);
968 if (ret) {
969 ibv_end_poll(ctx->cq_s.cq_ex);
970 return ret;
971 }
972 ret = ibv_next_poll(ctx->cq_s.cq_ex);
973 if (!ret)
974 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
975 iters,
976 ctx->cq_s.cq_ex->wr_id,
977 ctx->cq_s.cq_ex->status,
978 ibv_wc_read_completion_ts(ctx->cq_s.cq_ex),
979 &ts);
980 ibv_end_poll(ctx->cq_s.cq_ex);
981 if (ret && ret != ENOENT) {
982 fprintf(stderr, "poll CQ failed %d\n", ret);
983 return ret;
984 }
985 } else {
986 int ne, i;
987 struct ibv_wc wc[2];
988
989 do {
990 ne = ibv_poll_cq(pp_cq(ctx), 2, wc);
991 if (ne < 0) {
992 fprintf(stderr, "poll CQ failed %d\n", ne);
993 return 1;
994 }
995 } while (!use_event && ne < 1);
996
997 for (i = 0; i < ne; ++i) {
998 ret = parse_single_wc(ctx, &scnt, &rcnt, &routs,
999 iters,
1000 wc[i].wr_id,
1001 wc[i].status,
1002 0, &ts);
1003 if (ret) {
1004 fprintf(stderr, "parse WC failed %d\n", ne);
1005 return 1;
1006 }
1007 }
1008 }
1009 }
1010
1011 if (gettimeofday(&end, NULL)) {
1012 perror("gettimeofday");
1013 return 1;
1014 }
1015
1016 {
1017 float usec = (end.tv_sec - start.tv_sec) * 1000000 +
1018 (end.tv_usec - start.tv_usec);
1019 long long bytes = (long long) size * iters * 2;
1020
1021 printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
1022 bytes, usec / 1000000., bytes * 8. / usec);
1023 printf("%d iters in %.2f seconds = %.2f usec/iter\n",
1024 iters, usec / 1000000., usec / iters);
1025
1026 if (use_ts && ts.comp_with_time_iters) {
1027 printf("Max receive completion clock cycles = %" PRIu64 "\n",
1028 ts.comp_recv_max_time_delta);
1029 printf("Min receive completion clock cycles = %" PRIu64 "\n",
1030 ts.comp_recv_min_time_delta);
1031 printf("Average receive completion clock cycles = %f\n",
1032 (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters);
1033 }
1034 }
1035
1036 ibv_ack_cq_events(pp_cq(ctx), num_cq_events);
1037
1038 if (pp_close_ctx(ctx))
1039 return 1;
1040
1041 ibv_free_device_list(dev_list);
1042 free(rem_dest);
1043
1044 return 0;
1045 }
1046