xref: /freebsd/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 559af1ec16576f9f3e41318d66147f4df4fb8e87)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #define _GNU_SOURCE
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #include <sys/socket.h>
41 #include <sys/time.h>
42 #include <netdb.h>
43 #include <stdlib.h>
44 #include <getopt.h>
45 #include <arpa/inet.h>
46 #include <time.h>
47 
48 #include "pingpong.h"
49 
50 enum {
51 	PINGPONG_RECV_WRID = 1,
52 	PINGPONG_SEND_WRID = 2,
53 
54 	MAX_QP             = 256,
55 };
56 
57 static int page_size;
58 
59 struct pingpong_context {
60 	struct ibv_context	*context;
61 	struct ibv_comp_channel *channel;
62 	struct ibv_pd		*pd;
63 	struct ibv_mr		*mr;
64 	struct ibv_cq		*cq;
65 	struct ibv_srq		*srq;
66 	struct ibv_qp		*qp[MAX_QP];
67 	void			*buf;
68 	int			 size;
69 	int			 send_flags;
70 	int			 num_qp;
71 	int			 rx_depth;
72 	int			 pending[MAX_QP];
73 	struct ibv_port_attr	 portinfo;
74 };
75 
76 struct pingpong_dest {
77 	int lid;
78 	int qpn;
79 	int psn;
80 	union ibv_gid gid;
81 };
82 
83 static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84 			  int sl, const struct pingpong_dest *my_dest,
85 			  const struct pingpong_dest *dest, int sgid_idx)
86 {
87 	int i;
88 
89 	for (i = 0; i < ctx->num_qp; ++i) {
90 		struct ibv_qp_attr attr = {
91 			.qp_state		= IBV_QPS_RTR,
92 			.path_mtu		= mtu,
93 			.dest_qp_num		= dest[i].qpn,
94 			.rq_psn			= dest[i].psn,
95 			.max_dest_rd_atomic	= 1,
96 			.min_rnr_timer		= 12,
97 			.ah_attr		= {
98 				.is_global	= 0,
99 				.dlid		= dest[i].lid,
100 				.sl		= sl,
101 				.src_path_bits	= 0,
102 				.port_num	= port
103 			}
104 		};
105 
106 		if (dest->gid.global.interface_id) {
107 			attr.ah_attr.is_global = 1;
108 			attr.ah_attr.grh.hop_limit = 1;
109 			attr.ah_attr.grh.dgid = dest->gid;
110 			attr.ah_attr.grh.sgid_index = sgid_idx;
111 		}
112 		if (ibv_modify_qp(ctx->qp[i], &attr,
113 				  IBV_QP_STATE              |
114 				  IBV_QP_AV                 |
115 				  IBV_QP_PATH_MTU           |
116 				  IBV_QP_DEST_QPN           |
117 				  IBV_QP_RQ_PSN             |
118 				  IBV_QP_MAX_DEST_RD_ATOMIC |
119 				  IBV_QP_MIN_RNR_TIMER)) {
120 			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121 			return 1;
122 		}
123 
124 		attr.qp_state	    = IBV_QPS_RTS;
125 		attr.timeout	    = 14;
126 		attr.retry_cnt	    = 7;
127 		attr.rnr_retry	    = 7;
128 		attr.sq_psn	    = my_dest[i].psn;
129 		attr.max_rd_atomic  = 1;
130 		if (ibv_modify_qp(ctx->qp[i], &attr,
131 				  IBV_QP_STATE              |
132 				  IBV_QP_TIMEOUT            |
133 				  IBV_QP_RETRY_CNT          |
134 				  IBV_QP_RNR_RETRY          |
135 				  IBV_QP_SQ_PSN             |
136 				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137 			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138 			return 1;
139 		}
140 	}
141 
142 	return 0;
143 }
144 
145 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146 						 const struct pingpong_dest *my_dest)
147 {
148 	struct addrinfo *res, *t;
149 	struct addrinfo hints = {
150 		.ai_family   = AF_UNSPEC,
151 		.ai_socktype = SOCK_STREAM
152 	};
153 	char *service;
154 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155 	int n;
156 	int r;
157 	int i;
158 	int sockfd = -1;
159 	struct pingpong_dest *rem_dest = NULL;
160 	char gid[33];
161 
162 	if (asprintf(&service, "%d", port) < 0)
163 		return NULL;
164 
165 	n = getaddrinfo(servername, service, &hints, &res);
166 
167 	if (n < 0) {
168 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169 		free(service);
170 		return NULL;
171 	}
172 
173 	for (t = res; t; t = t->ai_next) {
174 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175 		if (sockfd >= 0) {
176 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177 				break;
178 			close(sockfd);
179 			sockfd = -1;
180 		}
181 	}
182 
183 	freeaddrinfo_null(res);
184 	free(service);
185 
186 	if (sockfd < 0) {
187 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188 		return NULL;
189 	}
190 
191 	for (i = 0; i < MAX_QP; ++i) {
192 		gid_to_wire_gid(&my_dest[i].gid, gid);
193 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
194 					my_dest[i].qpn, my_dest[i].psn, gid);
195 		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
196 			fprintf(stderr, "Couldn't send local address\n");
197 			goto out;
198 		}
199 	}
200 
201 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
202 	if (!rem_dest)
203 		goto out;
204 
205 	for (i = 0; i < MAX_QP; ++i) {
206 		n = 0;
207 		while (n < sizeof msg) {
208 			r = read(sockfd, msg + n, sizeof msg - n);
209 			if (r < 0) {
210 				perror("client read");
211 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
212 					n, (int) sizeof msg, i);
213 				goto out;
214 			}
215 			n += r;
216 		}
217 
218 		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
219 							&rem_dest[i].psn, gid);
220 		wire_gid_to_gid(gid, &rem_dest[i].gid);
221 	}
222 
223 	if (write(sockfd, "done", sizeof "done") != sizeof "done") {
224 		perror("client write");
225 		goto out;
226 	}
227 out:
228 	close(sockfd);
229 	return rem_dest;
230 }
231 
232 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
233 						 int ib_port, enum ibv_mtu mtu,
234 						 int port, int sl,
235 						 const struct pingpong_dest *my_dest,
236 						 int sgid_idx)
237 {
238 	struct addrinfo *res, *t;
239 	struct addrinfo hints = {
240 		.ai_flags    = AI_PASSIVE,
241 		.ai_family   = AF_INET,
242 		.ai_socktype = SOCK_STREAM
243 	};
244 	char *service;
245 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
246 	int n;
247 	int r;
248 	int i;
249 	int sockfd = -1, connfd;
250 	struct pingpong_dest *rem_dest = NULL;
251 	char gid[33];
252 
253 	if (asprintf(&service, "%d", port) < 0)
254 		return NULL;
255 
256 	n = getaddrinfo(NULL, service, &hints, &res);
257 
258 	if (n < 0) {
259 		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
260 		free(service);
261 		return NULL;
262 	}
263 
264 	for (t = res; t; t = t->ai_next) {
265 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
266 		if (sockfd >= 0) {
267 			n = 1;
268 
269 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
270 
271 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
272 				break;
273 			close(sockfd);
274 			sockfd = -1;
275 		}
276 	}
277 
278 	freeaddrinfo_null(res);
279 	free(service);
280 
281 	if (sockfd < 0) {
282 		fprintf(stderr, "Couldn't listen to port %d\n", port);
283 		return NULL;
284 	}
285 
286 	listen(sockfd, 1);
287 	connfd = accept(sockfd, NULL, NULL);
288 	close(sockfd);
289 	if (connfd < 0) {
290 		fprintf(stderr, "accept() failed\n");
291 		return NULL;
292 	}
293 
294 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
295 	if (!rem_dest)
296 		goto out;
297 
298 	for (i = 0; i < MAX_QP; ++i) {
299 		n = 0;
300 		while (n < sizeof msg) {
301 			r = read(connfd, msg + n, sizeof msg - n);
302 			if (r < 0) {
303 				perror("server read");
304 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
305 					n, (int) sizeof msg, i);
306 				goto out;
307 			}
308 			n += r;
309 		}
310 
311 		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
312 							&rem_dest[i].psn, gid);
313 		wire_gid_to_gid(gid, &rem_dest[i].gid);
314 	}
315 
316 	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
317 								sgid_idx)) {
318 		fprintf(stderr, "Couldn't connect to remote QP\n");
319 		free(rem_dest);
320 		rem_dest = NULL;
321 		goto out;
322 	}
323 
324 	for (i = 0; i < MAX_QP; ++i) {
325 		gid_to_wire_gid(&my_dest[i].gid, gid);
326 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
327 					my_dest[i].qpn, my_dest[i].psn, gid);
328 		if (write(connfd, msg, sizeof msg) != sizeof msg) {
329 			fprintf(stderr, "Couldn't send local address\n");
330 			free(rem_dest);
331 			rem_dest = NULL;
332 			goto out;
333 		}
334 	}
335 
336 	if (read(connfd, msg, sizeof msg) != sizeof "done") {
337 		perror("client write");
338 		free(rem_dest);
339 		rem_dest = NULL;
340 		goto out;
341 	}
342 
343 out:
344 	close(connfd);
345 	return rem_dest;
346 }
347 
348 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
349 					    int num_qp, int rx_depth, int port,
350 					    int use_event)
351 {
352 	struct pingpong_context *ctx;
353 	int i;
354 
355 	ctx = calloc(1, sizeof *ctx);
356 	if (!ctx)
357 		return NULL;
358 
359 	ctx->size       = size;
360 	ctx->send_flags = IBV_SEND_SIGNALED;
361 	ctx->num_qp     = num_qp;
362 	ctx->rx_depth   = rx_depth;
363 
364 	ctx->buf = memalign(page_size, size);
365 	if (!ctx->buf) {
366 		fprintf(stderr, "Couldn't allocate work buf.\n");
367 		goto clean_ctx;
368 	}
369 
370 	memset(ctx->buf, 0, size);
371 
372 	ctx->context = ibv_open_device(ib_dev);
373 	if (!ctx->context) {
374 		fprintf(stderr, "Couldn't get context for %s\n",
375 			ibv_get_device_name(ib_dev));
376 		goto clean_buffer;
377 	}
378 
379 	if (use_event) {
380 		ctx->channel = ibv_create_comp_channel(ctx->context);
381 		if (!ctx->channel) {
382 			fprintf(stderr, "Couldn't create completion channel\n");
383 			goto clean_device;
384 		}
385 	} else
386 		ctx->channel = NULL;
387 
388 	ctx->pd = ibv_alloc_pd(ctx->context);
389 	if (!ctx->pd) {
390 		fprintf(stderr, "Couldn't allocate PD\n");
391 		goto clean_comp_channel;
392 	}
393 
394 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
395 	if (!ctx->mr) {
396 		fprintf(stderr, "Couldn't register MR\n");
397 		goto clean_pd;
398 	}
399 
400 	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
401 				ctx->channel, 0);
402 	if (!ctx->cq) {
403 		fprintf(stderr, "Couldn't create CQ\n");
404 		goto clean_mr;
405 	}
406 
407 	{
408 		struct ibv_srq_init_attr attr = {
409 			.attr = {
410 				.max_wr  = rx_depth,
411 				.max_sge = 1
412 			}
413 		};
414 
415 		ctx->srq = ibv_create_srq(ctx->pd, &attr);
416 		if (!ctx->srq)  {
417 			fprintf(stderr, "Couldn't create SRQ\n");
418 			goto clean_cq;
419 		}
420 	}
421 
422 	for (i = 0; i < num_qp; ++i) {
423 		struct ibv_qp_attr attr;
424 		struct ibv_qp_init_attr init_attr = {
425 			.send_cq = ctx->cq,
426 			.recv_cq = ctx->cq,
427 			.srq     = ctx->srq,
428 			.cap     = {
429 				.max_send_wr  = 1,
430 				.max_send_sge = 1,
431 			},
432 			.qp_type = IBV_QPT_RC
433 		};
434 
435 		ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
436 		if (!ctx->qp[i])  {
437 			fprintf(stderr, "Couldn't create QP[%d]\n", i);
438 			goto clean_qps;
439 		}
440 		ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
441 		if (init_attr.cap.max_inline_data >= size) {
442 			ctx->send_flags |= IBV_SEND_INLINE;
443 		}
444 	}
445 
446 	for (i = 0; i < num_qp; ++i) {
447 		struct ibv_qp_attr attr = {
448 			.qp_state        = IBV_QPS_INIT,
449 			.pkey_index      = 0,
450 			.port_num        = port,
451 			.qp_access_flags = 0
452 		};
453 
454 		if (ibv_modify_qp(ctx->qp[i], &attr,
455 				  IBV_QP_STATE              |
456 				  IBV_QP_PKEY_INDEX         |
457 				  IBV_QP_PORT               |
458 				  IBV_QP_ACCESS_FLAGS)) {
459 			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
460 			goto clean_qps_full;
461 		}
462 	}
463 
464 	return ctx;
465 
466 clean_qps_full:
467 	i = num_qp;
468 
469 clean_qps:
470 	for (--i; i >= 0; --i)
471 		ibv_destroy_qp(ctx->qp[i]);
472 
473 	ibv_destroy_srq(ctx->srq);
474 
475 clean_cq:
476 	ibv_destroy_cq(ctx->cq);
477 
478 clean_mr:
479 	ibv_dereg_mr(ctx->mr);
480 
481 clean_pd:
482 	ibv_dealloc_pd(ctx->pd);
483 
484 clean_comp_channel:
485 	if (ctx->channel)
486 		ibv_destroy_comp_channel(ctx->channel);
487 
488 clean_device:
489 	ibv_close_device(ctx->context);
490 
491 clean_buffer:
492 	free(ctx->buf);
493 
494 clean_ctx:
495 	free(ctx);
496 
497 	return NULL;
498 }
499 
500 static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
501 {
502 	int i;
503 
504 	for (i = 0; i < num_qp; ++i) {
505 		if (ibv_destroy_qp(ctx->qp[i])) {
506 			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
507 			return 1;
508 		}
509 	}
510 
511 	if (ibv_destroy_srq(ctx->srq)) {
512 		fprintf(stderr, "Couldn't destroy SRQ\n");
513 		return 1;
514 	}
515 
516 	if (ibv_destroy_cq(ctx->cq)) {
517 		fprintf(stderr, "Couldn't destroy CQ\n");
518 		return 1;
519 	}
520 
521 	if (ibv_dereg_mr(ctx->mr)) {
522 		fprintf(stderr, "Couldn't deregister MR\n");
523 		return 1;
524 	}
525 
526 	if (ibv_dealloc_pd(ctx->pd)) {
527 		fprintf(stderr, "Couldn't deallocate PD\n");
528 		return 1;
529 	}
530 
531 	if (ctx->channel) {
532 		if (ibv_destroy_comp_channel(ctx->channel)) {
533 			fprintf(stderr, "Couldn't destroy completion channel\n");
534 			return 1;
535 		}
536 	}
537 
538 	if (ibv_close_device(ctx->context)) {
539 		fprintf(stderr, "Couldn't release context\n");
540 		return 1;
541 	}
542 
543 	free(ctx->buf);
544 	free(ctx);
545 
546 	return 0;
547 }
548 
549 static int pp_post_recv(struct pingpong_context *ctx, int n)
550 {
551 	struct ibv_sge list = {
552 		.addr	= (uintptr_t) ctx->buf,
553 		.length = ctx->size,
554 		.lkey	= ctx->mr->lkey
555 	};
556 	struct ibv_recv_wr wr = {
557 		.wr_id	    = PINGPONG_RECV_WRID,
558 		.sg_list    = &list,
559 		.num_sge    = 1,
560 	};
561 	struct ibv_recv_wr *bad_wr;
562 	int i;
563 
564 	for (i = 0; i < n; ++i)
565 		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
566 			break;
567 
568 	return i;
569 }
570 
571 static int pp_post_send(struct pingpong_context *ctx, int qp_index)
572 {
573 	struct ibv_sge list = {
574 		.addr	= (uintptr_t) ctx->buf,
575 		.length = ctx->size,
576 		.lkey	= ctx->mr->lkey
577 	};
578 	struct ibv_send_wr wr = {
579 		.wr_id	    = PINGPONG_SEND_WRID,
580 		.sg_list    = &list,
581 		.num_sge    = 1,
582 		.opcode     = IBV_WR_SEND,
583 		.send_flags = ctx->send_flags,
584 	};
585 	struct ibv_send_wr *bad_wr;
586 
587 	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
588 }
589 
590 static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
591 {
592 	int i;
593 
594 	for (i = 0; i < num_qp; ++i)
595 		if (ctx->qp[i]->qp_num == qpn)
596 			return i;
597 
598 	return -1;
599 }
600 
601 static void usage(const char *argv0)
602 {
603 	printf("Usage:\n");
604 	printf("  %s            start a server and wait for connection\n", argv0);
605 	printf("  %s <host>     connect to server at <host>\n", argv0);
606 	printf("\n");
607 	printf("Options:\n");
608 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
609 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
610 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
611 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
612 	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
613 	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
614 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
615 	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
616 	printf("  -l, --sl=<sl>          service level value\n");
617 	printf("  -e, --events           sleep on CQ events (default poll)\n");
618 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
619 }
620 
621 int main(int argc, char *argv[])
622 {
623 	struct ibv_device      **dev_list;
624 	struct ibv_device	*ib_dev;
625 	struct ibv_wc		*wc;
626 	struct pingpong_context *ctx;
627 	struct pingpong_dest     my_dest[MAX_QP];
628 	struct pingpong_dest    *rem_dest;
629 	struct timeval           start, end;
630 	char                    *ib_devname = NULL;
631 	char                    *servername = NULL;
632 	unsigned int             port = 18515;
633 	int                      ib_port = 1;
634 	unsigned int             size = 4096;
635 	enum ibv_mtu		 mtu = IBV_MTU_1024;
636 	unsigned int             num_qp = 16;
637 	unsigned int             rx_depth = 500;
638 	unsigned int             iters = 1000;
639 	int                      use_event = 0;
640 	int                      routs;
641 	int                      rcnt, scnt;
642 	int			 num_wc;
643 	int                      i;
644 	int                      num_cq_events = 0;
645 	int                      sl = 0;
646 	int			 gidx = -1;
647 	char			 gid[33];
648 
649 	srand48(getpid() * time(NULL));
650 
651 	while (1) {
652 		int c;
653 
654 		static struct option long_options[] = {
655 			{ .name = "port",     .has_arg = 1, .val = 'p' },
656 			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
657 			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
658 			{ .name = "size",     .has_arg = 1, .val = 's' },
659 			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
660 			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
661 			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
662 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
663 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
664 			{ .name = "events",   .has_arg = 0, .val = 'e' },
665 			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
666 			{}
667 		};
668 
669 		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:",
670 							long_options, NULL);
671 		if (c == -1)
672 			break;
673 
674 		switch (c) {
675 		case 'p':
676 			port = strtoul(optarg, NULL, 0);
677 			if (port > 65535) {
678 				usage(argv[0]);
679 				return 1;
680 			}
681 			break;
682 
683 		case 'd':
684 			ib_devname = strdupa(optarg);
685 			break;
686 
687 		case 'i':
688 			ib_port = strtol(optarg, NULL, 0);
689 			if (ib_port < 1) {
690 				usage(argv[0]);
691 				return 1;
692 			}
693 			break;
694 
695 		case 's':
696 			size = strtoul(optarg, NULL, 0);
697 			if (size < 1) {
698 				usage(argv[0]);
699 				return 1;
700 			}
701 			break;
702 
703 		case 'm':
704 			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
705 			if (mtu == 0) {
706 				usage(argv[0]);
707 				return 1;
708 			}
709 			break;
710 
711 		case 'q':
712 			num_qp = strtoul(optarg, NULL, 0);
713 			break;
714 
715 		case 'r':
716 			rx_depth = strtoul(optarg, NULL, 0);
717 			break;
718 
719 		case 'n':
720 			iters = strtoul(optarg, NULL, 0);
721 			break;
722 
723 		case 'l':
724 			sl = strtol(optarg, NULL, 0);
725 			break;
726 
727 		case 'e':
728 			++use_event;
729 			break;
730 
731 		case 'g':
732 			gidx = strtol(optarg, NULL, 0);
733 			break;
734 
735 		default:
736 			usage(argv[0]);
737 			return 1;
738 		}
739 	}
740 
741 	if (optind == argc - 1)
742 		servername = strdupa(argv[optind]);
743 	else if (optind < argc) {
744 		usage(argv[0]);
745 		return 1;
746 	}
747 
748 	if (num_qp > rx_depth) {
749 		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
750 			"must have at least one receive per QP.\n",
751 			rx_depth, num_qp);
752 		return 1;
753 	}
754 
755 	num_wc = num_qp + rx_depth;
756 	wc     = alloca(num_wc * sizeof *wc);
757 
758 	page_size = sysconf(_SC_PAGESIZE);
759 
760 	dev_list = ibv_get_device_list(NULL);
761 	if (!dev_list) {
762 		perror("Failed to get IB devices list");
763 		return 1;
764 	}
765 
766 	if (!ib_devname) {
767 		ib_dev = *dev_list;
768 		if (!ib_dev) {
769 			fprintf(stderr, "No IB devices found\n");
770 			return 1;
771 		}
772 	} else {
773 		for (i = 0; dev_list[i]; ++i)
774 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
775 				break;
776 		ib_dev = dev_list[i];
777 		if (!ib_dev) {
778 			fprintf(stderr, "IB device %s not found\n", ib_devname);
779 			return 1;
780 		}
781 	}
782 
783 	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
784 	if (!ctx)
785 		return 1;
786 
787 	routs = pp_post_recv(ctx, ctx->rx_depth);
788 	if (routs < ctx->rx_depth) {
789 		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
790 		return 1;
791 	}
792 
793 	if (use_event)
794 		if (ibv_req_notify_cq(ctx->cq, 0)) {
795 			fprintf(stderr, "Couldn't request CQ notification\n");
796 			return 1;
797 		}
798 
799 	memset(my_dest, 0, sizeof my_dest);
800 
801 	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
802 		fprintf(stderr, "Couldn't get port info\n");
803 		return 1;
804 	}
805 	for (i = 0; i < num_qp; ++i) {
806 		my_dest[i].qpn = ctx->qp[i]->qp_num;
807 		my_dest[i].psn = lrand48() & 0xffffff;
808 		my_dest[i].lid = ctx->portinfo.lid;
809 		if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
810 							&& !my_dest[i].lid) {
811 			fprintf(stderr, "Couldn't get local LID\n");
812 			return 1;
813 		}
814 
815 		if (gidx >= 0) {
816 			if (ibv_query_gid(ctx->context, ib_port, gidx,
817 							&my_dest[i].gid)) {
818 				fprintf(stderr, "Could not get local gid for "
819 							"gid index %d\n", gidx);
820 				return 1;
821 			}
822 		} else
823 			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
824 
825 		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
826 		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
827 			"GID %s\n", my_dest[i].lid, my_dest[i].qpn,
828 			my_dest[i].psn, gid);
829 	}
830 
831 	if (servername)
832 		rem_dest = pp_client_exch_dest(servername, port, my_dest);
833 	else
834 		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
835 								my_dest, gidx);
836 
837 	if (!rem_dest)
838 		return 1;
839 
840 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
841 
842 	for (i = 0; i < num_qp; ++i) {
843 		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
844 		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
845 			"GID %s\n", rem_dest[i].lid, rem_dest[i].qpn,
846 			rem_dest[i].psn, gid);
847 	}
848 
849 	if (servername)
850 		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
851 									gidx))
852 			return 1;
853 
854 	if (servername)
855 		for (i = 0; i < num_qp; ++i) {
856 			if (pp_post_send(ctx, i)) {
857 				fprintf(stderr, "Couldn't post send\n");
858 				return 1;
859 			}
860 			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
861 		}
862 	else
863 		for (i = 0; i < num_qp; ++i)
864 			ctx->pending[i] = PINGPONG_RECV_WRID;
865 
866 	if (gettimeofday(&start, NULL)) {
867 		perror("gettimeofday");
868 		return 1;
869 	}
870 
871 	rcnt = scnt = 0;
872 	while (rcnt < iters || scnt < iters) {
873 		if (use_event) {
874 			struct ibv_cq *ev_cq;
875 			void          *ev_ctx;
876 
877 			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
878 				fprintf(stderr, "Failed to get cq_event\n");
879 				return 1;
880 			}
881 
882 			++num_cq_events;
883 
884 			if (ev_cq != ctx->cq) {
885 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
886 				return 1;
887 			}
888 
889 			if (ibv_req_notify_cq(ctx->cq, 0)) {
890 				fprintf(stderr, "Couldn't request CQ notification\n");
891 				return 1;
892 			}
893 		}
894 
895 		{
896 			int ne, qp_ind;
897 
898 			do {
899 				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
900 				if (ne < 0) {
901 					fprintf(stderr, "poll CQ failed %d\n", ne);
902 					return 1;
903 				}
904 			} while (!use_event && ne < 1);
905 
906 			for (i = 0; i < ne; ++i) {
907 				if (wc[i].status != IBV_WC_SUCCESS) {
908 					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
909 						ibv_wc_status_str(wc[i].status),
910 						wc[i].status, (int) wc[i].wr_id);
911 					return 1;
912 				}
913 
914 				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
915 				if (qp_ind < 0) {
916 					fprintf(stderr, "Couldn't find QPN %06x\n",
917 						wc[i].qp_num);
918 					return 1;
919 				}
920 
921 				switch ((int) wc[i].wr_id) {
922 				case PINGPONG_SEND_WRID:
923 					++scnt;
924 					break;
925 
926 				case PINGPONG_RECV_WRID:
927 					if (--routs <= num_qp) {
928 						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
929 						if (routs < ctx->rx_depth) {
930 							fprintf(stderr,
931 								"Couldn't post receive (%d)\n",
932 								routs);
933 							return 1;
934 						}
935 					}
936 
937 					++rcnt;
938 					break;
939 
940 				default:
941 					fprintf(stderr, "Completion for unknown wr_id %d\n",
942 						(int) wc[i].wr_id);
943 					return 1;
944 				}
945 
946 				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
947 				if (scnt < iters && !ctx->pending[qp_ind]) {
948 					if (pp_post_send(ctx, qp_ind)) {
949 						fprintf(stderr, "Couldn't post send\n");
950 						return 1;
951 					}
952 					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
953 							       PINGPONG_SEND_WRID;
954 				}
955 
956 			}
957 		}
958 	}
959 
960 	if (gettimeofday(&end, NULL)) {
961 		perror("gettimeofday");
962 		return 1;
963 	}
964 
965 	{
966 		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
967 			(end.tv_usec - start.tv_usec);
968 		long long bytes = (long long) size * iters * 2;
969 
970 		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
971 		       bytes, usec / 1000000., bytes * 8. / usec);
972 		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
973 		       iters, usec / 1000000., usec / iters);
974 	}
975 
976 	ibv_ack_cq_events(ctx->cq, num_cq_events);
977 
978 	if (pp_close_ctx(ctx, num_qp))
979 		return 1;
980 
981 	ibv_free_device_list(dev_list);
982 	free(rem_dest);
983 
984 	return 0;
985 }
986