xref: /freebsd/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 28f6c2f292806bf31230a959bc4b19d7081669a7)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #define _GNU_SOURCE
33 #include <config.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #include <sys/socket.h>
41 #include <sys/time.h>
42 #include <netdb.h>
43 #include <stdlib.h>
44 #include <getopt.h>
45 #include <arpa/inet.h>
46 #include <time.h>
47 
48 #include "pingpong.h"
49 
50 enum {
51 	PINGPONG_RECV_WRID = 1,
52 	PINGPONG_SEND_WRID = 2,
53 
54 	MAX_QP             = 256,
55 };
56 
57 static int page_size;
58 
59 struct pingpong_context {
60 	struct ibv_context	*context;
61 	struct ibv_comp_channel *channel;
62 	struct ibv_pd		*pd;
63 	struct ibv_mr		*mr;
64 	struct ibv_cq		*cq;
65 	struct ibv_srq		*srq;
66 	struct ibv_qp		*qp[MAX_QP];
67 	void			*buf;
68 	int			 size;
69 	int			 send_flags;
70 	int			 num_qp;
71 	int			 rx_depth;
72 	int			 pending[MAX_QP];
73 	struct ibv_port_attr	 portinfo;
74 };
75 
76 struct pingpong_dest {
77 	int lid;
78 	int qpn;
79 	int psn;
80 	union ibv_gid gid;
81 };
82 
83 static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84 			  int sl, const struct pingpong_dest *my_dest,
85 			  const struct pingpong_dest *dest, int sgid_idx)
86 {
87 	int i;
88 
89 	for (i = 0; i < ctx->num_qp; ++i) {
90 		struct ibv_qp_attr attr = {
91 			.qp_state		= IBV_QPS_RTR,
92 			.path_mtu		= mtu,
93 			.dest_qp_num		= dest[i].qpn,
94 			.rq_psn			= dest[i].psn,
95 			.max_dest_rd_atomic	= 1,
96 			.min_rnr_timer		= 12,
97 			.ah_attr		= {
98 				.is_global	= 0,
99 				.dlid		= dest[i].lid,
100 				.sl		= sl,
101 				.src_path_bits	= 0,
102 				.port_num	= port
103 			}
104 		};
105 
106 		if (dest->gid.global.interface_id) {
107 			attr.ah_attr.is_global = 1;
108 			attr.ah_attr.grh.hop_limit = 1;
109 			attr.ah_attr.grh.dgid = dest->gid;
110 			attr.ah_attr.grh.sgid_index = sgid_idx;
111 		}
112 		if (ibv_modify_qp(ctx->qp[i], &attr,
113 				  IBV_QP_STATE              |
114 				  IBV_QP_AV                 |
115 				  IBV_QP_PATH_MTU           |
116 				  IBV_QP_DEST_QPN           |
117 				  IBV_QP_RQ_PSN             |
118 				  IBV_QP_MAX_DEST_RD_ATOMIC |
119 				  IBV_QP_MIN_RNR_TIMER)) {
120 			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
121 			return 1;
122 		}
123 
124 		attr.qp_state	    = IBV_QPS_RTS;
125 		attr.timeout	    = 14;
126 		attr.retry_cnt	    = 7;
127 		attr.rnr_retry	    = 7;
128 		attr.sq_psn	    = my_dest[i].psn;
129 		attr.max_rd_atomic  = 1;
130 		if (ibv_modify_qp(ctx->qp[i], &attr,
131 				  IBV_QP_STATE              |
132 				  IBV_QP_TIMEOUT            |
133 				  IBV_QP_RETRY_CNT          |
134 				  IBV_QP_RNR_RETRY          |
135 				  IBV_QP_SQ_PSN             |
136 				  IBV_QP_MAX_QP_RD_ATOMIC)) {
137 			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
138 			return 1;
139 		}
140 	}
141 
142 	return 0;
143 }
144 
145 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146 						 const struct pingpong_dest *my_dest)
147 {
148 	struct addrinfo *res, *t;
149 	struct addrinfo hints = {
150 		.ai_family   = AF_UNSPEC,
151 		.ai_socktype = SOCK_STREAM
152 	};
153 	char *service;
154 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
155 	int n;
156 	int r;
157 	int i;
158 	int sockfd = -1;
159 	struct pingpong_dest *rem_dest = NULL;
160 	char gid[33];
161 
162 	if (asprintf(&service, "%d", port) < 0)
163 		return NULL;
164 
165 	n = getaddrinfo(servername, service, &hints, &res);
166 
167 	if (n < 0) {
168 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
169 		free(service);
170 		return NULL;
171 	}
172 
173 	for (t = res; t; t = t->ai_next) {
174 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
175 		if (sockfd >= 0) {
176 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
177 				break;
178 			close(sockfd);
179 			sockfd = -1;
180 		}
181 	}
182 
183 	freeaddrinfo_null(res);
184 	free(service);
185 
186 	if (sockfd < 0) {
187 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
188 		return NULL;
189 	}
190 
191 	for (i = 0; i < MAX_QP; ++i) {
192 		gid_to_wire_gid(&my_dest[i].gid, gid);
193 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
194 					my_dest[i].qpn, my_dest[i].psn, gid);
195 		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
196 			fprintf(stderr, "Couldn't send local address\n");
197 			goto out;
198 		}
199 	}
200 
201 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
202 	if (!rem_dest)
203 		goto out;
204 
205 	for (i = 0; i < MAX_QP; ++i) {
206 		n = 0;
207 		while (n < sizeof msg) {
208 			r = read(sockfd, msg + n, sizeof msg - n);
209 			if (r < 0) {
210 				perror("client read");
211 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
212 					n, (int) sizeof msg, i);
213 				goto out;
214 			}
215 			n += r;
216 		}
217 
218 		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
219 							&rem_dest[i].psn, gid);
220 		wire_gid_to_gid(gid, &rem_dest[i].gid);
221 	}
222 
223 	if (write(sockfd, "done", sizeof "done") != sizeof "done") {
224 		perror("client write");
225 		goto out;
226 	}
227 out:
228 	close(sockfd);
229 	return rem_dest;
230 }
231 
232 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
233 						 int ib_port, enum ibv_mtu mtu,
234 						 int port, int sl,
235 						 const struct pingpong_dest *my_dest,
236 						 int sgid_idx)
237 {
238 	struct addrinfo *res, *t;
239 	struct addrinfo hints = {
240 		.ai_flags    = AI_PASSIVE,
241 		.ai_family   = AF_UNSPEC,
242 		.ai_socktype = SOCK_STREAM
243 	};
244 	char *service;
245 	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
246 	int n;
247 	int r;
248 	int i;
249 	int sockfd = -1, connfd;
250 	struct pingpong_dest *rem_dest = NULL;
251 	char gid[33];
252 
253 	if (asprintf(&service, "%d", port) < 0)
254 		return NULL;
255 
256 	n = getaddrinfo(NULL, service, &hints, &res);
257 
258 	if (n < 0) {
259 		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
260 		free(service);
261 		return NULL;
262 	}
263 
264 	for (t = res; t; t = t->ai_next) {
265 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
266 		if (sockfd >= 0) {
267 			n = 1;
268 
269 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
270 
271 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
272 				break;
273 			close(sockfd);
274 			sockfd = -1;
275 		}
276 	}
277 
278 	freeaddrinfo_null(res);
279 	free(service);
280 
281 	if (sockfd < 0) {
282 		fprintf(stderr, "Couldn't listen to port %d\n", port);
283 		return NULL;
284 	}
285 
286 	if (listen(sockfd, 1) < 0) {
287 		perror("listen() failed");
288 		close(sockfd);
289 		return NULL;
290 	}
291 	connfd = accept(sockfd, NULL, NULL);
292 	close(sockfd);
293 	if (connfd < 0) {
294 		fprintf(stderr, "accept() failed\n");
295 		return NULL;
296 	}
297 
298 	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
299 	if (!rem_dest)
300 		goto out;
301 
302 	for (i = 0; i < MAX_QP; ++i) {
303 		n = 0;
304 		while (n < sizeof msg) {
305 			r = read(connfd, msg + n, sizeof msg - n);
306 			if (r < 0) {
307 				perror("server read");
308 				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
309 					n, (int) sizeof msg, i);
310 				goto out;
311 			}
312 			n += r;
313 		}
314 
315 		sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
316 							&rem_dest[i].psn, gid);
317 		wire_gid_to_gid(gid, &rem_dest[i].gid);
318 	}
319 
320 	if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
321 								sgid_idx)) {
322 		fprintf(stderr, "Couldn't connect to remote QP\n");
323 		free(rem_dest);
324 		rem_dest = NULL;
325 		goto out;
326 	}
327 
328 	for (i = 0; i < MAX_QP; ++i) {
329 		gid_to_wire_gid(&my_dest[i].gid, gid);
330 		sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
331 					my_dest[i].qpn, my_dest[i].psn, gid);
332 		if (write(connfd, msg, sizeof msg) != sizeof msg) {
333 			fprintf(stderr, "Couldn't send local address\n");
334 			free(rem_dest);
335 			rem_dest = NULL;
336 			goto out;
337 		}
338 	}
339 
340 	if (read(connfd, msg, sizeof msg) != sizeof "done") {
341 		perror("client write");
342 		free(rem_dest);
343 		rem_dest = NULL;
344 		goto out;
345 	}
346 
347 out:
348 	close(connfd);
349 	return rem_dest;
350 }
351 
352 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
353 					    int num_qp, int rx_depth, int port,
354 					    int use_event)
355 {
356 	struct pingpong_context *ctx;
357 	int i;
358 
359 	ctx = calloc(1, sizeof *ctx);
360 	if (!ctx)
361 		return NULL;
362 
363 	ctx->size       = size;
364 	ctx->send_flags = IBV_SEND_SIGNALED;
365 	ctx->num_qp     = num_qp;
366 	ctx->rx_depth   = rx_depth;
367 
368 	ctx->buf = memalign(page_size, size);
369 	if (!ctx->buf) {
370 		fprintf(stderr, "Couldn't allocate work buf.\n");
371 		goto clean_ctx;
372 	}
373 
374 	memset(ctx->buf, 0, size);
375 
376 	ctx->context = ibv_open_device(ib_dev);
377 	if (!ctx->context) {
378 		fprintf(stderr, "Couldn't get context for %s\n",
379 			ibv_get_device_name(ib_dev));
380 		goto clean_buffer;
381 	}
382 
383 	if (use_event) {
384 		ctx->channel = ibv_create_comp_channel(ctx->context);
385 		if (!ctx->channel) {
386 			fprintf(stderr, "Couldn't create completion channel\n");
387 			goto clean_device;
388 		}
389 	} else
390 		ctx->channel = NULL;
391 
392 	ctx->pd = ibv_alloc_pd(ctx->context);
393 	if (!ctx->pd) {
394 		fprintf(stderr, "Couldn't allocate PD\n");
395 		goto clean_comp_channel;
396 	}
397 
398 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
399 	if (!ctx->mr) {
400 		fprintf(stderr, "Couldn't register MR\n");
401 		goto clean_pd;
402 	}
403 
404 	ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
405 				ctx->channel, 0);
406 	if (!ctx->cq) {
407 		fprintf(stderr, "Couldn't create CQ\n");
408 		goto clean_mr;
409 	}
410 
411 	{
412 		struct ibv_srq_init_attr attr = {
413 			.attr = {
414 				.max_wr  = rx_depth,
415 				.max_sge = 1
416 			}
417 		};
418 
419 		ctx->srq = ibv_create_srq(ctx->pd, &attr);
420 		if (!ctx->srq)  {
421 			fprintf(stderr, "Couldn't create SRQ\n");
422 			goto clean_cq;
423 		}
424 	}
425 
426 	for (i = 0; i < num_qp; ++i) {
427 		struct ibv_qp_attr attr;
428 		struct ibv_qp_init_attr init_attr = {
429 			.send_cq = ctx->cq,
430 			.recv_cq = ctx->cq,
431 			.srq     = ctx->srq,
432 			.cap     = {
433 				.max_send_wr  = 1,
434 				.max_send_sge = 1,
435 			},
436 			.qp_type = IBV_QPT_RC
437 		};
438 
439 		ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
440 		if (!ctx->qp[i])  {
441 			fprintf(stderr, "Couldn't create QP[%d]\n", i);
442 			goto clean_qps;
443 		}
444 		ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
445 		if (init_attr.cap.max_inline_data >= size) {
446 			ctx->send_flags |= IBV_SEND_INLINE;
447 		}
448 	}
449 
450 	for (i = 0; i < num_qp; ++i) {
451 		struct ibv_qp_attr attr = {
452 			.qp_state        = IBV_QPS_INIT,
453 			.pkey_index      = 0,
454 			.port_num        = port,
455 			.qp_access_flags = 0
456 		};
457 
458 		if (ibv_modify_qp(ctx->qp[i], &attr,
459 				  IBV_QP_STATE              |
460 				  IBV_QP_PKEY_INDEX         |
461 				  IBV_QP_PORT               |
462 				  IBV_QP_ACCESS_FLAGS)) {
463 			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
464 			goto clean_qps_full;
465 		}
466 	}
467 
468 	return ctx;
469 
470 clean_qps_full:
471 	i = num_qp;
472 
473 clean_qps:
474 	for (--i; i >= 0; --i)
475 		ibv_destroy_qp(ctx->qp[i]);
476 
477 	ibv_destroy_srq(ctx->srq);
478 
479 clean_cq:
480 	ibv_destroy_cq(ctx->cq);
481 
482 clean_mr:
483 	ibv_dereg_mr(ctx->mr);
484 
485 clean_pd:
486 	ibv_dealloc_pd(ctx->pd);
487 
488 clean_comp_channel:
489 	if (ctx->channel)
490 		ibv_destroy_comp_channel(ctx->channel);
491 
492 clean_device:
493 	ibv_close_device(ctx->context);
494 
495 clean_buffer:
496 	free(ctx->buf);
497 
498 clean_ctx:
499 	free(ctx);
500 
501 	return NULL;
502 }
503 
504 static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
505 {
506 	int i;
507 
508 	for (i = 0; i < num_qp; ++i) {
509 		if (ibv_destroy_qp(ctx->qp[i])) {
510 			fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
511 			return 1;
512 		}
513 	}
514 
515 	if (ibv_destroy_srq(ctx->srq)) {
516 		fprintf(stderr, "Couldn't destroy SRQ\n");
517 		return 1;
518 	}
519 
520 	if (ibv_destroy_cq(ctx->cq)) {
521 		fprintf(stderr, "Couldn't destroy CQ\n");
522 		return 1;
523 	}
524 
525 	if (ibv_dereg_mr(ctx->mr)) {
526 		fprintf(stderr, "Couldn't deregister MR\n");
527 		return 1;
528 	}
529 
530 	if (ibv_dealloc_pd(ctx->pd)) {
531 		fprintf(stderr, "Couldn't deallocate PD\n");
532 		return 1;
533 	}
534 
535 	if (ctx->channel) {
536 		if (ibv_destroy_comp_channel(ctx->channel)) {
537 			fprintf(stderr, "Couldn't destroy completion channel\n");
538 			return 1;
539 		}
540 	}
541 
542 	if (ibv_close_device(ctx->context)) {
543 		fprintf(stderr, "Couldn't release context\n");
544 		return 1;
545 	}
546 
547 	free(ctx->buf);
548 	free(ctx);
549 
550 	return 0;
551 }
552 
553 static int pp_post_recv(struct pingpong_context *ctx, int n)
554 {
555 	struct ibv_sge list = {
556 		.addr	= (uintptr_t) ctx->buf,
557 		.length = ctx->size,
558 		.lkey	= ctx->mr->lkey
559 	};
560 	struct ibv_recv_wr wr = {
561 		.wr_id	    = PINGPONG_RECV_WRID,
562 		.sg_list    = &list,
563 		.num_sge    = 1,
564 	};
565 	struct ibv_recv_wr *bad_wr;
566 	int i;
567 
568 	for (i = 0; i < n; ++i)
569 		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
570 			break;
571 
572 	return i;
573 }
574 
575 static int pp_post_send(struct pingpong_context *ctx, int qp_index)
576 {
577 	struct ibv_sge list = {
578 		.addr	= (uintptr_t) ctx->buf,
579 		.length = ctx->size,
580 		.lkey	= ctx->mr->lkey
581 	};
582 	struct ibv_send_wr wr = {
583 		.wr_id	    = PINGPONG_SEND_WRID,
584 		.sg_list    = &list,
585 		.num_sge    = 1,
586 		.opcode     = IBV_WR_SEND,
587 		.send_flags = ctx->send_flags,
588 	};
589 	struct ibv_send_wr *bad_wr;
590 
591 	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
592 }
593 
594 static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
595 {
596 	int i;
597 
598 	for (i = 0; i < num_qp; ++i)
599 		if (ctx->qp[i]->qp_num == qpn)
600 			return i;
601 
602 	return -1;
603 }
604 
605 static void usage(const char *argv0)
606 {
607 	printf("Usage:\n");
608 	printf("  %s            start a server and wait for connection\n", argv0);
609 	printf("  %s <host>     connect to server at <host>\n", argv0);
610 	printf("\n");
611 	printf("Options:\n");
612 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
613 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
614 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
615 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
616 	printf("  -m, --mtu=<size>       path MTU (default 1024)\n");
617 	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
618 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
619 	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
620 	printf("  -l, --sl=<sl>          service level value\n");
621 	printf("  -e, --events           sleep on CQ events (default poll)\n");
622 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
623 }
624 
625 int main(int argc, char *argv[])
626 {
627 	struct ibv_device      **dev_list;
628 	struct ibv_device	*ib_dev;
629 	struct ibv_wc		*wc;
630 	struct pingpong_context *ctx;
631 	struct pingpong_dest     my_dest[MAX_QP];
632 	struct pingpong_dest    *rem_dest;
633 	struct timeval           start, end;
634 	char                    *ib_devname = NULL;
635 	char                    *servername = NULL;
636 	unsigned int             port = 18515;
637 	int                      ib_port = 1;
638 	unsigned int             size = 4096;
639 	enum ibv_mtu		 mtu = IBV_MTU_1024;
640 	unsigned int             num_qp = 16;
641 	unsigned int             rx_depth = 500;
642 	unsigned int             iters = 1000;
643 	int                      use_event = 0;
644 	int                      routs;
645 	int                      rcnt, scnt;
646 	int			 num_wc;
647 	int                      i;
648 	int                      num_cq_events = 0;
649 	int                      sl = 0;
650 	int			 gidx = -1;
651 	char			 gid[33];
652 
653 	srand48(getpid() * time(NULL));
654 
655 	while (1) {
656 		int c;
657 
658 		static struct option long_options[] = {
659 			{ .name = "port",     .has_arg = 1, .val = 'p' },
660 			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
661 			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
662 			{ .name = "size",     .has_arg = 1, .val = 's' },
663 			{ .name = "mtu",      .has_arg = 1, .val = 'm' },
664 			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
665 			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
666 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
667 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
668 			{ .name = "events",   .has_arg = 0, .val = 'e' },
669 			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
670 			{}
671 		};
672 
673 		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:",
674 							long_options, NULL);
675 		if (c == -1)
676 			break;
677 
678 		switch (c) {
679 		case 'p':
680 			port = strtoul(optarg, NULL, 0);
681 			if (port > 65535) {
682 				usage(argv[0]);
683 				return 1;
684 			}
685 			break;
686 
687 		case 'd':
688 			ib_devname = strdupa(optarg);
689 			break;
690 
691 		case 'i':
692 			ib_port = strtol(optarg, NULL, 0);
693 			if (ib_port < 1) {
694 				usage(argv[0]);
695 				return 1;
696 			}
697 			break;
698 
699 		case 's':
700 			size = strtoul(optarg, NULL, 0);
701 			if (size < 1) {
702 				usage(argv[0]);
703 				return 1;
704 			}
705 			break;
706 
707 		case 'm':
708 			mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
709 			if (mtu == 0) {
710 				usage(argv[0]);
711 				return 1;
712 			}
713 			break;
714 
715 		case 'q':
716 			num_qp = strtoul(optarg, NULL, 0);
717 			break;
718 
719 		case 'r':
720 			rx_depth = strtoul(optarg, NULL, 0);
721 			break;
722 
723 		case 'n':
724 			iters = strtoul(optarg, NULL, 0);
725 			break;
726 
727 		case 'l':
728 			sl = strtol(optarg, NULL, 0);
729 			break;
730 
731 		case 'e':
732 			++use_event;
733 			break;
734 
735 		case 'g':
736 			gidx = strtol(optarg, NULL, 0);
737 			break;
738 
739 		default:
740 			usage(argv[0]);
741 			return 1;
742 		}
743 	}
744 
745 	if (optind == argc - 1)
746 		servername = strdupa(argv[optind]);
747 	else if (optind < argc) {
748 		usage(argv[0]);
749 		return 1;
750 	}
751 
752 	if (num_qp > rx_depth) {
753 		fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
754 			"must have at least one receive per QP.\n",
755 			rx_depth, num_qp);
756 		return 1;
757 	}
758 
759 	num_wc = num_qp + rx_depth;
760 	wc     = alloca(num_wc * sizeof *wc);
761 
762 	page_size = sysconf(_SC_PAGESIZE);
763 
764 	dev_list = ibv_get_device_list(NULL);
765 	if (!dev_list) {
766 		perror("Failed to get IB devices list");
767 		return 1;
768 	}
769 
770 	if (!ib_devname) {
771 		ib_dev = *dev_list;
772 		if (!ib_dev) {
773 			fprintf(stderr, "No IB devices found\n");
774 			return 1;
775 		}
776 	} else {
777 		for (i = 0; dev_list[i]; ++i)
778 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
779 				break;
780 		ib_dev = dev_list[i];
781 		if (!ib_dev) {
782 			fprintf(stderr, "IB device %s not found\n", ib_devname);
783 			return 1;
784 		}
785 	}
786 
787 	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
788 	if (!ctx)
789 		return 1;
790 
791 	routs = pp_post_recv(ctx, ctx->rx_depth);
792 	if (routs < ctx->rx_depth) {
793 		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
794 		return 1;
795 	}
796 
797 	if (use_event)
798 		if (ibv_req_notify_cq(ctx->cq, 0)) {
799 			fprintf(stderr, "Couldn't request CQ notification\n");
800 			return 1;
801 		}
802 
803 	memset(my_dest, 0, sizeof my_dest);
804 
805 	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
806 		fprintf(stderr, "Couldn't get port info\n");
807 		return 1;
808 	}
809 	for (i = 0; i < num_qp; ++i) {
810 		my_dest[i].qpn = ctx->qp[i]->qp_num;
811 		my_dest[i].psn = lrand48() & 0xffffff;
812 		my_dest[i].lid = ctx->portinfo.lid;
813 		if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
814 							&& !my_dest[i].lid) {
815 			fprintf(stderr, "Couldn't get local LID\n");
816 			return 1;
817 		}
818 
819 		if (gidx >= 0) {
820 			if (ibv_query_gid(ctx->context, ib_port, gidx,
821 							&my_dest[i].gid)) {
822 				fprintf(stderr, "Could not get local gid for "
823 							"gid index %d\n", gidx);
824 				return 1;
825 			}
826 		} else
827 			memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
828 
829 		inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
830 		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
831 			"GID %s\n", my_dest[i].lid, my_dest[i].qpn,
832 			my_dest[i].psn, gid);
833 	}
834 
835 	if (servername)
836 		rem_dest = pp_client_exch_dest(servername, port, my_dest);
837 	else
838 		rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
839 								my_dest, gidx);
840 
841 	if (!rem_dest)
842 		return 1;
843 
844 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
845 
846 	for (i = 0; i < num_qp; ++i) {
847 		inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
848 		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
849 			"GID %s\n", rem_dest[i].lid, rem_dest[i].qpn,
850 			rem_dest[i].psn, gid);
851 	}
852 
853 	if (servername)
854 		if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
855 									gidx))
856 			return 1;
857 
858 	if (servername)
859 		for (i = 0; i < num_qp; ++i) {
860 			if (pp_post_send(ctx, i)) {
861 				fprintf(stderr, "Couldn't post send\n");
862 				return 1;
863 			}
864 			ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
865 		}
866 	else
867 		for (i = 0; i < num_qp; ++i)
868 			ctx->pending[i] = PINGPONG_RECV_WRID;
869 
870 	if (gettimeofday(&start, NULL)) {
871 		perror("gettimeofday");
872 		return 1;
873 	}
874 
875 	rcnt = scnt = 0;
876 	while (rcnt < iters || scnt < iters) {
877 		if (use_event) {
878 			struct ibv_cq *ev_cq;
879 			void          *ev_ctx;
880 
881 			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
882 				fprintf(stderr, "Failed to get cq_event\n");
883 				return 1;
884 			}
885 
886 			++num_cq_events;
887 
888 			if (ev_cq != ctx->cq) {
889 				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
890 				return 1;
891 			}
892 
893 			if (ibv_req_notify_cq(ctx->cq, 0)) {
894 				fprintf(stderr, "Couldn't request CQ notification\n");
895 				return 1;
896 			}
897 		}
898 
899 		{
900 			int ne, qp_ind;
901 
902 			do {
903 				ne = ibv_poll_cq(ctx->cq, num_wc, wc);
904 				if (ne < 0) {
905 					fprintf(stderr, "poll CQ failed %d\n", ne);
906 					return 1;
907 				}
908 			} while (!use_event && ne < 1);
909 
910 			for (i = 0; i < ne; ++i) {
911 				if (wc[i].status != IBV_WC_SUCCESS) {
912 					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
913 						ibv_wc_status_str(wc[i].status),
914 						wc[i].status, (int) wc[i].wr_id);
915 					return 1;
916 				}
917 
918 				qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
919 				if (qp_ind < 0) {
920 					fprintf(stderr, "Couldn't find QPN %06x\n",
921 						wc[i].qp_num);
922 					return 1;
923 				}
924 
925 				switch ((int) wc[i].wr_id) {
926 				case PINGPONG_SEND_WRID:
927 					++scnt;
928 					break;
929 
930 				case PINGPONG_RECV_WRID:
931 					if (--routs <= num_qp) {
932 						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
933 						if (routs < ctx->rx_depth) {
934 							fprintf(stderr,
935 								"Couldn't post receive (%d)\n",
936 								routs);
937 							return 1;
938 						}
939 					}
940 
941 					++rcnt;
942 					break;
943 
944 				default:
945 					fprintf(stderr, "Completion for unknown wr_id %d\n",
946 						(int) wc[i].wr_id);
947 					return 1;
948 				}
949 
950 				ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
951 				if (scnt < iters && !ctx->pending[qp_ind]) {
952 					if (pp_post_send(ctx, qp_ind)) {
953 						fprintf(stderr, "Couldn't post send\n");
954 						return 1;
955 					}
956 					ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
957 							       PINGPONG_SEND_WRID;
958 				}
959 
960 			}
961 		}
962 	}
963 
964 	if (gettimeofday(&end, NULL)) {
965 		perror("gettimeofday");
966 		return 1;
967 	}
968 
969 	{
970 		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
971 			(end.tv_usec - start.tv_usec);
972 		long long bytes = (long long) size * iters * 2;
973 
974 		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
975 		       bytes, usec / 1000000., bytes * 8. / usec);
976 		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
977 		       iters, usec / 1000000., usec / iters);
978 	}
979 
980 	ibv_ack_cq_events(ctx->cq, num_cq_events);
981 
982 	if (pp_close_ctx(ctx, num_qp))
983 		return 1;
984 
985 	ibv_free_device_list(dev_list);
986 	free(rem_dest);
987 
988 	return 0;
989 }
990