xref: /freebsd/contrib/ofed/libibverbs/examples/xsrq_pingpong.c (revision a64729f5077d77e13b9497cb33ecb3c82e606ee8)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2011 Intel Corporation, Inc.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 #define _GNU_SOURCE
34 #include <config.h>
35 
36 #include <stdio.h>
37 #include <fcntl.h>
38 #include <errno.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <sys/types.h>
43 #include <sys/socket.h>
44 #include <sys/time.h>
45 #include <netdb.h>
46 #include <malloc.h>
47 #include <getopt.h>
48 #include <arpa/inet.h>
49 #include <time.h>
50 
51 #include "pingpong.h"
52 
53 #define MSG_FORMAT "%04x:%06x:%06x:%06x:%06x:%32s"
54 #define MSG_SIZE   66
55 #define MSG_SSCAN  "%x:%x:%x:%x:%x:%s"
56 #define ADDR_FORMAT \
57 	"%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %06x, GID %s\n"
58 #define TERMINATION_FORMAT "%s"
59 #define TERMINATION_MSG_SIZE 4
60 #define TERMINATION_MSG "END"
61 static int page_size;
62 
63 struct pingpong_dest {
64 	union ibv_gid gid;
65 	int lid;
66 	int recv_qpn;
67 	int send_qpn;
68 	int recv_psn;
69 	int send_psn;
70 	int srqn;
71 	int pp_cnt;
72 	int sockfd;
73 };
74 
75 struct pingpong_context {
76 	struct ibv_context	*context;
77 	struct ibv_comp_channel *channel;
78 	struct ibv_pd		*pd;
79 	struct ibv_mr		*mr;
80 	struct ibv_cq		*send_cq;
81 	struct ibv_cq		*recv_cq;
82 	struct ibv_srq		*srq;
83 	struct ibv_xrcd		*xrcd;
84 	struct ibv_qp		**recv_qp;
85 	struct ibv_qp		**send_qp;
86 	struct pingpong_dest	*rem_dest;
87 	void			*buf;
88 	int			 lid;
89 	int			 sl;
90 	enum ibv_mtu		 mtu;
91 	int			 ib_port;
92 	int			 fd;
93 	int			 size;
94 	int			 num_clients;
95 	int			 num_tests;
96 	int			 use_event;
97 	int			 gidx;
98 };
99 
100 static struct pingpong_context ctx;
101 
102 
103 static int open_device(char *ib_devname)
104 {
105 	struct ibv_device **dev_list;
106 	int i = 0;
107 
108 	dev_list = ibv_get_device_list(NULL);
109 	if (!dev_list) {
110 		fprintf(stderr, "Failed to get IB devices list");
111 		return -1;
112 	}
113 
114 	if (ib_devname) {
115 		for (; dev_list[i]; ++i) {
116 			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
117 				break;
118 		}
119 	}
120 	if (!dev_list[i]) {
121 		fprintf(stderr, "IB device %s not found\n",
122 			ib_devname ? ib_devname : "");
123 		return -1;
124 	}
125 
126 	ctx.context = ibv_open_device(dev_list[i]);
127 	if (!ctx.context) {
128 		fprintf(stderr, "Couldn't get context for %s\n",
129 			ibv_get_device_name(dev_list[i]));
130 		return -1;
131 	}
132 
133 	ibv_free_device_list(dev_list);
134 	return 0;
135 }
136 
137 static int create_qps(void)
138 {
139 	struct ibv_qp_init_attr_ex init;
140 	struct ibv_qp_attr mod;
141 	int i;
142 
143 	for (i = 0; i < ctx.num_clients; ++i) {
144 
145 		memset(&init, 0, sizeof init);
146 		init.qp_type = IBV_QPT_XRC_RECV;
147 		init.comp_mask = IBV_QP_INIT_ATTR_XRCD;
148 		init.xrcd = ctx.xrcd;
149 
150 		ctx.recv_qp[i] = ibv_create_qp_ex(ctx.context, &init);
151 		if (!ctx.recv_qp[i])  {
152 			fprintf(stderr, "Couldn't create recv QP[%d] errno %d\n",
153 				i, errno);
154 			return 1;
155 		}
156 
157 		mod.qp_state        = IBV_QPS_INIT;
158 		mod.pkey_index      = 0;
159 		mod.port_num        = ctx.ib_port;
160 		mod.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
161 
162 		if (ibv_modify_qp(ctx.recv_qp[i], &mod,
163 				  IBV_QP_STATE | IBV_QP_PKEY_INDEX |
164 				  IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
165 			fprintf(stderr, "Failed to modify recv QP[%d] to INIT\n", i);
166 			return 1;
167 		}
168 
169 		memset(&init, 0, sizeof init);
170 		init.qp_type	      = IBV_QPT_XRC_SEND;
171 		init.send_cq	      = ctx.send_cq;
172 		init.cap.max_send_wr  = ctx.num_clients * ctx.num_tests;
173 		init.cap.max_send_sge = 1;
174 		init.comp_mask	      = IBV_QP_INIT_ATTR_PD;
175 		init.pd		      = ctx.pd;
176 
177 		ctx.send_qp[i] = ibv_create_qp_ex(ctx.context, &init);
178 		if (!ctx.send_qp[i])  {
179 			fprintf(stderr, "Couldn't create send QP[%d] errno %d\n",
180 				i, errno);
181 			return 1;
182 		}
183 
184 		mod.qp_state        = IBV_QPS_INIT;
185 		mod.pkey_index      = 0;
186 		mod.port_num        = ctx.ib_port;
187 		mod.qp_access_flags = 0;
188 
189 		if (ibv_modify_qp(ctx.send_qp[i], &mod,
190 				  IBV_QP_STATE | IBV_QP_PKEY_INDEX |
191 				  IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
192 			fprintf(stderr, "Failed to modify send QP[%d] to INIT\n", i);
193 			return 1;
194 		}
195 	}
196 
197 	return 0;
198 }
199 
200 static int pp_init_ctx(char *ib_devname)
201 {
202 	struct ibv_srq_init_attr_ex attr;
203 	struct ibv_xrcd_init_attr xrcd_attr;
204 	struct ibv_port_attr port_attr;
205 
206 	ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp);
207 	ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp);
208 	ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest);
209 	if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest)
210 		return 1;
211 
212 	if (open_device(ib_devname)) {
213 		fprintf(stderr, "Failed to open device\n");
214 		return 1;
215 	}
216 
217 	if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) {
218 		fprintf(stderr, "Failed to get port info\n");
219 		return 1;
220 	}
221 
222 	ctx.lid = port_attr.lid;
223 	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) {
224 		fprintf(stderr, "Couldn't get local LID\n");
225 		return 1;
226 	}
227 
228 	ctx.buf = memalign(page_size, ctx.size);
229 	if (!ctx.buf) {
230 		fprintf(stderr, "Couldn't allocate work buf.\n");
231 		return 1;
232 	}
233 
234 	memset(ctx.buf, 0, ctx.size);
235 
236 	if (ctx.use_event) {
237 		ctx.channel = ibv_create_comp_channel(ctx.context);
238 		if (!ctx.channel) {
239 			fprintf(stderr, "Couldn't create completion channel\n");
240 			return 1;
241 		}
242 	}
243 
244 	ctx.pd = ibv_alloc_pd(ctx.context);
245 	if (!ctx.pd) {
246 		fprintf(stderr, "Couldn't allocate PD\n");
247 		return 1;
248 	}
249 
250 	ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE);
251 	if (!ctx.mr) {
252 		fprintf(stderr, "Couldn't register MR\n");
253 		return 1;
254 	}
255 
256 	ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP);
257 	if (ctx.fd < 0) {
258 		fprintf(stderr,
259 			"Couldn't create the file for the XRC Domain "
260 			"but not stopping %d\n", errno);
261 		ctx.fd = -1;
262 	}
263 
264 	memset(&xrcd_attr, 0, sizeof xrcd_attr);
265 	xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
266 	xrcd_attr.fd = ctx.fd;
267 	xrcd_attr.oflags = O_CREAT;
268 	ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr);
269 	if (!ctx.xrcd) {
270 		fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno);
271 		return 1;
272 	}
273 
274 	ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq,
275 				    ctx.channel, 0);
276 	if (!ctx.recv_cq) {
277 		fprintf(stderr, "Couldn't create recv CQ\n");
278 		return 1;
279 	}
280 
281 	if (ctx.use_event) {
282 		if (ibv_req_notify_cq(ctx.recv_cq, 0)) {
283 			fprintf(stderr, "Couldn't request CQ notification\n");
284 			return 1;
285 		}
286 	}
287 
288 	ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0);
289 	if (!ctx.send_cq) {
290 		fprintf(stderr, "Couldn't create send CQ\n");
291 		return 1;
292 	}
293 
294 	memset(&attr, 0, sizeof attr);
295 	attr.attr.max_wr = ctx.num_clients;
296 	attr.attr.max_sge = 1;
297 	attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD |
298 			 IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
299 	attr.srq_type = IBV_SRQT_XRC;
300 	attr.xrcd = ctx.xrcd;
301 	attr.cq = ctx.recv_cq;
302 	attr.pd = ctx.pd;
303 
304 	ctx.srq = ibv_create_srq_ex(ctx.context, &attr);
305 	if (!ctx.srq)  {
306 		fprintf(stderr, "Couldn't create SRQ\n");
307 		return 1;
308 	}
309 
310 	if (create_qps())
311 		return 1;
312 
313 	return 0;
314 }
315 
316 static int recv_termination_ack(int index)
317 {
318 	char msg[TERMINATION_MSG_SIZE];
319 	int n = 0, r;
320 	int sockfd = ctx.rem_dest[index].sockfd;
321 
322 	while (n < TERMINATION_MSG_SIZE) {
323 		r = read(sockfd, msg + n, TERMINATION_MSG_SIZE - n);
324 		if (r < 0) {
325 			perror("client read");
326 			fprintf(stderr,
327 				"%d/%d: Couldn't read remote termination ack\n",
328 				n, TERMINATION_MSG_SIZE);
329 			return 1;
330 		}
331 		n += r;
332 	}
333 
334 	if (strcmp(msg, TERMINATION_MSG)) {
335 		fprintf(stderr, "Invalid termination ack was accepted\n");
336 		return 1;
337 	}
338 
339 	return 0;
340 }
341 
342 static int send_termination_ack(int index)
343 {
344 	char msg[TERMINATION_MSG_SIZE];
345 	int sockfd = ctx.rem_dest[index].sockfd;
346 
347 	sprintf(msg, TERMINATION_FORMAT, TERMINATION_MSG);
348 
349 	if (write(sockfd, msg, TERMINATION_MSG_SIZE) != TERMINATION_MSG_SIZE) {
350 		fprintf(stderr, "Couldn't send termination ack\n");
351 		return 1;
352 	}
353 
354 	return 0;
355 }
356 
357 static int pp_client_termination(void)
358 {
359 	if (send_termination_ack(0))
360 		return 1;
361 	if (recv_termination_ack(0))
362 		return 1;
363 
364 	return 0;
365 }
366 
367 static int pp_server_termination(void)
368 {
369 	int i;
370 
371 	for (i = 0; i < ctx.num_clients; i++) {
372 		if (recv_termination_ack(i))
373 			return 1;
374 	}
375 
376 	for (i = 0; i < ctx.num_clients; i++) {
377 		if (send_termination_ack(i))
378 			return 1;
379 	}
380 
381 	return 0;
382 }
383 
384 static int send_local_dest(int sockfd, int index)
385 {
386 	char msg[MSG_SIZE];
387 	char gid[33];
388 	uint32_t srq_num;
389 	union ibv_gid local_gid;
390 
391 	if (ctx.gidx >= 0) {
392 		if (ibv_query_gid(ctx.context, ctx.ib_port, ctx.gidx,
393 				  &local_gid)) {
394 			fprintf(stderr, "can't read sgid of index %d\n",
395 				ctx.gidx);
396 			return -1;
397 		}
398 	} else {
399 		memset(&local_gid, 0, sizeof(local_gid));
400 	}
401 
402 	ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff;
403 	if (ibv_get_srq_num(ctx.srq, &srq_num)) {
404 		fprintf(stderr, "Couldn't get SRQ num\n");
405 		return -1;
406 	}
407 
408 	inet_ntop(AF_INET6, &local_gid, gid, sizeof(gid));
409 	printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num,
410 		ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn,
411 		srq_num, gid);
412 
413 	gid_to_wire_gid(&local_gid, gid);
414 	sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num,
415 		ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn,
416 		srq_num, gid);
417 
418 	if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) {
419 		fprintf(stderr, "Couldn't send local address\n");
420 		return -1;
421 	}
422 
423 	return 0;
424 }
425 
426 static int recv_remote_dest(int sockfd, int index)
427 {
428 	struct pingpong_dest *rem_dest;
429 	char msg[MSG_SIZE];
430 	char gid[33];
431 	int n = 0, r;
432 
433 	while (n < MSG_SIZE) {
434 		r = read(sockfd, msg + n, MSG_SIZE - n);
435 		if (r < 0) {
436 			perror("client read");
437 			fprintf(stderr,
438 				"%d/%d: Couldn't read remote address [%d]\n",
439 				n, MSG_SIZE, index);
440 			return -1;
441 		}
442 		n += r;
443 	}
444 
445 	rem_dest = &ctx.rem_dest[index];
446 	sscanf(msg, MSG_SSCAN, &rem_dest->lid, &rem_dest->recv_qpn,
447 		&rem_dest->send_qpn, &rem_dest->send_psn, &rem_dest->srqn, gid);
448 
449 	wire_gid_to_gid(gid, &rem_dest->gid);
450 	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof(gid));
451 	printf(ADDR_FORMAT, "remote", rem_dest->lid, rem_dest->recv_qpn,
452 		rem_dest->send_qpn, rem_dest->send_psn, rem_dest->srqn,
453 		gid);
454 
455 	rem_dest->sockfd = sockfd;
456 	return 0;
457 }
458 
459 static void set_ah_attr(struct ibv_ah_attr *attr, struct pingpong_context *myctx,
460 			int index)
461 {
462 	attr->is_global = 1;
463 	attr->grh.hop_limit = 5;
464 	attr->grh.dgid = myctx->rem_dest[index].gid;
465 	attr->grh.sgid_index = myctx->gidx;
466 }
467 
468 static int connect_qps(int index)
469 {
470 	struct ibv_qp_attr attr;
471 
472 	memset(&attr, 0, sizeof attr);
473 	attr.qp_state	      = IBV_QPS_RTR;
474 	attr.dest_qp_num      = ctx.rem_dest[index].send_qpn;
475 	attr.path_mtu	      = ctx.mtu;
476 	attr.rq_psn	      = ctx.rem_dest[index].send_psn;
477 	attr.min_rnr_timer    = 12;
478 	attr.ah_attr.dlid     = ctx.rem_dest[index].lid;
479 	attr.ah_attr.sl	      = ctx.sl;
480 	attr.ah_attr.port_num = ctx.ib_port;
481 
482 	if (ctx.rem_dest[index].gid.global.interface_id)
483 		set_ah_attr(&attr.ah_attr, &ctx, index);
484 
485 	if (ibv_modify_qp(ctx.recv_qp[index], &attr,
486 			  IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
487 			  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
488 			  IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) {
489 		fprintf(stderr, "Failed to modify recv QP[%d] to RTR\n", index);
490 		return 1;
491 	}
492 
493 	memset(&attr, 0, sizeof attr);
494 	attr.qp_state = IBV_QPS_RTS;
495 	attr.timeout = 14;
496 	attr.sq_psn = ctx.rem_dest[index].recv_psn;
497 
498 	if (ibv_modify_qp(ctx.recv_qp[index], &attr,
499 			  IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN)) {
500 		fprintf(stderr, "Failed to modify recv QP[%d] to RTS\n", index);
501 		return 1;
502 	}
503 
504 	memset(&attr, 0, sizeof attr);
505 	attr.qp_state	      = IBV_QPS_RTR;
506 	attr.dest_qp_num      = ctx.rem_dest[index].recv_qpn;
507 	attr.path_mtu	      = ctx.mtu;
508 	attr.rq_psn	      = ctx.rem_dest[index].send_psn;
509 	attr.ah_attr.dlid     = ctx.rem_dest[index].lid;
510 	attr.ah_attr.sl	      = ctx.sl;
511 	attr.ah_attr.port_num = ctx.ib_port;
512 
513 	if (ctx.rem_dest[index].gid.global.interface_id)
514 		set_ah_attr(&attr.ah_attr, &ctx, index);
515 
516 	if (ibv_modify_qp(ctx.send_qp[index], &attr,
517 			  IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
518 			  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) {
519 		fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index);
520 		return 1;
521 	}
522 
523 	memset(&attr, 0, sizeof attr);
524 	attr.qp_state = IBV_QPS_RTS;
525 	attr.timeout = 14;
526 	attr.retry_cnt = 7;
527 	attr.rnr_retry = 7;
528 	attr.sq_psn = ctx.rem_dest[index].recv_psn;
529 
530 	if (ibv_modify_qp(ctx.send_qp[index], &attr,
531 			  IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN |
532 			  IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) {
533 		fprintf(stderr, "Failed to modify send QP[%d] to RTS\n", index);
534 		return 1;
535 	}
536 
537 	return 0;
538 }
539 
540 static int pp_client_connect(const char *servername, int port)
541 {
542 	struct addrinfo *res, *t;
543 	char *service;
544 	int ret;
545 	int sockfd = -1;
546 	struct addrinfo hints = {
547 		.ai_family   = AF_UNSPEC,
548 		.ai_socktype = SOCK_STREAM
549 	};
550 
551 	if (asprintf(&service, "%d", port) < 0)
552 		return 1;
553 
554 	ret = getaddrinfo(servername, service, &hints, &res);
555 	if (ret < 0) {
556 		fprintf(stderr, "%s for %s:%d\n", gai_strerror(ret), servername, port);
557 		free(service);
558 		return 1;
559 	}
560 
561 	for (t = res; t; t = t->ai_next) {
562 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
563 		if (sockfd >= 0) {
564 			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
565 				break;
566 			close(sockfd);
567 			sockfd = -1;
568 		}
569 	}
570 
571 	freeaddrinfo_null(res);
572 	free(service);
573 
574 	if (sockfd < 0) {
575 		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
576 		return 1;
577 	}
578 
579 	if (send_local_dest(sockfd, 0))
580 		return 1;
581 
582 	if (recv_remote_dest(sockfd, 0))
583 		return 1;
584 
585 	if (connect_qps(0))
586 		return 1;
587 
588 	return 0;
589 }
590 
591 static int pp_server_connect(int port)
592 {
593 	struct addrinfo *res, *t;
594 	char *service;
595 	int ret, i, n;
596 	int sockfd = -1, connfd;
597 	struct addrinfo hints = {
598 		.ai_flags    = AI_PASSIVE,
599 		.ai_family   = AF_UNSPEC,
600 		.ai_socktype = SOCK_STREAM
601 	};
602 
603 	if (asprintf(&service, "%d", port) < 0)
604 		return 1;
605 
606 	ret = getaddrinfo(NULL, service, &hints, &res);
607 	if (ret < 0) {
608 		fprintf(stderr, "%s for port %d\n", gai_strerror(ret), port);
609 		free(service);
610 		return 1;
611 	}
612 
613 	for (t = res; t; t = t->ai_next) {
614 		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
615 		if (sockfd >= 0) {
616 			n = 1;
617 			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
618 			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
619 				break;
620 			close(sockfd);
621 			sockfd = -1;
622 		}
623 	}
624 
625 	freeaddrinfo_null(res);
626 	free(service);
627 
628 	if (sockfd < 0) {
629 		fprintf(stderr, "Couldn't listen to port %d\n", port);
630 		return 1;
631 	}
632 
633 	if (listen(sockfd, ctx.num_clients) < 0) {
634 		perror("listen() failed");
635 		close(sockfd);
636 		return 1;
637 	}
638 
639 	for (i = 0; i < ctx.num_clients; i++) {
640 		connfd = accept(sockfd, NULL, NULL);
641 		if (connfd < 0) {
642 			fprintf(stderr, "accept() failed for client %d\n", i);
643 			return 1;
644 		}
645 
646 		if (recv_remote_dest(connfd, i))
647 			return 1;
648 
649 		if (send_local_dest(connfd, i))
650 			return 1;
651 
652 		if (connect_qps(i))
653 			return 1;
654 	}
655 
656 	close(sockfd);
657 	return 0;
658 }
659 
660 
661 static int pp_close_ctx(void)
662 {
663 	int i;
664 
665 	for (i = 0; i < ctx.num_clients; ++i) {
666 
667 		if (ibv_destroy_qp(ctx.send_qp[i])) {
668 			fprintf(stderr, "Couldn't destroy INI QP[%d]\n", i);
669 			return 1;
670 		}
671 
672 		if (ibv_destroy_qp(ctx.recv_qp[i])) {
673 			fprintf(stderr, "Couldn't destroy TGT QP[%d]\n", i);
674 			return 1;
675 		}
676 
677 		if (ctx.rem_dest[i].sockfd)
678 			close(ctx.rem_dest[i].sockfd);
679 	}
680 
681 	if (ibv_destroy_srq(ctx.srq)) {
682 		fprintf(stderr, "Couldn't destroy SRQ\n");
683 		return 1;
684 	}
685 
686 	if (ctx.xrcd && ibv_close_xrcd(ctx.xrcd)) {
687 		fprintf(stderr, "Couldn't close the XRC Domain\n");
688 		return 1;
689 	}
690 	if (ctx.fd >= 0 && close(ctx.fd)) {
691 		fprintf(stderr, "Couldn't close the file for the XRC Domain\n");
692 		return 1;
693 	}
694 
695 	if (ibv_destroy_cq(ctx.send_cq)) {
696 		fprintf(stderr, "Couldn't destroy send CQ\n");
697 		return 1;
698 	}
699 
700 	if (ibv_destroy_cq(ctx.recv_cq)) {
701 		fprintf(stderr, "Couldn't destroy recv CQ\n");
702 		return 1;
703 	}
704 
705 	if (ibv_dereg_mr(ctx.mr)) {
706 		fprintf(stderr, "Couldn't deregister MR\n");
707 		return 1;
708 	}
709 
710 	if (ibv_dealloc_pd(ctx.pd)) {
711 		fprintf(stderr, "Couldn't deallocate PD\n");
712 		return 1;
713 	}
714 
715 	if (ctx.channel) {
716 		if (ibv_destroy_comp_channel(ctx.channel)) {
717 			fprintf(stderr,
718 				"Couldn't destroy completion channel\n");
719 			return 1;
720 		}
721 	}
722 
723 	if (ibv_close_device(ctx.context)) {
724 		fprintf(stderr, "Couldn't release context\n");
725 		return 1;
726 	}
727 
728 	free(ctx.buf);
729 	free(ctx.rem_dest);
730 	free(ctx.send_qp);
731 	free(ctx.recv_qp);
732 	return 0;
733 }
734 
735 static int pp_post_recv(int cnt)
736 {
737 	struct ibv_sge sge;
738 	struct ibv_recv_wr wr, *bad_wr;
739 
740 	sge.addr = (uintptr_t) ctx.buf;
741 	sge.length = ctx.size;
742 	sge.lkey = ctx.mr->lkey;
743 
744 	wr.next       = NULL;
745 	wr.wr_id      = (uintptr_t) &ctx;
746 	wr.sg_list    = &sge;
747 	wr.num_sge    = 1;
748 
749 	while (cnt--) {
750 		if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) {
751 			fprintf(stderr, "Failed to post receive to SRQ\n");
752 			return 1;
753 		}
754 	}
755 	return 0;
756 }
757 
758 /*
759  * Send to each client round robin on each set of xrc send/recv qp.
760  * Generate a completion on the last send.
761  */
762 static int pp_post_send(int index)
763 {
764 	struct ibv_sge sge;
765 	struct ibv_send_wr wr, *bad_wr;
766 	int qpi;
767 
768 	sge.addr = (uintptr_t) ctx.buf;
769 	sge.length = ctx.size;
770 	sge.lkey = ctx.mr->lkey;
771 
772 	wr.wr_id   = (uintptr_t) index;
773 	wr.next    = NULL;
774 	wr.sg_list = &sge;
775 	wr.num_sge = 1;
776 	wr.opcode  = IBV_WR_SEND;
777 	wr.qp_type.xrc.remote_srqn = ctx.rem_dest[index].srqn;
778 
779 	qpi = (index + ctx.rem_dest[index].pp_cnt) % ctx.num_clients;
780 	wr.send_flags = (++ctx.rem_dest[index].pp_cnt >= ctx.num_tests) ?
781 			IBV_SEND_SIGNALED : 0;
782 
783 	return ibv_post_send(ctx.send_qp[qpi], &wr, &bad_wr);
784 }
785 
786 static int find_qp(int qpn)
787 {
788 	int i;
789 
790 	if (ctx.num_clients == 1)
791 		return 0;
792 
793 	for (i = 0; i < ctx.num_clients; ++i)
794 		if (ctx.recv_qp[i]->qp_num == qpn)
795 			return i;
796 
797 	fprintf(stderr, "Unable to find qp %x\n", qpn);
798 	return 0;
799 }
800 
801 static int get_cq_event(void)
802 {
803 	struct ibv_cq *ev_cq;
804 	void          *ev_ctx;
805 
806 	if (ibv_get_cq_event(ctx.channel, &ev_cq, &ev_ctx)) {
807 		fprintf(stderr, "Failed to get cq_event\n");
808 		return 1;
809 	}
810 
811 	if (ev_cq != ctx.recv_cq) {
812 		fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
813 		return 1;
814 	}
815 
816 	if (ibv_req_notify_cq(ctx.recv_cq, 0)) {
817 		fprintf(stderr, "Couldn't request CQ notification\n");
818 		return 1;
819 	}
820 
821 	return 0;
822 }
823 
824 static void init(void)
825 {
826 	srand48(getpid() * time(NULL));
827 
828 	ctx.size = 4096;
829 	ctx.ib_port = 1;
830 	ctx.num_clients  = 1;
831 	ctx.num_tests = 5;
832 	ctx.mtu = IBV_MTU_2048;
833 	ctx.sl = 0;
834 	ctx.gidx = -1;
835 }
836 
837 static void usage(const char *argv0)
838 {
839 	printf("Usage:\n");
840 	printf("  %s            start a server and wait for connection\n", argv0);
841 	printf("  %s <host>     connect to server at <host>\n", argv0);
842 	printf("\n");
843 	printf("Options:\n");
844 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
845 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
846 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
847 	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
848 	printf("  -m, --mtu=<size>       path MTU (default 2048)\n");
849 	printf("  -c, --clients=<n>      number of clients (on server only, default 1)\n");
850 	printf("  -n, --num_tests=<n>    number of tests per client (default 5)\n");
851 	printf("  -l, --sl=<sl>          service level value\n");
852 	printf("  -e, --events           sleep on CQ events (default poll)\n");
853 	printf("  -g, --gid-idx=<gid index> local port gid index\n");
854 }
855 
856 int main(int argc, char *argv[])
857 {
858 	char          *ib_devname = NULL;
859 	char          *servername = NULL;
860 	int           port = 18515;
861 	int           i, total, cnt = 0;
862 	int           ne, qpi, num_cq_events = 0;
863 	struct ibv_wc wc;
864 
865 	init();
866 	while (1) {
867 		int c;
868 
869 		static struct option long_options[] = {
870 			{ .name = "port",      .has_arg = 1, .val = 'p' },
871 			{ .name = "ib-dev",    .has_arg = 1, .val = 'd' },
872 			{ .name = "ib-port",   .has_arg = 1, .val = 'i' },
873 			{ .name = "size",      .has_arg = 1, .val = 's' },
874 			{ .name = "mtu",       .has_arg = 1, .val = 'm' },
875 			{ .name = "clients",   .has_arg = 1, .val = 'c' },
876 			{ .name = "num_tests", .has_arg = 1, .val = 'n' },
877 			{ .name = "sl",        .has_arg = 1, .val = 'l' },
878 			{ .name = "events",    .has_arg = 0, .val = 'e' },
879 			{ .name = "gid-idx",   .has_arg = 1, .val = 'g' },
880 			{}
881 		};
882 
883 		c = getopt_long(argc, argv, "p:d:i:s:m:c:n:l:eg:", long_options,
884 				NULL);
885 		if (c == -1)
886 			break;
887 
888 		switch (c) {
889 		case 'p':
890 			port = strtol(optarg, NULL, 0);
891 			if (port < 0 || port > 65535) {
892 				usage(argv[0]);
893 				return 1;
894 			}
895 			break;
896 		case 'd':
897 			ib_devname = strdupa(optarg);
898 			break;
899 		case 'i':
900 			ctx.ib_port = strtol(optarg, NULL, 0);
901 			if (ctx.ib_port < 0) {
902 				usage(argv[0]);
903 				return 1;
904 			}
905 			break;
906 		case 's':
907 			ctx.size = strtol(optarg, NULL, 0);
908 			break;
909 		case 'm':
910 			ctx.mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
911 			if (ctx.mtu == 0) {
912 				usage(argv[0]);
913 				return 1;
914 			}
915 			break;
916 		case 'c':
917 			ctx.num_clients = strtol(optarg, NULL, 0);
918 			break;
919 		case 'n':
920 			ctx.num_tests = strtol(optarg, NULL, 0);
921 			break;
922 		case 'l':
923 			ctx.sl = strtol(optarg, NULL, 0);
924 			break;
925 		case 'g':
926 			ctx.gidx = strtol(optarg, NULL, 0);
927 			break;
928 		case 'e':
929 			ctx.use_event = 1;
930 			break;
931 		default:
932 			usage(argv[0]);
933 			return 1;
934 		}
935 	}
936 
937 	if (optind == argc - 1) {
938 		servername = strdupa(argv[optind]);
939 		ctx.num_clients = 1;
940 	} else if (optind < argc) {
941 		usage(argv[0]);
942 		return 1;
943 	}
944 
945 	page_size = sysconf(_SC_PAGESIZE);
946 
947 	if (pp_init_ctx(ib_devname))
948 		return 1;
949 
950 	if (pp_post_recv(ctx.num_clients)) {
951 		fprintf(stderr, "Couldn't post receives\n");
952 		return 1;
953 	}
954 
955 	if (servername) {
956 		if (pp_client_connect(servername, port))
957 			return 1;
958 	} else {
959 		if (pp_server_connect(port))
960 			return 1;
961 
962 		for (i = 0; i < ctx.num_clients; i++)
963 			pp_post_send(i);
964 	}
965 
966 	total = ctx.num_clients * ctx.num_tests;
967 	while (cnt < total) {
968 		if (ctx.use_event) {
969 			if (get_cq_event())
970 				return 1;
971 
972 			++num_cq_events;
973 		}
974 
975 		do {
976 			ne = ibv_poll_cq(ctx.recv_cq, 1, &wc);
977 			if (ne < 0) {
978 				fprintf(stderr, "Error polling cq %d\n", ne);
979 				return 1;
980 			} else if (ne == 0) {
981 				break;
982 			}
983 
984 			if (wc.status) {
985 				fprintf(stderr, "Work completion error %d\n", wc.status);
986 				return 1;
987 			}
988 
989 			pp_post_recv(ne);
990 			qpi = find_qp(wc.qp_num);
991 			if (ctx.rem_dest[qpi].pp_cnt < ctx.num_tests)
992 				pp_post_send(qpi);
993 			cnt += ne;
994 		} while (ne > 0);
995 	}
996 
997 	for (cnt = 0; cnt < ctx.num_clients; cnt += ne) {
998 		ne = ibv_poll_cq(ctx.send_cq, 1, &wc);
999 		if (ne < 0) {
1000 			fprintf(stderr, "Error polling cq %d\n", ne);
1001 			return 1;
1002 		}
1003 	}
1004 
1005 	if (ctx.use_event)
1006 		ibv_ack_cq_events(ctx.recv_cq, num_cq_events);
1007 
1008 	/* Process should get an ack from the daemon to close its resources to
1009 	  * make sure latest daemon's response sent via its target QP destined
1010 	  * to an XSRQ created by another client won't be lost.
1011 	  * Failure to do so may cause the other client to wait for that sent
1012 	  * message forever. See comment on pp_post_send.
1013 	*/
1014 	if (servername) {
1015 		if (pp_client_termination())
1016 			return 1;
1017 	} else if (pp_server_termination()) {
1018 		return 1;
1019 	}
1020 
1021 	if (pp_close_ctx())
1022 		return 1;
1023 
1024 	printf("success\n");
1025 	return 0;
1026 }
1027