1 /*
2 * Copyright (c) 2013 Intel Corporation. All rights reserved.
3 *
4 * This software is available to you under the OpenIB.org BSD license
5 * below:
6 *
7 * Redistribution and use in source and binary forms, with or
8 * without modification, are permitted provided that the following
9 * conditions are met:
10 *
11 * - Redistributions of source code must retain the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer.
14 *
15 * - Redistributions in binary form must reproduce the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer in the documentation and/or other materials
18 * provided with the distribution.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
23 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <errno.h>
35 #include <getopt.h>
36 #include <sys/types.h>
37 #include <sys/socket.h>
38 #include <sys/time.h>
39 #include <sys/wait.h>
40 #include <netdb.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <netinet/tcp.h>
44
45 #include <rdma/rdma_cma.h>
46 #include "common.h"
47
48 static struct rdma_addrinfo hints, *rai;
49 static struct rdma_event_channel *channel;
50 static const char *port = "7471";
51 static char *dst_addr;
52 static char *src_addr;
53 static int timeout = 2000;
54 static int retries = 2;
55
56 enum step {
57 STEP_CREATE_ID,
58 STEP_BIND,
59 STEP_RESOLVE_ADDR,
60 STEP_RESOLVE_ROUTE,
61 STEP_CREATE_QP,
62 STEP_CONNECT,
63 STEP_DISCONNECT,
64 STEP_DESTROY,
65 STEP_CNT
66 };
67
68 static const char *step_str[] = {
69 "create id",
70 "bind addr",
71 "resolve addr",
72 "resolve route",
73 "create qp",
74 "connect",
75 "disconnect",
76 "destroy"
77 };
78
79 struct node {
80 struct rdma_cm_id *id;
81 struct timeval times[STEP_CNT][2];
82 int error;
83 int retries;
84 };
85
86 struct list_head {
87 struct list_head *prev;
88 struct list_head *next;
89 struct rdma_cm_id *id;
90 };
91
92 struct work_list {
93 pthread_mutex_t lock;
94 pthread_cond_t cond;
95 struct list_head list;
96 };
97
98 #define INIT_LIST(x) ((x)->prev = (x)->next = (x))
99
100 static struct work_list req_work;
101 static struct work_list disc_work;
102 static struct node *nodes;
103 static struct timeval times[STEP_CNT][2];
104 static int connections = 100;
105 static volatile int started[STEP_CNT];
106 static volatile int completed[STEP_CNT];
107 static struct ibv_qp_init_attr init_qp_attr;
108 static struct rdma_conn_param conn_param;
109
110 #define start_perf(n, s) gettimeofday(&((n)->times[s][0]), NULL)
111 #define end_perf(n, s) gettimeofday(&((n)->times[s][1]), NULL)
112 #define start_time(s) gettimeofday(×[s][0], NULL)
113 #define end_time(s) gettimeofday(×[s][1], NULL)
114
__list_delete(struct list_head * list)115 static inline void __list_delete(struct list_head *list)
116 {
117 struct list_head *prev, *next;
118 prev = list->prev;
119 next = list->next;
120 prev->next = next;
121 next->prev = prev;
122 INIT_LIST(list);
123 }
124
__list_empty(struct work_list * list)125 static inline int __list_empty(struct work_list *list)
126 {
127 return list->list.next == &list->list;
128 }
129
__list_remove_head(struct work_list * work_list)130 static inline struct list_head *__list_remove_head(struct work_list *work_list)
131 {
132 struct list_head *list_item;
133
134 list_item = work_list->list.next;
135 __list_delete(list_item);
136 return list_item;
137 }
138
list_add_tail(struct work_list * work_list,struct list_head * req)139 static inline void list_add_tail(struct work_list *work_list, struct list_head *req)
140 {
141 int empty;
142 pthread_mutex_lock(&work_list->lock);
143 empty = __list_empty(work_list);
144 req->prev = work_list->list.prev;
145 req->next = &work_list->list;
146 req->prev->next = work_list->list.prev = req;
147 pthread_mutex_unlock(&work_list->lock);
148 if (empty)
149 pthread_cond_signal(&work_list->cond);
150 }
151
zero_time(struct timeval * t)152 static int zero_time(struct timeval *t)
153 {
154 return !(t->tv_sec || t->tv_usec);
155 }
156
diff_us(struct timeval * end,struct timeval * start)157 static float diff_us(struct timeval *end, struct timeval *start)
158 {
159 return (end->tv_sec - start->tv_sec) * 1000000. + (end->tv_usec - start->tv_usec);
160 }
161
show_perf(void)162 static void show_perf(void)
163 {
164 int c, i;
165 float us, max[STEP_CNT], min[STEP_CNT];
166
167 for (i = 0; i < STEP_CNT; i++) {
168 max[i] = 0;
169 min[i] = 999999999.;
170 for (c = 0; c < connections; c++) {
171 if (!zero_time(&nodes[c].times[i][0]) &&
172 !zero_time(&nodes[c].times[i][1])) {
173 us = diff_us(&nodes[c].times[i][1], &nodes[c].times[i][0]);
174 if (us > max[i])
175 max[i] = us;
176 if (us < min[i])
177 min[i] = us;
178 }
179 }
180 }
181
182 printf("step total ms max ms min us us / conn\n");
183 for (i = 0; i < STEP_CNT; i++) {
184 if (i == STEP_BIND && !src_addr)
185 continue;
186
187 us = diff_us(×[i][1], ×[i][0]);
188 printf("%-13s: %11.2f%11.2f%11.2f%11.2f\n", step_str[i], us / 1000.,
189 max[i] / 1000., min[i], us / connections);
190 }
191 }
192
addr_handler(struct node * n)193 static void addr_handler(struct node *n)
194 {
195 end_perf(n, STEP_RESOLVE_ADDR);
196 completed[STEP_RESOLVE_ADDR]++;
197 }
198
route_handler(struct node * n)199 static void route_handler(struct node *n)
200 {
201 end_perf(n, STEP_RESOLVE_ROUTE);
202 completed[STEP_RESOLVE_ROUTE]++;
203 }
204
conn_handler(struct node * n)205 static void conn_handler(struct node *n)
206 {
207 end_perf(n, STEP_CONNECT);
208 completed[STEP_CONNECT]++;
209 }
210
disc_handler(struct node * n)211 static void disc_handler(struct node *n)
212 {
213 end_perf(n, STEP_DISCONNECT);
214 completed[STEP_DISCONNECT]++;
215 }
216
__req_handler(struct rdma_cm_id * id)217 static void __req_handler(struct rdma_cm_id *id)
218 {
219 int ret;
220
221 ret = rdma_create_qp(id, NULL, &init_qp_attr);
222 if (ret) {
223 perror("failure creating qp");
224 goto err;
225 }
226
227 ret = rdma_accept(id, NULL);
228 if (ret) {
229 perror("failure accepting");
230 goto err;
231 }
232 return;
233
234 err:
235 printf("failing connection request\n");
236 rdma_reject(id, NULL, 0);
237 rdma_destroy_id(id);
238 return;
239 }
240
req_handler_thread(void * arg)241 static void *req_handler_thread(void *arg)
242 {
243 struct list_head *work;
244 do {
245 pthread_mutex_lock(&req_work.lock);
246 if (__list_empty(&req_work))
247 pthread_cond_wait(&req_work.cond, &req_work.lock);
248 work = __list_remove_head(&req_work);
249 pthread_mutex_unlock(&req_work.lock);
250 __req_handler(work->id);
251 free(work);
252 } while (1);
253 return NULL;
254 }
255
disc_handler_thread(void * arg)256 static void *disc_handler_thread(void *arg)
257 {
258 struct list_head *work;
259 do {
260 pthread_mutex_lock(&disc_work.lock);
261 if (__list_empty(&disc_work))
262 pthread_cond_wait(&disc_work.cond, &disc_work.lock);
263 work = __list_remove_head(&disc_work);
264 pthread_mutex_unlock(&disc_work.lock);
265 rdma_disconnect(work->id);
266 rdma_destroy_id(work->id);
267 free(work);
268 } while (1);
269 return NULL;
270 }
271
cma_handler(struct rdma_cm_id * id,struct rdma_cm_event * event)272 static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
273 {
274 struct node *n = id->context;
275 struct list_head *request;
276
277 switch (event->event) {
278 case RDMA_CM_EVENT_ADDR_RESOLVED:
279 addr_handler(n);
280 break;
281 case RDMA_CM_EVENT_ROUTE_RESOLVED:
282 route_handler(n);
283 break;
284 case RDMA_CM_EVENT_CONNECT_REQUEST:
285 request = malloc(sizeof *request);
286 if (!request) {
287 perror("out of memory accepting connect request");
288 rdma_reject(id, NULL, 0);
289 rdma_destroy_id(id);
290 } else {
291 INIT_LIST(request);
292 request->id = id;
293 list_add_tail(&req_work, request);
294 }
295 break;
296 case RDMA_CM_EVENT_ESTABLISHED:
297 if (n)
298 conn_handler(n);
299 break;
300 case RDMA_CM_EVENT_ADDR_ERROR:
301 if (n->retries--) {
302 if (!rdma_resolve_addr(n->id, rai->ai_src_addr,
303 rai->ai_dst_addr, timeout))
304 break;
305 }
306 printf("RDMA_CM_EVENT_ADDR_ERROR, error: %d\n", event->status);
307 addr_handler(n);
308 n->error = 1;
309 break;
310 case RDMA_CM_EVENT_ROUTE_ERROR:
311 if (n->retries--) {
312 if (!rdma_resolve_route(n->id, timeout))
313 break;
314 }
315 printf("RDMA_CM_EVENT_ROUTE_ERROR, error: %d\n", event->status);
316 route_handler(n);
317 n->error = 1;
318 break;
319 case RDMA_CM_EVENT_CONNECT_ERROR:
320 case RDMA_CM_EVENT_UNREACHABLE:
321 case RDMA_CM_EVENT_REJECTED:
322 printf("event: %s, error: %d\n",
323 rdma_event_str(event->event), event->status);
324 conn_handler(n);
325 n->error = 1;
326 break;
327 case RDMA_CM_EVENT_DISCONNECTED:
328 if (!n) {
329 request = malloc(sizeof *request);
330 if (!request) {
331 perror("out of memory queueing disconnect request, handling synchronously");
332 rdma_disconnect(id);
333 rdma_destroy_id(id);
334 } else {
335 INIT_LIST(request);
336 request->id = id;
337 list_add_tail(&disc_work, request);
338 }
339 } else
340 disc_handler(n);
341 break;
342 case RDMA_CM_EVENT_DEVICE_REMOVAL:
343 /* Cleanup will occur after test completes. */
344 break;
345 default:
346 break;
347 }
348 rdma_ack_cm_event(event);
349 }
350
alloc_nodes(void)351 static int alloc_nodes(void)
352 {
353 int ret, i;
354
355 nodes = calloc(sizeof *nodes, connections);
356 if (!nodes)
357 return -ENOMEM;
358
359 printf("creating id\n");
360 start_time(STEP_CREATE_ID);
361 for (i = 0; i < connections; i++) {
362 start_perf(&nodes[i], STEP_CREATE_ID);
363 if (dst_addr) {
364 ret = rdma_create_id(channel, &nodes[i].id, &nodes[i],
365 hints.ai_port_space);
366 if (ret)
367 goto err;
368 }
369 end_perf(&nodes[i], STEP_CREATE_ID);
370 }
371 end_time(STEP_CREATE_ID);
372 return 0;
373
374 err:
375 while (--i >= 0)
376 rdma_destroy_id(nodes[i].id);
377 free(nodes);
378 return ret;
379 }
380
cleanup_nodes(void)381 static void cleanup_nodes(void)
382 {
383 int i;
384
385 printf("destroying id\n");
386 start_time(STEP_DESTROY);
387 for (i = 0; i < connections; i++) {
388 start_perf(&nodes[i], STEP_DESTROY);
389 if (nodes[i].id)
390 rdma_destroy_id(nodes[i].id);
391 end_perf(&nodes[i], STEP_DESTROY);
392 }
393 end_time(STEP_DESTROY);
394 }
395
process_events(void * arg)396 static void *process_events(void *arg)
397 {
398 struct rdma_cm_event *event;
399 int ret = 0;
400
401 while (!ret) {
402 ret = rdma_get_cm_event(channel, &event);
403 if (!ret) {
404 cma_handler(event->id, event);
405 } else {
406 perror("failure in rdma_get_cm_event in process_server_events");
407 ret = errno;
408 }
409 }
410 return NULL;
411 }
412
run_server(void)413 static int run_server(void)
414 {
415 pthread_t req_thread, disc_thread;
416 struct rdma_cm_id *listen_id;
417 int ret;
418
419 INIT_LIST(&req_work.list);
420 INIT_LIST(&disc_work.list);
421 ret = pthread_mutex_init(&req_work.lock, NULL);
422 if (ret) {
423 perror("initializing mutex for req work");
424 return ret;
425 }
426
427 ret = pthread_mutex_init(&disc_work.lock, NULL);
428 if (ret) {
429 perror("initializing mutex for disc work");
430 return ret;
431 }
432
433 ret = pthread_cond_init(&req_work.cond, NULL);
434 if (ret) {
435 perror("initializing cond for req work");
436 return ret;
437 }
438
439 ret = pthread_cond_init(&disc_work.cond, NULL);
440 if (ret) {
441 perror("initializing cond for disc work");
442 return ret;
443 }
444
445 ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL);
446 if (ret) {
447 perror("failed to create req handler thread");
448 return ret;
449 }
450
451 ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL);
452 if (ret) {
453 perror("failed to create disconnect handler thread");
454 return ret;
455 }
456
457 ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space);
458 if (ret) {
459 perror("listen request failed");
460 return ret;
461 }
462
463 ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
464 if (ret) {
465 printf("getrdmaaddr error: %s\n", gai_strerror(ret));
466 goto out;
467 }
468
469 ret = rdma_bind_addr(listen_id, rai->ai_src_addr);
470 if (ret) {
471 perror("bind address failed");
472 goto out;
473 }
474
475 ret = rdma_listen(listen_id, 0);
476 if (ret) {
477 perror("failure trying to listen");
478 goto out;
479 }
480
481 process_events(NULL);
482 out:
483 rdma_destroy_id(listen_id);
484 return ret;
485 }
486
run_client(void)487 static int run_client(void)
488 {
489 pthread_t event_thread;
490 int i, ret;
491
492 ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai);
493 if (ret) {
494 printf("getaddrinfo error: %s\n", gai_strerror(ret));
495 return ret;
496 }
497
498 conn_param.responder_resources = 1;
499 conn_param.initiator_depth = 1;
500 conn_param.retry_count = retries;
501 conn_param.private_data = rai->ai_connect;
502 conn_param.private_data_len = rai->ai_connect_len;
503
504 ret = pthread_create(&event_thread, NULL, process_events, NULL);
505 if (ret) {
506 perror("failure creating event thread");
507 return ret;
508 }
509
510 if (src_addr) {
511 printf("binding source address\n");
512 start_time(STEP_BIND);
513 for (i = 0; i < connections; i++) {
514 start_perf(&nodes[i], STEP_BIND);
515 ret = rdma_bind_addr(nodes[i].id, rai->ai_src_addr);
516 if (ret) {
517 perror("failure bind addr");
518 nodes[i].error = 1;
519 continue;
520 }
521 end_perf(&nodes[i], STEP_BIND);
522 }
523 end_time(STEP_BIND);
524 }
525
526 printf("resolving address\n");
527 start_time(STEP_RESOLVE_ADDR);
528 for (i = 0; i < connections; i++) {
529 if (nodes[i].error)
530 continue;
531 nodes[i].retries = retries;
532 start_perf(&nodes[i], STEP_RESOLVE_ADDR);
533 ret = rdma_resolve_addr(nodes[i].id, rai->ai_src_addr,
534 rai->ai_dst_addr, timeout);
535 if (ret) {
536 perror("failure getting addr");
537 nodes[i].error = 1;
538 continue;
539 }
540 started[STEP_RESOLVE_ADDR]++;
541 }
542 while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield();
543 end_time(STEP_RESOLVE_ADDR);
544
545 printf("resolving route\n");
546 start_time(STEP_RESOLVE_ROUTE);
547 for (i = 0; i < connections; i++) {
548 if (nodes[i].error)
549 continue;
550 nodes[i].retries = retries;
551 start_perf(&nodes[i], STEP_RESOLVE_ROUTE);
552 ret = rdma_resolve_route(nodes[i].id, timeout);
553 if (ret) {
554 perror("failure resolving route");
555 nodes[i].error = 1;
556 continue;
557 }
558 started[STEP_RESOLVE_ROUTE]++;
559 }
560 while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield();
561 end_time(STEP_RESOLVE_ROUTE);
562
563 printf("creating qp\n");
564 start_time(STEP_CREATE_QP);
565 for (i = 0; i < connections; i++) {
566 if (nodes[i].error)
567 continue;
568 start_perf(&nodes[i], STEP_CREATE_QP);
569 ret = rdma_create_qp(nodes[i].id, NULL, &init_qp_attr);
570 if (ret) {
571 perror("failure creating qp");
572 nodes[i].error = 1;
573 continue;
574 }
575 end_perf(&nodes[i], STEP_CREATE_QP);
576 }
577 end_time(STEP_CREATE_QP);
578
579 printf("connecting\n");
580 start_time(STEP_CONNECT);
581 for (i = 0; i < connections; i++) {
582 if (nodes[i].error)
583 continue;
584 start_perf(&nodes[i], STEP_CONNECT);
585 ret = rdma_connect(nodes[i].id, &conn_param);
586 if (ret) {
587 perror("failure rconnecting");
588 nodes[i].error = 1;
589 continue;
590 }
591 started[STEP_CONNECT]++;
592 }
593 while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield();
594 end_time(STEP_CONNECT);
595
596 printf("disconnecting\n");
597 start_time(STEP_DISCONNECT);
598 for (i = 0; i < connections; i++) {
599 if (nodes[i].error)
600 continue;
601 start_perf(&nodes[i], STEP_DISCONNECT);
602 rdma_disconnect(nodes[i].id);
603 started[STEP_DISCONNECT]++;
604 }
605 while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield();
606 end_time(STEP_DISCONNECT);
607
608 return ret;
609 }
610
main(int argc,char ** argv)611 int main(int argc, char **argv)
612 {
613 int op, ret;
614
615 hints.ai_port_space = RDMA_PS_TCP;
616 hints.ai_qp_type = IBV_QPT_RC;
617 while ((op = getopt(argc, argv, "s:b:c:p:r:t:")) != -1) {
618 switch (op) {
619 case 's':
620 dst_addr = optarg;
621 break;
622 case 'b':
623 src_addr = optarg;
624 break;
625 case 'c':
626 connections = atoi(optarg);
627 break;
628 case 'p':
629 port = optarg;
630 break;
631 case 'r':
632 retries = atoi(optarg);
633 break;
634 case 't':
635 timeout = atoi(optarg);
636 break;
637 default:
638 printf("usage: %s\n", argv[0]);
639 printf("\t[-s server_address]\n");
640 printf("\t[-b bind_address]\n");
641 printf("\t[-c connections]\n");
642 printf("\t[-p port_number]\n");
643 printf("\t[-r retries]\n");
644 printf("\t[-t timeout_ms]\n");
645 exit(1);
646 }
647 }
648
649 init_qp_attr.cap.max_send_wr = 1;
650 init_qp_attr.cap.max_recv_wr = 1;
651 init_qp_attr.cap.max_send_sge = 1;
652 init_qp_attr.cap.max_recv_sge = 1;
653 init_qp_attr.qp_type = IBV_QPT_RC;
654
655 channel = rdma_create_event_channel();
656 if (!channel) {
657 printf("failed to create event channel\n");
658 exit(1);
659 }
660
661 if (dst_addr) {
662 alloc_nodes();
663 ret = run_client();
664 } else {
665 hints.ai_flags |= RAI_PASSIVE;
666 ret = run_server();
667 }
668
669 cleanup_nodes();
670 rdma_destroy_event_channel(channel);
671 if (rai)
672 rdma_freeaddrinfo(rai);
673
674 show_perf();
675 free(nodes);
676 return ret;
677 }
678