1 /* 2 * Copyright (c) 2013 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under the OpenIB.org BSD license 5 * below: 6 * 7 * Redistribution and use in source and binary forms, with or 8 * without modification, are permitted provided that the following 9 * conditions are met: 10 * 11 * - Redistributions of source code must retain the above 12 * copyright notice, this list of conditions and the following 13 * disclaimer. 14 * 15 * - Redistributions in binary form must reproduce the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer in the documentation and/or other materials 18 * provided with the distribution. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV 23 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 24 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 25 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 26 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 * SOFTWARE. 28 */ 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <strings.h> 34 #include <errno.h> 35 #include <getopt.h> 36 #include <sys/types.h> 37 #include <sys/socket.h> 38 #include <sys/time.h> 39 #include <sys/wait.h> 40 #include <netdb.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <netinet/tcp.h> 44 45 #include <rdma/rdma_cma.h> 46 #include "common.h" 47 48 static struct rdma_addrinfo hints, *rai; 49 static struct rdma_event_channel *channel; 50 static const char *port = "7471"; 51 static char *dst_addr; 52 static char *src_addr; 53 static int timeout = 2000; 54 static int retries = 2; 55 56 enum step { 57 STEP_CREATE_ID, 58 STEP_BIND, 59 STEP_RESOLVE_ADDR, 60 STEP_RESOLVE_ROUTE, 61 STEP_CREATE_QP, 62 STEP_CONNECT, 63 STEP_DISCONNECT, 64 STEP_DESTROY, 65 STEP_CNT 66 }; 67 68 static const char *step_str[] = { 69 "create id", 70 "bind addr", 71 "resolve addr", 72 "resolve route", 73 "create qp", 74 "connect", 75 "disconnect", 76 "destroy" 77 }; 78 79 struct node { 80 struct rdma_cm_id *id; 81 struct timeval times[STEP_CNT][2]; 82 int error; 83 int retries; 84 }; 85 86 struct list_head { 87 struct list_head *prev; 88 struct list_head *next; 89 struct rdma_cm_id *id; 90 }; 91 92 struct work_list { 93 pthread_mutex_t lock; 94 pthread_cond_t cond; 95 struct list_head list; 96 }; 97 98 #define INIT_LIST(x) ((x)->prev = (x)->next = (x)) 99 100 static struct work_list req_work; 101 static struct work_list disc_work; 102 static struct node *nodes; 103 static struct timeval times[STEP_CNT][2]; 104 static int connections = 100; 105 static volatile int started[STEP_CNT]; 106 static volatile int completed[STEP_CNT]; 107 static struct ibv_qp_init_attr init_qp_attr; 108 static struct rdma_conn_param conn_param; 109 110 #define start_perf(n, s) gettimeofday(&((n)->times[s][0]), NULL) 111 #define end_perf(n, s) gettimeofday(&((n)->times[s][1]), NULL) 112 #define start_time(s) gettimeofday(×[s][0], NULL) 113 #define end_time(s) gettimeofday(×[s][1], NULL) 114 115 static inline void __list_delete(struct list_head *list) 116 { 117 struct list_head *prev, *next; 118 prev = list->prev; 119 next = list->next; 120 prev->next = next; 121 next->prev = prev; 122 INIT_LIST(list); 123 } 124 125 static inline int __list_empty(struct work_list *list) 126 { 127 return list->list.next == &list->list; 128 } 129 130 static inline struct list_head *__list_remove_head(struct work_list *work_list) 131 { 132 struct list_head *list_item; 133 134 list_item = work_list->list.next; 135 __list_delete(list_item); 136 return list_item; 137 } 138 139 static inline void list_add_tail(struct work_list *work_list, struct list_head *req) 140 { 141 int empty; 142 pthread_mutex_lock(&work_list->lock); 143 empty = __list_empty(work_list); 144 req->prev = work_list->list.prev; 145 req->next = &work_list->list; 146 req->prev->next = work_list->list.prev = req; 147 pthread_mutex_unlock(&work_list->lock); 148 if (empty) 149 pthread_cond_signal(&work_list->cond); 150 } 151 152 static int zero_time(struct timeval *t) 153 { 154 return !(t->tv_sec || t->tv_usec); 155 } 156 157 static float diff_us(struct timeval *end, struct timeval *start) 158 { 159 return (end->tv_sec - start->tv_sec) * 1000000. + (end->tv_usec - start->tv_usec); 160 } 161 162 static void show_perf(void) 163 { 164 int c, i; 165 float us, max[STEP_CNT], min[STEP_CNT]; 166 167 for (i = 0; i < STEP_CNT; i++) { 168 max[i] = 0; 169 min[i] = 999999999.; 170 for (c = 0; c < connections; c++) { 171 if (!zero_time(&nodes[c].times[i][0]) && 172 !zero_time(&nodes[c].times[i][1])) { 173 us = diff_us(&nodes[c].times[i][1], &nodes[c].times[i][0]); 174 if (us > max[i]) 175 max[i] = us; 176 if (us < min[i]) 177 min[i] = us; 178 } 179 } 180 } 181 182 printf("step total ms max ms min us us / conn\n"); 183 for (i = 0; i < STEP_CNT; i++) { 184 if (i == STEP_BIND && !src_addr) 185 continue; 186 187 us = diff_us(×[i][1], ×[i][0]); 188 printf("%-13s: %11.2f%11.2f%11.2f%11.2f\n", step_str[i], us / 1000., 189 max[i] / 1000., min[i], us / connections); 190 } 191 } 192 193 static void addr_handler(struct node *n) 194 { 195 end_perf(n, STEP_RESOLVE_ADDR); 196 completed[STEP_RESOLVE_ADDR]++; 197 } 198 199 static void route_handler(struct node *n) 200 { 201 end_perf(n, STEP_RESOLVE_ROUTE); 202 completed[STEP_RESOLVE_ROUTE]++; 203 } 204 205 static void conn_handler(struct node *n) 206 { 207 end_perf(n, STEP_CONNECT); 208 completed[STEP_CONNECT]++; 209 } 210 211 static void disc_handler(struct node *n) 212 { 213 end_perf(n, STEP_DISCONNECT); 214 completed[STEP_DISCONNECT]++; 215 } 216 217 static void __req_handler(struct rdma_cm_id *id) 218 { 219 int ret; 220 221 ret = rdma_create_qp(id, NULL, &init_qp_attr); 222 if (ret) { 223 perror("failure creating qp"); 224 goto err; 225 } 226 227 ret = rdma_accept(id, NULL); 228 if (ret) { 229 perror("failure accepting"); 230 goto err; 231 } 232 return; 233 234 err: 235 printf("failing connection request\n"); 236 rdma_reject(id, NULL, 0); 237 rdma_destroy_id(id); 238 return; 239 } 240 241 static void *req_handler_thread(void *arg) 242 { 243 struct list_head *work; 244 do { 245 pthread_mutex_lock(&req_work.lock); 246 if (__list_empty(&req_work)) 247 pthread_cond_wait(&req_work.cond, &req_work.lock); 248 work = __list_remove_head(&req_work); 249 pthread_mutex_unlock(&req_work.lock); 250 __req_handler(work->id); 251 free(work); 252 } while (1); 253 return NULL; 254 } 255 256 static void *disc_handler_thread(void *arg) 257 { 258 struct list_head *work; 259 do { 260 pthread_mutex_lock(&disc_work.lock); 261 if (__list_empty(&disc_work)) 262 pthread_cond_wait(&disc_work.cond, &disc_work.lock); 263 work = __list_remove_head(&disc_work); 264 pthread_mutex_unlock(&disc_work.lock); 265 rdma_disconnect(work->id); 266 rdma_destroy_id(work->id); 267 free(work); 268 } while (1); 269 return NULL; 270 } 271 272 static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 273 { 274 struct node *n = id->context; 275 struct list_head *request; 276 277 switch (event->event) { 278 case RDMA_CM_EVENT_ADDR_RESOLVED: 279 addr_handler(n); 280 break; 281 case RDMA_CM_EVENT_ROUTE_RESOLVED: 282 route_handler(n); 283 break; 284 case RDMA_CM_EVENT_CONNECT_REQUEST: 285 request = malloc(sizeof *request); 286 if (!request) { 287 perror("out of memory accepting connect request"); 288 rdma_reject(id, NULL, 0); 289 rdma_destroy_id(id); 290 } else { 291 INIT_LIST(request); 292 request->id = id; 293 list_add_tail(&req_work, request); 294 } 295 break; 296 case RDMA_CM_EVENT_ESTABLISHED: 297 if (n) 298 conn_handler(n); 299 break; 300 case RDMA_CM_EVENT_ADDR_ERROR: 301 if (n->retries--) { 302 if (!rdma_resolve_addr(n->id, rai->ai_src_addr, 303 rai->ai_dst_addr, timeout)) 304 break; 305 } 306 printf("RDMA_CM_EVENT_ADDR_ERROR, error: %d\n", event->status); 307 addr_handler(n); 308 n->error = 1; 309 break; 310 case RDMA_CM_EVENT_ROUTE_ERROR: 311 if (n->retries--) { 312 if (!rdma_resolve_route(n->id, timeout)) 313 break; 314 } 315 printf("RDMA_CM_EVENT_ROUTE_ERROR, error: %d\n", event->status); 316 route_handler(n); 317 n->error = 1; 318 break; 319 case RDMA_CM_EVENT_CONNECT_ERROR: 320 case RDMA_CM_EVENT_UNREACHABLE: 321 case RDMA_CM_EVENT_REJECTED: 322 printf("event: %s, error: %d\n", 323 rdma_event_str(event->event), event->status); 324 conn_handler(n); 325 n->error = 1; 326 break; 327 case RDMA_CM_EVENT_DISCONNECTED: 328 if (!n) { 329 request = malloc(sizeof *request); 330 if (!request) { 331 perror("out of memory queueing disconnect request, handling synchronously"); 332 rdma_disconnect(id); 333 rdma_destroy_id(id); 334 } else { 335 INIT_LIST(request); 336 request->id = id; 337 list_add_tail(&disc_work, request); 338 } 339 } else 340 disc_handler(n); 341 break; 342 case RDMA_CM_EVENT_DEVICE_REMOVAL: 343 /* Cleanup will occur after test completes. */ 344 break; 345 default: 346 break; 347 } 348 rdma_ack_cm_event(event); 349 } 350 351 static int alloc_nodes(void) 352 { 353 int ret, i; 354 355 nodes = calloc(sizeof *nodes, connections); 356 if (!nodes) 357 return -ENOMEM; 358 359 printf("creating id\n"); 360 start_time(STEP_CREATE_ID); 361 for (i = 0; i < connections; i++) { 362 start_perf(&nodes[i], STEP_CREATE_ID); 363 if (dst_addr) { 364 ret = rdma_create_id(channel, &nodes[i].id, &nodes[i], 365 hints.ai_port_space); 366 if (ret) 367 goto err; 368 } 369 end_perf(&nodes[i], STEP_CREATE_ID); 370 } 371 end_time(STEP_CREATE_ID); 372 return 0; 373 374 err: 375 while (--i >= 0) 376 rdma_destroy_id(nodes[i].id); 377 free(nodes); 378 return ret; 379 } 380 381 static void cleanup_nodes(void) 382 { 383 int i; 384 385 printf("destroying id\n"); 386 start_time(STEP_DESTROY); 387 for (i = 0; i < connections; i++) { 388 start_perf(&nodes[i], STEP_DESTROY); 389 if (nodes[i].id) 390 rdma_destroy_id(nodes[i].id); 391 end_perf(&nodes[i], STEP_DESTROY); 392 } 393 end_time(STEP_DESTROY); 394 } 395 396 static void *process_events(void *arg) 397 { 398 struct rdma_cm_event *event; 399 int ret = 0; 400 401 while (!ret) { 402 ret = rdma_get_cm_event(channel, &event); 403 if (!ret) { 404 cma_handler(event->id, event); 405 } else { 406 perror("failure in rdma_get_cm_event in process_server_events"); 407 ret = errno; 408 } 409 } 410 return NULL; 411 } 412 413 static int run_server(void) 414 { 415 pthread_t req_thread, disc_thread; 416 struct rdma_cm_id *listen_id; 417 int ret; 418 419 INIT_LIST(&req_work.list); 420 INIT_LIST(&disc_work.list); 421 ret = pthread_mutex_init(&req_work.lock, NULL); 422 if (ret) { 423 perror("initializing mutex for req work"); 424 return ret; 425 } 426 427 ret = pthread_mutex_init(&disc_work.lock, NULL); 428 if (ret) { 429 perror("initializing mutex for disc work"); 430 return ret; 431 } 432 433 ret = pthread_cond_init(&req_work.cond, NULL); 434 if (ret) { 435 perror("initializing cond for req work"); 436 return ret; 437 } 438 439 ret = pthread_cond_init(&disc_work.cond, NULL); 440 if (ret) { 441 perror("initializing cond for disc work"); 442 return ret; 443 } 444 445 ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL); 446 if (ret) { 447 perror("failed to create req handler thread"); 448 return ret; 449 } 450 451 ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL); 452 if (ret) { 453 perror("failed to create disconnect handler thread"); 454 return ret; 455 } 456 457 ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space); 458 if (ret) { 459 perror("listen request failed"); 460 return ret; 461 } 462 463 ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai); 464 if (ret) { 465 printf("getrdmaaddr error: %s\n", gai_strerror(ret)); 466 goto out; 467 } 468 469 ret = rdma_bind_addr(listen_id, rai->ai_src_addr); 470 if (ret) { 471 perror("bind address failed"); 472 goto out; 473 } 474 475 ret = rdma_listen(listen_id, 0); 476 if (ret) { 477 perror("failure trying to listen"); 478 goto out; 479 } 480 481 process_events(NULL); 482 out: 483 rdma_destroy_id(listen_id); 484 return ret; 485 } 486 487 static int run_client(void) 488 { 489 pthread_t event_thread; 490 int i, ret; 491 492 ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai); 493 if (ret) { 494 printf("getaddrinfo error: %s\n", gai_strerror(ret)); 495 return ret; 496 } 497 498 conn_param.responder_resources = 1; 499 conn_param.initiator_depth = 1; 500 conn_param.retry_count = retries; 501 conn_param.private_data = rai->ai_connect; 502 conn_param.private_data_len = rai->ai_connect_len; 503 504 ret = pthread_create(&event_thread, NULL, process_events, NULL); 505 if (ret) { 506 perror("failure creating event thread"); 507 return ret; 508 } 509 510 if (src_addr) { 511 printf("binding source address\n"); 512 start_time(STEP_BIND); 513 for (i = 0; i < connections; i++) { 514 start_perf(&nodes[i], STEP_BIND); 515 ret = rdma_bind_addr(nodes[i].id, rai->ai_src_addr); 516 if (ret) { 517 perror("failure bind addr"); 518 nodes[i].error = 1; 519 continue; 520 } 521 end_perf(&nodes[i], STEP_BIND); 522 } 523 end_time(STEP_BIND); 524 } 525 526 printf("resolving address\n"); 527 start_time(STEP_RESOLVE_ADDR); 528 for (i = 0; i < connections; i++) { 529 if (nodes[i].error) 530 continue; 531 nodes[i].retries = retries; 532 start_perf(&nodes[i], STEP_RESOLVE_ADDR); 533 ret = rdma_resolve_addr(nodes[i].id, rai->ai_src_addr, 534 rai->ai_dst_addr, timeout); 535 if (ret) { 536 perror("failure getting addr"); 537 nodes[i].error = 1; 538 continue; 539 } 540 started[STEP_RESOLVE_ADDR]++; 541 } 542 while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield(); 543 end_time(STEP_RESOLVE_ADDR); 544 545 printf("resolving route\n"); 546 start_time(STEP_RESOLVE_ROUTE); 547 for (i = 0; i < connections; i++) { 548 if (nodes[i].error) 549 continue; 550 nodes[i].retries = retries; 551 start_perf(&nodes[i], STEP_RESOLVE_ROUTE); 552 ret = rdma_resolve_route(nodes[i].id, timeout); 553 if (ret) { 554 perror("failure resolving route"); 555 nodes[i].error = 1; 556 continue; 557 } 558 started[STEP_RESOLVE_ROUTE]++; 559 } 560 while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield(); 561 end_time(STEP_RESOLVE_ROUTE); 562 563 printf("creating qp\n"); 564 start_time(STEP_CREATE_QP); 565 for (i = 0; i < connections; i++) { 566 if (nodes[i].error) 567 continue; 568 start_perf(&nodes[i], STEP_CREATE_QP); 569 ret = rdma_create_qp(nodes[i].id, NULL, &init_qp_attr); 570 if (ret) { 571 perror("failure creating qp"); 572 nodes[i].error = 1; 573 continue; 574 } 575 end_perf(&nodes[i], STEP_CREATE_QP); 576 } 577 end_time(STEP_CREATE_QP); 578 579 printf("connecting\n"); 580 start_time(STEP_CONNECT); 581 for (i = 0; i < connections; i++) { 582 if (nodes[i].error) 583 continue; 584 start_perf(&nodes[i], STEP_CONNECT); 585 ret = rdma_connect(nodes[i].id, &conn_param); 586 if (ret) { 587 perror("failure rconnecting"); 588 nodes[i].error = 1; 589 continue; 590 } 591 started[STEP_CONNECT]++; 592 } 593 while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield(); 594 end_time(STEP_CONNECT); 595 596 printf("disconnecting\n"); 597 start_time(STEP_DISCONNECT); 598 for (i = 0; i < connections; i++) { 599 if (nodes[i].error) 600 continue; 601 start_perf(&nodes[i], STEP_DISCONNECT); 602 rdma_disconnect(nodes[i].id); 603 started[STEP_DISCONNECT]++; 604 } 605 while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield(); 606 end_time(STEP_DISCONNECT); 607 608 return ret; 609 } 610 611 int main(int argc, char **argv) 612 { 613 int op, ret; 614 615 hints.ai_port_space = RDMA_PS_TCP; 616 hints.ai_qp_type = IBV_QPT_RC; 617 while ((op = getopt(argc, argv, "s:b:c:p:r:t:")) != -1) { 618 switch (op) { 619 case 's': 620 dst_addr = optarg; 621 break; 622 case 'b': 623 src_addr = optarg; 624 break; 625 case 'c': 626 connections = atoi(optarg); 627 break; 628 case 'p': 629 port = optarg; 630 break; 631 case 'r': 632 retries = atoi(optarg); 633 break; 634 case 't': 635 timeout = atoi(optarg); 636 break; 637 default: 638 printf("usage: %s\n", argv[0]); 639 printf("\t[-s server_address]\n"); 640 printf("\t[-b bind_address]\n"); 641 printf("\t[-c connections]\n"); 642 printf("\t[-p port_number]\n"); 643 printf("\t[-r retries]\n"); 644 printf("\t[-t timeout_ms]\n"); 645 exit(1); 646 } 647 } 648 649 init_qp_attr.cap.max_send_wr = 1; 650 init_qp_attr.cap.max_recv_wr = 1; 651 init_qp_attr.cap.max_send_sge = 1; 652 init_qp_attr.cap.max_recv_sge = 1; 653 init_qp_attr.qp_type = IBV_QPT_RC; 654 655 channel = rdma_create_event_channel(); 656 if (!channel) { 657 printf("failed to create event channel\n"); 658 exit(1); 659 } 660 661 if (dst_addr) { 662 alloc_nodes(); 663 ret = run_client(); 664 } else { 665 hints.ai_flags |= RAI_PASSIVE; 666 ret = run_server(); 667 } 668 669 cleanup_nodes(); 670 rdma_destroy_event_channel(channel); 671 if (rai) 672 rdma_freeaddrinfo(rai); 673 674 show_perf(); 675 free(nodes); 676 return ret; 677 } 678