1 /*
2 * Copyright (c) 2011-2012 Intel Corporation. All rights reserved.
3 * Copyright (c) 2014-2015 Mellanox Technologies LTD. All rights reserved.
4 *
5 * This software is available to you under the OpenIB.org BSD license
6 * below:
7 *
8 * Redistribution and use in source and binary forms, with or
9 * without modification, are permitted provided that the following
10 * conditions are met:
11 *
12 * - Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * - Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * SOFTWARE.
29 */
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <errno.h>
36 #include <getopt.h>
37 #include <sys/types.h>
38 #include <sys/socket.h>
39 #include <sys/time.h>
40 #include <sys/wait.h>
41 #include <netdb.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <netinet/tcp.h>
45
46 #include <rdma/rdma_cma.h>
47 #include <rdma/rsocket.h>
48 #include <util/compiler.h>
49 #include "common.h"
50
51 struct test_size_param {
52 int size;
53 int option;
54 };
55
56 static struct test_size_param test_size[] = {
57 { 1 << 6, 0 },
58 { 1 << 7, 1 }, { (1 << 7) + (1 << 6), 1},
59 { 1 << 8, 1 }, { (1 << 8) + (1 << 7), 1},
60 { 1 << 9, 1 }, { (1 << 9) + (1 << 8), 1},
61 { 1 << 10, 1 }, { (1 << 10) + (1 << 9), 1},
62 { 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
63 { 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
64 { 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
65 { 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
66 { 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
67 { 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
68 { 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
69 { 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
70 { 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
71 { 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
72 { 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
73 { 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
74 };
75 #define TEST_CNT (sizeof test_size / sizeof test_size[0])
76
77 static int rs, lrs;
78 static int use_async;
79 static int use_rgai;
80 static int verify;
81 static int flags = MSG_DONTWAIT;
82 static int poll_timeout = 0;
83 static int custom;
84 static int use_fork;
85 static pid_t fork_pid;
86 static enum rs_optimization optimization;
87 static int size_option;
88 static int iterations = 1;
89 static int transfer_size = 1000;
90 static int transfer_count = 1000;
91 static int buffer_size, inline_size = 64;
92 static char test_name[10] = "custom";
93 static const char *port = "7471";
94 static int keepalive;
95 static char *dst_addr;
96 static char *src_addr;
97 static struct timeval start, end;
98 static void *buf;
99 static struct rdma_addrinfo rai_hints;
100 static struct addrinfo ai_hints;
101
show_perf(void)102 static void show_perf(void)
103 {
104 char str[32];
105 float usec;
106 long long bytes;
107
108 usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
109 bytes = (long long) iterations * transfer_count * transfer_size * 2;
110
111 /* name size transfers iterations bytes seconds Gb/sec usec/xfer */
112 printf("%-10s", test_name);
113 size_str(str, sizeof str, transfer_size);
114 printf("%-8s", str);
115 cnt_str(str, sizeof str, transfer_count);
116 printf("%-8s", str);
117 cnt_str(str, sizeof str, iterations);
118 printf("%-8s", str);
119 size_str(str, sizeof str, bytes);
120 printf("%-8s", str);
121 printf("%8.2fs%10.2f%11.2f\n",
122 usec / 1000000., (bytes * 8) / (1000. * usec),
123 (usec / iterations) / (transfer_count * 2));
124 }
125
init_latency_test(int size)126 static void init_latency_test(int size)
127 {
128 char sstr[5];
129
130 size_str(sstr, sizeof sstr, size);
131 snprintf(test_name, sizeof test_name, "%s_lat", sstr);
132 transfer_count = 1;
133 transfer_size = size;
134 iterations = size_to_count(transfer_size);
135 }
136
init_bandwidth_test(int size)137 static void init_bandwidth_test(int size)
138 {
139 char sstr[5];
140
141 size_str(sstr, sizeof sstr, size);
142 snprintf(test_name, sizeof test_name, "%s_bw", sstr);
143 iterations = 1;
144 transfer_size = size;
145 transfer_count = size_to_count(transfer_size);
146 }
147
send_xfer(int size)148 static int send_xfer(int size)
149 {
150 struct pollfd fds;
151 int offset, ret;
152
153 if (verify)
154 format_buf(buf, size);
155
156 if (use_async) {
157 fds.fd = rs;
158 fds.events = POLLOUT;
159 }
160
161 for (offset = 0; offset < size; ) {
162 if (use_async) {
163 ret = do_poll(&fds, poll_timeout);
164 if (ret)
165 return ret;
166 }
167
168 ret = rs_send(rs, buf + offset, size - offset, flags);
169 if (ret > 0) {
170 offset += ret;
171 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
172 perror("rsend");
173 return ret;
174 }
175 }
176
177 return 0;
178 }
179
recv_xfer(int size)180 static int recv_xfer(int size)
181 {
182 struct pollfd fds;
183 int offset, ret;
184
185 if (use_async) {
186 fds.fd = rs;
187 fds.events = POLLIN;
188 }
189
190 for (offset = 0; offset < size; ) {
191 if (use_async) {
192 ret = do_poll(&fds, poll_timeout);
193 if (ret)
194 return ret;
195 }
196
197 ret = rs_recv(rs, buf + offset, size - offset, flags);
198 if (ret > 0) {
199 offset += ret;
200 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
201 perror("rrecv");
202 return ret;
203 }
204 }
205
206 if (verify) {
207 ret = verify_buf(buf, size);
208 if (ret)
209 return ret;
210 }
211
212 return 0;
213 }
214
sync_test(void)215 static int sync_test(void)
216 {
217 int ret;
218
219 ret = dst_addr ? send_xfer(16) : recv_xfer(16);
220 if (ret)
221 return ret;
222
223 return dst_addr ? recv_xfer(16) : send_xfer(16);
224 }
225
run_test(void)226 static int run_test(void)
227 {
228 int ret, i, t;
229
230 ret = sync_test();
231 if (ret)
232 goto out;
233
234 gettimeofday(&start, NULL);
235 for (i = 0; i < iterations; i++) {
236 for (t = 0; t < transfer_count; t++) {
237 ret = dst_addr ? send_xfer(transfer_size) :
238 recv_xfer(transfer_size);
239 if (ret)
240 goto out;
241 }
242
243 for (t = 0; t < transfer_count; t++) {
244 ret = dst_addr ? recv_xfer(transfer_size) :
245 send_xfer(transfer_size);
246 if (ret)
247 goto out;
248 }
249 }
250 gettimeofday(&end, NULL);
251 show_perf();
252 ret = 0;
253
254 out:
255 return ret;
256 }
257
set_keepalive(int fd)258 static void set_keepalive(int fd)
259 {
260 int optval;
261 socklen_t optlen = sizeof(optlen);
262
263 optval = 1;
264 if (rs_setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen)) {
265 perror("rsetsockopt SO_KEEPALIVE");
266 return;
267 }
268
269 optval = keepalive;
270 if (rs_setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, optlen))
271 perror("rsetsockopt TCP_KEEPIDLE");
272
273 if (!(rs_getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)))
274 printf("Keepalive: %s\n", (optval ? "ON" : "OFF"));
275
276 if (!(rs_getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, &optlen)))
277 printf(" time: %i\n", optval);
278 }
279
set_options(int fd)280 static void set_options(int fd)
281 {
282 int val;
283
284 if (buffer_size) {
285 rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
286 sizeof buffer_size);
287 rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
288 sizeof buffer_size);
289 } else {
290 val = 1 << 19;
291 rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
292 rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
293 }
294
295 val = 1;
296 rs_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
297
298 if (flags & MSG_DONTWAIT)
299 rs_fcntl(fd, F_SETFL, O_NONBLOCK);
300
301 if (use_rs) {
302 /* Inline size based on experimental data */
303 if (optimization == opt_latency) {
304 rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &inline_size,
305 sizeof inline_size);
306 } else if (optimization == opt_bandwidth) {
307 val = 0;
308 rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &val, sizeof val);
309 }
310 }
311
312 if (keepalive)
313 set_keepalive(fd);
314 }
315
server_listen(void)316 static int server_listen(void)
317 {
318 struct rdma_addrinfo *rai = NULL;
319 struct addrinfo *ai;
320 int val, ret;
321
322 if (use_rgai) {
323 rai_hints.ai_flags |= RAI_PASSIVE;
324 ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai);
325 } else {
326 ai_hints.ai_flags |= AI_PASSIVE;
327 ret = getaddrinfo(src_addr, port, &ai_hints, &ai);
328 }
329 if (ret) {
330 printf("getaddrinfo: %s\n", gai_strerror(ret));
331 return ret;
332 }
333
334 lrs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
335 rs_socket(ai->ai_family, SOCK_STREAM, 0);
336 if (lrs < 0) {
337 perror("rsocket");
338 ret = lrs;
339 goto free;
340 }
341
342 val = 1;
343 ret = rs_setsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
344 if (ret) {
345 perror("rsetsockopt SO_REUSEADDR");
346 goto close;
347 }
348
349 ret = rai ? rs_bind(lrs, rai->ai_src_addr, rai->ai_src_len) :
350 rs_bind(lrs, ai->ai_addr, ai->ai_addrlen);
351 if (ret) {
352 perror("rbind");
353 goto close;
354 }
355
356 ret = rs_listen(lrs, 1);
357 if (ret)
358 perror("rlisten");
359
360 close:
361 if (ret)
362 rs_close(lrs);
363 free:
364 if (rai)
365 rdma_freeaddrinfo(rai);
366 else
367 freeaddrinfo(ai);
368 return ret;
369 }
370
server_connect(void)371 static int server_connect(void)
372 {
373 struct pollfd fds;
374 int ret = 0;
375
376 set_options(lrs);
377 do {
378 if (use_async) {
379 fds.fd = lrs;
380 fds.events = POLLIN;
381
382 ret = do_poll(&fds, poll_timeout);
383 if (ret) {
384 perror("rpoll");
385 return ret;
386 }
387 }
388
389 rs = rs_accept(lrs, NULL, NULL);
390 } while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
391 if (rs < 0) {
392 perror("raccept");
393 return rs;
394 }
395
396 if (use_fork)
397 fork_pid = fork();
398 if (!fork_pid)
399 set_options(rs);
400 return ret;
401 }
402
client_connect(void)403 static int client_connect(void)
404 {
405 struct rdma_addrinfo *rai = NULL, *rai_src = NULL;
406 struct addrinfo *ai, *ai_src;
407 struct pollfd fds;
408 int ret, err;
409 socklen_t len;
410
411 ret = use_rgai ? rdma_getaddrinfo(dst_addr, port, &rai_hints, &rai) :
412 getaddrinfo(dst_addr, port, &ai_hints, &ai);
413
414 if (ret) {
415 printf("getaddrinfo: %s\n", gai_strerror(ret));
416 return ret;
417 }
418
419 if (src_addr) {
420 if (use_rgai) {
421 rai_hints.ai_flags |= RAI_PASSIVE;
422 ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai_src);
423 } else {
424 ai_hints.ai_flags |= AI_PASSIVE;
425 ret = getaddrinfo(src_addr, port, &ai_hints, &ai_src);
426 }
427 if (ret) {
428 printf("getaddrinfo src_addr: %s\n", gai_strerror(ret));
429 return ret;
430 }
431 }
432
433 rs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
434 rs_socket(ai->ai_family, SOCK_STREAM, 0);
435 if (rs < 0) {
436 perror("rsocket");
437 ret = rs;
438 goto free;
439 }
440
441 set_options(rs);
442
443 if (src_addr) {
444 ret = rai ? rs_bind(rs, rai_src->ai_src_addr, rai_src->ai_src_len) :
445 rs_bind(rs, ai_src->ai_addr, ai_src->ai_addrlen);
446 if (ret) {
447 perror("rbind");
448 goto close;
449 }
450 }
451
452 if (rai && rai->ai_route) {
453 ret = rs_setsockopt(rs, SOL_RDMA, RDMA_ROUTE, rai->ai_route,
454 rai->ai_route_len);
455 if (ret) {
456 perror("rsetsockopt RDMA_ROUTE");
457 goto close;
458 }
459 }
460
461 ret = rai ? rs_connect(rs, rai->ai_dst_addr, rai->ai_dst_len) :
462 rs_connect(rs, ai->ai_addr, ai->ai_addrlen);
463 if (ret && (errno != EINPROGRESS)) {
464 perror("rconnect");
465 goto close;
466 }
467
468 if (ret && (errno == EINPROGRESS)) {
469 fds.fd = rs;
470 fds.events = POLLOUT;
471 ret = do_poll(&fds, poll_timeout);
472 if (ret) {
473 perror("rpoll");
474 goto close;
475 }
476
477 len = sizeof err;
478 ret = rs_getsockopt(rs, SOL_SOCKET, SO_ERROR, &err, &len);
479 if (ret)
480 goto close;
481 if (err) {
482 ret = -1;
483 errno = err;
484 perror("async rconnect");
485 }
486 }
487
488 close:
489 if (ret)
490 rs_close(rs);
491 free:
492 if (rai)
493 rdma_freeaddrinfo(rai);
494 else
495 freeaddrinfo(ai);
496 return ret;
497 }
498
run(void)499 static int run(void)
500 {
501 int i, ret = 0;
502
503 buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
504 if (!buf) {
505 perror("malloc");
506 return -1;
507 }
508
509 if (!dst_addr) {
510 ret = server_listen();
511 if (ret)
512 goto free;
513 }
514
515 printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
516 "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
517 if (!custom) {
518 optimization = opt_latency;
519 ret = dst_addr ? client_connect() : server_connect();
520 if (ret)
521 goto free;
522
523 for (i = 0; i < TEST_CNT && !fork_pid; i++) {
524 if (test_size[i].option > size_option)
525 continue;
526 init_latency_test(test_size[i].size);
527 run_test();
528 }
529 if (fork_pid)
530 waitpid(fork_pid, NULL, 0);
531 else
532 rs_shutdown(rs, SHUT_RDWR);
533 rs_close(rs);
534
535 if (!dst_addr && use_fork && !fork_pid)
536 goto free;
537
538 optimization = opt_bandwidth;
539 ret = dst_addr ? client_connect() : server_connect();
540 if (ret)
541 goto free;
542 for (i = 0; i < TEST_CNT && !fork_pid; i++) {
543 if (test_size[i].option > size_option)
544 continue;
545 init_bandwidth_test(test_size[i].size);
546 run_test();
547 }
548 } else {
549 ret = dst_addr ? client_connect() : server_connect();
550 if (ret)
551 goto free;
552
553 if (!fork_pid)
554 ret = run_test();
555 }
556
557 if (fork_pid)
558 waitpid(fork_pid, NULL, 0);
559 else
560 rs_shutdown(rs, SHUT_RDWR);
561 rs_close(rs);
562 free:
563 free(buf);
564 return ret;
565 }
566
set_test_opt(const char * arg)567 static int set_test_opt(const char *arg)
568 {
569 if (strlen(arg) == 1) {
570 switch (arg[0]) {
571 case 's':
572 use_rs = 0;
573 break;
574 case 'a':
575 use_async = 1;
576 break;
577 case 'b':
578 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
579 break;
580 case 'f':
581 use_fork = 1;
582 use_rs = 0;
583 break;
584 case 'n':
585 flags |= MSG_DONTWAIT;
586 break;
587 case 'r':
588 use_rgai = 1;
589 break;
590 case 'v':
591 verify = 1;
592 break;
593 default:
594 return -1;
595 }
596 } else {
597 if (!strncasecmp("socket", arg, 6)) {
598 use_rs = 0;
599 } else if (!strncasecmp("async", arg, 5)) {
600 use_async = 1;
601 } else if (!strncasecmp("block", arg, 5)) {
602 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
603 } else if (!strncasecmp("nonblock", arg, 8)) {
604 flags |= MSG_DONTWAIT;
605 } else if (!strncasecmp("resolve", arg, 7)) {
606 use_rgai = 1;
607 } else if (!strncasecmp("verify", arg, 6)) {
608 verify = 1;
609 } else if (!strncasecmp("fork", arg, 4)) {
610 use_fork = 1;
611 use_rs = 0;
612 } else {
613 return -1;
614 }
615 }
616 return 0;
617 }
618
main(int argc,char ** argv)619 int main(int argc, char **argv)
620 {
621 int op, ret;
622
623 ai_hints.ai_socktype = SOCK_STREAM;
624 rai_hints.ai_port_space = RDMA_PS_TCP;
625 while ((op = getopt(argc, argv, "s:b:f:B:i:I:C:S:p:k:T:")) != -1) {
626 switch (op) {
627 case 's':
628 dst_addr = optarg;
629 break;
630 case 'b':
631 src_addr = optarg;
632 break;
633 case 'f':
634 if (!strncasecmp("ip", optarg, 2)) {
635 ai_hints.ai_flags = AI_NUMERICHOST;
636 } else if (!strncasecmp("gid", optarg, 3)) {
637 rai_hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
638 rai_hints.ai_family = AF_IB;
639 use_rgai = 1;
640 } else {
641 fprintf(stderr, "Warning: unknown address format\n");
642 }
643 break;
644 case 'B':
645 buffer_size = atoi(optarg);
646 break;
647 case 'i':
648 inline_size = atoi(optarg);
649 break;
650 case 'I':
651 custom = 1;
652 iterations = atoi(optarg);
653 break;
654 case 'C':
655 custom = 1;
656 transfer_count = atoi(optarg);
657 break;
658 case 'S':
659 if (!strncasecmp("all", optarg, 3)) {
660 size_option = 1;
661 } else {
662 custom = 1;
663 transfer_size = atoi(optarg);
664 }
665 break;
666 case 'p':
667 port = optarg;
668 break;
669 case 'k':
670 keepalive = atoi(optarg);
671 break;
672 case 'T':
673 if (!set_test_opt(optarg))
674 break;
675 /* invalid option - fall through */
676 SWITCH_FALLTHROUGH;
677 default:
678 printf("usage: %s\n", argv[0]);
679 printf("\t[-s server_address]\n");
680 printf("\t[-b bind_address]\n");
681 printf("\t[-f address_format]\n");
682 printf("\t name, ip, ipv6, or gid\n");
683 printf("\t[-B buffer_size]\n");
684 printf("\t[-i inline_size]\n");
685 printf("\t[-I iterations]\n");
686 printf("\t[-C transfer_count]\n");
687 printf("\t[-S transfer_size or all]\n");
688 printf("\t[-p port_number]\n");
689 printf("\t[-k keepalive_time]\n");
690 printf("\t[-T test_option]\n");
691 printf("\t s|sockets - use standard tcp/ip sockets\n");
692 printf("\t a|async - asynchronous operation (use poll)\n");
693 printf("\t b|blocking - use blocking calls\n");
694 printf("\t f|fork - fork server processing\n");
695 printf("\t n|nonblocking - use nonblocking calls\n");
696 printf("\t r|resolve - use rdma cm to resolve address\n");
697 printf("\t v|verify - verify data\n");
698 exit(1);
699 }
700 }
701
702 if (!(flags & MSG_DONTWAIT))
703 poll_timeout = -1;
704
705 ret = run();
706 return ret;
707 }
708