1 /*
2 * Copyright (c) 2011-2012 Intel Corporation. All rights reserved.
3 * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved.
4 *
5 * This software is available to you under the OpenIB.org BSD license
6 * below:
7 *
8 * Redistribution and use in source and binary forms, with or
9 * without modification, are permitted provided that the following
10 * conditions are met:
11 *
12 * - Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * - Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * SOFTWARE.
29 */
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <errno.h>
36 #include <getopt.h>
37 #include <sys/types.h>
38 #include <sys/socket.h>
39 #include <sys/time.h>
40 #include <sys/wait.h>
41 #include <netdb.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <netinet/tcp.h>
45
46 #include <rdma/rdma_cma.h>
47 #include <rdma/rsocket.h>
48 #include <util/compiler.h>
49 #include "common.h"
50
51 struct test_size_param {
52 int size;
53 int option;
54 };
55
56 static struct test_size_param test_size[] = {
57 { 1 << 6, 0 },
58 { 1 << 7, 1 }, { (1 << 7) + (1 << 6), 1},
59 { 1 << 8, 1 }, { (1 << 8) + (1 << 7), 1},
60 { 1 << 9, 1 }, { (1 << 9) + (1 << 8), 1},
61 { 1 << 10, 1 }, { (1 << 10) + (1 << 9), 1},
62 { 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
63 { 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
64 { 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
65 { 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
66 { 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
67 { 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
68 { 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
69 { 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
70 { 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
71 { 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
72 { 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
73 { 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
74 };
75 #define TEST_CNT (sizeof test_size / sizeof test_size[0])
76
77 static int rs, lrs;
78 static int use_async;
79 static int use_rgai;
80 static int verify;
81 static int flags = MSG_DONTWAIT;
82 static int poll_timeout = 0;
83 static int custom;
84 static enum rs_optimization optimization;
85 static int size_option;
86 static int iterations = 1;
87 static int transfer_size = 1000;
88 static int transfer_count = 1000;
89 static int buffer_size, inline_size = 64;
90 static char test_name[10] = "custom";
91 static const char *port = "7471";
92 static char *dst_addr;
93 static char *src_addr;
94 static struct timeval start, end;
95 static void *buf;
96 static volatile uint8_t *poll_byte;
97 static struct rdma_addrinfo rai_hints;
98 static struct addrinfo ai_hints;
99
show_perf(void)100 static void show_perf(void)
101 {
102 char str[32];
103 float usec;
104 long long bytes;
105
106 usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
107 bytes = (long long) iterations * transfer_count * transfer_size * 2;
108
109 /* name size transfers iterations bytes seconds Gb/sec usec/xfer */
110 printf("%-10s", test_name);
111 size_str(str, sizeof str, transfer_size);
112 printf("%-8s", str);
113 cnt_str(str, sizeof str, transfer_count);
114 printf("%-8s", str);
115 cnt_str(str, sizeof str, iterations);
116 printf("%-8s", str);
117 size_str(str, sizeof str, bytes);
118 printf("%-8s", str);
119 printf("%8.2fs%10.2f%11.2f\n",
120 usec / 1000000., (bytes * 8) / (1000. * usec),
121 (usec / iterations) / (transfer_count * 2));
122 }
123
init_latency_test(int size)124 static void init_latency_test(int size)
125 {
126 char sstr[5];
127
128 size_str(sstr, sizeof sstr, size);
129 snprintf(test_name, sizeof test_name, "%s_lat", sstr);
130 transfer_count = 1;
131 transfer_size = size;
132 iterations = size_to_count(transfer_size);
133 }
134
init_bandwidth_test(int size)135 static void init_bandwidth_test(int size)
136 {
137 char sstr[5];
138
139 size_str(sstr, sizeof sstr, size);
140 snprintf(test_name, sizeof test_name, "%s_bw", sstr);
141 iterations = 1;
142 transfer_size = size;
143 transfer_count = size_to_count(transfer_size);
144 }
145
send_msg(int size)146 static int send_msg(int size)
147 {
148 struct pollfd fds;
149 int offset, ret;
150
151 if (use_async) {
152 fds.fd = rs;
153 fds.events = POLLOUT;
154 }
155
156 for (offset = 0; offset < size; ) {
157 if (use_async) {
158 ret = do_poll(&fds, poll_timeout);
159 if (ret)
160 return ret;
161 }
162
163 ret = rsend(rs, buf + offset, size - offset, flags);
164 if (ret > 0) {
165 offset += ret;
166 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
167 perror("rsend");
168 return ret;
169 }
170 }
171
172 return 0;
173 }
174
send_xfer(int size)175 static int send_xfer(int size)
176 {
177 struct pollfd fds;
178 int offset, ret;
179
180 if (use_async) {
181 fds.fd = rs;
182 fds.events = POLLOUT;
183 }
184
185 for (offset = 0; offset < size; ) {
186 if (use_async) {
187 ret = do_poll(&fds, poll_timeout);
188 if (ret)
189 return ret;
190 }
191
192 ret = riowrite(rs, buf + offset, size - offset, offset, flags);
193 if (ret > 0) {
194 offset += ret;
195 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
196 perror("riowrite");
197 return ret;
198 }
199 }
200
201 return 0;
202 }
203
recv_msg(int size)204 static int recv_msg(int size)
205 {
206 struct pollfd fds;
207 int offset, ret;
208
209 if (use_async) {
210 fds.fd = rs;
211 fds.events = POLLIN;
212 }
213
214 for (offset = 0; offset < size; ) {
215 if (use_async) {
216 ret = do_poll(&fds, poll_timeout);
217 if (ret)
218 return ret;
219 }
220
221 ret = rrecv(rs, buf + offset, size - offset, flags);
222 if (ret > 0) {
223 offset += ret;
224 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
225 perror("rrecv");
226 return ret;
227 }
228 }
229
230 return 0;
231 }
232
recv_xfer(int size,uint8_t marker)233 static int recv_xfer(int size, uint8_t marker)
234 {
235 int ret;
236
237 while (*poll_byte != marker)
238 ;
239
240 if (verify) {
241 ret = verify_buf(buf, size - 1);
242 if (ret)
243 return ret;
244 }
245
246 return 0;
247 }
248
sync_test(void)249 static int sync_test(void)
250 {
251 int ret;
252
253 ret = dst_addr ? send_msg(16) : recv_msg(16);
254 if (ret)
255 return ret;
256
257 return dst_addr ? recv_msg(16) : send_msg(16);
258 }
259
run_test(void)260 static int run_test(void)
261 {
262 int ret, i, t;
263 off_t offset;
264 uint8_t marker = 0;
265
266 poll_byte = buf + transfer_size - 1;
267 *poll_byte = -1;
268 offset = riomap(rs, buf, transfer_size, PROT_WRITE, 0, 0);
269 if (offset == -1) {
270 perror("riomap");
271 ret = -1;
272 goto out;
273 }
274 ret = sync_test();
275 if (ret)
276 goto out;
277
278 gettimeofday(&start, NULL);
279 for (i = 0; i < iterations; i++) {
280 if (dst_addr) {
281 for (t = 0; t < transfer_count - 1; t++) {
282 ret = send_xfer(transfer_size);
283 if (ret)
284 goto out;
285 }
286 *poll_byte = (uint8_t) marker++;
287 if (verify)
288 format_buf(buf, transfer_size - 1);
289 ret = send_xfer(transfer_size);
290 if (ret)
291 goto out;
292
293 ret = recv_xfer(transfer_size, marker++);
294 } else {
295 ret = recv_xfer(transfer_size, marker++);
296 if (ret)
297 goto out;
298
299 for (t = 0; t < transfer_count - 1; t++) {
300 ret = send_xfer(transfer_size);
301 if (ret)
302 goto out;
303 }
304 *poll_byte = (uint8_t) marker++;
305 if (verify)
306 format_buf(buf, transfer_size - 1);
307 ret = send_xfer(transfer_size);
308 }
309 if (ret)
310 goto out;
311 }
312 gettimeofday(&end, NULL);
313 show_perf();
314 ret = riounmap(rs, buf, transfer_size);
315
316 out:
317 return ret;
318 }
319
set_options(int fd)320 static void set_options(int fd)
321 {
322 int val;
323
324 if (buffer_size) {
325 rsetsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
326 sizeof buffer_size);
327 rsetsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
328 sizeof buffer_size);
329 } else {
330 val = 1 << 19;
331 rsetsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
332 rsetsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
333 }
334
335 val = 1;
336 rsetsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
337 rsetsockopt(fd, SOL_RDMA, RDMA_IOMAPSIZE, (void *) &val, sizeof val);
338
339 if (flags & MSG_DONTWAIT)
340 rfcntl(fd, F_SETFL, O_NONBLOCK);
341
342 /* Inline size based on experimental data */
343 if (optimization == opt_latency) {
344 rsetsockopt(fd, SOL_RDMA, RDMA_INLINE, &inline_size,
345 sizeof inline_size);
346 } else if (optimization == opt_bandwidth) {
347 val = 0;
348 rsetsockopt(fd, SOL_RDMA, RDMA_INLINE, &val, sizeof val);
349 }
350 }
351
server_listen(void)352 static int server_listen(void)
353 {
354 struct rdma_addrinfo *rai = NULL;
355 struct addrinfo *ai;
356 int val, ret;
357
358 if (use_rgai) {
359 rai_hints.ai_flags |= RAI_PASSIVE;
360 ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai);
361 } else {
362 ai_hints.ai_flags |= AI_PASSIVE;
363 ret = getaddrinfo(src_addr, port, &ai_hints, &ai);
364 }
365 if (ret) {
366 printf("getaddrinfo: %s\n", gai_strerror(ret));
367 return ret;
368 }
369
370 lrs = rai ? rsocket(rai->ai_family, SOCK_STREAM, 0) :
371 rsocket(ai->ai_family, SOCK_STREAM, 0);
372 if (lrs < 0) {
373 perror("rsocket");
374 ret = lrs;
375 goto free;
376 }
377
378 val = 1;
379 ret = rsetsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
380 if (ret) {
381 perror("rsetsockopt SO_REUSEADDR");
382 goto close;
383 }
384
385 ret = rai ? rbind(lrs, rai->ai_src_addr, rai->ai_src_len) :
386 rbind(lrs, ai->ai_addr, ai->ai_addrlen);
387 if (ret) {
388 perror("rbind");
389 goto close;
390 }
391
392 ret = rlisten(lrs, 1);
393 if (ret)
394 perror("rlisten");
395
396 close:
397 if (ret)
398 rclose(lrs);
399 free:
400 if (rai)
401 rdma_freeaddrinfo(rai);
402 else
403 freeaddrinfo(ai);
404 return ret;
405 }
406
server_connect(void)407 static int server_connect(void)
408 {
409 struct pollfd fds;
410 int ret = 0;
411
412 set_options(lrs);
413 do {
414 if (use_async) {
415 fds.fd = lrs;
416 fds.events = POLLIN;
417
418 ret = do_poll(&fds, poll_timeout);
419 if (ret) {
420 perror("rpoll");
421 return ret;
422 }
423 }
424
425 rs = raccept(lrs, NULL, NULL);
426 } while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
427 if (rs < 0) {
428 perror("raccept");
429 return rs;
430 }
431
432 set_options(rs);
433 return ret;
434 }
435
client_connect(void)436 static int client_connect(void)
437 {
438 struct rdma_addrinfo *rai = NULL;
439 struct addrinfo *ai;
440 struct pollfd fds;
441 int ret, err;
442 socklen_t len;
443
444 ret = use_rgai ? rdma_getaddrinfo(dst_addr, port, &rai_hints, &rai) :
445 getaddrinfo(dst_addr, port, &ai_hints, &ai);
446 if (ret) {
447 printf("getaddrinfo: %s\n", gai_strerror(ret));
448 return ret;
449 }
450
451 rs = rai ? rsocket(rai->ai_family, SOCK_STREAM, 0) :
452 rsocket(ai->ai_family, SOCK_STREAM, 0);
453 if (rs < 0) {
454 perror("rsocket");
455 ret = rs;
456 goto free;
457 }
458
459 set_options(rs);
460 /* TODO: bind client to src_addr */
461
462 ret = rai ? rconnect(rs, rai->ai_dst_addr, rai->ai_dst_len) :
463 rconnect(rs, ai->ai_addr, ai->ai_addrlen);
464 if (ret && (errno != EINPROGRESS)) {
465 perror("rconnect");
466 goto close;
467 }
468
469 if (ret && (errno == EINPROGRESS)) {
470 fds.fd = rs;
471 fds.events = POLLOUT;
472 ret = do_poll(&fds, poll_timeout);
473 if (ret) {
474 perror("rpoll");
475 goto close;
476 }
477
478 len = sizeof err;
479 ret = rgetsockopt(rs, SOL_SOCKET, SO_ERROR, &err, &len);
480 if (ret)
481 goto close;
482 if (err) {
483 ret = -1;
484 errno = err;
485 perror("async rconnect");
486 }
487 }
488
489 close:
490 if (ret)
491 rclose(rs);
492 free:
493 if (rai)
494 rdma_freeaddrinfo(rai);
495 else
496 freeaddrinfo(ai);
497 return ret;
498 }
499
run(void)500 static int run(void)
501 {
502 int i, ret = 0;
503
504 buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
505 if (!buf) {
506 perror("malloc");
507 return -1;
508 }
509
510 if (!dst_addr) {
511 ret = server_listen();
512 if (ret)
513 goto free;
514 }
515
516 printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
517 "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
518 if (!custom) {
519 optimization = opt_latency;
520 ret = dst_addr ? client_connect() : server_connect();
521 if (ret)
522 goto free;
523
524 for (i = 0; i < TEST_CNT; i++) {
525 if (test_size[i].option > size_option)
526 continue;
527 init_latency_test(test_size[i].size);
528 run_test();
529 }
530 rshutdown(rs, SHUT_RDWR);
531 rclose(rs);
532
533 optimization = opt_bandwidth;
534 ret = dst_addr ? client_connect() : server_connect();
535 if (ret)
536 goto free;
537 for (i = 0; i < TEST_CNT; i++) {
538 if (test_size[i].option > size_option)
539 continue;
540 init_bandwidth_test(test_size[i].size);
541 run_test();
542 }
543 } else {
544 ret = dst_addr ? client_connect() : server_connect();
545 if (ret)
546 goto free;
547
548 ret = run_test();
549 }
550
551 rshutdown(rs, SHUT_RDWR);
552 rclose(rs);
553 free:
554 free(buf);
555 return ret;
556 }
557
set_test_opt(const char * arg)558 static int set_test_opt(const char *arg)
559 {
560 if (strlen(arg) == 1) {
561 switch (arg[0]) {
562 case 'a':
563 use_async = 1;
564 break;
565 case 'b':
566 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
567 break;
568 case 'n':
569 flags |= MSG_DONTWAIT;
570 break;
571 case 'v':
572 verify = 1;
573 break;
574 default:
575 return -1;
576 }
577 } else {
578 if (!strncasecmp("async", arg, 5)) {
579 use_async = 1;
580 } else if (!strncasecmp("block", arg, 5)) {
581 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
582 } else if (!strncasecmp("nonblock", arg, 8)) {
583 flags |= MSG_DONTWAIT;
584 } else if (!strncasecmp("verify", arg, 6)) {
585 verify = 1;
586 } else {
587 return -1;
588 }
589 }
590 return 0;
591 }
592
main(int argc,char ** argv)593 int main(int argc, char **argv)
594 {
595 int op, ret;
596
597 ai_hints.ai_socktype = SOCK_STREAM;
598 rai_hints.ai_port_space = RDMA_PS_TCP;
599 while ((op = getopt(argc, argv, "s:b:f:B:i:I:C:S:p:T:")) != -1) {
600 switch (op) {
601 case 's':
602 dst_addr = optarg;
603 break;
604 case 'b':
605 src_addr = optarg;
606 break;
607 case 'f':
608 if (!strncasecmp("ip", optarg, 2)) {
609 ai_hints.ai_flags = AI_NUMERICHOST;
610 } else if (!strncasecmp("gid", optarg, 3)) {
611 rai_hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
612 rai_hints.ai_family = AF_IB;
613 use_rgai = 1;
614 } else {
615 fprintf(stderr, "Warning: unknown address format\n");
616 }
617 break;
618 case 'B':
619 buffer_size = atoi(optarg);
620 break;
621 case 'i':
622 inline_size = atoi(optarg);
623 break;
624 case 'I':
625 custom = 1;
626 iterations = atoi(optarg);
627 break;
628 case 'C':
629 custom = 1;
630 transfer_count = atoi(optarg);
631 break;
632 case 'S':
633 if (!strncasecmp("all", optarg, 3)) {
634 size_option = 1;
635 } else {
636 custom = 1;
637 transfer_size = atoi(optarg);
638 }
639 break;
640 case 'p':
641 port = optarg;
642 break;
643 case 'T':
644 if (!set_test_opt(optarg))
645 break;
646 /* invalid option - fall through */
647 SWITCH_FALLTHROUGH;
648 default:
649 printf("usage: %s\n", argv[0]);
650 printf("\t[-s server_address]\n");
651 printf("\t[-b bind_address]\n");
652 printf("\t[-f address_format]\n");
653 printf("\t name, ip, ipv6, or gid\n");
654 printf("\t[-B buffer_size]\n");
655 printf("\t[-i inline_size]\n");
656 printf("\t[-I iterations]\n");
657 printf("\t[-C transfer_count]\n");
658 printf("\t[-S transfer_size or all]\n");
659 printf("\t[-p port_number]\n");
660 printf("\t[-T test_option]\n");
661 printf("\t a|async - asynchronous operation (use poll)\n");
662 printf("\t b|blocking - use blocking calls\n");
663 printf("\t n|nonblocking - use nonblocking calls\n");
664 printf("\t v|verify - verify data\n");
665 exit(1);
666 }
667 }
668
669 if (!(flags & MSG_DONTWAIT))
670 poll_timeout = -1;
671
672 ret = run();
673 return ret;
674 }
675