1 /* 2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved. 3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/ctype.h> 38 39 #include <sys/param.h> 40 #include <sys/condvar.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/socket.h> 44 #include <sys/module.h> 45 #include <sys/endian.h> 46 #include <sys/limits.h> 47 #include <sys/proc.h> 48 #include <sys/signalvar.h> 49 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/rwlock.h> 53 #include <sys/queue.h> 54 #include <sys/taskqueue.h> 55 #include <sys/syslog.h> 56 57 #include <vm/vm.h> 58 #include <vm/pmap.h> 59 60 #include <contrib/rdma/rdma_cm.h> 61 62 #include "getopt.h" 63 #include "krping.h" 64 65 #define PFX "krping: " 66 67 static int debug = 0; 68 #define DEBUG_LOG if (debug) printf 69 70 static const struct krping_option krping_opts[] = { 71 {"count", OPT_INT, 'C'}, 72 {"size", OPT_INT, 'S'}, 73 {"addr", OPT_STRING, 'a'}, 74 {"port", OPT_INT, 'p'}, 75 {"verbose", OPT_NOPARAM, 'v'}, 76 {"validate", OPT_NOPARAM, 'V'}, 77 {"server", OPT_NOPARAM, 's'}, 78 {"client", OPT_NOPARAM, 'c'}, 79 {"dmamr", OPT_NOPARAM, 'D'}, 80 {"debug", OPT_NOPARAM, 'd'}, 81 {"wlat", OPT_NOPARAM, 'l'}, 82 {"rlat", OPT_NOPARAM, 'L'}, 83 {"bw", OPT_NOPARAM, 'B'}, 84 {"tx-depth", OPT_INT, 't'}, 85 {"poll", OPT_NOPARAM, 'P'}, 86 {NULL, 0, 0} 87 }; 88 89 struct mtx krping_mutex; 90 91 /* 92 * List of running krping threads. 93 */ 94 struct krping_cb_list krping_cbs; 95 96 /* 97 * krping "ping/pong" loop: 98 * client sends source rkey/addr/len 99 * server receives source rkey/add/len 100 * server rdma reads "ping" data from source 101 * server sends "go ahead" on rdma read completion 102 * client sends sink rkey/addr/len 103 * server receives sink rkey/addr/len 104 * server rdma writes "pong" data to sink 105 * server sends "go ahead" on rdma write completion 106 * <repeat loop> 107 */ 108 109 /* 110 * Default max buffer size for IO... 111 */ 112 #define RPING_BUFSIZE 128*1024 113 #define RPING_SQ_DEPTH 32 114 115 static void krping_wait(struct krping_cb *cb, int state) 116 { 117 int rc; 118 mtx_lock(&cb->lock); 119 while (cb->state < state) { 120 rc = msleep(cb, &cb->lock, 0, "krping", 0); 121 if (rc && rc != ERESTART) { 122 cb->state = ERROR; 123 break; 124 } 125 } 126 mtx_unlock(&cb->lock); 127 } 128 129 static int krping_cma_event_handler(struct rdma_cm_id *cma_id, 130 struct rdma_cm_event *event) 131 { 132 int ret; 133 struct krping_cb *cb = cma_id->context; 134 135 DEBUG_LOG(PFX "cma_event type %d cma_id %p (%s)\n", event->event, cma_id, 136 (cma_id == cb->cm_id) ? "parent" : "child"); 137 138 mtx_lock(&cb->lock); 139 switch (event->event) { 140 case RDMA_CM_EVENT_ADDR_RESOLVED: 141 cb->state = ADDR_RESOLVED; 142 ret = rdma_resolve_route(cma_id, 2000); 143 if (ret) { 144 log(LOG_ERR, "rdma_resolve_route error %d\n", 145 ret); 146 wakeup(cb); 147 } 148 break; 149 150 case RDMA_CM_EVENT_ROUTE_RESOLVED: 151 cb->state = ROUTE_RESOLVED; 152 wakeup(cb); 153 break; 154 155 case RDMA_CM_EVENT_CONNECT_REQUEST: 156 cb->state = CONNECT_REQUEST; 157 cb->child_cm_id = cma_id; 158 DEBUG_LOG(PFX "child cma %p\n", cb->child_cm_id); 159 wakeup(cb); 160 break; 161 162 case RDMA_CM_EVENT_ESTABLISHED: 163 DEBUG_LOG(PFX "ESTABLISHED\n"); 164 if (!cb->server) { 165 cb->state = CONNECTED; 166 wakeup(cb); 167 } 168 break; 169 170 case RDMA_CM_EVENT_ADDR_ERROR: 171 case RDMA_CM_EVENT_ROUTE_ERROR: 172 case RDMA_CM_EVENT_CONNECT_ERROR: 173 case RDMA_CM_EVENT_UNREACHABLE: 174 case RDMA_CM_EVENT_REJECTED: 175 log(LOG_ERR, "cma event %d, error %d\n", event->event, 176 event->status); 177 cb->state = ERROR; 178 wakeup(cb); 179 break; 180 181 case RDMA_CM_EVENT_DISCONNECTED: 182 DEBUG_LOG(PFX "DISCONNECT EVENT...\n"); 183 cb->state = ERROR; 184 wakeup(cb); 185 break; 186 187 case RDMA_CM_EVENT_DEVICE_REMOVAL: 188 DEBUG_LOG(PFX "cma detected device removal!!!!\n"); 189 break; 190 191 default: 192 log(LOG_ERR, "oof bad type!\n"); 193 wakeup(cb); 194 break; 195 } 196 mtx_unlock(&cb->lock); 197 return 0; 198 } 199 200 static int server_recv(struct krping_cb *cb, struct ib_wc *wc) 201 { 202 if (wc->byte_len != sizeof(cb->recv_buf)) { 203 log(LOG_ERR, "Received bogus data, size %d\n", 204 wc->byte_len); 205 return -1; 206 } 207 208 cb->remote_rkey = ntohl(cb->recv_buf.rkey); 209 cb->remote_addr = ntohll(cb->recv_buf.buf); 210 cb->remote_len = ntohl(cb->recv_buf.size); 211 DEBUG_LOG(PFX "Received rkey %x addr %llx len %d from peer\n", 212 cb->remote_rkey, (unsigned long long)cb->remote_addr, 213 cb->remote_len); 214 215 if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) 216 cb->state = RDMA_READ_ADV; 217 else 218 cb->state = RDMA_WRITE_ADV; 219 220 return 0; 221 } 222 223 static int client_recv(struct krping_cb *cb, struct ib_wc *wc) 224 { 225 if (wc->byte_len != sizeof(cb->recv_buf)) { 226 log(LOG_ERR, "Received bogus data, size %d\n", 227 wc->byte_len); 228 return -1; 229 } 230 231 if (cb->state == RDMA_READ_ADV) 232 cb->state = RDMA_WRITE_ADV; 233 else 234 cb->state = RDMA_WRITE_COMPLETE; 235 236 return 0; 237 } 238 239 static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) 240 { 241 struct krping_cb *cb = ctx; 242 struct ib_wc wc; 243 struct ib_recv_wr *bad_wr; 244 int ret; 245 246 mtx_lock(&cb->lock); 247 KASSERT(cb->cq == cq, ("bad condition")); 248 if (cb->state == ERROR) { 249 log(LOG_ERR, "cq completion in ERROR state\n"); 250 mtx_unlock(&cb->lock); 251 return; 252 } 253 if (!cb->wlat && !cb->rlat && !cb->bw) 254 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 255 while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { 256 if (wc.status) { 257 if (wc.status != IB_WC_WR_FLUSH_ERR) 258 log(LOG_ERR, "cq completion failed status %d\n", 259 wc.status); 260 goto error; 261 } 262 263 switch (wc.opcode) { 264 case IB_WC_SEND: 265 DEBUG_LOG(PFX "send completion\n"); 266 cb->stats.send_bytes += cb->send_sgl.length; 267 cb->stats.send_msgs++; 268 break; 269 270 case IB_WC_RDMA_WRITE: 271 DEBUG_LOG(PFX "rdma write completion\n"); 272 cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length; 273 cb->stats.write_msgs++; 274 cb->state = RDMA_WRITE_COMPLETE; 275 wakeup(cb); 276 break; 277 278 case IB_WC_RDMA_READ: 279 DEBUG_LOG(PFX "rdma read completion\n"); 280 cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length; 281 cb->stats.read_msgs++; 282 cb->state = RDMA_READ_COMPLETE; 283 wakeup(cb); 284 break; 285 286 case IB_WC_RECV: 287 DEBUG_LOG(PFX "recv completion\n"); 288 cb->stats.recv_bytes += sizeof(cb->recv_buf); 289 cb->stats.recv_msgs++; 290 if (cb->wlat || cb->rlat || cb->bw) 291 ret = server_recv(cb, &wc); 292 else 293 ret = cb->server ? server_recv(cb, &wc) : 294 client_recv(cb, &wc); 295 if (ret) { 296 log(LOG_ERR, "recv wc error: %d\n", ret); 297 goto error; 298 } 299 300 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 301 if (ret) { 302 log(LOG_ERR, "post recv error: %d\n", 303 ret); 304 goto error; 305 } 306 wakeup(cb); 307 break; 308 309 default: 310 log(LOG_ERR, "unknown!!!!! completion\n"); 311 goto error; 312 } 313 } 314 if (ret) { 315 log(LOG_ERR, "poll error %d\n", ret); 316 goto error; 317 } 318 mtx_unlock(&cb->lock); 319 return; 320 error: 321 cb->state = ERROR; 322 wakeup(cb); 323 mtx_unlock(&cb->lock); 324 } 325 326 static int krping_accept(struct krping_cb *cb) 327 { 328 struct rdma_conn_param conn_param; 329 int ret; 330 331 DEBUG_LOG(PFX "accepting client connection request\n"); 332 333 memset(&conn_param, 0, sizeof conn_param); 334 conn_param.responder_resources = 1; 335 conn_param.initiator_depth = 1; 336 337 ret = rdma_accept(cb->child_cm_id, &conn_param); 338 if (ret) { 339 log(LOG_ERR, "rdma_accept error: %d\n", ret); 340 return ret; 341 } 342 343 if (!cb->wlat && !cb->rlat && !cb->bw) { 344 krping_wait(cb, CONNECTED); 345 if (cb->state == ERROR) { 346 log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state); 347 return -1; 348 } 349 } 350 return 0; 351 } 352 353 static void krping_setup_wr(struct krping_cb *cb) 354 { 355 /* XXX X86 only here... not mapping for dma! */ 356 cb->recv_sgl.addr = vtophys(&cb->recv_buf); 357 cb->recv_sgl.length = sizeof cb->recv_buf; 358 if (cb->use_dmamr) 359 cb->recv_sgl.lkey = cb->dma_mr->lkey; 360 else 361 cb->recv_sgl.lkey = cb->recv_mr->lkey; 362 cb->rq_wr.sg_list = &cb->recv_sgl; 363 cb->rq_wr.num_sge = 1; 364 365 cb->send_sgl.addr = vtophys(&cb->send_buf); 366 cb->send_sgl.length = sizeof cb->send_buf; 367 if (cb->use_dmamr) 368 cb->send_sgl.lkey = cb->dma_mr->lkey; 369 else 370 cb->send_sgl.lkey = cb->send_mr->lkey; 371 372 cb->sq_wr.opcode = IB_WR_SEND; 373 cb->sq_wr.send_flags = IB_SEND_SIGNALED; 374 cb->sq_wr.sg_list = &cb->send_sgl; 375 cb->sq_wr.num_sge = 1; 376 377 cb->rdma_addr = vtophys(cb->rdma_buf); 378 cb->rdma_sgl.addr = cb->rdma_addr; 379 if (cb->use_dmamr) 380 cb->rdma_sgl.lkey = cb->dma_mr->lkey; 381 else 382 cb->rdma_sgl.lkey = cb->rdma_mr->lkey; 383 cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; 384 cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; 385 cb->rdma_sq_wr.num_sge = 1; 386 387 if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 388 cb->start_addr = vtophys(cb->start_buf); 389 } 390 } 391 392 static int krping_setup_buffers(struct krping_cb *cb) 393 { 394 int ret; 395 struct ib_phys_buf buf; 396 u64 iovbase; 397 398 DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); 399 400 if (cb->use_dmamr) { 401 cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| 402 IB_ACCESS_REMOTE_READ| 403 IB_ACCESS_REMOTE_WRITE); 404 if (IS_ERR(cb->dma_mr)) { 405 log(LOG_ERR, "reg_dmamr failed\n"); 406 return PTR_ERR(cb->dma_mr); 407 } 408 } else { 409 410 buf.addr = vtophys(&cb->recv_buf); 411 buf.size = sizeof cb->recv_buf; 412 iovbase = vtophys(&cb->recv_buf); 413 cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 414 IB_ACCESS_LOCAL_WRITE, 415 &iovbase); 416 417 if (IS_ERR(cb->recv_mr)) { 418 log(LOG_ERR, "recv_buf reg_mr failed\n"); 419 return PTR_ERR(cb->recv_mr); 420 } 421 422 buf.addr = vtophys(&cb->send_buf); 423 buf.size = sizeof cb->send_buf; 424 iovbase = vtophys(&cb->send_buf); 425 cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 426 0, &iovbase); 427 428 if (IS_ERR(cb->send_mr)) { 429 log(LOG_ERR, "send_buf reg_mr failed\n"); 430 ib_dereg_mr(cb->recv_mr); 431 return PTR_ERR(cb->send_mr); 432 } 433 } 434 435 cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, 436 PAGE_SIZE, 0); 437 438 if (!cb->rdma_buf) { 439 log(LOG_ERR, "rdma_buf malloc failed\n"); 440 ret = ENOMEM; 441 goto err1; 442 } 443 if (!cb->use_dmamr) { 444 445 buf.addr = vtophys(cb->rdma_buf); 446 buf.size = cb->size; 447 iovbase = vtophys(cb->rdma_buf); 448 cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 449 IB_ACCESS_REMOTE_READ| 450 IB_ACCESS_REMOTE_WRITE, 451 &iovbase); 452 453 if (IS_ERR(cb->rdma_mr)) { 454 log(LOG_ERR, "rdma_buf reg_mr failed\n"); 455 ret = PTR_ERR(cb->rdma_mr); 456 goto err2; 457 } 458 } 459 460 if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 461 cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 462 0, -1UL, PAGE_SIZE, 0); 463 if (!cb->start_buf) { 464 log(LOG_ERR, "start_buf malloc failed\n"); 465 ret = ENOMEM; 466 goto err2; 467 } 468 if (!cb->use_dmamr) { 469 unsigned flags = IB_ACCESS_REMOTE_READ; 470 471 if (cb->wlat || cb->rlat || cb->bw) 472 flags |= IB_ACCESS_REMOTE_WRITE; 473 buf.addr = vtophys(cb->start_buf); 474 buf.size = cb->size; 475 iovbase = vtophys(cb->start_buf); 476 cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 477 flags, 478 &iovbase); 479 480 if (IS_ERR(cb->start_mr)) { 481 log(LOG_ERR, "start_buf reg_mr failed\n"); 482 ret = PTR_ERR(cb->start_mr); 483 goto err3; 484 } 485 } 486 } 487 488 krping_setup_wr(cb); 489 DEBUG_LOG(PFX "allocated & registered buffers...\n"); 490 return 0; 491 err3: 492 contigfree(cb->start_buf, cb->size, M_DEVBUF); 493 494 if (!cb->use_dmamr) 495 ib_dereg_mr(cb->rdma_mr); 496 err2: 497 contigfree(cb->rdma_buf, cb->size, M_DEVBUF); 498 err1: 499 if (cb->use_dmamr) 500 ib_dereg_mr(cb->dma_mr); 501 else { 502 ib_dereg_mr(cb->recv_mr); 503 ib_dereg_mr(cb->send_mr); 504 } 505 return ret; 506 } 507 508 static void krping_free_buffers(struct krping_cb *cb) 509 { 510 DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb); 511 512 #if 0 513 dma_unmap_single(cb->pd->device->dma_device, 514 pci_unmap_addr(cb, recv_mapping), 515 sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 516 dma_unmap_single(cb->pd->device->dma_device, 517 pci_unmap_addr(cb, send_mapping), 518 sizeof(cb->send_buf), DMA_BIDIRECTIONAL); 519 dma_unmap_single(cb->pd->device->dma_device, 520 pci_unmap_addr(cb, rdma_mapping), 521 cb->size, DMA_BIDIRECTIONAL); 522 #endif 523 contigfree(cb->rdma_buf, cb->size, M_DEVBUF); 524 if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 525 #if 0 526 dma_unmap_single(cb->pd->device->dma_device, 527 pci_unmap_addr(cb, start_mapping), 528 cb->size, DMA_BIDIRECTIONAL); 529 #endif 530 contigfree(cb->start_buf, cb->size, M_DEVBUF); 531 } 532 if (cb->use_dmamr) 533 ib_dereg_mr(cb->dma_mr); 534 else { 535 ib_dereg_mr(cb->send_mr); 536 ib_dereg_mr(cb->recv_mr); 537 ib_dereg_mr(cb->rdma_mr); 538 if (!cb->server) 539 ib_dereg_mr(cb->start_mr); 540 } 541 } 542 543 static int krping_create_qp(struct krping_cb *cb) 544 { 545 struct ib_qp_init_attr init_attr; 546 int ret; 547 548 memset(&init_attr, 0, sizeof(init_attr)); 549 init_attr.cap.max_send_wr = cb->txdepth; 550 init_attr.cap.max_recv_wr = 2; 551 init_attr.cap.max_recv_sge = 1; 552 init_attr.cap.max_send_sge = 1; 553 init_attr.qp_type = IB_QPT_RC; 554 init_attr.send_cq = cb->cq; 555 init_attr.recv_cq = cb->cq; 556 557 if (cb->server) { 558 ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); 559 if (!ret) 560 cb->qp = cb->child_cm_id->qp; 561 } else { 562 ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); 563 if (!ret) 564 cb->qp = cb->cm_id->qp; 565 } 566 567 return ret; 568 } 569 570 static void krping_free_qp(struct krping_cb *cb) 571 { 572 ib_destroy_qp(cb->qp); 573 ib_destroy_cq(cb->cq); 574 ib_dealloc_pd(cb->pd); 575 } 576 577 static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) 578 { 579 int ret; 580 cb->pd = ib_alloc_pd(cm_id->device); 581 if (IS_ERR(cb->pd)) { 582 log(LOG_ERR, "ib_alloc_pd failed\n"); 583 return PTR_ERR(cb->pd); 584 } 585 DEBUG_LOG(PFX "created pd %p\n", cb->pd); 586 587 cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, 588 cb, cb->txdepth * 2, 0); 589 if (IS_ERR(cb->cq)) { 590 log(LOG_ERR, "ib_create_cq failed\n"); 591 ret = PTR_ERR(cb->cq); 592 goto err1; 593 } 594 DEBUG_LOG(PFX "created cq %p\n", cb->cq); 595 596 if (!cb->wlat && !cb->rlat && !cb->bw) { 597 ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 598 if (ret) { 599 log(LOG_ERR, "ib_create_cq failed\n"); 600 goto err2; 601 } 602 } 603 604 ret = krping_create_qp(cb); 605 if (ret) { 606 log(LOG_ERR, "krping_create_qp failed: %d\n", ret); 607 goto err2; 608 } 609 DEBUG_LOG(PFX "created qp %p\n", cb->qp); 610 return 0; 611 err2: 612 ib_destroy_cq(cb->cq); 613 err1: 614 ib_dealloc_pd(cb->pd); 615 return ret; 616 } 617 618 static void krping_format_send(struct krping_cb *cb, u64 buf, 619 struct ib_mr *mr) 620 { 621 struct krping_rdma_info *info = &cb->send_buf; 622 623 info->buf = htonll(buf); 624 info->rkey = htonl(mr->rkey); 625 info->size = htonl(cb->size); 626 627 DEBUG_LOG(PFX "RDMA addr %llx rkey %x len %d\n", 628 (unsigned long long)buf, mr->rkey, cb->size); 629 } 630 631 static void krping_test_server(struct krping_cb *cb) 632 { 633 struct ib_send_wr *bad_wr; 634 int ret; 635 636 while (1) { 637 /* Wait for client's Start STAG/TO/Len */ 638 krping_wait(cb, RDMA_READ_ADV); 639 if (cb->state != RDMA_READ_ADV) { 640 DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n", 641 cb->state); 642 break; 643 } 644 645 DEBUG_LOG(PFX "server received sink adv\n"); 646 647 /* Issue RDMA Read. */ 648 cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; 649 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 650 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 651 cb->rdma_sq_wr.sg_list->length = cb->remote_len; 652 653 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 654 if (ret) { 655 log(LOG_ERR, "post send error %d\n", ret); 656 break; 657 } 658 DEBUG_LOG(PFX "server posted rdma read req \n"); 659 660 /* Wait for read completion */ 661 krping_wait(cb, RDMA_READ_COMPLETE); 662 if (cb->state != RDMA_READ_COMPLETE) { 663 log(LOG_ERR, 664 "wait for RDMA_READ_COMPLETE state %d\n", 665 cb->state); 666 break; 667 } 668 DEBUG_LOG(PFX "server received read complete\n"); 669 670 /* Display data in recv buf */ 671 if (cb->verbose) 672 DEBUG_LOG("server ping data: %s\n", cb->rdma_buf); 673 674 /* Tell client to continue */ 675 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 676 if (ret) { 677 log(LOG_ERR, "post send error %d\n", ret); 678 break; 679 } 680 DEBUG_LOG(PFX "server posted go ahead\n"); 681 682 /* Wait for client's RDMA STAG/TO/Len */ 683 krping_wait(cb, RDMA_WRITE_ADV); 684 if (cb->state != RDMA_WRITE_ADV) { 685 log(LOG_ERR, 686 "wait for RDMA_WRITE_ADV state %d\n", 687 cb->state); 688 break; 689 } 690 DEBUG_LOG(PFX "server received sink adv\n"); 691 692 /* RDMA Write echo data */ 693 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 694 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 695 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 696 cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; 697 DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n", 698 cb->rdma_sq_wr.sg_list->lkey, 699 (unsigned long long)cb->rdma_sq_wr.sg_list->addr, 700 cb->rdma_sq_wr.sg_list->length); 701 702 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 703 if (ret) { 704 log(LOG_ERR, "post send error %d\n", ret); 705 break; 706 } 707 708 /* Wait for completion */ 709 krping_wait(cb, RDMA_WRITE_COMPLETE); 710 if (cb->state != RDMA_WRITE_COMPLETE) { 711 log(LOG_ERR, 712 "wait for RDMA_WRITE_COMPLETE state %d\n", 713 cb->state); 714 break; 715 } 716 DEBUG_LOG(PFX "server rdma write complete \n"); 717 718 cb->state = CONNECTED; 719 720 /* Tell client to begin again */ 721 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 722 if (ret) { 723 log(LOG_ERR, "post send error %d\n", ret); 724 break; 725 } 726 DEBUG_LOG(PFX "server posted go ahead\n"); 727 } 728 } 729 730 static void rlat_test(struct krping_cb *cb) 731 { 732 int scnt; 733 int iters = cb->count; 734 struct timeval start_tv, stop_tv; 735 int ret; 736 struct ib_wc wc; 737 struct ib_send_wr *bad_wr; 738 int ne; 739 740 scnt = 0; 741 cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; 742 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 743 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 744 cb->rdma_sq_wr.sg_list->length = cb->size; 745 746 microtime(&start_tv); 747 if (!cb->poll) { 748 cb->state = RDMA_READ_ADV; 749 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 750 } 751 while (scnt < iters) { 752 753 cb->state = RDMA_READ_ADV; 754 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 755 if (ret) { 756 log(LOG_ERR, 757 "Couldn't post send: ret=%d scnt %d\n", 758 ret, scnt); 759 return; 760 } 761 762 do { 763 if (!cb->poll) { 764 krping_wait(cb, RDMA_READ_COMPLETE); 765 if (cb->state == RDMA_READ_COMPLETE) { 766 ne = 1; 767 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 768 } else { 769 ne = -1; 770 } 771 } else 772 ne = ib_poll_cq(cb->cq, 1, &wc); 773 if (cb->state == ERROR) { 774 log(LOG_ERR, 775 "state == ERROR...bailing scnt %d\n", scnt); 776 return; 777 } 778 } while (ne == 0); 779 780 if (ne < 0) { 781 log(LOG_ERR, "poll CQ failed %d\n", ne); 782 return; 783 } 784 if (cb->poll && wc.status != IB_WC_SUCCESS) { 785 log(LOG_ERR, "Completion wth error at %s:\n", 786 cb->server ? "server" : "client"); 787 log(LOG_ERR, "Failed status %d: wr_id %d\n", 788 wc.status, (int) wc.wr_id); 789 return; 790 } 791 ++scnt; 792 } 793 microtime(&stop_tv); 794 795 if (stop_tv.tv_usec < start_tv.tv_usec) { 796 stop_tv.tv_usec += 1000000; 797 stop_tv.tv_sec -= 1; 798 } 799 800 log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d\n", 801 stop_tv.tv_sec - start_tv.tv_sec, 802 stop_tv.tv_usec - start_tv.tv_usec, 803 scnt, cb->size); 804 } 805 806 static int alloc_cycle_mem(int cycle_iters, 807 cycles_t **post_cycles_start, 808 cycles_t **post_cycles_stop, 809 cycles_t **poll_cycles_start, 810 cycles_t **poll_cycles_stop, 811 cycles_t **last_poll_cycles_start) 812 { 813 *post_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); 814 if (!*post_cycles_start) { 815 goto fail1; 816 } 817 *post_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); 818 if (!*post_cycles_stop) { 819 goto fail2; 820 } 821 *poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); 822 if (!*poll_cycles_start) { 823 goto fail3; 824 } 825 *poll_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); 826 if (!*poll_cycles_stop) { 827 goto fail4; 828 } 829 *last_poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); 830 if (!*last_poll_cycles_start) { 831 goto fail5; 832 } 833 return 0; 834 fail5: 835 free(*poll_cycles_stop, M_DEVBUF); 836 fail4: 837 free(*poll_cycles_start, M_DEVBUF); 838 fail3: 839 free(*post_cycles_stop, M_DEVBUF); 840 fail2: 841 free(*post_cycles_start, M_DEVBUF); 842 fail1: 843 log(LOG_ERR, "%s malloc failed\n", __FUNCTION__); 844 return ENOMEM; 845 } 846 847 static void free_cycle_mem(cycles_t *post_cycles_start, 848 cycles_t *post_cycles_stop, 849 cycles_t *poll_cycles_start, 850 cycles_t *poll_cycles_stop, 851 cycles_t *last_poll_cycles_start) 852 { 853 free(last_poll_cycles_start, M_DEVBUF); 854 free(poll_cycles_stop, M_DEVBUF); 855 free(poll_cycles_start, M_DEVBUF); 856 free(post_cycles_stop, M_DEVBUF); 857 free(post_cycles_start, M_DEVBUF); 858 } 859 860 static void wlat_test(struct krping_cb *cb) 861 { 862 int ccnt, scnt, rcnt; 863 int iters=cb->count; 864 volatile char *poll_buf = (char *) cb->start_buf; 865 char *buf = (char *)cb->rdma_buf; 866 ccnt = 0; 867 scnt = 0; 868 rcnt = 0; 869 struct timeval start_tv, stop_tv; 870 cycles_t *post_cycles_start, *post_cycles_stop; 871 cycles_t *poll_cycles_start, *poll_cycles_stop; 872 cycles_t *last_poll_cycles_start; 873 cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 874 int i; 875 int cycle_iters = 1000; 876 int err; 877 878 err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, 879 &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); 880 881 if (err) { 882 log(LOG_ERR, "%s malloc failed\n", __FUNCTION__); 883 return; 884 } 885 886 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 887 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 888 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 889 cb->rdma_sq_wr.sg_list->length = cb->size; 890 891 if (cycle_iters > iters) 892 cycle_iters = iters; 893 microtime(&start_tv); 894 while (scnt < iters || ccnt < iters || rcnt < iters) { 895 896 /* Wait till buffer changes. */ 897 if (rcnt < iters && !(scnt < 1 && !cb->server)) { 898 ++rcnt; 899 while (*poll_buf != (char)rcnt) { 900 if (cb->state == ERROR) { 901 log(LOG_ERR, "state = ERROR, bailing\n"); 902 return; 903 } 904 } 905 } 906 907 if (scnt < iters) { 908 struct ib_send_wr *bad_wr; 909 910 *buf = (char)scnt+1; 911 if (scnt < cycle_iters) 912 post_cycles_start[scnt] = get_cycles(); 913 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 914 log(LOG_ERR, "Couldn't post send: scnt=%d\n", 915 scnt); 916 return; 917 } 918 if (scnt < cycle_iters) 919 post_cycles_stop[scnt] = get_cycles(); 920 scnt++; 921 } 922 923 if (ccnt < iters) { 924 struct ib_wc wc; 925 int ne; 926 927 if (ccnt < cycle_iters) 928 poll_cycles_start[ccnt] = get_cycles(); 929 do { 930 if (ccnt < cycle_iters) 931 last_poll_cycles_start[ccnt] = get_cycles(); 932 ne = ib_poll_cq(cb->cq, 1, &wc); 933 } while (ne == 0); 934 if (ccnt < cycle_iters) 935 poll_cycles_stop[ccnt] = get_cycles(); 936 ++ccnt; 937 938 if (ne < 0) { 939 log(LOG_ERR, "poll CQ failed %d\n", ne); 940 return; 941 } 942 if (wc.status != IB_WC_SUCCESS) { 943 log(LOG_ERR, "Completion wth error at %s:\n", 944 cb->server ? "server" : "client"); 945 log(LOG_ERR, "Failed status %d: wr_id %d\n", 946 wc.status, (int) wc.wr_id); 947 log(LOG_ERR, "scnt=%d, rcnt=%d, ccnt=%d\n", 948 scnt, rcnt, ccnt); 949 return; 950 } 951 } 952 } 953 microtime(&stop_tv); 954 955 if (stop_tv.tv_usec < start_tv.tv_usec) { 956 stop_tv.tv_usec += 1000000; 957 stop_tv.tv_sec -= 1; 958 } 959 960 for (i=0; i < cycle_iters; i++) { 961 sum_post += post_cycles_stop[i] - post_cycles_start[i]; 962 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 963 sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; 964 } 965 966 log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", 967 stop_tv.tv_sec - start_tv.tv_sec, 968 stop_tv.tv_usec - start_tv.tv_usec, 969 scnt, cb->size, cycle_iters, 970 (unsigned long long)sum_post, (unsigned long long)sum_poll, 971 (unsigned long long)sum_last_poll); 972 973 free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, 974 poll_cycles_stop, last_poll_cycles_start); 975 } 976 977 static void bw_test(struct krping_cb *cb) 978 { 979 int ccnt, scnt, rcnt; 980 int iters=cb->count; 981 ccnt = 0; 982 scnt = 0; 983 rcnt = 0; 984 struct timeval start_tv, stop_tv; 985 cycles_t *post_cycles_start, *post_cycles_stop; 986 cycles_t *poll_cycles_start, *poll_cycles_stop; 987 cycles_t *last_poll_cycles_start; 988 cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 989 int i; 990 int cycle_iters = 1000; 991 int err; 992 993 err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, 994 &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); 995 996 if (err) { 997 log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__); 998 return; 999 } 1000 1001 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1002 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1003 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1004 cb->rdma_sq_wr.sg_list->length = cb->size; 1005 1006 if (cycle_iters > iters) 1007 cycle_iters = iters; 1008 microtime(&start_tv); 1009 while (scnt < iters || ccnt < iters) { 1010 1011 while (scnt < iters && scnt - ccnt < cb->txdepth) { 1012 struct ib_send_wr *bad_wr; 1013 1014 if (scnt < cycle_iters) 1015 post_cycles_start[scnt] = get_cycles(); 1016 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 1017 log(LOG_ERR, "Couldn't post send: scnt=%d\n", 1018 scnt); 1019 return; 1020 } 1021 if (scnt < cycle_iters) 1022 post_cycles_stop[scnt] = get_cycles(); 1023 ++scnt; 1024 } 1025 1026 if (ccnt < iters) { 1027 int ne; 1028 struct ib_wc wc; 1029 1030 if (ccnt < cycle_iters) 1031 poll_cycles_start[ccnt] = get_cycles(); 1032 do { 1033 if (ccnt < cycle_iters) 1034 last_poll_cycles_start[ccnt] = get_cycles(); 1035 ne = ib_poll_cq(cb->cq, 1, &wc); 1036 } while (ne == 0); 1037 if (ccnt < cycle_iters) 1038 poll_cycles_stop[ccnt] = get_cycles(); 1039 ccnt += 1; 1040 1041 if (ne < 0) { 1042 log(LOG_ERR, "poll CQ failed %d\n", ne); 1043 return; 1044 } 1045 if (wc.status != IB_WC_SUCCESS) { 1046 log(LOG_ERR, "Completion wth error at %s:\n", 1047 cb->server ? "server" : "client"); 1048 log(LOG_ERR, "Failed status %d: wr_id %d\n", 1049 wc.status, (int) wc.wr_id); 1050 return; 1051 } 1052 } 1053 } 1054 microtime(&stop_tv); 1055 1056 if (stop_tv.tv_usec < start_tv.tv_usec) { 1057 stop_tv.tv_usec += 1000000; 1058 stop_tv.tv_sec -= 1; 1059 } 1060 1061 for (i=0; i < cycle_iters; i++) { 1062 sum_post += post_cycles_stop[i] - post_cycles_start[i]; 1063 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 1064 sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; 1065 } 1066 1067 log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", 1068 stop_tv.tv_sec - start_tv.tv_sec, 1069 stop_tv.tv_usec - start_tv.tv_usec, 1070 scnt, cb->size, cycle_iters, 1071 (unsigned long long)sum_post, (unsigned long long)sum_poll, 1072 (unsigned long long)sum_last_poll); 1073 1074 free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, 1075 poll_cycles_stop, last_poll_cycles_start); 1076 } 1077 1078 static void krping_rlat_test_server(struct krping_cb *cb) 1079 { 1080 struct ib_send_wr *bad_wr; 1081 struct ib_wc wc; 1082 int ret; 1083 1084 /* Spin waiting for client's Start STAG/TO/Len */ 1085 while (cb->state < RDMA_READ_ADV) { 1086 krping_cq_event_handler(cb->cq, cb); 1087 } 1088 1089 /* Send STAG/TO/Len to client */ 1090 if (cb->dma_mr) 1091 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1092 else 1093 krping_format_send(cb, cb->start_addr, cb->start_mr); 1094 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1095 if (ret) { 1096 log(LOG_ERR, "post send error %d\n", ret); 1097 return; 1098 } 1099 1100 /* Spin waiting for send completion */ 1101 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1102 if (ret < 0) { 1103 log(LOG_ERR, "poll error %d\n", ret); 1104 return; 1105 } 1106 if (wc.status) { 1107 log(LOG_ERR, "send completiong error %d\n", wc.status); 1108 return; 1109 } 1110 1111 krping_wait(cb, ERROR); 1112 } 1113 1114 static void krping_wlat_test_server(struct krping_cb *cb) 1115 { 1116 struct ib_send_wr *bad_wr; 1117 struct ib_wc wc; 1118 int ret; 1119 1120 /* Spin waiting for client's Start STAG/TO/Len */ 1121 while (cb->state < RDMA_READ_ADV) { 1122 krping_cq_event_handler(cb->cq, cb); 1123 } 1124 1125 /* Send STAG/TO/Len to client */ 1126 if (cb->dma_mr) 1127 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1128 else 1129 krping_format_send(cb, cb->start_addr, cb->start_mr); 1130 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1131 if (ret) { 1132 log(LOG_ERR, "post send error %d\n", ret); 1133 return; 1134 } 1135 1136 /* Spin waiting for send completion */ 1137 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1138 if (ret < 0) { 1139 log(LOG_ERR, "poll error %d\n", ret); 1140 return; 1141 } 1142 if (wc.status) { 1143 log(LOG_ERR, "send completiong error %d\n", wc.status); 1144 return; 1145 } 1146 1147 wlat_test(cb); 1148 1149 } 1150 1151 static void krping_bw_test_server(struct krping_cb *cb) 1152 { 1153 struct ib_send_wr *bad_wr; 1154 struct ib_wc wc; 1155 int ret; 1156 1157 /* Spin waiting for client's Start STAG/TO/Len */ 1158 while (cb->state < RDMA_READ_ADV) { 1159 krping_cq_event_handler(cb->cq, cb); 1160 } 1161 1162 /* Send STAG/TO/Len to client */ 1163 if (cb->dma_mr) 1164 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1165 else 1166 krping_format_send(cb, cb->start_addr, cb->start_mr); 1167 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1168 if (ret) { 1169 log(LOG_ERR, "post send error %d\n", ret); 1170 return; 1171 } 1172 1173 /* Spin waiting for send completion */ 1174 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1175 if (ret < 0) { 1176 log(LOG_ERR, "poll error %d\n", ret); 1177 return; 1178 } 1179 if (wc.status) { 1180 log(LOG_ERR, "send completiong error %d\n", wc.status); 1181 return; 1182 } 1183 1184 if (cb->duplex) 1185 bw_test(cb); 1186 krping_wait(cb, ERROR); 1187 } 1188 1189 static int krping_bind_server(struct krping_cb *cb) 1190 { 1191 struct sockaddr_in sin; 1192 int ret; 1193 1194 memset(&sin, 0, sizeof(sin)); 1195 sin.sin_len = sizeof sin; 1196 sin.sin_family = AF_INET; 1197 sin.sin_addr.s_addr = cb->addr.s_addr; 1198 sin.sin_port = cb->port; 1199 1200 ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin); 1201 if (ret) { 1202 log(LOG_ERR, "rdma_bind_addr error %d\n", ret); 1203 return ret; 1204 } 1205 DEBUG_LOG(PFX "rdma_bind_addr successful\n"); 1206 1207 DEBUG_LOG(PFX "rdma_listen\n"); 1208 ret = rdma_listen(cb->cm_id, 3); 1209 if (ret) { 1210 log(LOG_ERR, "rdma_listen failed: %d\n", ret); 1211 return ret; 1212 } 1213 1214 krping_wait(cb, CONNECT_REQUEST); 1215 if (cb->state != CONNECT_REQUEST) { 1216 log(LOG_ERR, "wait for CONNECT_REQUEST state %d\n", 1217 cb->state); 1218 return -1; 1219 } 1220 1221 return 0; 1222 } 1223 1224 static void krping_run_server(struct krping_cb *cb) 1225 { 1226 struct ib_recv_wr *bad_wr; 1227 int ret; 1228 1229 ret = krping_bind_server(cb); 1230 if (ret) 1231 return; 1232 1233 ret = krping_setup_qp(cb, cb->child_cm_id); 1234 if (ret) { 1235 log(LOG_ERR, "setup_qp failed: %d\n", ret); 1236 return; 1237 } 1238 1239 ret = krping_setup_buffers(cb); 1240 if (ret) { 1241 log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret); 1242 goto err1; 1243 } 1244 1245 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 1246 if (ret) { 1247 log(LOG_ERR, "ib_post_recv failed: %d\n", ret); 1248 goto err2; 1249 } 1250 1251 ret = krping_accept(cb); 1252 if (ret) { 1253 log(LOG_ERR, "connect error %d\n", ret); 1254 goto err2; 1255 } 1256 1257 if (cb->wlat) 1258 krping_wlat_test_server(cb); 1259 else if (cb->rlat) 1260 krping_rlat_test_server(cb); 1261 else if (cb->bw) 1262 krping_bw_test_server(cb); 1263 else 1264 krping_test_server(cb); 1265 1266 rdma_disconnect(cb->child_cm_id); 1267 rdma_destroy_id(cb->child_cm_id); 1268 err2: 1269 krping_free_buffers(cb); 1270 err1: 1271 krping_free_qp(cb); 1272 } 1273 1274 static void krping_test_client(struct krping_cb *cb) 1275 { 1276 int ping, start, cc, i, ret; 1277 struct ib_send_wr *bad_wr; 1278 unsigned char c; 1279 1280 start = 65; 1281 for (ping = 0; !cb->count || ping < cb->count; ping++) { 1282 cb->state = RDMA_READ_ADV; 1283 1284 /* Put some ascii text in the buffer. */ 1285 cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); 1286 for (i = cc, c = start; i < cb->size; i++) { 1287 cb->start_buf[i] = c; 1288 c++; 1289 if (c > 122) 1290 c = 65; 1291 } 1292 start++; 1293 if (start > 122) 1294 start = 65; 1295 cb->start_buf[cb->size - 1] = 0; 1296 1297 if (cb->dma_mr) 1298 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1299 else 1300 krping_format_send(cb, cb->start_addr, cb->start_mr); 1301 1302 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1303 if (ret) { 1304 log(LOG_ERR, "post send error %d\n", ret); 1305 break; 1306 } 1307 1308 /* Wait for server to ACK */ 1309 krping_wait(cb, RDMA_WRITE_ADV); 1310 if (cb->state != RDMA_WRITE_ADV) { 1311 log(LOG_ERR, 1312 "wait for RDMA_WRITE_ADV state %d\n", 1313 cb->state); 1314 break; 1315 } 1316 1317 if (cb->dma_mr) 1318 krping_format_send(cb, cb->rdma_addr, cb->dma_mr); 1319 else 1320 krping_format_send(cb, cb->rdma_addr, cb->rdma_mr); 1321 1322 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1323 if (ret) { 1324 log(LOG_ERR, "post send error %d\n", ret); 1325 break; 1326 } 1327 1328 /* Wait for the server to say the RDMA Write is complete. */ 1329 krping_wait(cb, RDMA_WRITE_COMPLETE); 1330 if (cb->state != RDMA_WRITE_COMPLETE) { 1331 log(LOG_ERR, 1332 "wait for RDMA_WRITE_COMPLETE state %d\n", 1333 cb->state); 1334 break; 1335 } 1336 1337 if (cb->validate) 1338 if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { 1339 log(LOG_ERR, "data mismatch!\n"); 1340 break; 1341 } 1342 1343 if (cb->verbose) 1344 DEBUG_LOG("ping data: %s\n", cb->rdma_buf); 1345 } 1346 } 1347 1348 static void krping_rlat_test_client(struct krping_cb *cb) 1349 { 1350 struct ib_send_wr *bad_wr; 1351 struct ib_wc wc; 1352 int ret; 1353 1354 cb->state = RDMA_READ_ADV; 1355 1356 /* Send STAG/TO/Len to client */ 1357 if (cb->dma_mr) 1358 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1359 else 1360 krping_format_send(cb, cb->start_addr, cb->rdma_mr); 1361 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1362 if (ret) { 1363 log(LOG_ERR, "post send error %d\n", ret); 1364 return; 1365 } 1366 1367 /* Spin waiting for send completion */ 1368 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1369 if (ret < 0) { 1370 log(LOG_ERR, "poll error %d\n", ret); 1371 return; 1372 } 1373 if (wc.status) { 1374 log(LOG_ERR, "send completion error %d\n", wc.status); 1375 return; 1376 } 1377 1378 /* Spin waiting for server's Start STAG/TO/Len */ 1379 while (cb->state < RDMA_WRITE_ADV) { 1380 krping_cq_event_handler(cb->cq, cb); 1381 } 1382 1383 #if 0 1384 { 1385 int i; 1386 struct timeval start, stop; 1387 time_t sec; 1388 suseconds_t usec; 1389 unsigned long long elapsed; 1390 struct ib_wc wc; 1391 struct ib_send_wr *bad_wr; 1392 int ne; 1393 1394 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1395 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1396 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1397 cb->rdma_sq_wr.sg_list->length = 0; 1398 cb->rdma_sq_wr.num_sge = 0; 1399 1400 microtime(&start); 1401 for (i=0; i < 100000; i++) { 1402 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 1403 log(LOG_ERR, "Couldn't post send\n"); 1404 return; 1405 } 1406 do { 1407 ne = ib_poll_cq(cb->cq, 1, &wc); 1408 } while (ne == 0); 1409 if (ne < 0) { 1410 log(LOG_ERR, "poll CQ failed %d\n", ne); 1411 return; 1412 } 1413 if (wc.status != IB_WC_SUCCESS) { 1414 log(LOG_ERR, "Completion wth error at %s:\n", 1415 cb->server ? "server" : "client"); 1416 log(LOG_ERR, "Failed status %d: wr_id %d\n", 1417 wc.status, (int) wc.wr_id); 1418 return; 1419 } 1420 } 1421 microtime(&stop); 1422 1423 if (stop.tv_usec < start.tv_usec) { 1424 stop.tv_usec += 1000000; 1425 stop.tv_sec -= 1; 1426 } 1427 sec = stop.tv_sec - start.tv_sec; 1428 usec = stop.tv_usec - start.tv_usec; 1429 elapsed = sec * 1000000 + usec; 1430 log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed); 1431 } 1432 #endif 1433 1434 rlat_test(cb); 1435 } 1436 1437 static void krping_wlat_test_client(struct krping_cb *cb) 1438 { 1439 struct ib_send_wr *bad_wr; 1440 struct ib_wc wc; 1441 int ret; 1442 1443 cb->state = RDMA_READ_ADV; 1444 1445 /* Send STAG/TO/Len to client */ 1446 if (cb->dma_mr) 1447 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1448 else 1449 krping_format_send(cb, cb->start_addr, cb->start_mr); 1450 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1451 if (ret) { 1452 log(LOG_ERR, "post send error %d\n", ret); 1453 return; 1454 } 1455 1456 /* Spin waiting for send completion */ 1457 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1458 if (ret < 0) { 1459 log(LOG_ERR, "poll error %d\n", ret); 1460 return; 1461 } 1462 if (wc.status) { 1463 log(LOG_ERR, "send completion error %d\n", wc.status); 1464 return; 1465 } 1466 1467 /* Spin waiting for server's Start STAG/TO/Len */ 1468 while (cb->state < RDMA_WRITE_ADV) { 1469 krping_cq_event_handler(cb->cq, cb); 1470 } 1471 1472 wlat_test(cb); 1473 } 1474 1475 static void krping_bw_test_client(struct krping_cb *cb) 1476 { 1477 struct ib_send_wr *bad_wr; 1478 struct ib_wc wc; 1479 int ret; 1480 1481 cb->state = RDMA_READ_ADV; 1482 1483 /* Send STAG/TO/Len to client */ 1484 if (cb->dma_mr) 1485 krping_format_send(cb, cb->start_addr, cb->dma_mr); 1486 else 1487 krping_format_send(cb, cb->start_addr, cb->start_mr); 1488 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1489 if (ret) { 1490 log(LOG_ERR, "post send error %d\n", ret); 1491 return; 1492 } 1493 1494 /* Spin waiting for send completion */ 1495 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1496 if (ret < 0) { 1497 log(LOG_ERR, "poll error %d\n", ret); 1498 return; 1499 } 1500 if (wc.status) { 1501 log(LOG_ERR, "send completion error %d\n", wc.status); 1502 return; 1503 } 1504 1505 /* Spin waiting for server's Start STAG/TO/Len */ 1506 while (cb->state < RDMA_WRITE_ADV) { 1507 krping_cq_event_handler(cb->cq, cb); 1508 } 1509 1510 bw_test(cb); 1511 } 1512 1513 static int krping_connect_client(struct krping_cb *cb) 1514 { 1515 struct rdma_conn_param conn_param; 1516 int ret; 1517 1518 memset(&conn_param, 0, sizeof conn_param); 1519 conn_param.responder_resources = 1; 1520 conn_param.initiator_depth = 1; 1521 conn_param.retry_count = 10; 1522 1523 ret = rdma_connect(cb->cm_id, &conn_param); 1524 if (ret) { 1525 log(LOG_ERR, "rdma_connect error %d\n", ret); 1526 return ret; 1527 } 1528 1529 krping_wait(cb, CONNECTED); 1530 if (cb->state == ERROR) { 1531 log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state); 1532 return -1; 1533 } 1534 1535 DEBUG_LOG(PFX "rdma_connect successful\n"); 1536 return 0; 1537 } 1538 1539 static int krping_bind_client(struct krping_cb *cb) 1540 { 1541 struct sockaddr_in sin; 1542 int ret; 1543 1544 memset(&sin, 0, sizeof(sin)); 1545 sin.sin_len = sizeof sin; 1546 sin.sin_family = AF_INET; 1547 sin.sin_addr.s_addr = cb->addr.s_addr; 1548 sin.sin_port = cb->port; 1549 1550 ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin, 1551 2000); 1552 if (ret) { 1553 log(LOG_ERR, "rdma_resolve_addr error %d\n", ret); 1554 return ret; 1555 } 1556 1557 krping_wait(cb, ROUTE_RESOLVED); 1558 if (cb->state != ROUTE_RESOLVED) { 1559 log(LOG_ERR, 1560 "addr/route resolution did not resolve: state %d\n", 1561 cb->state); 1562 return EINTR; 1563 } 1564 1565 DEBUG_LOG(PFX "rdma_resolve_addr - rdma_resolve_route successful\n"); 1566 return 0; 1567 } 1568 1569 static void krping_run_client(struct krping_cb *cb) 1570 { 1571 struct ib_recv_wr *bad_wr; 1572 int ret; 1573 1574 ret = krping_bind_client(cb); 1575 if (ret) 1576 return; 1577 1578 ret = krping_setup_qp(cb, cb->cm_id); 1579 if (ret) { 1580 log(LOG_ERR, "setup_qp failed: %d\n", ret); 1581 return; 1582 } 1583 1584 ret = krping_setup_buffers(cb); 1585 if (ret) { 1586 log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret); 1587 goto err1; 1588 } 1589 1590 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 1591 if (ret) { 1592 log(LOG_ERR, "ib_post_recv failed: %d\n", ret); 1593 goto err2; 1594 } 1595 1596 ret = krping_connect_client(cb); 1597 if (ret) { 1598 log(LOG_ERR, "connect error %d\n", ret); 1599 goto err2; 1600 } 1601 1602 if (cb->wlat) 1603 krping_wlat_test_client(cb); 1604 else if (cb->rlat) 1605 krping_rlat_test_client(cb); 1606 else if (cb->bw) 1607 krping_bw_test_client(cb); 1608 else 1609 krping_test_client(cb); 1610 rdma_disconnect(cb->cm_id); 1611 err2: 1612 krping_free_buffers(cb); 1613 err1: 1614 krping_free_qp(cb); 1615 } 1616 1617 int krping_doit(char *cmd) 1618 { 1619 struct krping_cb *cb; 1620 int op; 1621 int ret = 0; 1622 char *optarg; 1623 unsigned long optint; 1624 debug = 0; 1625 1626 cb = malloc(sizeof(*cb), M_DEVBUF, M_WAITOK); 1627 if (!cb) 1628 return ENOMEM; 1629 bzero(cb, sizeof *cb); 1630 1631 mtx_lock(&krping_mutex); 1632 TAILQ_INSERT_TAIL(&krping_cbs, cb, list); 1633 mtx_unlock(&krping_mutex); 1634 1635 cb->server = -1; 1636 cb->state = IDLE; 1637 cb->size = 64; 1638 cb->txdepth = RPING_SQ_DEPTH; 1639 mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF); 1640 1641 while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, 1642 &optint)) != 0) { 1643 switch (op) { 1644 case 'a': 1645 cb->addr_str = optarg; 1646 DEBUG_LOG(PFX "ipaddr (%s)\n", optarg); 1647 if (!inet_aton(optarg, &cb->addr)) { 1648 log(LOG_ERR, "bad addr string %s\n", optarg); 1649 ret = EINVAL; 1650 } 1651 break; 1652 case 'D': 1653 cb->use_dmamr = 1; 1654 DEBUG_LOG(PFX "using dma mr\n"); 1655 break; 1656 case 'p': 1657 cb->port = htons(optint); 1658 DEBUG_LOG(PFX "port %d\n", (int)optint); 1659 break; 1660 case 'P': 1661 cb->poll = 1; 1662 DEBUG_LOG("server\n"); 1663 break; 1664 case 's': 1665 cb->server = 1; 1666 DEBUG_LOG(PFX "server\n"); 1667 break; 1668 case 'c': 1669 cb->server = 0; 1670 DEBUG_LOG(PFX "client\n"); 1671 break; 1672 case 'S': 1673 cb->size = optint; 1674 if ((cb->size < 1) || 1675 (cb->size > RPING_BUFSIZE)) { 1676 log(LOG_ERR, "Invalid size %d " 1677 "(valid range is 1 to %d)\n", 1678 cb->size, RPING_BUFSIZE); 1679 ret = EINVAL; 1680 } else 1681 DEBUG_LOG(PFX "size %d\n", (int)optint); 1682 break; 1683 case 'C': 1684 cb->count = optint; 1685 if (cb->count < 0) { 1686 log(LOG_ERR, "Invalid count %d\n", 1687 cb->count); 1688 ret = EINVAL; 1689 } else 1690 DEBUG_LOG(PFX "count %d\n", (int) cb->count); 1691 break; 1692 case 'v': 1693 cb->verbose++; 1694 DEBUG_LOG(PFX "verbose\n"); 1695 break; 1696 case 'V': 1697 cb->validate++; 1698 DEBUG_LOG(PFX "validate data\n"); 1699 break; 1700 case 'L': 1701 cb->rlat++; 1702 break; 1703 case 'l': 1704 cb->wlat++; 1705 break; 1706 case 'B': 1707 cb->bw++; 1708 break; 1709 case 't': 1710 cb->txdepth = optint; 1711 DEBUG_LOG(PFX "txdepth %d\n", cb->txdepth); 1712 break; 1713 case 'd': 1714 debug++; 1715 break; 1716 default: 1717 log(LOG_ERR, "unknown opt %s\n", optarg); 1718 ret = EINVAL; 1719 break; 1720 } 1721 } 1722 if (ret) 1723 goto out; 1724 1725 if (cb->server == -1) { 1726 log(LOG_ERR, "must be either client or server\n"); 1727 ret = EINVAL; 1728 goto out; 1729 } 1730 if ((cb->bw + cb->rlat + cb->wlat) > 1) { 1731 log(LOG_ERR, "Pick only one test: bw, rlat, wlat\n"); 1732 ret = EINVAL; 1733 goto out; 1734 } 1735 1736 1737 cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); 1738 if (IS_ERR(cb->cm_id)) { 1739 ret = PTR_ERR(cb->cm_id); 1740 log(LOG_ERR, "rdma_create_id error %d\n", ret); 1741 goto out; 1742 } 1743 DEBUG_LOG(PFX "created cm_id %p\n", cb->cm_id); 1744 if (cb->server) 1745 krping_run_server(cb); 1746 else 1747 krping_run_client(cb); 1748 DEBUG_LOG(PFX "destroy cm_id %p\n", cb->cm_id); 1749 rdma_destroy_id(cb->cm_id); 1750 out: 1751 mtx_lock(&krping_mutex); 1752 TAILQ_REMOVE(&krping_cbs, cb, list); 1753 mtx_unlock(&krping_mutex); 1754 free(cb, M_DEVBUF); 1755 return ret; 1756 } 1757 1758 void krping_init(void) 1759 { 1760 mtx_init(&krping_mutex, "krping lock", NULL, MTX_DEF); 1761 TAILQ_INIT(&krping_cbs); 1762 } 1763