1 /* 2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved. 3 * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <linux/module.h> 38 #include <linux/moduleparam.h> 39 #include <linux/slab.h> 40 #include <linux/err.h> 41 #include <linux/string.h> 42 #include <linux/list.h> 43 #include <linux/in.h> 44 #include <linux/device.h> 45 #include <linux/pci.h> 46 #include <linux/sched.h> 47 48 #include <asm/atomic.h> 49 50 #include <rdma/ib_verbs.h> 51 #include <rdma/rdma_cm.h> 52 53 #include "krping.h" 54 #include "getopt.h" 55 56 extern int krping_debug; 57 #define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x) 58 #define PRINTF(cb, x...) krping_printf((cb)->cookie, x) 59 60 MODULE_AUTHOR("Steve Wise"); 61 MODULE_DESCRIPTION("RDMA ping client/server"); 62 MODULE_LICENSE("Dual BSD/GPL"); 63 64 static __inline uint64_t 65 get_cycles(void) 66 { 67 uint32_t low, high; 68 __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); 69 return (low | ((u_int64_t)high << 32)); 70 } 71 72 typedef uint64_t cycles_t; 73 74 enum mem_type { 75 DMA = 1, 76 FASTREG = 2, 77 MW = 3, 78 MR = 4 79 }; 80 81 static const struct krping_option krping_opts[] = { 82 {"count", OPT_INT, 'C'}, 83 {"size", OPT_INT, 'S'}, 84 {"addr", OPT_STRING, 'a'}, 85 {"port", OPT_INT, 'p'}, 86 {"verbose", OPT_NOPARAM, 'v'}, 87 {"validate", OPT_NOPARAM, 'V'}, 88 {"server", OPT_NOPARAM, 's'}, 89 {"client", OPT_NOPARAM, 'c'}, 90 {"mem_mode", OPT_STRING, 'm'}, 91 {"server_inv", OPT_NOPARAM, 'I'}, 92 {"wlat", OPT_NOPARAM, 'l'}, 93 {"rlat", OPT_NOPARAM, 'L'}, 94 {"bw", OPT_NOPARAM, 'B'}, 95 {"duplex", OPT_NOPARAM, 'd'}, 96 {"txdepth", OPT_INT, 'T'}, 97 {"poll", OPT_NOPARAM, 'P'}, 98 {"local_dma_lkey", OPT_NOPARAM, 'Z'}, 99 {"read_inv", OPT_NOPARAM, 'R'}, 100 {"fr", OPT_NOPARAM, 'f'}, 101 {NULL, 0, 0} 102 }; 103 104 #define htonll(x) cpu_to_be64((x)) 105 #define ntohll(x) cpu_to_be64((x)) 106 107 static struct mutex krping_mutex; 108 109 /* 110 * List of running krping threads. 111 */ 112 static LIST_HEAD(krping_cbs); 113 114 /* 115 * krping "ping/pong" loop: 116 * client sends source rkey/addr/len 117 * server receives source rkey/add/len 118 * server rdma reads "ping" data from source 119 * server sends "go ahead" on rdma read completion 120 * client sends sink rkey/addr/len 121 * server receives sink rkey/addr/len 122 * server rdma writes "pong" data to sink 123 * server sends "go ahead" on rdma write completion 124 * <repeat loop> 125 */ 126 127 /* 128 * These states are used to signal events between the completion handler 129 * and the main client or server thread. 130 * 131 * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, 132 * and RDMA_WRITE_COMPLETE for each ping. 133 */ 134 enum test_state { 135 IDLE = 1, 136 CONNECT_REQUEST, 137 ADDR_RESOLVED, 138 ROUTE_RESOLVED, 139 CONNECTED, 140 RDMA_READ_ADV, 141 RDMA_READ_COMPLETE, 142 RDMA_WRITE_ADV, 143 RDMA_WRITE_COMPLETE, 144 ERROR 145 }; 146 147 struct krping_rdma_info { 148 uint64_t buf; 149 uint32_t rkey; 150 uint32_t size; 151 }; 152 153 /* 154 * Default max buffer size for IO... 155 */ 156 #define RPING_BUFSIZE 128*1024 157 #define RPING_SQ_DEPTH 64 158 159 /* 160 * Control block struct. 161 */ 162 struct krping_cb { 163 void *cookie; 164 int server; /* 0 iff client */ 165 struct ib_cq *cq; 166 struct ib_pd *pd; 167 struct ib_qp *qp; 168 169 enum mem_type mem; 170 struct ib_mr *dma_mr; 171 172 struct ib_fast_reg_page_list *page_list; 173 int page_list_len; 174 struct ib_send_wr fastreg_wr; 175 struct ib_send_wr invalidate_wr; 176 struct ib_mr *fastreg_mr; 177 int server_invalidate; 178 int read_inv; 179 u8 key; 180 181 struct ib_mw *mw; 182 struct ib_mw_bind bind_attr; 183 184 struct ib_recv_wr rq_wr; /* recv work request record */ 185 struct ib_sge recv_sgl; /* recv single SGE */ 186 struct krping_rdma_info recv_buf;/* malloc'd buffer */ 187 u64 recv_dma_addr; 188 DECLARE_PCI_UNMAP_ADDR(recv_mapping) 189 struct ib_mr *recv_mr; 190 191 struct ib_send_wr sq_wr; /* send work requrest record */ 192 struct ib_sge send_sgl; 193 struct krping_rdma_info send_buf;/* single send buf */ 194 u64 send_dma_addr; 195 DECLARE_PCI_UNMAP_ADDR(send_mapping) 196 struct ib_mr *send_mr; 197 198 struct ib_send_wr rdma_sq_wr; /* rdma work request record */ 199 struct ib_sge rdma_sgl; /* rdma single SGE */ 200 char *rdma_buf; /* used as rdma sink */ 201 u64 rdma_dma_addr; 202 DECLARE_PCI_UNMAP_ADDR(rdma_mapping) 203 struct ib_mr *rdma_mr; 204 205 uint32_t remote_rkey; /* remote guys RKEY */ 206 uint64_t remote_addr; /* remote guys TO */ 207 uint32_t remote_len; /* remote guys LEN */ 208 209 char *start_buf; /* rdma read src */ 210 u64 start_dma_addr; 211 DECLARE_PCI_UNMAP_ADDR(start_mapping) 212 struct ib_mr *start_mr; 213 214 enum test_state state; /* used for cond/signalling */ 215 wait_queue_head_t sem; 216 struct krping_stats stats; 217 218 uint16_t port; /* dst port in NBO */ 219 struct in_addr addr; /* dst addr in NBO */ 220 char *addr_str; /* dst addr string */ 221 int verbose; /* verbose logging */ 222 int count; /* ping count */ 223 int size; /* ping data size */ 224 int validate; /* validate ping data */ 225 int wlat; /* run wlat test */ 226 int rlat; /* run rlat test */ 227 int bw; /* run bw test */ 228 int duplex; /* run bw full duplex test */ 229 int poll; /* poll or block for rlat test */ 230 int txdepth; /* SQ depth */ 231 int local_dma_lkey; /* use 0 for lkey */ 232 int frtest; /* fastreg test */ 233 234 /* CM stuff */ 235 struct rdma_cm_id *cm_id; /* connection on client side,*/ 236 /* listener on server side. */ 237 struct rdma_cm_id *child_cm_id; /* connection on server side */ 238 struct list_head list; 239 }; 240 241 static int krping_cma_event_handler(struct rdma_cm_id *cma_id, 242 struct rdma_cm_event *event) 243 { 244 int ret; 245 struct krping_cb *cb = cma_id->context; 246 247 DEBUG_LOG(cb, "cma_event type %d cma_id %p (%s)\n", event->event, 248 cma_id, (cma_id == cb->cm_id) ? "parent" : "child"); 249 250 switch (event->event) { 251 case RDMA_CM_EVENT_ADDR_RESOLVED: 252 cb->state = ADDR_RESOLVED; 253 ret = rdma_resolve_route(cma_id, 2000); 254 if (ret) { 255 PRINTF(cb, "rdma_resolve_route error %d\n", ret); 256 wake_up_interruptible(&cb->sem); 257 } 258 break; 259 260 case RDMA_CM_EVENT_ROUTE_RESOLVED: 261 cb->state = ROUTE_RESOLVED; 262 wake_up_interruptible(&cb->sem); 263 break; 264 265 case RDMA_CM_EVENT_CONNECT_REQUEST: 266 cb->state = CONNECT_REQUEST; 267 cb->child_cm_id = cma_id; 268 DEBUG_LOG(cb, "child cma %p\n", cb->child_cm_id); 269 wake_up_interruptible(&cb->sem); 270 break; 271 272 case RDMA_CM_EVENT_ESTABLISHED: 273 DEBUG_LOG(cb, "ESTABLISHED\n"); 274 if (!cb->server) { 275 cb->state = CONNECTED; 276 } 277 wake_up_interruptible(&cb->sem); 278 break; 279 280 case RDMA_CM_EVENT_ADDR_ERROR: 281 case RDMA_CM_EVENT_ROUTE_ERROR: 282 case RDMA_CM_EVENT_CONNECT_ERROR: 283 case RDMA_CM_EVENT_UNREACHABLE: 284 case RDMA_CM_EVENT_REJECTED: 285 PRINTF(cb, "cma event %d, error %d\n", event->event, 286 event->status); 287 cb->state = ERROR; 288 wake_up_interruptible(&cb->sem); 289 break; 290 291 case RDMA_CM_EVENT_DISCONNECTED: 292 PRINTF(cb, "DISCONNECT EVENT...\n"); 293 cb->state = ERROR; 294 wake_up_interruptible(&cb->sem); 295 break; 296 297 case RDMA_CM_EVENT_DEVICE_REMOVAL: 298 PRINTF(cb, "cma detected device removal!!!!\n"); 299 break; 300 301 default: 302 PRINTF(cb, "oof bad type!\n"); 303 wake_up_interruptible(&cb->sem); 304 break; 305 } 306 return 0; 307 } 308 309 static int server_recv(struct krping_cb *cb, struct ib_wc *wc) 310 { 311 if (wc->byte_len != sizeof(cb->recv_buf)) { 312 PRINTF(cb, "Received bogus data, size %d\n", 313 wc->byte_len); 314 return -1; 315 } 316 317 cb->remote_rkey = ntohl(cb->recv_buf.rkey); 318 cb->remote_addr = ntohll(cb->recv_buf.buf); 319 cb->remote_len = ntohl(cb->recv_buf.size); 320 DEBUG_LOG(cb, "Received rkey %x addr %llx len %d from peer\n", 321 cb->remote_rkey, (unsigned long long)cb->remote_addr, 322 cb->remote_len); 323 324 if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) 325 cb->state = RDMA_READ_ADV; 326 else 327 cb->state = RDMA_WRITE_ADV; 328 329 return 0; 330 } 331 332 static int client_recv(struct krping_cb *cb, struct ib_wc *wc) 333 { 334 if (wc->byte_len != sizeof(cb->recv_buf)) { 335 PRINTF(cb, "Received bogus data, size %d\n", 336 wc->byte_len); 337 return -1; 338 } 339 340 if (cb->state == RDMA_READ_ADV) 341 cb->state = RDMA_WRITE_ADV; 342 else 343 cb->state = RDMA_WRITE_COMPLETE; 344 345 return 0; 346 } 347 348 static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) 349 { 350 struct krping_cb *cb = ctx; 351 struct ib_wc wc; 352 struct ib_recv_wr *bad_wr; 353 int ret; 354 355 BUG_ON(cb->cq != cq); 356 if (cb->state == ERROR) { 357 PRINTF(cb, "cq completion in ERROR state\n"); 358 return; 359 } 360 if (cb->frtest) { 361 PRINTF(cb, "cq completion event in frtest!\n"); 362 return; 363 } 364 if (!cb->wlat && !cb->rlat && !cb->bw) 365 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 366 while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { 367 if (wc.status) { 368 if (wc.status == IB_WC_WR_FLUSH_ERR) { 369 DEBUG_LOG(cb, "cq flushed\n"); 370 continue; 371 } else { 372 PRINTF(cb, "cq completion failed with " 373 "wr_id %Lx status %d opcode %d vender_err %x\n", 374 wc.wr_id, wc.status, wc.opcode, wc.vendor_err); 375 goto error; 376 } 377 } 378 379 switch (wc.opcode) { 380 case IB_WC_SEND: 381 DEBUG_LOG(cb, "send completion\n"); 382 cb->stats.send_bytes += cb->send_sgl.length; 383 cb->stats.send_msgs++; 384 break; 385 386 case IB_WC_RDMA_WRITE: 387 DEBUG_LOG(cb, "rdma write completion\n"); 388 cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length; 389 cb->stats.write_msgs++; 390 cb->state = RDMA_WRITE_COMPLETE; 391 wake_up_interruptible(&cb->sem); 392 break; 393 394 case IB_WC_RDMA_READ: 395 DEBUG_LOG(cb, "rdma read completion\n"); 396 cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length; 397 cb->stats.read_msgs++; 398 cb->state = RDMA_READ_COMPLETE; 399 wake_up_interruptible(&cb->sem); 400 break; 401 402 case IB_WC_RECV: 403 DEBUG_LOG(cb, "recv completion\n"); 404 cb->stats.recv_bytes += sizeof(cb->recv_buf); 405 cb->stats.recv_msgs++; 406 if (cb->wlat || cb->rlat || cb->bw) 407 ret = server_recv(cb, &wc); 408 else 409 ret = cb->server ? server_recv(cb, &wc) : 410 client_recv(cb, &wc); 411 if (ret) { 412 PRINTF(cb, "recv wc error: %d\n", ret); 413 goto error; 414 } 415 416 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 417 if (ret) { 418 PRINTF(cb, "post recv error: %d\n", 419 ret); 420 goto error; 421 } 422 wake_up_interruptible(&cb->sem); 423 break; 424 425 default: 426 PRINTF(cb, 427 "%s:%d Unexpected opcode %d, Shutting down\n", 428 __func__, __LINE__, wc.opcode); 429 goto error; 430 } 431 } 432 if (ret) { 433 PRINTF(cb, "poll error %d\n", ret); 434 goto error; 435 } 436 return; 437 error: 438 cb->state = ERROR; 439 wake_up_interruptible(&cb->sem); 440 } 441 442 static int krping_accept(struct krping_cb *cb) 443 { 444 struct rdma_conn_param conn_param; 445 int ret; 446 447 DEBUG_LOG(cb, "accepting client connection request\n"); 448 449 memset(&conn_param, 0, sizeof conn_param); 450 conn_param.responder_resources = 1; 451 conn_param.initiator_depth = 1; 452 453 ret = rdma_accept(cb->child_cm_id, &conn_param); 454 if (ret) { 455 PRINTF(cb, "rdma_accept error: %d\n", ret); 456 return ret; 457 } 458 459 if (!cb->wlat && !cb->rlat && !cb->bw) { 460 wait_event_interruptible(cb->sem, cb->state >= CONNECTED); 461 if (cb->state == ERROR) { 462 PRINTF(cb, "wait for CONNECTED state %d\n", 463 cb->state); 464 return -1; 465 } 466 } 467 return 0; 468 } 469 470 static void krping_setup_wr(struct krping_cb *cb) 471 { 472 cb->recv_sgl.addr = cb->recv_dma_addr; 473 cb->recv_sgl.length = sizeof cb->recv_buf; 474 if (cb->local_dma_lkey) 475 cb->recv_sgl.lkey = cb->qp->device->local_dma_lkey; 476 else if (cb->mem == DMA) 477 cb->recv_sgl.lkey = cb->dma_mr->lkey; 478 else 479 cb->recv_sgl.lkey = cb->recv_mr->lkey; 480 cb->rq_wr.sg_list = &cb->recv_sgl; 481 cb->rq_wr.num_sge = 1; 482 483 cb->send_sgl.addr = cb->send_dma_addr; 484 cb->send_sgl.length = sizeof cb->send_buf; 485 if (cb->local_dma_lkey) 486 cb->send_sgl.lkey = cb->qp->device->local_dma_lkey; 487 else if (cb->mem == DMA) 488 cb->send_sgl.lkey = cb->dma_mr->lkey; 489 else 490 cb->send_sgl.lkey = cb->send_mr->lkey; 491 492 cb->sq_wr.opcode = IB_WR_SEND; 493 cb->sq_wr.send_flags = IB_SEND_SIGNALED; 494 cb->sq_wr.sg_list = &cb->send_sgl; 495 cb->sq_wr.num_sge = 1; 496 497 if (cb->server || cb->wlat || cb->rlat || cb->bw) { 498 cb->rdma_sgl.addr = cb->rdma_dma_addr; 499 if (cb->mem == MR) 500 cb->rdma_sgl.lkey = cb->rdma_mr->lkey; 501 cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; 502 cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; 503 cb->rdma_sq_wr.num_sge = 1; 504 } 505 506 switch(cb->mem) { 507 case FASTREG: 508 509 /* 510 * A chain of 2 WRs, INVALDATE_MR + FAST_REG_MR. 511 * both unsignaled. The client uses them to reregister 512 * the rdma buffers with a new key each iteration. 513 */ 514 cb->fastreg_wr.opcode = IB_WR_FAST_REG_MR; 515 cb->fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 516 cb->fastreg_wr.wr.fast_reg.length = cb->size; 517 cb->fastreg_wr.wr.fast_reg.page_list = cb->page_list; 518 cb->fastreg_wr.wr.fast_reg.page_list_len = cb->page_list_len; 519 520 cb->invalidate_wr.next = &cb->fastreg_wr; 521 cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; 522 break; 523 case MW: 524 cb->bind_attr.wr_id = 0xabbaabba; 525 cb->bind_attr.send_flags = 0; /* unsignaled */ 526 cb->bind_attr.length = cb->size; 527 break; 528 default: 529 break; 530 } 531 } 532 533 static int krping_setup_buffers(struct krping_cb *cb) 534 { 535 int ret; 536 struct ib_phys_buf buf; 537 u64 iovbase; 538 539 DEBUG_LOG(cb, "krping_setup_buffers called on cb %p\n", cb); 540 541 cb->recv_dma_addr = dma_map_single(cb->pd->device->dma_device, 542 &cb->recv_buf, 543 sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 544 pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); 545 cb->send_dma_addr = dma_map_single(cb->pd->device->dma_device, 546 &cb->send_buf, sizeof(cb->send_buf), 547 DMA_BIDIRECTIONAL); 548 pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); 549 550 if (cb->mem == DMA) { 551 cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| 552 IB_ACCESS_REMOTE_READ| 553 IB_ACCESS_REMOTE_WRITE); 554 if (IS_ERR(cb->dma_mr)) { 555 DEBUG_LOG(cb, "reg_dmamr failed\n"); 556 ret = PTR_ERR(cb->dma_mr); 557 goto bail; 558 } 559 } else { 560 if (!cb->local_dma_lkey) { 561 buf.addr = cb->recv_dma_addr; 562 buf.size = sizeof cb->recv_buf; 563 DEBUG_LOG(cb, "recv buf dma_addr %llx size %d\n", buf.addr, 564 (int)buf.size); 565 iovbase = cb->recv_dma_addr; 566 cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 567 IB_ACCESS_LOCAL_WRITE, 568 &iovbase); 569 570 if (IS_ERR(cb->recv_mr)) { 571 DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); 572 ret = PTR_ERR(cb->recv_mr); 573 goto bail; 574 } 575 576 buf.addr = cb->send_dma_addr; 577 buf.size = sizeof cb->send_buf; 578 DEBUG_LOG(cb, "send buf dma_addr %llx size %d\n", buf.addr, 579 (int)buf.size); 580 iovbase = cb->send_dma_addr; 581 cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 582 0, &iovbase); 583 584 if (IS_ERR(cb->send_mr)) { 585 DEBUG_LOG(cb, "send_buf reg_mr failed\n"); 586 ret = PTR_ERR(cb->send_mr); 587 goto bail; 588 } 589 } 590 } 591 592 cb->rdma_buf = kmalloc(cb->size, GFP_KERNEL); 593 if (!cb->rdma_buf) { 594 DEBUG_LOG(cb, "rdma_buf malloc failed\n"); 595 ret = -ENOMEM; 596 goto bail; 597 } 598 599 cb->rdma_dma_addr = dma_map_single(cb->pd->device->dma_device, 600 cb->rdma_buf, cb->size, 601 DMA_BIDIRECTIONAL); 602 pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); 603 if (cb->mem != DMA) { 604 switch (cb->mem) { 605 case FASTREG: 606 cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + 607 PAGE_SIZE) >> PAGE_SHIFT; 608 cb->page_list = ib_alloc_fast_reg_page_list( 609 cb->pd->device, 610 cb->page_list_len); 611 if (IS_ERR(cb->page_list)) { 612 DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); 613 ret = PTR_ERR(cb->page_list); 614 goto bail; 615 } 616 cb->fastreg_mr = ib_alloc_fast_reg_mr(cb->pd, 617 cb->page_list->max_page_list_len); 618 if (IS_ERR(cb->fastreg_mr)) { 619 DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); 620 ret = PTR_ERR(cb->fastreg_mr); 621 goto bail; 622 } 623 DEBUG_LOG(cb, "fastreg rkey 0x%x page_list %p" 624 " page_list_len %u\n", cb->fastreg_mr->rkey, 625 cb->page_list, cb->page_list_len); 626 break; 627 case MW: 628 cb->mw = ib_alloc_mw(cb->pd); 629 if (IS_ERR(cb->mw)) { 630 DEBUG_LOG(cb, "recv_buf alloc_mw failed\n"); 631 ret = PTR_ERR(cb->mw); 632 goto bail; 633 } 634 DEBUG_LOG(cb, "mw rkey 0x%x\n", cb->mw->rkey); 635 /*FALLTHROUGH*/ 636 case MR: 637 buf.addr = cb->rdma_dma_addr; 638 buf.size = cb->size; 639 iovbase = cb->rdma_dma_addr; 640 cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 641 IB_ACCESS_REMOTE_READ| 642 IB_ACCESS_REMOTE_WRITE, 643 &iovbase); 644 if (IS_ERR(cb->rdma_mr)) { 645 DEBUG_LOG(cb, "rdma_buf reg_mr failed\n"); 646 ret = PTR_ERR(cb->rdma_mr); 647 goto bail; 648 } 649 DEBUG_LOG(cb, "rdma buf dma_addr %llx size %d mr rkey 0x%x\n", 650 buf.addr, (int)buf.size, cb->rdma_mr->rkey); 651 break; 652 default: 653 ret = -EINVAL; 654 goto bail; 655 break; 656 } 657 } 658 659 if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 660 661 cb->start_buf = kmalloc(cb->size, GFP_KERNEL); 662 if (!cb->start_buf) { 663 DEBUG_LOG(cb, "start_buf malloc failed\n"); 664 ret = -ENOMEM; 665 goto bail; 666 } 667 668 cb->start_dma_addr = dma_map_single(cb->pd->device->dma_device, 669 cb->start_buf, cb->size, 670 DMA_BIDIRECTIONAL); 671 pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); 672 673 if (cb->mem == MR || cb->mem == MW) { 674 unsigned flags = IB_ACCESS_REMOTE_READ; 675 676 if (cb->wlat || cb->rlat || cb->bw) 677 flags |= IB_ACCESS_REMOTE_WRITE; 678 679 buf.addr = cb->start_dma_addr; 680 buf.size = cb->size; 681 DEBUG_LOG(cb, "start buf dma_addr %llx size %d\n", 682 buf.addr, (int)buf.size); 683 iovbase = cb->start_dma_addr; 684 cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 685 flags, 686 &iovbase); 687 688 if (IS_ERR(cb->start_mr)) { 689 DEBUG_LOG(cb, "start_buf reg_mr failed\n"); 690 ret = PTR_ERR(cb->start_mr); 691 goto bail; 692 } 693 } 694 } 695 696 krping_setup_wr(cb); 697 DEBUG_LOG(cb, "allocated & registered buffers...\n"); 698 return 0; 699 bail: 700 if (cb->fastreg_mr && !IS_ERR(cb->fastreg_mr)) 701 ib_dereg_mr(cb->fastreg_mr); 702 if (cb->mw && !IS_ERR(cb->mw)) 703 ib_dealloc_mw(cb->mw); 704 if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) 705 ib_dereg_mr(cb->rdma_mr); 706 if (cb->page_list && !IS_ERR(cb->page_list)) 707 ib_free_fast_reg_page_list(cb->page_list); 708 if (cb->dma_mr && !IS_ERR(cb->dma_mr)) 709 ib_dereg_mr(cb->dma_mr); 710 if (cb->recv_mr && !IS_ERR(cb->recv_mr)) 711 ib_dereg_mr(cb->recv_mr); 712 if (cb->send_mr && !IS_ERR(cb->send_mr)) 713 ib_dereg_mr(cb->send_mr); 714 if (cb->rdma_buf) 715 kfree(cb->rdma_buf); 716 if (cb->start_buf) 717 kfree(cb->start_buf); 718 return ret; 719 } 720 721 static void krping_free_buffers(struct krping_cb *cb) 722 { 723 DEBUG_LOG(cb, "krping_free_buffers called on cb %p\n", cb); 724 725 if (cb->dma_mr) 726 ib_dereg_mr(cb->dma_mr); 727 if (cb->send_mr) 728 ib_dereg_mr(cb->send_mr); 729 if (cb->recv_mr) 730 ib_dereg_mr(cb->recv_mr); 731 if (cb->rdma_mr) 732 ib_dereg_mr(cb->rdma_mr); 733 if (cb->start_mr) 734 ib_dereg_mr(cb->start_mr); 735 if (cb->fastreg_mr) 736 ib_dereg_mr(cb->fastreg_mr); 737 if (cb->mw) 738 ib_dealloc_mw(cb->mw); 739 740 dma_unmap_single(cb->pd->device->dma_device, 741 pci_unmap_addr(cb, recv_mapping), 742 sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 743 dma_unmap_single(cb->pd->device->dma_device, 744 pci_unmap_addr(cb, send_mapping), 745 sizeof(cb->send_buf), DMA_BIDIRECTIONAL); 746 dma_unmap_single(cb->pd->device->dma_device, 747 pci_unmap_addr(cb, rdma_mapping), 748 cb->size, DMA_BIDIRECTIONAL); 749 kfree(cb->rdma_buf); 750 if (cb->start_buf) { 751 dma_unmap_single(cb->pd->device->dma_device, 752 pci_unmap_addr(cb, start_mapping), 753 cb->size, DMA_BIDIRECTIONAL); 754 kfree(cb->start_buf); 755 } 756 } 757 758 static int krping_create_qp(struct krping_cb *cb) 759 { 760 struct ib_qp_init_attr init_attr; 761 int ret; 762 763 memset(&init_attr, 0, sizeof(init_attr)); 764 init_attr.cap.max_send_wr = cb->txdepth; 765 init_attr.cap.max_recv_wr = 2; 766 init_attr.cap.max_recv_sge = 1; 767 init_attr.cap.max_send_sge = 1; 768 init_attr.qp_type = IB_QPT_RC; 769 init_attr.send_cq = cb->cq; 770 init_attr.recv_cq = cb->cq; 771 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 772 773 if (cb->server) { 774 ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); 775 if (!ret) 776 cb->qp = cb->child_cm_id->qp; 777 } else { 778 ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); 779 if (!ret) 780 cb->qp = cb->cm_id->qp; 781 } 782 783 return ret; 784 } 785 786 static void krping_free_qp(struct krping_cb *cb) 787 { 788 ib_destroy_qp(cb->qp); 789 ib_destroy_cq(cb->cq); 790 ib_dealloc_pd(cb->pd); 791 } 792 793 static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) 794 { 795 int ret; 796 cb->pd = ib_alloc_pd(cm_id->device); 797 if (IS_ERR(cb->pd)) { 798 PRINTF(cb, "ib_alloc_pd failed\n"); 799 return PTR_ERR(cb->pd); 800 } 801 DEBUG_LOG(cb, "created pd %p\n", cb->pd); 802 803 strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name)); 804 805 cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, 806 cb, cb->txdepth * 2, 0); 807 if (IS_ERR(cb->cq)) { 808 PRINTF(cb, "ib_create_cq failed\n"); 809 ret = PTR_ERR(cb->cq); 810 goto err1; 811 } 812 DEBUG_LOG(cb, "created cq %p\n", cb->cq); 813 814 if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { 815 ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 816 if (ret) { 817 PRINTF(cb, "ib_create_cq failed\n"); 818 goto err2; 819 } 820 } 821 822 ret = krping_create_qp(cb); 823 if (ret) { 824 PRINTF(cb, "krping_create_qp failed: %d\n", ret); 825 goto err2; 826 } 827 DEBUG_LOG(cb, "created qp %p\n", cb->qp); 828 return 0; 829 err2: 830 ib_destroy_cq(cb->cq); 831 err1: 832 ib_dealloc_pd(cb->pd); 833 return ret; 834 } 835 836 /* 837 * return the (possibly rebound) rkey for the rdma buffer. 838 * FASTREG mode: invalidate and rebind via fastreg wr. 839 * MW mode: rebind the MW. 840 * other modes: just return the mr rkey. 841 */ 842 static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) 843 { 844 u32 rkey = 0xffffffff; 845 u64 p; 846 struct ib_send_wr *bad_wr; 847 int i; 848 int ret; 849 850 switch (cb->mem) { 851 case FASTREG: 852 cb->invalidate_wr.ex.invalidate_rkey = cb->fastreg_mr->rkey; 853 854 /* 855 * Update the fastreg key. 856 */ 857 ib_update_fast_reg_key(cb->fastreg_mr, ++cb->key); 858 cb->fastreg_wr.wr.fast_reg.rkey = cb->fastreg_mr->rkey; 859 860 /* 861 * Update the fastreg WR with new buf info. 862 */ 863 if (buf == (u64)cb->start_dma_addr) 864 cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_READ; 865 else 866 cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; 867 cb->fastreg_wr.wr.fast_reg.iova_start = buf; 868 p = (u64)(buf & PAGE_MASK); 869 for (i=0; i < cb->fastreg_wr.wr.fast_reg.page_list_len; 870 i++, p += PAGE_SIZE) { 871 cb->page_list->page_list[i] = p; 872 DEBUG_LOG(cb, "page_list[%d] 0x%llx\n", i, p); 873 } 874 875 DEBUG_LOG(cb, "post_inv = %d, fastreg new rkey 0x%x shift %u len %u" 876 " iova_start %llx page_list_len %u\n", 877 post_inv, 878 cb->fastreg_wr.wr.fast_reg.rkey, 879 cb->fastreg_wr.wr.fast_reg.page_shift, 880 cb->fastreg_wr.wr.fast_reg.length, 881 cb->fastreg_wr.wr.fast_reg.iova_start, 882 cb->fastreg_wr.wr.fast_reg.page_list_len); 883 884 if (post_inv) 885 ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); 886 else 887 ret = ib_post_send(cb->qp, &cb->fastreg_wr, &bad_wr); 888 if (ret) { 889 PRINTF(cb, "post send error %d\n", ret); 890 cb->state = ERROR; 891 } 892 rkey = cb->fastreg_mr->rkey; 893 break; 894 case MW: 895 /* 896 * Update the MW with new buf info. 897 */ 898 if (buf == (u64)cb->start_dma_addr) { 899 cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ; 900 cb->bind_attr.mr = cb->start_mr; 901 } else { 902 cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE; 903 cb->bind_attr.mr = cb->rdma_mr; 904 } 905 cb->bind_attr.addr = buf; 906 DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n", 907 cb->mw->rkey, buf, cb->bind_attr.mr->rkey); 908 ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr); 909 if (ret) { 910 PRINTF(cb, "bind mw error %d\n", ret); 911 cb->state = ERROR; 912 } else 913 rkey = cb->mw->rkey; 914 break; 915 case MR: 916 if (buf == (u64)cb->start_dma_addr) 917 rkey = cb->start_mr->rkey; 918 else 919 rkey = cb->rdma_mr->rkey; 920 break; 921 case DMA: 922 rkey = cb->dma_mr->rkey; 923 break; 924 default: 925 PRINTF(cb, "%s:%d case ERROR\n", __func__, __LINE__); 926 cb->state = ERROR; 927 break; 928 } 929 return rkey; 930 } 931 932 static void krping_format_send(struct krping_cb *cb, u64 buf) 933 { 934 struct krping_rdma_info *info = &cb->send_buf; 935 u32 rkey; 936 937 /* 938 * Client side will do fastreg or mw bind before 939 * advertising the rdma buffer. Server side 940 * sends have no data. 941 */ 942 if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 943 rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); 944 info->buf = htonll(buf); 945 info->rkey = htonl(rkey); 946 info->size = htonl(cb->size); 947 DEBUG_LOG(cb, "RDMA addr %llx rkey %x len %d\n", 948 (unsigned long long)buf, rkey, cb->size); 949 } 950 } 951 952 static void krping_test_server(struct krping_cb *cb) 953 { 954 struct ib_send_wr *bad_wr, inv; 955 int ret; 956 957 while (1) { 958 /* Wait for client's Start STAG/TO/Len */ 959 wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); 960 if (cb->state != RDMA_READ_ADV) { 961 PRINTF(cb, "wait for RDMA_READ_ADV state %d\n", 962 cb->state); 963 break; 964 } 965 966 DEBUG_LOG(cb, "server received sink adv\n"); 967 968 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 969 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 970 cb->rdma_sq_wr.sg_list->length = cb->remote_len; 971 cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1); 972 cb->rdma_sq_wr.next = NULL; 973 974 /* Issue RDMA Read. */ 975 if (cb->read_inv) 976 cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 977 else { 978 979 cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; 980 if (cb->mem == FASTREG) { 981 /* 982 * Immediately follow the read with a 983 * fenced LOCAL_INV. 984 */ 985 cb->rdma_sq_wr.next = &inv; 986 memset(&inv, 0, sizeof inv); 987 inv.opcode = IB_WR_LOCAL_INV; 988 inv.ex.invalidate_rkey = cb->fastreg_mr->rkey; 989 inv.send_flags = IB_SEND_FENCE; 990 } 991 } 992 993 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 994 if (ret) { 995 PRINTF(cb, "post send error %d\n", ret); 996 break; 997 } 998 cb->rdma_sq_wr.next = NULL; 999 1000 DEBUG_LOG(cb, "server posted rdma read req \n"); 1001 1002 /* Wait for read completion */ 1003 wait_event_interruptible(cb->sem, 1004 cb->state >= RDMA_READ_COMPLETE); 1005 if (cb->state != RDMA_READ_COMPLETE) { 1006 PRINTF(cb, 1007 "wait for RDMA_READ_COMPLETE state %d\n", 1008 cb->state); 1009 break; 1010 } 1011 DEBUG_LOG(cb, "server received read complete\n"); 1012 1013 /* Display data in recv buf */ 1014 if (cb->verbose) { 1015 if (strlen(cb->rdma_buf) > 128) { 1016 char msgbuf[128]; 1017 1018 strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf)); 1019 PRINTF(cb, "server ping data stripped: %s\n", 1020 msgbuf); 1021 } else 1022 PRINTF(cb, "server ping data: %s\n", 1023 cb->rdma_buf); 1024 } 1025 1026 /* Tell client to continue */ 1027 if (cb->server && cb->server_invalidate) { 1028 cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; 1029 cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; 1030 DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); 1031 } 1032 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1033 if (ret) { 1034 PRINTF(cb, "post send error %d\n", ret); 1035 break; 1036 } 1037 DEBUG_LOG(cb, "server posted go ahead\n"); 1038 1039 /* Wait for client's RDMA STAG/TO/Len */ 1040 wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); 1041 if (cb->state != RDMA_WRITE_ADV) { 1042 PRINTF(cb, 1043 "wait for RDMA_WRITE_ADV state %d\n", 1044 cb->state); 1045 break; 1046 } 1047 DEBUG_LOG(cb, "server received sink adv\n"); 1048 1049 /* RDMA Write echo data */ 1050 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1051 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1052 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1053 cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; 1054 if (cb->local_dma_lkey) 1055 cb->rdma_sgl.lkey = cb->qp->device->local_dma_lkey; 1056 else 1057 cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); 1058 1059 DEBUG_LOG(cb, "rdma write from lkey %x laddr %llx len %d\n", 1060 cb->rdma_sq_wr.sg_list->lkey, 1061 (unsigned long long)cb->rdma_sq_wr.sg_list->addr, 1062 cb->rdma_sq_wr.sg_list->length); 1063 1064 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 1065 if (ret) { 1066 PRINTF(cb, "post send error %d\n", ret); 1067 break; 1068 } 1069 1070 /* Wait for completion */ 1071 ret = wait_event_interruptible(cb->sem, cb->state >= 1072 RDMA_WRITE_COMPLETE); 1073 if (cb->state != RDMA_WRITE_COMPLETE) { 1074 PRINTF(cb, 1075 "wait for RDMA_WRITE_COMPLETE state %d\n", 1076 cb->state); 1077 break; 1078 } 1079 DEBUG_LOG(cb, "server rdma write complete \n"); 1080 1081 cb->state = CONNECTED; 1082 1083 /* Tell client to begin again */ 1084 if (cb->server && cb->server_invalidate) { 1085 cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; 1086 cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; 1087 DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); 1088 } 1089 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1090 if (ret) { 1091 PRINTF(cb, "post send error %d\n", ret); 1092 break; 1093 } 1094 DEBUG_LOG(cb, "server posted go ahead\n"); 1095 } 1096 } 1097 1098 static void rlat_test(struct krping_cb *cb) 1099 { 1100 int scnt; 1101 int iters = cb->count; 1102 struct timeval start_tv, stop_tv; 1103 int ret; 1104 struct ib_wc wc; 1105 struct ib_send_wr *bad_wr; 1106 int ne; 1107 1108 scnt = 0; 1109 cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; 1110 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1111 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1112 cb->rdma_sq_wr.sg_list->length = cb->size; 1113 1114 microtime(&start_tv); 1115 if (!cb->poll) { 1116 cb->state = RDMA_READ_ADV; 1117 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 1118 } 1119 while (scnt < iters) { 1120 1121 cb->state = RDMA_READ_ADV; 1122 ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); 1123 if (ret) { 1124 PRINTF(cb, 1125 "Couldn't post send: ret=%d scnt %d\n", 1126 ret, scnt); 1127 return; 1128 } 1129 1130 do { 1131 if (!cb->poll) { 1132 wait_event_interruptible(cb->sem, 1133 cb->state != RDMA_READ_ADV); 1134 if (cb->state == RDMA_READ_COMPLETE) { 1135 ne = 1; 1136 ib_req_notify_cq(cb->cq, 1137 IB_CQ_NEXT_COMP); 1138 } else { 1139 ne = -1; 1140 } 1141 } else 1142 ne = ib_poll_cq(cb->cq, 1, &wc); 1143 if (cb->state == ERROR) { 1144 PRINTF(cb, 1145 "state == ERROR...bailing scnt %d\n", 1146 scnt); 1147 return; 1148 } 1149 } while (ne == 0); 1150 1151 if (ne < 0) { 1152 PRINTF(cb, "poll CQ failed %d\n", ne); 1153 return; 1154 } 1155 if (cb->poll && wc.status != IB_WC_SUCCESS) { 1156 PRINTF(cb, "Completion wth error at %s:\n", 1157 cb->server ? "server" : "client"); 1158 PRINTF(cb, "Failed status %d: wr_id %d\n", 1159 wc.status, (int) wc.wr_id); 1160 return; 1161 } 1162 ++scnt; 1163 } 1164 microtime(&stop_tv); 1165 1166 if (stop_tv.tv_usec < start_tv.tv_usec) { 1167 stop_tv.tv_usec += 1000000; 1168 stop_tv.tv_sec -= 1; 1169 } 1170 1171 PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d\n", 1172 stop_tv.tv_sec - start_tv.tv_sec, 1173 stop_tv.tv_usec - start_tv.tv_usec, 1174 scnt, cb->size); 1175 } 1176 1177 static void wlat_test(struct krping_cb *cb) 1178 { 1179 int ccnt, scnt, rcnt; 1180 int iters=cb->count; 1181 volatile char *poll_buf = (char *) cb->start_buf; 1182 char *buf = (char *)cb->rdma_buf; 1183 struct timeval start_tv, stop_tv; 1184 cycles_t *post_cycles_start, *post_cycles_stop; 1185 cycles_t *poll_cycles_start, *poll_cycles_stop; 1186 cycles_t *last_poll_cycles_start; 1187 cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 1188 int i; 1189 int cycle_iters = 1000; 1190 1191 ccnt = 0; 1192 scnt = 0; 1193 rcnt = 0; 1194 1195 post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1196 if (!post_cycles_start) { 1197 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1198 return; 1199 } 1200 post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1201 if (!post_cycles_stop) { 1202 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1203 return; 1204 } 1205 poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1206 if (!poll_cycles_start) { 1207 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1208 return; 1209 } 1210 poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1211 if (!poll_cycles_stop) { 1212 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1213 return; 1214 } 1215 last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 1216 GFP_KERNEL); 1217 if (!last_poll_cycles_start) { 1218 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1219 return; 1220 } 1221 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1222 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1223 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1224 cb->rdma_sq_wr.sg_list->length = cb->size; 1225 1226 if (cycle_iters > iters) 1227 cycle_iters = iters; 1228 microtime(&start_tv); 1229 while (scnt < iters || ccnt < iters || rcnt < iters) { 1230 1231 /* Wait till buffer changes. */ 1232 if (rcnt < iters && !(scnt < 1 && !cb->server)) { 1233 ++rcnt; 1234 while (*poll_buf != (char)rcnt) { 1235 if (cb->state == ERROR) { 1236 PRINTF(cb, 1237 "state = ERROR, bailing\n"); 1238 return; 1239 } 1240 } 1241 } 1242 1243 if (scnt < iters) { 1244 struct ib_send_wr *bad_wr; 1245 1246 *buf = (char)scnt+1; 1247 if (scnt < cycle_iters) 1248 post_cycles_start[scnt] = get_cycles(); 1249 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 1250 PRINTF(cb, 1251 "Couldn't post send: scnt=%d\n", 1252 scnt); 1253 return; 1254 } 1255 if (scnt < cycle_iters) 1256 post_cycles_stop[scnt] = get_cycles(); 1257 scnt++; 1258 } 1259 1260 if (ccnt < iters) { 1261 struct ib_wc wc; 1262 int ne; 1263 1264 if (ccnt < cycle_iters) 1265 poll_cycles_start[ccnt] = get_cycles(); 1266 do { 1267 if (ccnt < cycle_iters) 1268 last_poll_cycles_start[ccnt] = 1269 get_cycles(); 1270 ne = ib_poll_cq(cb->cq, 1, &wc); 1271 } while (ne == 0); 1272 if (ccnt < cycle_iters) 1273 poll_cycles_stop[ccnt] = get_cycles(); 1274 ++ccnt; 1275 1276 if (ne < 0) { 1277 PRINTF(cb, "poll CQ failed %d\n", ne); 1278 return; 1279 } 1280 if (wc.status != IB_WC_SUCCESS) { 1281 PRINTF(cb, 1282 "Completion wth error at %s:\n", 1283 cb->server ? "server" : "client"); 1284 PRINTF(cb, 1285 "Failed status %d: wr_id %d\n", 1286 wc.status, (int) wc.wr_id); 1287 PRINTF(cb, 1288 "scnt=%d, rcnt=%d, ccnt=%d\n", 1289 scnt, rcnt, ccnt); 1290 return; 1291 } 1292 } 1293 } 1294 microtime(&stop_tv); 1295 1296 if (stop_tv.tv_usec < start_tv.tv_usec) { 1297 stop_tv.tv_usec += 1000000; 1298 stop_tv.tv_sec -= 1; 1299 } 1300 1301 for (i=0; i < cycle_iters; i++) { 1302 sum_post += post_cycles_stop[i] - post_cycles_start[i]; 1303 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 1304 sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; 1305 } 1306 PRINTF(cb, 1307 "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" 1308 " sum_post %llu sum_poll %llu sum_last_poll %llu\n", 1309 stop_tv.tv_sec - start_tv.tv_sec, 1310 stop_tv.tv_usec - start_tv.tv_usec, 1311 scnt, cb->size, cycle_iters, 1312 (unsigned long long)sum_post, (unsigned long long)sum_poll, 1313 (unsigned long long)sum_last_poll); 1314 kfree(post_cycles_start); 1315 kfree(post_cycles_stop); 1316 kfree(poll_cycles_start); 1317 kfree(poll_cycles_stop); 1318 kfree(last_poll_cycles_start); 1319 } 1320 1321 static void bw_test(struct krping_cb *cb) 1322 { 1323 int ccnt, scnt, rcnt; 1324 int iters=cb->count; 1325 struct timeval start_tv, stop_tv; 1326 cycles_t *post_cycles_start, *post_cycles_stop; 1327 cycles_t *poll_cycles_start, *poll_cycles_stop; 1328 cycles_t *last_poll_cycles_start; 1329 cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 1330 int i; 1331 int cycle_iters = 1000; 1332 1333 ccnt = 0; 1334 scnt = 0; 1335 rcnt = 0; 1336 1337 post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1338 if (!post_cycles_start) { 1339 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1340 return; 1341 } 1342 post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1343 if (!post_cycles_stop) { 1344 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1345 return; 1346 } 1347 poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1348 if (!poll_cycles_start) { 1349 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1350 return; 1351 } 1352 poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1353 if (!poll_cycles_stop) { 1354 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1355 return; 1356 } 1357 last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 1358 GFP_KERNEL); 1359 if (!last_poll_cycles_start) { 1360 PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); 1361 return; 1362 } 1363 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1364 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1365 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1366 cb->rdma_sq_wr.sg_list->length = cb->size; 1367 1368 if (cycle_iters > iters) 1369 cycle_iters = iters; 1370 microtime(&start_tv); 1371 while (scnt < iters || ccnt < iters) { 1372 1373 while (scnt < iters && scnt - ccnt < cb->txdepth) { 1374 struct ib_send_wr *bad_wr; 1375 1376 if (scnt < cycle_iters) 1377 post_cycles_start[scnt] = get_cycles(); 1378 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 1379 PRINTF(cb, 1380 "Couldn't post send: scnt=%d\n", 1381 scnt); 1382 return; 1383 } 1384 if (scnt < cycle_iters) 1385 post_cycles_stop[scnt] = get_cycles(); 1386 ++scnt; 1387 } 1388 1389 if (ccnt < iters) { 1390 int ne; 1391 struct ib_wc wc; 1392 1393 if (ccnt < cycle_iters) 1394 poll_cycles_start[ccnt] = get_cycles(); 1395 do { 1396 if (ccnt < cycle_iters) 1397 last_poll_cycles_start[ccnt] = 1398 get_cycles(); 1399 ne = ib_poll_cq(cb->cq, 1, &wc); 1400 } while (ne == 0); 1401 if (ccnt < cycle_iters) 1402 poll_cycles_stop[ccnt] = get_cycles(); 1403 ccnt += 1; 1404 1405 if (ne < 0) { 1406 PRINTF(cb, "poll CQ failed %d\n", ne); 1407 return; 1408 } 1409 if (wc.status != IB_WC_SUCCESS) { 1410 PRINTF(cb, 1411 "Completion wth error at %s:\n", 1412 cb->server ? "server" : "client"); 1413 PRINTF(cb, 1414 "Failed status %d: wr_id %d\n", 1415 wc.status, (int) wc.wr_id); 1416 return; 1417 } 1418 } 1419 } 1420 microtime(&stop_tv); 1421 1422 if (stop_tv.tv_usec < start_tv.tv_usec) { 1423 stop_tv.tv_usec += 1000000; 1424 stop_tv.tv_sec -= 1; 1425 } 1426 1427 for (i=0; i < cycle_iters; i++) { 1428 sum_post += post_cycles_stop[i] - post_cycles_start[i]; 1429 sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 1430 sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; 1431 } 1432 PRINTF(cb, 1433 "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" 1434 " sum_post %llu sum_poll %llu sum_last_poll %llu\n", 1435 stop_tv.tv_sec - start_tv.tv_sec, 1436 stop_tv.tv_usec - start_tv.tv_usec, 1437 scnt, cb->size, cycle_iters, 1438 (unsigned long long)sum_post, (unsigned long long)sum_poll, 1439 (unsigned long long)sum_last_poll); 1440 kfree(post_cycles_start); 1441 kfree(post_cycles_stop); 1442 kfree(poll_cycles_start); 1443 kfree(poll_cycles_stop); 1444 kfree(last_poll_cycles_start); 1445 } 1446 1447 static void krping_rlat_test_server(struct krping_cb *cb) 1448 { 1449 struct ib_send_wr *bad_wr; 1450 struct ib_wc wc; 1451 int ret; 1452 1453 /* Spin waiting for client's Start STAG/TO/Len */ 1454 while (cb->state < RDMA_READ_ADV) { 1455 krping_cq_event_handler(cb->cq, cb); 1456 } 1457 1458 /* Send STAG/TO/Len to client */ 1459 krping_format_send(cb, cb->start_dma_addr); 1460 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1461 if (ret) { 1462 PRINTF(cb, "post send error %d\n", ret); 1463 return; 1464 } 1465 1466 /* Spin waiting for send completion */ 1467 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1468 if (ret < 0) { 1469 PRINTF(cb, "poll error %d\n", ret); 1470 return; 1471 } 1472 if (wc.status) { 1473 PRINTF(cb, "send completiong error %d\n", wc.status); 1474 return; 1475 } 1476 1477 wait_event_interruptible(cb->sem, cb->state == ERROR); 1478 } 1479 1480 static void krping_wlat_test_server(struct krping_cb *cb) 1481 { 1482 struct ib_send_wr *bad_wr; 1483 struct ib_wc wc; 1484 int ret; 1485 1486 /* Spin waiting for client's Start STAG/TO/Len */ 1487 while (cb->state < RDMA_READ_ADV) { 1488 krping_cq_event_handler(cb->cq, cb); 1489 } 1490 1491 /* Send STAG/TO/Len to client */ 1492 krping_format_send(cb, cb->start_dma_addr); 1493 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1494 if (ret) { 1495 PRINTF(cb, "post send error %d\n", ret); 1496 return; 1497 } 1498 1499 /* Spin waiting for send completion */ 1500 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1501 if (ret < 0) { 1502 PRINTF(cb, "poll error %d\n", ret); 1503 return; 1504 } 1505 if (wc.status) { 1506 PRINTF(cb, "send completiong error %d\n", wc.status); 1507 return; 1508 } 1509 1510 wlat_test(cb); 1511 wait_event_interruptible(cb->sem, cb->state == ERROR); 1512 } 1513 1514 static void krping_bw_test_server(struct krping_cb *cb) 1515 { 1516 struct ib_send_wr *bad_wr; 1517 struct ib_wc wc; 1518 int ret; 1519 1520 /* Spin waiting for client's Start STAG/TO/Len */ 1521 while (cb->state < RDMA_READ_ADV) { 1522 krping_cq_event_handler(cb->cq, cb); 1523 } 1524 1525 /* Send STAG/TO/Len to client */ 1526 krping_format_send(cb, cb->start_dma_addr); 1527 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1528 if (ret) { 1529 PRINTF(cb, "post send error %d\n", ret); 1530 return; 1531 } 1532 1533 /* Spin waiting for send completion */ 1534 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1535 if (ret < 0) { 1536 PRINTF(cb, "poll error %d\n", ret); 1537 return; 1538 } 1539 if (wc.status) { 1540 PRINTF(cb, "send completiong error %d\n", wc.status); 1541 return; 1542 } 1543 1544 if (cb->duplex) 1545 bw_test(cb); 1546 wait_event_interruptible(cb->sem, cb->state == ERROR); 1547 } 1548 1549 static int fastreg_supported(struct krping_cb *cb) 1550 { 1551 struct ib_device *dev = cb->child_cm_id->device; 1552 struct ib_device_attr attr; 1553 int ret; 1554 1555 ret = ib_query_device(dev, &attr); 1556 if (ret) { 1557 PRINTF(cb, "ib_query_device failed ret %d\n", ret); 1558 return 0; 1559 } 1560 if (!(attr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 1561 PRINTF(cb, "Fastreg not supported - device_cap_flags 0x%x\n", 1562 attr.device_cap_flags); 1563 return 0; 1564 } 1565 DEBUG_LOG(cb, "Fastreg supported - device_cap_flags 0x%x\n", 1566 attr.device_cap_flags); 1567 return 1; 1568 } 1569 1570 static int krping_bind_server(struct krping_cb *cb) 1571 { 1572 struct sockaddr_in sin; 1573 int ret; 1574 1575 memset(&sin, 0, sizeof(sin)); 1576 sin.sin_len = sizeof sin; 1577 sin.sin_family = AF_INET; 1578 sin.sin_addr.s_addr = cb->addr.s_addr; 1579 sin.sin_port = cb->port; 1580 1581 ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin); 1582 if (ret) { 1583 PRINTF(cb, "rdma_bind_addr error %d\n", ret); 1584 return ret; 1585 } 1586 DEBUG_LOG(cb, "rdma_bind_addr successful\n"); 1587 1588 DEBUG_LOG(cb, "rdma_listen\n"); 1589 ret = rdma_listen(cb->cm_id, 3); 1590 if (ret) { 1591 PRINTF(cb, "rdma_listen failed: %d\n", ret); 1592 return ret; 1593 } 1594 1595 wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); 1596 if (cb->state != CONNECT_REQUEST) { 1597 PRINTF(cb, "wait for CONNECT_REQUEST state %d\n", 1598 cb->state); 1599 return -1; 1600 } 1601 1602 if (cb->mem == FASTREG && !fastreg_supported(cb)) 1603 return -EINVAL; 1604 1605 return 0; 1606 } 1607 1608 static void krping_run_server(struct krping_cb *cb) 1609 { 1610 struct ib_recv_wr *bad_wr; 1611 int ret; 1612 1613 ret = krping_bind_server(cb); 1614 if (ret) 1615 return; 1616 1617 ret = krping_setup_qp(cb, cb->child_cm_id); 1618 if (ret) { 1619 PRINTF(cb, "setup_qp failed: %d\n", ret); 1620 goto err0; 1621 } 1622 1623 ret = krping_setup_buffers(cb); 1624 if (ret) { 1625 PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); 1626 goto err1; 1627 } 1628 1629 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 1630 if (ret) { 1631 PRINTF(cb, "ib_post_recv failed: %d\n", ret); 1632 goto err2; 1633 } 1634 1635 ret = krping_accept(cb); 1636 if (ret) { 1637 PRINTF(cb, "connect error %d\n", ret); 1638 goto err2; 1639 } 1640 1641 if (cb->wlat) 1642 krping_wlat_test_server(cb); 1643 else if (cb->rlat) 1644 krping_rlat_test_server(cb); 1645 else if (cb->bw) 1646 krping_bw_test_server(cb); 1647 else 1648 krping_test_server(cb); 1649 rdma_disconnect(cb->child_cm_id); 1650 err2: 1651 krping_free_buffers(cb); 1652 err1: 1653 krping_free_qp(cb); 1654 err0: 1655 rdma_destroy_id(cb->child_cm_id); 1656 } 1657 1658 static void krping_test_client(struct krping_cb *cb) 1659 { 1660 int ping, start, cc, i, ret; 1661 struct ib_send_wr *bad_wr; 1662 unsigned char c; 1663 1664 start = 65; 1665 for (ping = 0; !cb->count || ping < cb->count; ping++) { 1666 cb->state = RDMA_READ_ADV; 1667 1668 /* Put some ascii text in the buffer. */ 1669 cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); 1670 for (i = cc, c = start; i < cb->size; i++) { 1671 cb->start_buf[i] = c; 1672 c++; 1673 if (c > 122) 1674 c = 65; 1675 } 1676 start++; 1677 if (start > 122) 1678 start = 65; 1679 cb->start_buf[cb->size - 1] = 0; 1680 1681 krping_format_send(cb, cb->start_dma_addr); 1682 if (cb->state == ERROR) { 1683 PRINTF(cb, "krping_format_send failed\n"); 1684 break; 1685 } 1686 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1687 if (ret) { 1688 PRINTF(cb, "post send error %d\n", ret); 1689 break; 1690 } 1691 1692 /* Wait for server to ACK */ 1693 wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); 1694 if (cb->state != RDMA_WRITE_ADV) { 1695 PRINTF(cb, 1696 "wait for RDMA_WRITE_ADV state %d\n", 1697 cb->state); 1698 break; 1699 } 1700 1701 krping_format_send(cb, cb->rdma_dma_addr); 1702 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1703 if (ret) { 1704 PRINTF(cb, "post send error %d\n", ret); 1705 break; 1706 } 1707 1708 /* Wait for the server to say the RDMA Write is complete. */ 1709 wait_event_interruptible(cb->sem, 1710 cb->state >= RDMA_WRITE_COMPLETE); 1711 if (cb->state != RDMA_WRITE_COMPLETE) { 1712 PRINTF(cb, 1713 "wait for RDMA_WRITE_COMPLETE state %d\n", 1714 cb->state); 1715 break; 1716 } 1717 1718 if (cb->validate) 1719 if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { 1720 PRINTF(cb, "data mismatch!\n"); 1721 break; 1722 } 1723 1724 if (cb->verbose) { 1725 if (strlen(cb->rdma_buf) > 128) { 1726 char msgbuf[128]; 1727 1728 strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf)); 1729 PRINTF(cb, "ping data stripped: %s\n", 1730 msgbuf); 1731 } else 1732 PRINTF(cb, "ping data: %s\n", cb->rdma_buf); 1733 } 1734 #ifdef SLOW_KRPING 1735 wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); 1736 #endif 1737 } 1738 } 1739 1740 static void krping_rlat_test_client(struct krping_cb *cb) 1741 { 1742 struct ib_send_wr *bad_wr; 1743 struct ib_wc wc; 1744 int ret; 1745 1746 cb->state = RDMA_READ_ADV; 1747 1748 /* Send STAG/TO/Len to client */ 1749 krping_format_send(cb, cb->start_dma_addr); 1750 if (cb->state == ERROR) { 1751 PRINTF(cb, "krping_format_send failed\n"); 1752 return; 1753 } 1754 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1755 if (ret) { 1756 PRINTF(cb, "post send error %d\n", ret); 1757 return; 1758 } 1759 1760 /* Spin waiting for send completion */ 1761 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1762 if (ret < 0) { 1763 PRINTF(cb, "poll error %d\n", ret); 1764 return; 1765 } 1766 if (wc.status) { 1767 PRINTF(cb, "send completion error %d\n", wc.status); 1768 return; 1769 } 1770 1771 /* Spin waiting for server's Start STAG/TO/Len */ 1772 while (cb->state < RDMA_WRITE_ADV) { 1773 krping_cq_event_handler(cb->cq, cb); 1774 } 1775 1776 #if 0 1777 { 1778 int i; 1779 struct timeval start, stop; 1780 time_t sec; 1781 suseconds_t usec; 1782 unsigned long long elapsed; 1783 struct ib_wc wc; 1784 struct ib_send_wr *bad_wr; 1785 int ne; 1786 1787 cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; 1788 cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; 1789 cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; 1790 cb->rdma_sq_wr.sg_list->length = 0; 1791 cb->rdma_sq_wr.num_sge = 0; 1792 1793 microtime(&start); 1794 for (i=0; i < 100000; i++) { 1795 if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { 1796 PRINTF(cb, "Couldn't post send\n"); 1797 return; 1798 } 1799 do { 1800 ne = ib_poll_cq(cb->cq, 1, &wc); 1801 } while (ne == 0); 1802 if (ne < 0) { 1803 PRINTF(cb, "poll CQ failed %d\n", ne); 1804 return; 1805 } 1806 if (wc.status != IB_WC_SUCCESS) { 1807 PRINTF(cb, "Completion wth error at %s:\n", 1808 cb->server ? "server" : "client"); 1809 PRINTF(cb, "Failed status %d: wr_id %d\n", 1810 wc.status, (int) wc.wr_id); 1811 return; 1812 } 1813 } 1814 microtime(&stop); 1815 1816 if (stop.tv_usec < start.tv_usec) { 1817 stop.tv_usec += 1000000; 1818 stop.tv_sec -= 1; 1819 } 1820 sec = stop.tv_sec - start.tv_sec; 1821 usec = stop.tv_usec - start.tv_usec; 1822 elapsed = sec * 1000000 + usec; 1823 PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed); 1824 } 1825 #endif 1826 1827 rlat_test(cb); 1828 } 1829 1830 static void krping_wlat_test_client(struct krping_cb *cb) 1831 { 1832 struct ib_send_wr *bad_wr; 1833 struct ib_wc wc; 1834 int ret; 1835 1836 cb->state = RDMA_READ_ADV; 1837 1838 /* Send STAG/TO/Len to client */ 1839 krping_format_send(cb, cb->start_dma_addr); 1840 if (cb->state == ERROR) { 1841 PRINTF(cb, "krping_format_send failed\n"); 1842 return; 1843 } 1844 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1845 if (ret) { 1846 PRINTF(cb, "post send error %d\n", ret); 1847 return; 1848 } 1849 1850 /* Spin waiting for send completion */ 1851 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1852 if (ret < 0) { 1853 PRINTF(cb, "poll error %d\n", ret); 1854 return; 1855 } 1856 if (wc.status) { 1857 PRINTF(cb, "send completion error %d\n", wc.status); 1858 return; 1859 } 1860 1861 /* Spin waiting for server's Start STAG/TO/Len */ 1862 while (cb->state < RDMA_WRITE_ADV) { 1863 krping_cq_event_handler(cb->cq, cb); 1864 } 1865 1866 wlat_test(cb); 1867 } 1868 1869 static void krping_bw_test_client(struct krping_cb *cb) 1870 { 1871 struct ib_send_wr *bad_wr; 1872 struct ib_wc wc; 1873 int ret; 1874 1875 cb->state = RDMA_READ_ADV; 1876 1877 /* Send STAG/TO/Len to client */ 1878 krping_format_send(cb, cb->start_dma_addr); 1879 if (cb->state == ERROR) { 1880 PRINTF(cb, "krping_format_send failed\n"); 1881 return; 1882 } 1883 ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1884 if (ret) { 1885 PRINTF(cb, "post send error %d\n", ret); 1886 return; 1887 } 1888 1889 /* Spin waiting for send completion */ 1890 while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1891 if (ret < 0) { 1892 PRINTF(cb, "poll error %d\n", ret); 1893 return; 1894 } 1895 if (wc.status) { 1896 PRINTF(cb, "send completion error %d\n", wc.status); 1897 return; 1898 } 1899 1900 /* Spin waiting for server's Start STAG/TO/Len */ 1901 while (cb->state < RDMA_WRITE_ADV) { 1902 krping_cq_event_handler(cb->cq, cb); 1903 } 1904 1905 bw_test(cb); 1906 } 1907 1908 static void krping_fr_test(struct krping_cb *cb) 1909 { 1910 struct ib_fast_reg_page_list *pl; 1911 struct ib_send_wr fr, inv, *bad; 1912 struct ib_wc wc; 1913 u8 key = 0; 1914 struct ib_mr *mr; 1915 int i; 1916 int ret; 1917 int size = cb->size; 1918 int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; 1919 time_t start; 1920 int count = 0; 1921 int scnt = 0; 1922 1923 pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen); 1924 if (IS_ERR(pl)) { 1925 PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl)); 1926 return; 1927 } 1928 1929 mr = ib_alloc_fast_reg_mr(cb->pd, plen); 1930 if (IS_ERR(mr)) { 1931 PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl)); 1932 goto err1; 1933 } 1934 1935 for (i=0; i<plen; i++) 1936 pl->page_list[i] = 0xcafebabe | i; 1937 1938 memset(&fr, 0, sizeof fr); 1939 fr.opcode = IB_WR_FAST_REG_MR; 1940 fr.wr.fast_reg.page_shift = PAGE_SHIFT; 1941 fr.wr.fast_reg.length = size; 1942 fr.wr.fast_reg.page_list = pl; 1943 fr.wr.fast_reg.page_list_len = plen; 1944 fr.wr.fast_reg.iova_start = 0; 1945 fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; 1946 fr.next = &inv; 1947 memset(&inv, 0, sizeof inv); 1948 inv.opcode = IB_WR_LOCAL_INV; 1949 inv.send_flags = IB_SEND_SIGNALED; 1950 1951 DEBUG_LOG(cb, "fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); 1952 start = time_uptime; 1953 while (1) { 1954 if ((time_uptime - start) >= 9) { 1955 DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); 1956 wait_event_interruptible(cb->sem, cb->state == ERROR); 1957 if (cb->state == ERROR) 1958 break; 1959 start = time_uptime; 1960 } 1961 while (scnt < (cb->txdepth>>1)) { 1962 ib_update_fast_reg_key(mr, ++key); 1963 fr.wr.fast_reg.rkey = mr->rkey; 1964 inv.ex.invalidate_rkey = mr->rkey; 1965 size = arc4random() % cb->size; 1966 if (size == 0) 1967 size = cb->size; 1968 plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; 1969 fr.wr.fast_reg.length = size; 1970 fr.wr.fast_reg.page_list_len = plen; 1971 ret = ib_post_send(cb->qp, &fr, &bad); 1972 if (ret) { 1973 PRINTF(cb, "ib_post_send failed %d\n", ret); 1974 goto err2; 1975 } 1976 scnt++; 1977 } 1978 1979 do { 1980 ret = ib_poll_cq(cb->cq, 1, &wc); 1981 if (ret < 0) { 1982 PRINTF(cb, "ib_poll_cq failed %d\n", ret); 1983 goto err2; 1984 } 1985 if (ret == 1) { 1986 if (wc.status) { 1987 PRINTF(cb, "completion error %u\n", wc.status); 1988 goto err2; 1989 } 1990 count++; 1991 scnt--; 1992 } 1993 else if (krping_sigpending()) { 1994 PRINTF(cb, "signal!\n"); 1995 goto err2; 1996 } 1997 } while (ret == 1); 1998 } 1999 err2: 2000 #if 0 2001 DEBUG_LOG(cb, "sleeping 1 second\n"); 2002 wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); 2003 #endif 2004 DEBUG_LOG(cb, "draining the cq...\n"); 2005 do { 2006 ret = ib_poll_cq(cb->cq, 1, &wc); 2007 if (ret < 0) { 2008 PRINTF(cb, "ib_poll_cq failed %d\n", ret); 2009 break; 2010 } 2011 if (ret == 1) { 2012 if (wc.status) { 2013 PRINTF(cb, "completion error %u opcode %u\n", wc.status, wc.opcode); 2014 } 2015 } 2016 } while (ret == 1); 2017 DEBUG_LOG(cb, "fr_test: done!\n"); 2018 ib_dereg_mr(mr); 2019 err1: 2020 ib_free_fast_reg_page_list(pl); 2021 } 2022 2023 static int krping_connect_client(struct krping_cb *cb) 2024 { 2025 struct rdma_conn_param conn_param; 2026 int ret; 2027 2028 memset(&conn_param, 0, sizeof conn_param); 2029 conn_param.responder_resources = 1; 2030 conn_param.initiator_depth = 1; 2031 conn_param.retry_count = 10; 2032 2033 ret = rdma_connect(cb->cm_id, &conn_param); 2034 if (ret) { 2035 PRINTF(cb, "rdma_connect error %d\n", ret); 2036 return ret; 2037 } 2038 2039 wait_event_interruptible(cb->sem, cb->state >= CONNECTED); 2040 if (cb->state == ERROR) { 2041 PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); 2042 return -1; 2043 } 2044 2045 DEBUG_LOG(cb, "rdma_connect successful\n"); 2046 return 0; 2047 } 2048 2049 static int krping_bind_client(struct krping_cb *cb) 2050 { 2051 struct sockaddr_in sin; 2052 int ret; 2053 2054 memset(&sin, 0, sizeof(sin)); 2055 sin.sin_len = sizeof sin; 2056 sin.sin_family = AF_INET; 2057 sin.sin_addr.s_addr = cb->addr.s_addr; 2058 sin.sin_port = cb->port; 2059 2060 ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin, 2061 2000); 2062 if (ret) { 2063 PRINTF(cb, "rdma_resolve_addr error %d\n", ret); 2064 return ret; 2065 } 2066 2067 wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); 2068 if (cb->state != ROUTE_RESOLVED) { 2069 PRINTF(cb, 2070 "addr/route resolution did not resolve: state %d\n", 2071 cb->state); 2072 return -EINTR; 2073 } 2074 2075 if (cb->mem == FASTREG && !fastreg_supported(cb)) 2076 return -EINVAL; 2077 2078 DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n"); 2079 return 0; 2080 } 2081 2082 static void krping_run_client(struct krping_cb *cb) 2083 { 2084 struct ib_recv_wr *bad_wr; 2085 int ret; 2086 2087 ret = krping_bind_client(cb); 2088 if (ret) 2089 return; 2090 2091 ret = krping_setup_qp(cb, cb->cm_id); 2092 if (ret) { 2093 PRINTF(cb, "setup_qp failed: %d\n", ret); 2094 return; 2095 } 2096 2097 ret = krping_setup_buffers(cb); 2098 if (ret) { 2099 PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); 2100 goto err1; 2101 } 2102 2103 ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 2104 if (ret) { 2105 PRINTF(cb, "ib_post_recv failed: %d\n", ret); 2106 goto err2; 2107 } 2108 2109 ret = krping_connect_client(cb); 2110 if (ret) { 2111 PRINTF(cb, "connect error %d\n", ret); 2112 goto err2; 2113 } 2114 2115 if (cb->wlat) 2116 krping_wlat_test_client(cb); 2117 else if (cb->rlat) 2118 krping_rlat_test_client(cb); 2119 else if (cb->bw) 2120 krping_bw_test_client(cb); 2121 else if (cb->frtest) 2122 krping_fr_test(cb); 2123 else 2124 krping_test_client(cb); 2125 rdma_disconnect(cb->cm_id); 2126 err2: 2127 krping_free_buffers(cb); 2128 err1: 2129 krping_free_qp(cb); 2130 } 2131 2132 int krping_doit(char *cmd, void *cookie) 2133 { 2134 struct krping_cb *cb; 2135 int op; 2136 int ret = 0; 2137 char *optarg; 2138 unsigned long optint; 2139 2140 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 2141 if (!cb) 2142 return -ENOMEM; 2143 2144 mutex_lock(&krping_mutex); 2145 list_add_tail(&cb->list, &krping_cbs); 2146 mutex_unlock(&krping_mutex); 2147 2148 cb->cookie = cookie; 2149 cb->server = -1; 2150 cb->state = IDLE; 2151 cb->size = 64; 2152 cb->txdepth = RPING_SQ_DEPTH; 2153 cb->mem = DMA; 2154 init_waitqueue_head(&cb->sem); 2155 2156 while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, 2157 &optint)) != 0) { 2158 switch (op) { 2159 case 'a': 2160 cb->addr_str = optarg; 2161 DEBUG_LOG(cb, "ipaddr (%s)\n", optarg); 2162 if (!inet_aton(optarg, &cb->addr)) { 2163 PRINTF(cb, "bad addr string %s\n", 2164 optarg); 2165 ret = EINVAL; 2166 } 2167 break; 2168 case 'p': 2169 cb->port = htons(optint); 2170 DEBUG_LOG(cb, "port %d\n", (int)optint); 2171 break; 2172 case 'P': 2173 cb->poll = 1; 2174 DEBUG_LOG(cb, "server\n"); 2175 break; 2176 case 's': 2177 cb->server = 1; 2178 DEBUG_LOG(cb, "server\n"); 2179 break; 2180 case 'c': 2181 cb->server = 0; 2182 DEBUG_LOG(cb, "client\n"); 2183 break; 2184 case 'S': 2185 cb->size = optint; 2186 if ((cb->size < 1) || 2187 (cb->size > RPING_BUFSIZE)) { 2188 PRINTF(cb, "Invalid size %d " 2189 "(valid range is 1 to %d)\n", 2190 cb->size, RPING_BUFSIZE); 2191 ret = EINVAL; 2192 } else 2193 DEBUG_LOG(cb, "size %d\n", (int)optint); 2194 break; 2195 case 'C': 2196 cb->count = optint; 2197 if (cb->count < 0) { 2198 PRINTF(cb, "Invalid count %d\n", 2199 cb->count); 2200 ret = EINVAL; 2201 } else 2202 DEBUG_LOG(cb, "count %d\n", (int) cb->count); 2203 break; 2204 case 'v': 2205 cb->verbose++; 2206 DEBUG_LOG(cb, "verbose\n"); 2207 break; 2208 case 'V': 2209 cb->validate++; 2210 DEBUG_LOG(cb, "validate data\n"); 2211 break; 2212 case 'l': 2213 cb->wlat++; 2214 break; 2215 case 'L': 2216 cb->rlat++; 2217 break; 2218 case 'B': 2219 cb->bw++; 2220 break; 2221 case 'd': 2222 cb->duplex++; 2223 break; 2224 case 'm': 2225 if (!strncmp(optarg, "dma", 3)) 2226 cb->mem = DMA; 2227 else if (!strncmp(optarg, "fastreg", 7)) 2228 cb->mem = FASTREG; 2229 else if (!strncmp(optarg, "mw", 2)) 2230 cb->mem = MW; 2231 else if (!strncmp(optarg, "mr", 2)) 2232 cb->mem = MR; 2233 else { 2234 PRINTF(cb, "unknown mem mode %s. " 2235 "Must be dma, fastreg, mw, or mr\n", 2236 optarg); 2237 ret = -EINVAL; 2238 break; 2239 } 2240 break; 2241 case 'I': 2242 cb->server_invalidate = 1; 2243 break; 2244 case 'T': 2245 cb->txdepth = optint; 2246 DEBUG_LOG(cb, "txdepth %d\n", (int) cb->txdepth); 2247 break; 2248 case 'Z': 2249 cb->local_dma_lkey = 1; 2250 DEBUG_LOG(cb, "using local dma lkey\n"); 2251 break; 2252 case 'R': 2253 cb->read_inv = 1; 2254 DEBUG_LOG(cb, "using read-with-inv\n"); 2255 break; 2256 case 'f': 2257 cb->frtest = 1; 2258 DEBUG_LOG(cb, "fast-reg test!\n"); 2259 break; 2260 default: 2261 PRINTF(cb, "unknown opt %s\n", optarg); 2262 ret = -EINVAL; 2263 break; 2264 } 2265 } 2266 if (ret) 2267 goto out; 2268 2269 if (cb->server == -1) { 2270 PRINTF(cb, "must be either client or server\n"); 2271 ret = -EINVAL; 2272 goto out; 2273 } 2274 2275 if (cb->server && cb->frtest) { 2276 PRINTF(cb, "must be client to run frtest\n"); 2277 ret = -EINVAL; 2278 goto out; 2279 } 2280 2281 if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { 2282 PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n"); 2283 ret = -EINVAL; 2284 goto out; 2285 } 2286 2287 if (cb->server_invalidate && cb->mem != FASTREG) { 2288 PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n"); 2289 ret = -EINVAL; 2290 goto out; 2291 } 2292 2293 if (cb->read_inv && cb->mem != FASTREG) { 2294 PRINTF(cb, "read_inv only valid with fastreg mem_mode\n"); 2295 ret = -EINVAL; 2296 goto out; 2297 } 2298 2299 if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) { 2300 PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n"); 2301 ret = -EINVAL; 2302 goto out; 2303 } 2304 2305 cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); 2306 if (IS_ERR(cb->cm_id)) { 2307 ret = PTR_ERR(cb->cm_id); 2308 PRINTF(cb, "rdma_create_id error %d\n", ret); 2309 goto out; 2310 } 2311 DEBUG_LOG(cb, "created cm_id %p\n", cb->cm_id); 2312 2313 if (cb->server) 2314 krping_run_server(cb); 2315 else 2316 krping_run_client(cb); 2317 2318 DEBUG_LOG(cb, "destroy cm_id %p\n", cb->cm_id); 2319 rdma_destroy_id(cb->cm_id); 2320 out: 2321 mutex_lock(&krping_mutex); 2322 list_del(&cb->list); 2323 mutex_unlock(&krping_mutex); 2324 kfree(cb); 2325 return ret; 2326 } 2327 2328 void 2329 krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg) 2330 { 2331 struct krping_cb *cb; 2332 2333 mutex_lock(&krping_mutex); 2334 list_for_each_entry(cb, &krping_cbs, list) 2335 (*f)(cb->pd ? &cb->stats : NULL, arg); 2336 mutex_unlock(&krping_mutex); 2337 } 2338 2339 void krping_init(void) 2340 { 2341 2342 mutex_init(&krping_mutex); 2343 } 2344