1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * verbs.c 44 * 45 * Encapsulates the major functions managing: 46 * o adapters 47 * o endpoints 48 * o connections 49 * o buffer memory 50 */ 51 52 #include <linux/interrupt.h> 53 #include <linux/slab.h> 54 #include <linux/sunrpc/addr.h> 55 #include <linux/sunrpc/svc_rdma.h> 56 #include <linux/log2.h> 57 58 #include <asm-generic/barrier.h> 59 #include <asm/bitops.h> 60 61 #include <rdma/ib_cm.h> 62 63 #include "xprt_rdma.h" 64 #include <trace/events/rpcrdma.h> 65 66 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); 67 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); 68 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 69 struct rpcrdma_sendctx *sc); 70 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 71 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 72 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 73 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 74 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 75 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 76 static void rpcrdma_ep_get(struct rpcrdma_ep *ep); 77 static int rpcrdma_ep_put(struct rpcrdma_ep *ep); 78 static struct rpcrdma_regbuf * 79 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); 80 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 81 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 82 83 /* Wait for outstanding transport work to finish. ib_drain_qp 84 * handles the drains in the wrong order for us, so open code 85 * them here. 86 */ 87 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 88 { 89 struct rpcrdma_ep *ep = r_xprt->rx_ep; 90 struct rdma_cm_id *id = ep->re_id; 91 92 /* Wait for rpcrdma_post_recvs() to leave its critical 93 * section. 94 */ 95 if (atomic_inc_return(&ep->re_receiving) > 1) 96 wait_for_completion(&ep->re_done); 97 98 /* Flush Receives, then wait for deferred Reply work 99 * to complete. 100 */ 101 ib_drain_rq(id->qp); 102 103 /* Deferred Reply processing might have scheduled 104 * local invalidations. 105 */ 106 ib_drain_sq(id->qp); 107 108 rpcrdma_ep_put(ep); 109 } 110 111 /* Ensure xprt_force_disconnect() is invoked exactly once when a 112 * connection is closed or lost. (The important thing is it needs 113 * to be invoked "at least" once). 114 */ 115 void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) 116 { 117 if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) 118 xprt_force_disconnect(ep->re_xprt); 119 } 120 121 /** 122 * rpcrdma_flush_disconnect - Disconnect on flushed completion 123 * @r_xprt: transport to disconnect 124 * @wc: work completion entry 125 * 126 * Must be called in process context. 127 */ 128 void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) 129 { 130 if (wc->status != IB_WC_SUCCESS) 131 rpcrdma_force_disconnect(r_xprt->rx_ep); 132 } 133 134 /** 135 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 136 * @cq: completion queue 137 * @wc: WCE for a completed Send WR 138 * 139 */ 140 static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 141 { 142 struct ib_cqe *cqe = wc->wr_cqe; 143 struct rpcrdma_sendctx *sc = 144 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 145 struct rpcrdma_xprt *r_xprt = cq->cq_context; 146 147 /* WARNING: Only wr_cqe and status are reliable at this point */ 148 trace_xprtrdma_wc_send(wc, &sc->sc_cid); 149 rpcrdma_sendctx_put_locked(r_xprt, sc); 150 rpcrdma_flush_disconnect(r_xprt, wc); 151 } 152 153 /** 154 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 155 * @cq: completion queue 156 * @wc: WCE for a completed Receive WR 157 * 158 */ 159 static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 160 { 161 struct ib_cqe *cqe = wc->wr_cqe; 162 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 163 rr_cqe); 164 struct rpcrdma_xprt *r_xprt = cq->cq_context; 165 166 /* WARNING: Only wr_cqe and status are reliable at this point */ 167 trace_xprtrdma_wc_receive(wc, &rep->rr_cid); 168 --r_xprt->rx_ep->re_receive_count; 169 if (wc->status != IB_WC_SUCCESS) 170 goto out_flushed; 171 172 /* status == SUCCESS means all fields in wc are trustworthy */ 173 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 174 rep->rr_wc_flags = wc->wc_flags; 175 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 176 177 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), 178 rdmab_addr(rep->rr_rdmabuf), 179 wc->byte_len, DMA_FROM_DEVICE); 180 181 rpcrdma_reply_handler(rep); 182 return; 183 184 out_flushed: 185 rpcrdma_flush_disconnect(r_xprt, wc); 186 rpcrdma_rep_put(&r_xprt->rx_buf, rep); 187 } 188 189 static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, 190 struct rdma_conn_param *param) 191 { 192 const struct rpcrdma_connect_private *pmsg = param->private_data; 193 unsigned int rsize, wsize; 194 195 /* Default settings for RPC-over-RDMA Version One */ 196 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 197 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 199 if (pmsg && 200 pmsg->cp_magic == rpcrdma_cmp_magic && 201 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 202 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 203 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 204 } 205 206 if (rsize < ep->re_inline_recv) 207 ep->re_inline_recv = rsize; 208 if (wsize < ep->re_inline_send) 209 ep->re_inline_send = wsize; 210 211 rpcrdma_set_max_header_sizes(ep); 212 } 213 214 /** 215 * rpcrdma_cm_event_handler - Handle RDMA CM events 216 * @id: rdma_cm_id on which an event has occurred 217 * @event: details of the event 218 * 219 * Called with @id's mutex held. Returns 1 if caller should 220 * destroy @id, otherwise 0. 221 */ 222 static int 223 rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 224 { 225 struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; 226 struct rpcrdma_ep *ep = id->context; 227 228 might_sleep(); 229 230 switch (event->event) { 231 case RDMA_CM_EVENT_ADDR_RESOLVED: 232 case RDMA_CM_EVENT_ROUTE_RESOLVED: 233 ep->re_async_rc = 0; 234 complete(&ep->re_done); 235 return 0; 236 case RDMA_CM_EVENT_ADDR_ERROR: 237 ep->re_async_rc = -EPROTO; 238 complete(&ep->re_done); 239 return 0; 240 case RDMA_CM_EVENT_ROUTE_ERROR: 241 ep->re_async_rc = -ENETUNREACH; 242 complete(&ep->re_done); 243 return 0; 244 case RDMA_CM_EVENT_DEVICE_REMOVAL: 245 pr_info("rpcrdma: removing device %s for %pISpc\n", 246 ep->re_id->device->name, sap); 247 fallthrough; 248 case RDMA_CM_EVENT_ADDR_CHANGE: 249 ep->re_connect_status = -ENODEV; 250 goto disconnected; 251 case RDMA_CM_EVENT_ESTABLISHED: 252 rpcrdma_ep_get(ep); 253 ep->re_connect_status = 1; 254 rpcrdma_update_cm_private(ep, &event->param.conn); 255 trace_xprtrdma_inline_thresh(ep); 256 wake_up_all(&ep->re_connect_wait); 257 break; 258 case RDMA_CM_EVENT_CONNECT_ERROR: 259 ep->re_connect_status = -ENOTCONN; 260 goto wake_connect_worker; 261 case RDMA_CM_EVENT_UNREACHABLE: 262 ep->re_connect_status = -ENETUNREACH; 263 goto wake_connect_worker; 264 case RDMA_CM_EVENT_REJECTED: 265 ep->re_connect_status = -ECONNREFUSED; 266 if (event->status == IB_CM_REJ_STALE_CONN) 267 ep->re_connect_status = -ENOTCONN; 268 wake_connect_worker: 269 wake_up_all(&ep->re_connect_wait); 270 return 0; 271 case RDMA_CM_EVENT_DISCONNECTED: 272 ep->re_connect_status = -ECONNABORTED; 273 disconnected: 274 rpcrdma_force_disconnect(ep); 275 return rpcrdma_ep_put(ep); 276 default: 277 break; 278 } 279 280 return 0; 281 } 282 283 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, 284 struct rpcrdma_ep *ep) 285 { 286 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 287 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 288 struct rdma_cm_id *id; 289 int rc; 290 291 init_completion(&ep->re_done); 292 293 id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, 294 RDMA_PS_TCP, IB_QPT_RC); 295 if (IS_ERR(id)) 296 return id; 297 298 ep->re_async_rc = -ETIMEDOUT; 299 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, 300 RDMA_RESOLVE_TIMEOUT); 301 if (rc) 302 goto out; 303 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 304 if (rc < 0) 305 goto out; 306 307 rc = ep->re_async_rc; 308 if (rc) 309 goto out; 310 311 ep->re_async_rc = -ETIMEDOUT; 312 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 313 if (rc) 314 goto out; 315 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 316 if (rc < 0) 317 goto out; 318 rc = ep->re_async_rc; 319 if (rc) 320 goto out; 321 322 return id; 323 324 out: 325 rdma_destroy_id(id); 326 return ERR_PTR(rc); 327 } 328 329 static void rpcrdma_ep_destroy(struct kref *kref) 330 { 331 struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); 332 333 if (ep->re_id->qp) { 334 rdma_destroy_qp(ep->re_id); 335 ep->re_id->qp = NULL; 336 } 337 338 if (ep->re_attr.recv_cq) 339 ib_free_cq(ep->re_attr.recv_cq); 340 ep->re_attr.recv_cq = NULL; 341 if (ep->re_attr.send_cq) 342 ib_free_cq(ep->re_attr.send_cq); 343 ep->re_attr.send_cq = NULL; 344 345 if (ep->re_pd) 346 ib_dealloc_pd(ep->re_pd); 347 ep->re_pd = NULL; 348 349 kfree(ep); 350 module_put(THIS_MODULE); 351 } 352 353 static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) 354 { 355 kref_get(&ep->re_kref); 356 } 357 358 /* Returns: 359 * %0 if @ep still has a positive kref count, or 360 * %1 if @ep was destroyed successfully. 361 */ 362 static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) 363 { 364 return kref_put(&ep->re_kref, rpcrdma_ep_destroy); 365 } 366 367 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) 368 { 369 struct rpcrdma_connect_private *pmsg; 370 struct ib_device *device; 371 struct rdma_cm_id *id; 372 struct rpcrdma_ep *ep; 373 int rc; 374 375 ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS); 376 if (!ep) 377 return -ENOTCONN; 378 ep->re_xprt = &r_xprt->rx_xprt; 379 kref_init(&ep->re_kref); 380 381 id = rpcrdma_create_id(r_xprt, ep); 382 if (IS_ERR(id)) { 383 kfree(ep); 384 return PTR_ERR(id); 385 } 386 __module_get(THIS_MODULE); 387 device = id->device; 388 ep->re_id = id; 389 reinit_completion(&ep->re_done); 390 391 ep->re_max_requests = r_xprt->rx_xprt.max_reqs; 392 ep->re_inline_send = xprt_rdma_max_inline_write; 393 ep->re_inline_recv = xprt_rdma_max_inline_read; 394 rc = frwr_query_device(ep, device); 395 if (rc) 396 goto out_destroy; 397 398 r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); 399 400 ep->re_attr.srq = NULL; 401 ep->re_attr.cap.max_inline_data = 0; 402 ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 403 ep->re_attr.qp_type = IB_QPT_RC; 404 ep->re_attr.port_num = ~0; 405 406 ep->re_send_batch = ep->re_max_requests >> 3; 407 ep->re_send_count = ep->re_send_batch; 408 init_waitqueue_head(&ep->re_connect_wait); 409 410 ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, 411 ep->re_attr.cap.max_send_wr, 412 IB_POLL_WORKQUEUE); 413 if (IS_ERR(ep->re_attr.send_cq)) { 414 rc = PTR_ERR(ep->re_attr.send_cq); 415 ep->re_attr.send_cq = NULL; 416 goto out_destroy; 417 } 418 419 ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, 420 ep->re_attr.cap.max_recv_wr, 421 IB_POLL_WORKQUEUE); 422 if (IS_ERR(ep->re_attr.recv_cq)) { 423 rc = PTR_ERR(ep->re_attr.recv_cq); 424 ep->re_attr.recv_cq = NULL; 425 goto out_destroy; 426 } 427 ep->re_receive_count = 0; 428 429 /* Initialize cma parameters */ 430 memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); 431 432 /* Prepare RDMA-CM private message */ 433 pmsg = &ep->re_cm_private; 434 pmsg->cp_magic = rpcrdma_cmp_magic; 435 pmsg->cp_version = RPCRDMA_CMP_VERSION; 436 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 437 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); 438 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); 439 ep->re_remote_cma.private_data = pmsg; 440 ep->re_remote_cma.private_data_len = sizeof(*pmsg); 441 442 /* Client offers RDMA Read but does not initiate */ 443 ep->re_remote_cma.initiator_depth = 0; 444 ep->re_remote_cma.responder_resources = 445 min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); 446 447 /* Limit transport retries so client can detect server 448 * GID changes quickly. RPC layer handles re-establishing 449 * transport connection and retransmission. 450 */ 451 ep->re_remote_cma.retry_count = 6; 452 453 /* RPC-over-RDMA handles its own flow control. In addition, 454 * make all RNR NAKs visible so we know that RPC-over-RDMA 455 * flow control is working correctly (no NAKs should be seen). 456 */ 457 ep->re_remote_cma.flow_control = 0; 458 ep->re_remote_cma.rnr_retry_count = 0; 459 460 ep->re_pd = ib_alloc_pd(device, 0); 461 if (IS_ERR(ep->re_pd)) { 462 rc = PTR_ERR(ep->re_pd); 463 ep->re_pd = NULL; 464 goto out_destroy; 465 } 466 467 rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); 468 if (rc) 469 goto out_destroy; 470 471 r_xprt->rx_ep = ep; 472 return 0; 473 474 out_destroy: 475 rpcrdma_ep_put(ep); 476 rdma_destroy_id(id); 477 return rc; 478 } 479 480 /** 481 * rpcrdma_xprt_connect - Connect an unconnected transport 482 * @r_xprt: controlling transport instance 483 * 484 * Returns 0 on success or a negative errno. 485 */ 486 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) 487 { 488 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 489 struct rpcrdma_ep *ep; 490 int rc; 491 492 rc = rpcrdma_ep_create(r_xprt); 493 if (rc) 494 return rc; 495 ep = r_xprt->rx_ep; 496 497 xprt_clear_connected(xprt); 498 rpcrdma_reset_cwnd(r_xprt); 499 500 /* Bump the ep's reference count while there are 501 * outstanding Receives. 502 */ 503 rpcrdma_ep_get(ep); 504 rpcrdma_post_recvs(r_xprt, 1, true); 505 506 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 507 if (rc) 508 goto out; 509 510 if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 511 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 512 wait_event_interruptible(ep->re_connect_wait, 513 ep->re_connect_status != 0); 514 if (ep->re_connect_status <= 0) { 515 rc = ep->re_connect_status; 516 goto out; 517 } 518 519 rc = rpcrdma_sendctxs_create(r_xprt); 520 if (rc) { 521 rc = -ENOTCONN; 522 goto out; 523 } 524 525 rc = rpcrdma_reqs_setup(r_xprt); 526 if (rc) { 527 rc = -ENOTCONN; 528 goto out; 529 } 530 rpcrdma_mrs_create(r_xprt); 531 frwr_wp_create(r_xprt); 532 533 out: 534 trace_xprtrdma_connect(r_xprt, rc); 535 return rc; 536 } 537 538 /** 539 * rpcrdma_xprt_disconnect - Disconnect underlying transport 540 * @r_xprt: controlling transport instance 541 * 542 * Caller serializes. Either the transport send lock is held, 543 * or we're being called to destroy the transport. 544 * 545 * On return, @r_xprt is completely divested of all hardware 546 * resources and prepared for the next ->connect operation. 547 */ 548 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) 549 { 550 struct rpcrdma_ep *ep = r_xprt->rx_ep; 551 struct rdma_cm_id *id; 552 int rc; 553 554 if (!ep) 555 return; 556 557 id = ep->re_id; 558 rc = rdma_disconnect(id); 559 trace_xprtrdma_disconnect(r_xprt, rc); 560 561 rpcrdma_xprt_drain(r_xprt); 562 rpcrdma_reps_unmap(r_xprt); 563 rpcrdma_reqs_reset(r_xprt); 564 rpcrdma_mrs_destroy(r_xprt); 565 rpcrdma_sendctxs_destroy(r_xprt); 566 567 if (rpcrdma_ep_put(ep)) 568 rdma_destroy_id(id); 569 570 r_xprt->rx_ep = NULL; 571 } 572 573 /* Fixed-size circular FIFO queue. This implementation is wait-free and 574 * lock-free. 575 * 576 * Consumer is the code path that posts Sends. This path dequeues a 577 * sendctx for use by a Send operation. Multiple consumer threads 578 * are serialized by the RPC transport lock, which allows only one 579 * ->send_request call at a time. 580 * 581 * Producer is the code path that handles Send completions. This path 582 * enqueues a sendctx that has been completed. Multiple producer 583 * threads are serialized by the ib_poll_cq() function. 584 */ 585 586 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 587 * queue activity, and rpcrdma_xprt_drain has flushed all remaining 588 * Send requests. 589 */ 590 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) 591 { 592 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 593 unsigned long i; 594 595 if (!buf->rb_sc_ctxs) 596 return; 597 for (i = 0; i <= buf->rb_sc_last; i++) 598 kfree(buf->rb_sc_ctxs[i]); 599 kfree(buf->rb_sc_ctxs); 600 buf->rb_sc_ctxs = NULL; 601 } 602 603 static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) 604 { 605 struct rpcrdma_sendctx *sc; 606 607 sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), 608 XPRTRDMA_GFP_FLAGS); 609 if (!sc) 610 return NULL; 611 612 sc->sc_cqe.done = rpcrdma_wc_send; 613 sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id; 614 sc->sc_cid.ci_completion_id = 615 atomic_inc_return(&ep->re_completion_ids); 616 return sc; 617 } 618 619 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) 620 { 621 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 622 struct rpcrdma_sendctx *sc; 623 unsigned long i; 624 625 /* Maximum number of concurrent outstanding Send WRs. Capping 626 * the circular queue size stops Send Queue overflow by causing 627 * the ->send_request call to fail temporarily before too many 628 * Sends are posted. 629 */ 630 i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; 631 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS); 632 if (!buf->rb_sc_ctxs) 633 return -ENOMEM; 634 635 buf->rb_sc_last = i - 1; 636 for (i = 0; i <= buf->rb_sc_last; i++) { 637 sc = rpcrdma_sendctx_create(r_xprt->rx_ep); 638 if (!sc) 639 return -ENOMEM; 640 641 buf->rb_sc_ctxs[i] = sc; 642 } 643 644 buf->rb_sc_head = 0; 645 buf->rb_sc_tail = 0; 646 return 0; 647 } 648 649 /* The sendctx queue is not guaranteed to have a size that is a 650 * power of two, thus the helpers in circ_buf.h cannot be used. 651 * The other option is to use modulus (%), which can be expensive. 652 */ 653 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, 654 unsigned long item) 655 { 656 return likely(item < buf->rb_sc_last) ? item + 1 : 0; 657 } 658 659 /** 660 * rpcrdma_sendctx_get_locked - Acquire a send context 661 * @r_xprt: controlling transport instance 662 * 663 * Returns pointer to a free send completion context; or NULL if 664 * the queue is empty. 665 * 666 * Usage: Called to acquire an SGE array before preparing a Send WR. 667 * 668 * The caller serializes calls to this function (per transport), and 669 * provides an effective memory barrier that flushes the new value 670 * of rb_sc_head. 671 */ 672 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) 673 { 674 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 675 struct rpcrdma_sendctx *sc; 676 unsigned long next_head; 677 678 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); 679 680 if (next_head == READ_ONCE(buf->rb_sc_tail)) 681 goto out_emptyq; 682 683 /* ORDER: item must be accessed _before_ head is updated */ 684 sc = buf->rb_sc_ctxs[next_head]; 685 686 /* Releasing the lock in the caller acts as a memory 687 * barrier that flushes rb_sc_head. 688 */ 689 buf->rb_sc_head = next_head; 690 691 return sc; 692 693 out_emptyq: 694 /* The queue is "empty" if there have not been enough Send 695 * completions recently. This is a sign the Send Queue is 696 * backing up. Cause the caller to pause and try again. 697 */ 698 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 699 r_xprt->rx_stats.empty_sendctx_q++; 700 return NULL; 701 } 702 703 /** 704 * rpcrdma_sendctx_put_locked - Release a send context 705 * @r_xprt: controlling transport instance 706 * @sc: send context to release 707 * 708 * Usage: Called from Send completion to return a sendctxt 709 * to the queue. 710 * 711 * The caller serializes calls to this function (per transport). 712 */ 713 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 714 struct rpcrdma_sendctx *sc) 715 { 716 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 717 unsigned long next_tail; 718 719 /* Unmap SGEs of previously completed but unsignaled 720 * Sends by walking up the queue until @sc is found. 721 */ 722 next_tail = buf->rb_sc_tail; 723 do { 724 next_tail = rpcrdma_sendctx_next(buf, next_tail); 725 726 /* ORDER: item must be accessed _before_ tail is updated */ 727 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); 728 729 } while (buf->rb_sc_ctxs[next_tail] != sc); 730 731 /* Paired with READ_ONCE */ 732 smp_store_release(&buf->rb_sc_tail, next_tail); 733 734 xprt_write_space(&r_xprt->rx_xprt); 735 } 736 737 static void 738 rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 739 { 740 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 741 struct rpcrdma_ep *ep = r_xprt->rx_ep; 742 struct ib_device *device = ep->re_id->device; 743 unsigned int count; 744 745 /* Try to allocate enough to perform one full-sized I/O */ 746 for (count = 0; count < ep->re_max_rdma_segs; count++) { 747 struct rpcrdma_mr *mr; 748 int rc; 749 750 mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS, 751 ibdev_to_node(device)); 752 if (!mr) 753 break; 754 755 rc = frwr_mr_init(r_xprt, mr); 756 if (rc) { 757 kfree(mr); 758 break; 759 } 760 761 spin_lock(&buf->rb_lock); 762 rpcrdma_mr_push(mr, &buf->rb_mrs); 763 list_add(&mr->mr_all, &buf->rb_all_mrs); 764 spin_unlock(&buf->rb_lock); 765 } 766 767 r_xprt->rx_stats.mrs_allocated += count; 768 trace_xprtrdma_createmrs(r_xprt, count); 769 } 770 771 static void 772 rpcrdma_mr_refresh_worker(struct work_struct *work) 773 { 774 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 775 rb_refresh_worker); 776 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 777 rx_buf); 778 779 rpcrdma_mrs_create(r_xprt); 780 xprt_write_space(&r_xprt->rx_xprt); 781 } 782 783 /** 784 * rpcrdma_mrs_refresh - Wake the MR refresh worker 785 * @r_xprt: controlling transport instance 786 * 787 */ 788 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 789 { 790 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 791 struct rpcrdma_ep *ep = r_xprt->rx_ep; 792 793 /* If there is no underlying connection, it's no use 794 * to wake the refresh worker. 795 */ 796 if (ep->re_connect_status != 1) 797 return; 798 queue_work(system_highpri_wq, &buf->rb_refresh_worker); 799 } 800 801 /** 802 * rpcrdma_req_create - Allocate an rpcrdma_req object 803 * @r_xprt: controlling r_xprt 804 * @size: initial size, in bytes, of send and receive buffers 805 * 806 * Returns an allocated and fully initialized rpcrdma_req or NULL. 807 */ 808 struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, 809 size_t size) 810 { 811 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 812 struct rpcrdma_req *req; 813 814 req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS); 815 if (req == NULL) 816 goto out1; 817 818 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE); 819 if (!req->rl_sendbuf) 820 goto out2; 821 822 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE); 823 if (!req->rl_recvbuf) 824 goto out3; 825 826 INIT_LIST_HEAD(&req->rl_free_mrs); 827 INIT_LIST_HEAD(&req->rl_registered); 828 spin_lock(&buffer->rb_lock); 829 list_add(&req->rl_all, &buffer->rb_allreqs); 830 spin_unlock(&buffer->rb_lock); 831 return req; 832 833 out3: 834 rpcrdma_regbuf_free(req->rl_sendbuf); 835 out2: 836 kfree(req); 837 out1: 838 return NULL; 839 } 840 841 /** 842 * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object 843 * @r_xprt: controlling transport instance 844 * @req: rpcrdma_req object to set up 845 * 846 * Returns zero on success, and a negative errno on failure. 847 */ 848 int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 849 { 850 struct rpcrdma_regbuf *rb; 851 size_t maxhdrsize; 852 853 /* Compute maximum header buffer size in bytes */ 854 maxhdrsize = rpcrdma_fixed_maxsz + 3 + 855 r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; 856 maxhdrsize *= sizeof(__be32); 857 rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), 858 DMA_TO_DEVICE); 859 if (!rb) 860 goto out; 861 862 if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) 863 goto out_free; 864 865 req->rl_rdmabuf = rb; 866 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); 867 return 0; 868 869 out_free: 870 rpcrdma_regbuf_free(rb); 871 out: 872 return -ENOMEM; 873 } 874 875 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 876 * and thus can be walked without holding rb_lock. Eg. the 877 * caller is holding the transport send lock to exclude 878 * device removal or disconnection. 879 */ 880 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) 881 { 882 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 883 struct rpcrdma_req *req; 884 int rc; 885 886 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 887 rc = rpcrdma_req_setup(r_xprt, req); 888 if (rc) 889 return rc; 890 } 891 return 0; 892 } 893 894 static void rpcrdma_req_reset(struct rpcrdma_req *req) 895 { 896 /* Credits are valid for only one connection */ 897 req->rl_slot.rq_cong = 0; 898 899 rpcrdma_regbuf_free(req->rl_rdmabuf); 900 req->rl_rdmabuf = NULL; 901 902 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 903 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 904 905 frwr_reset(req); 906 } 907 908 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 909 * and thus can be walked without holding rb_lock. Eg. the 910 * caller is holding the transport send lock to exclude 911 * device removal or disconnection. 912 */ 913 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 914 { 915 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 916 struct rpcrdma_req *req; 917 918 list_for_each_entry(req, &buf->rb_allreqs, rl_all) 919 rpcrdma_req_reset(req); 920 } 921 922 static noinline 923 struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 924 bool temp) 925 { 926 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 927 struct rpcrdma_rep *rep; 928 929 rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); 930 if (rep == NULL) 931 goto out; 932 933 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 934 DMA_FROM_DEVICE); 935 if (!rep->rr_rdmabuf) 936 goto out_free; 937 938 rep->rr_cid.ci_completion_id = 939 atomic_inc_return(&r_xprt->rx_ep->re_completion_ids); 940 941 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 942 rdmab_length(rep->rr_rdmabuf)); 943 rep->rr_cqe.done = rpcrdma_wc_receive; 944 rep->rr_rxprt = r_xprt; 945 rep->rr_recv_wr.next = NULL; 946 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 947 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 948 rep->rr_recv_wr.num_sge = 1; 949 rep->rr_temp = temp; 950 951 spin_lock(&buf->rb_lock); 952 list_add(&rep->rr_all, &buf->rb_all_reps); 953 spin_unlock(&buf->rb_lock); 954 return rep; 955 956 out_free: 957 kfree(rep); 958 out: 959 return NULL; 960 } 961 962 static void rpcrdma_rep_free(struct rpcrdma_rep *rep) 963 { 964 rpcrdma_regbuf_free(rep->rr_rdmabuf); 965 kfree(rep); 966 } 967 968 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 969 { 970 struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf; 971 972 spin_lock(&buf->rb_lock); 973 list_del(&rep->rr_all); 974 spin_unlock(&buf->rb_lock); 975 976 rpcrdma_rep_free(rep); 977 } 978 979 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) 980 { 981 struct llist_node *node; 982 983 /* Calls to llist_del_first are required to be serialized */ 984 node = llist_del_first(&buf->rb_free_reps); 985 if (!node) 986 return NULL; 987 return llist_entry(node, struct rpcrdma_rep, rr_node); 988 } 989 990 /** 991 * rpcrdma_rep_put - Release rpcrdma_rep back to free list 992 * @buf: buffer pool 993 * @rep: rep to release 994 * 995 */ 996 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep) 997 { 998 llist_add(&rep->rr_node, &buf->rb_free_reps); 999 } 1000 1001 /* Caller must ensure the QP is quiescent (RQ is drained) before 1002 * invoking this function, to guarantee rb_all_reps is not 1003 * changing. 1004 */ 1005 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) 1006 { 1007 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1008 struct rpcrdma_rep *rep; 1009 1010 list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1011 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1012 rep->rr_temp = true; /* Mark this rep for destruction */ 1013 } 1014 } 1015 1016 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) 1017 { 1018 struct rpcrdma_rep *rep; 1019 1020 spin_lock(&buf->rb_lock); 1021 while ((rep = list_first_entry_or_null(&buf->rb_all_reps, 1022 struct rpcrdma_rep, 1023 rr_all)) != NULL) { 1024 list_del(&rep->rr_all); 1025 spin_unlock(&buf->rb_lock); 1026 1027 rpcrdma_rep_free(rep); 1028 1029 spin_lock(&buf->rb_lock); 1030 } 1031 spin_unlock(&buf->rb_lock); 1032 } 1033 1034 /** 1035 * rpcrdma_buffer_create - Create initial set of req/rep objects 1036 * @r_xprt: transport instance to (re)initialize 1037 * 1038 * Returns zero on success, otherwise a negative errno. 1039 */ 1040 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1041 { 1042 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1043 int i, rc; 1044 1045 buf->rb_bc_srv_max_requests = 0; 1046 spin_lock_init(&buf->rb_lock); 1047 INIT_LIST_HEAD(&buf->rb_mrs); 1048 INIT_LIST_HEAD(&buf->rb_all_mrs); 1049 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1050 1051 INIT_LIST_HEAD(&buf->rb_send_bufs); 1052 INIT_LIST_HEAD(&buf->rb_allreqs); 1053 INIT_LIST_HEAD(&buf->rb_all_reps); 1054 1055 rc = -ENOMEM; 1056 for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { 1057 struct rpcrdma_req *req; 1058 1059 req = rpcrdma_req_create(r_xprt, 1060 RPCRDMA_V1_DEF_INLINE_SIZE * 2); 1061 if (!req) 1062 goto out; 1063 list_add(&req->rl_list, &buf->rb_send_bufs); 1064 } 1065 1066 init_llist_head(&buf->rb_free_reps); 1067 1068 return 0; 1069 out: 1070 rpcrdma_buffer_destroy(buf); 1071 return rc; 1072 } 1073 1074 /** 1075 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1076 * @req: unused object to be destroyed 1077 * 1078 * Relies on caller holding the transport send lock to protect 1079 * removing req->rl_all from buf->rb_all_reqs safely. 1080 */ 1081 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1082 { 1083 struct rpcrdma_mr *mr; 1084 1085 list_del(&req->rl_all); 1086 1087 while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1088 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1089 1090 spin_lock(&buf->rb_lock); 1091 list_del(&mr->mr_all); 1092 spin_unlock(&buf->rb_lock); 1093 1094 frwr_mr_release(mr); 1095 } 1096 1097 rpcrdma_regbuf_free(req->rl_recvbuf); 1098 rpcrdma_regbuf_free(req->rl_sendbuf); 1099 rpcrdma_regbuf_free(req->rl_rdmabuf); 1100 kfree(req); 1101 } 1102 1103 /** 1104 * rpcrdma_mrs_destroy - Release all of a transport's MRs 1105 * @r_xprt: controlling transport instance 1106 * 1107 * Relies on caller holding the transport send lock to protect 1108 * removing mr->mr_list from req->rl_free_mrs safely. 1109 */ 1110 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1111 { 1112 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1113 struct rpcrdma_mr *mr; 1114 1115 cancel_work_sync(&buf->rb_refresh_worker); 1116 1117 spin_lock(&buf->rb_lock); 1118 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1119 struct rpcrdma_mr, 1120 mr_all)) != NULL) { 1121 list_del(&mr->mr_list); 1122 list_del(&mr->mr_all); 1123 spin_unlock(&buf->rb_lock); 1124 1125 frwr_mr_release(mr); 1126 1127 spin_lock(&buf->rb_lock); 1128 } 1129 spin_unlock(&buf->rb_lock); 1130 } 1131 1132 /** 1133 * rpcrdma_buffer_destroy - Release all hw resources 1134 * @buf: root control block for resources 1135 * 1136 * ORDERING: relies on a prior rpcrdma_xprt_drain : 1137 * - No more Send or Receive completions can occur 1138 * - All MRs, reps, and reqs are returned to their free lists 1139 */ 1140 void 1141 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1142 { 1143 rpcrdma_reps_destroy(buf); 1144 1145 while (!list_empty(&buf->rb_send_bufs)) { 1146 struct rpcrdma_req *req; 1147 1148 req = list_first_entry(&buf->rb_send_bufs, 1149 struct rpcrdma_req, rl_list); 1150 list_del(&req->rl_list); 1151 rpcrdma_req_destroy(req); 1152 } 1153 } 1154 1155 /** 1156 * rpcrdma_mr_get - Allocate an rpcrdma_mr object 1157 * @r_xprt: controlling transport 1158 * 1159 * Returns an initialized rpcrdma_mr or NULL if no free 1160 * rpcrdma_mr objects are available. 1161 */ 1162 struct rpcrdma_mr * 1163 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) 1164 { 1165 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1166 struct rpcrdma_mr *mr; 1167 1168 spin_lock(&buf->rb_lock); 1169 mr = rpcrdma_mr_pop(&buf->rb_mrs); 1170 spin_unlock(&buf->rb_lock); 1171 return mr; 1172 } 1173 1174 /** 1175 * rpcrdma_reply_put - Put reply buffers back into pool 1176 * @buffers: buffer pool 1177 * @req: object to return 1178 * 1179 */ 1180 void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1181 { 1182 if (req->rl_reply) { 1183 rpcrdma_rep_put(buffers, req->rl_reply); 1184 req->rl_reply = NULL; 1185 } 1186 } 1187 1188 /** 1189 * rpcrdma_buffer_get - Get a request buffer 1190 * @buffers: Buffer pool from which to obtain a buffer 1191 * 1192 * Returns a fresh rpcrdma_req, or NULL if none are available. 1193 */ 1194 struct rpcrdma_req * 1195 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1196 { 1197 struct rpcrdma_req *req; 1198 1199 spin_lock(&buffers->rb_lock); 1200 req = list_first_entry_or_null(&buffers->rb_send_bufs, 1201 struct rpcrdma_req, rl_list); 1202 if (req) 1203 list_del_init(&req->rl_list); 1204 spin_unlock(&buffers->rb_lock); 1205 return req; 1206 } 1207 1208 /** 1209 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1210 * @buffers: buffer pool 1211 * @req: object to return 1212 * 1213 */ 1214 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1215 { 1216 rpcrdma_reply_put(buffers, req); 1217 1218 spin_lock(&buffers->rb_lock); 1219 list_add(&req->rl_list, &buffers->rb_send_bufs); 1220 spin_unlock(&buffers->rb_lock); 1221 } 1222 1223 /* Returns a pointer to a rpcrdma_regbuf object, or NULL. 1224 * 1225 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1226 * receiving the payload of RDMA RECV operations. During Long Calls 1227 * or Replies they may be registered externally via frwr_map. 1228 */ 1229 static struct rpcrdma_regbuf * 1230 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) 1231 { 1232 struct rpcrdma_regbuf *rb; 1233 1234 rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); 1235 if (!rb) 1236 return NULL; 1237 rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); 1238 if (!rb->rg_data) { 1239 kfree(rb); 1240 return NULL; 1241 } 1242 1243 rb->rg_device = NULL; 1244 rb->rg_direction = direction; 1245 rb->rg_iov.length = size; 1246 return rb; 1247 } 1248 1249 /** 1250 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer 1251 * @rb: regbuf to reallocate 1252 * @size: size of buffer to be allocated, in bytes 1253 * @flags: GFP flags 1254 * 1255 * Returns true if reallocation was successful. If false is 1256 * returned, @rb is left untouched. 1257 */ 1258 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) 1259 { 1260 void *buf; 1261 1262 buf = kmalloc(size, flags); 1263 if (!buf) 1264 return false; 1265 1266 rpcrdma_regbuf_dma_unmap(rb); 1267 kfree(rb->rg_data); 1268 1269 rb->rg_data = buf; 1270 rb->rg_iov.length = size; 1271 return true; 1272 } 1273 1274 /** 1275 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf 1276 * @r_xprt: controlling transport instance 1277 * @rb: regbuf to be mapped 1278 * 1279 * Returns true if the buffer is now DMA mapped to @r_xprt's device 1280 */ 1281 bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, 1282 struct rpcrdma_regbuf *rb) 1283 { 1284 struct ib_device *device = r_xprt->rx_ep->re_id->device; 1285 1286 if (rb->rg_direction == DMA_NONE) 1287 return false; 1288 1289 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), 1290 rdmab_length(rb), rb->rg_direction); 1291 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1292 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1293 return false; 1294 } 1295 1296 rb->rg_device = device; 1297 rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; 1298 return true; 1299 } 1300 1301 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) 1302 { 1303 if (!rb) 1304 return; 1305 1306 if (!rpcrdma_regbuf_is_mapped(rb)) 1307 return; 1308 1309 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), 1310 rb->rg_direction); 1311 rb->rg_device = NULL; 1312 } 1313 1314 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) 1315 { 1316 rpcrdma_regbuf_dma_unmap(rb); 1317 if (rb) 1318 kfree(rb->rg_data); 1319 kfree(rb); 1320 } 1321 1322 /** 1323 * rpcrdma_post_recvs - Refill the Receive Queue 1324 * @r_xprt: controlling transport instance 1325 * @needed: current credit grant 1326 * @temp: mark Receive buffers to be deleted after one use 1327 * 1328 */ 1329 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) 1330 { 1331 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1332 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1333 struct ib_recv_wr *wr, *bad_wr; 1334 struct rpcrdma_rep *rep; 1335 int count, rc; 1336 1337 rc = 0; 1338 count = 0; 1339 1340 if (likely(ep->re_receive_count > needed)) 1341 goto out; 1342 needed -= ep->re_receive_count; 1343 if (!temp) 1344 needed += RPCRDMA_MAX_RECV_BATCH; 1345 1346 if (atomic_inc_return(&ep->re_receiving) > 1) 1347 goto out; 1348 1349 /* fast path: all needed reps can be found on the free list */ 1350 wr = NULL; 1351 while (needed) { 1352 rep = rpcrdma_rep_get_locked(buf); 1353 if (rep && rep->rr_temp) { 1354 rpcrdma_rep_destroy(rep); 1355 continue; 1356 } 1357 if (!rep) 1358 rep = rpcrdma_rep_create(r_xprt, temp); 1359 if (!rep) 1360 break; 1361 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { 1362 rpcrdma_rep_put(buf, rep); 1363 break; 1364 } 1365 1366 rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; 1367 trace_xprtrdma_post_recv(&rep->rr_cid); 1368 rep->rr_recv_wr.next = wr; 1369 wr = &rep->rr_recv_wr; 1370 --needed; 1371 ++count; 1372 } 1373 if (!wr) 1374 goto out; 1375 1376 rc = ib_post_recv(ep->re_id->qp, wr, 1377 (const struct ib_recv_wr **)&bad_wr); 1378 if (rc) { 1379 trace_xprtrdma_post_recvs_err(r_xprt, rc); 1380 for (wr = bad_wr; wr;) { 1381 struct rpcrdma_rep *rep; 1382 1383 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1384 wr = wr->next; 1385 rpcrdma_rep_put(buf, rep); 1386 --count; 1387 } 1388 } 1389 if (atomic_dec_return(&ep->re_receiving) > 0) 1390 complete(&ep->re_done); 1391 1392 out: 1393 trace_xprtrdma_post_recvs(r_xprt, count); 1394 ep->re_receive_count += count; 1395 return; 1396 } 1397