1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2007, The Ohio State University. All rights reserved. 27 * 28 * Portions of this source code is developed by the team members of 29 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 30 * headed by Professor Dhabaleswar K. (DK) Panda. 31 * 32 * Acknowledgements to contributions from developors: 33 * Ranjit Noronha: noronha@cse.ohio-state.edu 34 * Lei Chai : chail@cse.ohio-state.edu 35 * Weikuan Yu : yuw@cse.ohio-state.edu 36 * 37 */ 38 39 /* 40 * xdr_rdma.c, XDR implementation using RDMA to move large chunks 41 */ 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 #include <sys/systm.h> 46 #include <sys/kmem.h> 47 #include <sys/sdt.h> 48 #include <sys/debug.h> 49 50 #include <rpc/types.h> 51 #include <rpc/xdr.h> 52 #include <sys/cmn_err.h> 53 #include <rpc/rpc_sztypes.h> 54 #include <rpc/rpc_rdma.h> 55 #include <sys/sysmacros.h> 56 57 /* 58 * RCP header and xdr encoding overhead. The number was determined by 59 * tracing the msglen in svc_rdma_ksend for sec=sys,krb5,krb5i and krb5p. 60 * If the XDR_RDMA_BUF_OVERHEAD is not large enough the result is the trigger 61 * of the dtrace probe on the server "krpc-e-svcrdma-ksend-noreplycl" from 62 * svc_rdma_ksend. 63 */ 64 #define XDR_RDMA_BUF_OVERHEAD 300 65 66 static bool_t xdrrdma_getint32(XDR *, int32_t *); 67 static bool_t xdrrdma_putint32(XDR *, int32_t *); 68 static bool_t xdrrdma_getbytes(XDR *, caddr_t, int); 69 static bool_t xdrrdma_putbytes(XDR *, caddr_t, int); 70 uint_t xdrrdma_getpos(XDR *); 71 bool_t xdrrdma_setpos(XDR *, uint_t); 72 static rpc_inline_t *xdrrdma_inline(XDR *, int); 73 void xdrrdma_destroy(XDR *); 74 static bool_t xdrrdma_control(XDR *, int, void *); 75 static bool_t xdrrdma_read_a_chunk(XDR *, CONN **); 76 static void xdrrdma_free_xdr_chunks(CONN *, struct clist *); 77 78 struct xdr_ops xdrrdmablk_ops = { 79 xdrrdma_getbytes, 80 xdrrdma_putbytes, 81 xdrrdma_getpos, 82 xdrrdma_setpos, 83 xdrrdma_inline, 84 xdrrdma_destroy, 85 xdrrdma_control, 86 xdrrdma_getint32, 87 xdrrdma_putint32 88 }; 89 90 struct xdr_ops xdrrdma_ops = { 91 xdrrdma_getbytes, 92 xdrrdma_putbytes, 93 xdrrdma_getpos, 94 xdrrdma_setpos, 95 xdrrdma_inline, 96 xdrrdma_destroy, 97 xdrrdma_control, 98 xdrrdma_getint32, 99 xdrrdma_putint32 100 }; 101 102 /* 103 * A chunk list entry identifies a chunk of opaque data to be moved 104 * separately from the rest of the RPC message. xp_min_chunk = 0, is a 105 * special case for ENCODING, which means do not chunk the incoming stream of 106 * data. 107 * 108 * A read chunk can contain part of the RPC message in addition to the 109 * inline message. In such a case, (xp_offp - x_base) will not provide 110 * the correct xdr offset of the entire message. xp_off is used in such 111 * a case to denote the offset or current position in the overall message 112 * covering both the inline and the chunk. This is used only in the case 113 * of decoding and useful to compare read chunk 'c_xdroff' offsets. 114 * 115 * An example for a read chunk containing an XDR message: 116 * An NFSv4 compound as following: 117 * 118 * PUTFH 119 * WRITE [4109 bytes] 120 * GETATTR 121 * 122 * Solaris Encoding is: 123 * ------------------- 124 * 125 * <Inline message>: [PUTFH WRITE4args GETATTR] 126 * | 127 * v 128 * [RDMA_READ chunks]: [write data] 129 * 130 * 131 * Linux encoding is: 132 * ----------------- 133 * 134 * <Inline message>: [PUTFH WRITE4args] 135 * | 136 * v 137 * [RDMA_READ chunks]: [Write data] [Write data2] [Getattr chunk] 138 * chunk1 chunk2 chunk3 139 * 140 * where the READ chunks are as: 141 * 142 * - chunk1 - 4k 143 * write data | 144 * - chunk2 - 13 bytes(4109 - 4k) 145 * getattr op - chunk3 - 19 bytes 146 * (getattr op starts at byte 4 after 3 bytes of roundup) 147 * 148 */ 149 150 typedef struct { 151 caddr_t xp_offp; 152 int xp_min_chunk; 153 uint_t xp_flags; /* Controls setting for rdma xdr */ 154 int xp_buf_size; /* size of xdr buffer */ 155 int xp_off; /* overall offset */ 156 struct clist *xp_rcl; /* head of chunk list */ 157 struct clist **xp_rcl_next; /* location to place/find next chunk */ 158 struct clist *xp_rcl_xdr; /* copy of rcl containing RPC message */ 159 struct clist *xp_wcl; /* head of write chunk list */ 160 CONN *xp_conn; /* connection for chunk data xfer */ 161 uint_t xp_reply_chunk_len; 162 /* used to track length for security modes: integrity/privacy */ 163 uint_t xp_reply_chunk_len_alt; 164 } xrdma_private_t; 165 166 extern kmem_cache_t *clist_cache; 167 168 bool_t 169 xdrrdma_getrdmablk(XDR *xdrs, struct clist **rlist, uint_t *sizep, 170 CONN **conn, const uint_t maxsize) 171 { 172 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 173 struct clist *cle = *(xdrp->xp_rcl_next); 174 struct clist *rdclist = NULL, *prev = NULL; 175 bool_t retval = TRUE; 176 uint32_t cur_offset = 0; 177 uint32_t total_segments = 0; 178 uint32_t actual_segments = 0; 179 uint32_t alen; 180 uint_t total_len; 181 182 ASSERT(xdrs->x_op != XDR_FREE); 183 184 /* 185 * first deal with the length since xdr bytes are counted 186 */ 187 if (!xdr_u_int(xdrs, sizep)) { 188 DTRACE_PROBE(xdr__e__getrdmablk_sizep_fail); 189 return (FALSE); 190 } 191 total_len = *sizep; 192 if (total_len > maxsize) { 193 DTRACE_PROBE2(xdr__e__getrdmablk_bad_size, 194 int, total_len, int, maxsize); 195 return (FALSE); 196 } 197 (*conn) = xdrp->xp_conn; 198 199 /* 200 * if no data we are done 201 */ 202 if (total_len == 0) 203 return (TRUE); 204 205 while (cle) { 206 total_segments++; 207 cle = cle->c_next; 208 } 209 210 cle = *(xdrp->xp_rcl_next); 211 212 /* 213 * If there was a chunk at the current offset, then setup a read 214 * chunk list which records the destination address and length 215 * and will RDMA READ the data in later. 216 */ 217 if (cle == NULL) 218 return (FALSE); 219 220 if (cle->c_xdroff != (xdrp->xp_offp - xdrs->x_base)) 221 return (FALSE); 222 223 /* 224 * Setup the chunk list with appropriate 225 * address (offset) and length 226 */ 227 for (actual_segments = 0; 228 actual_segments < total_segments; actual_segments++) { 229 230 DTRACE_PROBE3(krpc__i__xdrrdma_getrdmablk, uint32_t, cle->c_len, 231 uint32_t, total_len, uint32_t, cle->c_xdroff); 232 233 if (total_len <= 0) 234 break; 235 236 /* 237 * not the first time in the loop 238 */ 239 if (actual_segments > 0) 240 cle = cle->c_next; 241 242 cle->u.c_daddr = (uint64) cur_offset; 243 alen = 0; 244 if (cle->c_len > total_len) { 245 alen = cle->c_len; 246 cle->c_len = total_len; 247 } 248 if (!alen) 249 xdrp->xp_rcl_next = &cle->c_next; 250 251 cur_offset += cle->c_len; 252 total_len -= cle->c_len; 253 254 if ((total_segments - actual_segments - 1) == 0 && 255 total_len > 0) { 256 DTRACE_PROBE(krpc__e__xdrrdma_getblk_chunktooshort); 257 retval = FALSE; 258 } 259 260 if ((total_segments - actual_segments - 1) > 0 && 261 total_len == 0) { 262 DTRACE_PROBE2(krpc__e__xdrrdma_getblk_toobig, 263 int, total_segments, int, actual_segments); 264 } 265 266 rdclist = clist_alloc(); 267 (*rdclist) = (*cle); 268 if ((*rlist) == NULL) 269 (*rlist) = rdclist; 270 if (prev == NULL) 271 prev = rdclist; 272 else { 273 prev->c_next = rdclist; 274 prev = rdclist; 275 } 276 277 } 278 279 out: 280 if (prev != NULL) 281 prev->c_next = NULL; 282 283 /* 284 * Adjust the chunk length, if we read only a part of 285 * a chunk. 286 */ 287 288 if (alen) { 289 cle->w.c_saddr = 290 (uint64)(uintptr_t)cle->w.c_saddr + cle->c_len; 291 cle->c_len = alen - cle->c_len; 292 } 293 294 return (retval); 295 } 296 297 /* 298 * The procedure xdrrdma_create initializes a stream descriptor for a memory 299 * buffer. 300 */ 301 void 302 xdrrdma_create(XDR *xdrs, caddr_t addr, uint_t size, 303 int min_chunk, struct clist *cl, enum xdr_op op, CONN *conn) 304 { 305 xrdma_private_t *xdrp; 306 struct clist *cle; 307 308 xdrs->x_op = op; 309 xdrs->x_ops = &xdrrdma_ops; 310 xdrs->x_base = addr; 311 xdrs->x_handy = size; 312 xdrs->x_public = NULL; 313 314 xdrp = (xrdma_private_t *)kmem_zalloc(sizeof (xrdma_private_t), 315 KM_SLEEP); 316 xdrs->x_private = (caddr_t)xdrp; 317 xdrp->xp_offp = addr; 318 xdrp->xp_min_chunk = min_chunk; 319 xdrp->xp_flags = 0; 320 xdrp->xp_buf_size = size; 321 xdrp->xp_rcl = cl; 322 xdrp->xp_reply_chunk_len = 0; 323 xdrp->xp_reply_chunk_len_alt = 0; 324 325 if (op == XDR_ENCODE && cl != NULL) { 326 /* Find last element in chunk list and set xp_rcl_next */ 327 for (cle = cl; cle->c_next != NULL; cle = cle->c_next) 328 continue; 329 330 xdrp->xp_rcl_next = &(cle->c_next); 331 } else { 332 xdrp->xp_rcl_next = &(xdrp->xp_rcl); 333 } 334 335 xdrp->xp_wcl = NULL; 336 337 xdrp->xp_conn = conn; 338 if (xdrp->xp_min_chunk != 0) 339 xdrp->xp_flags |= XDR_RDMA_CHUNK; 340 } 341 342 /* ARGSUSED */ 343 void 344 xdrrdma_destroy(XDR * xdrs) 345 { 346 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 347 348 if (xdrp == NULL) 349 return; 350 351 if (xdrp->xp_wcl) { 352 if (xdrp->xp_flags & XDR_RDMA_WLIST_REG) { 353 (void) clist_deregister(xdrp->xp_conn, xdrp->xp_wcl); 354 rdma_buf_free(xdrp->xp_conn, 355 &xdrp->xp_wcl->rb_longbuf); 356 } 357 clist_free(xdrp->xp_wcl); 358 } 359 360 if (xdrp->xp_rcl) { 361 if (xdrp->xp_flags & XDR_RDMA_RLIST_REG) { 362 (void) clist_deregister(xdrp->xp_conn, xdrp->xp_rcl); 363 rdma_buf_free(xdrp->xp_conn, 364 &xdrp->xp_rcl->rb_longbuf); 365 } 366 clist_free(xdrp->xp_rcl); 367 } 368 369 if (xdrp->xp_rcl_xdr) 370 xdrrdma_free_xdr_chunks(xdrp->xp_conn, xdrp->xp_rcl_xdr); 371 372 (void) kmem_free(xdrs->x_private, sizeof (xrdma_private_t)); 373 xdrs->x_private = NULL; 374 } 375 376 static bool_t 377 xdrrdma_getint32(XDR *xdrs, int32_t *int32p) 378 { 379 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 380 int chunked = 0; 381 382 if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0) { 383 /* 384 * check if rest of the rpc message is in a chunk 385 */ 386 if (!xdrrdma_read_a_chunk(xdrs, &xdrp->xp_conn)) { 387 return (FALSE); 388 } 389 chunked = 1; 390 } 391 392 /* LINTED pointer alignment */ 393 *int32p = (int32_t)ntohl((uint32_t)(*((int32_t *)(xdrp->xp_offp)))); 394 395 DTRACE_PROBE1(krpc__i__xdrrdma_getint32, int32_t, *int32p); 396 397 xdrp->xp_offp += sizeof (int32_t); 398 399 if (chunked) 400 xdrs->x_handy -= (int)sizeof (int32_t); 401 402 if (xdrp->xp_off != 0) { 403 xdrp->xp_off += sizeof (int32_t); 404 } 405 406 return (TRUE); 407 } 408 409 static bool_t 410 xdrrdma_putint32(XDR *xdrs, int32_t *int32p) 411 { 412 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 413 414 if ((xdrs->x_handy -= (int)sizeof (int32_t)) < 0) 415 return (FALSE); 416 417 /* LINTED pointer alignment */ 418 *(int32_t *)xdrp->xp_offp = (int32_t)htonl((uint32_t)(*int32p)); 419 xdrp->xp_offp += sizeof (int32_t); 420 421 return (TRUE); 422 } 423 424 /* 425 * DECODE bytes from XDR stream for rdma. 426 * If the XDR stream contains a read chunk list, 427 * it will go through xdrrdma_getrdmablk instead. 428 */ 429 static bool_t 430 xdrrdma_getbytes(XDR *xdrs, caddr_t addr, int len) 431 { 432 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 433 struct clist *cle = *(xdrp->xp_rcl_next); 434 struct clist *cls = *(xdrp->xp_rcl_next); 435 struct clist cl; 436 bool_t retval = TRUE; 437 uint32_t total_len = len; 438 uint32_t cur_offset = 0; 439 uint32_t total_segments = 0; 440 uint32_t actual_segments = 0; 441 uint32_t status = RDMA_SUCCESS; 442 uint32_t alen = 0; 443 uint32_t xpoff; 444 445 while (cle) { 446 total_segments++; 447 cle = cle->c_next; 448 } 449 450 cle = *(xdrp->xp_rcl_next); 451 452 if (xdrp->xp_off) { 453 xpoff = xdrp->xp_off; 454 } else { 455 xpoff = (xdrp->xp_offp - xdrs->x_base); 456 } 457 458 /* 459 * If there was a chunk at the current offset, then setup a read 460 * chunk list which records the destination address and length 461 * and will RDMA READ the data in later. 462 */ 463 464 if (cle != NULL && cle->c_xdroff == xpoff) { 465 for (actual_segments = 0; 466 actual_segments < total_segments; actual_segments++) { 467 468 if (total_len <= 0) 469 break; 470 471 if (status != RDMA_SUCCESS) 472 goto out; 473 474 cle->u.c_daddr = (uint64)(uintptr_t)addr + cur_offset; 475 alen = 0; 476 if (cle->c_len > total_len) { 477 alen = cle->c_len; 478 cle->c_len = total_len; 479 } 480 if (!alen) 481 xdrp->xp_rcl_next = &cle->c_next; 482 483 cur_offset += cle->c_len; 484 total_len -= cle->c_len; 485 486 if ((total_segments - actual_segments - 1) == 0 && 487 total_len > 0) { 488 DTRACE_PROBE( 489 krpc__e__xdrrdma_getbytes_chunktooshort); 490 retval = FALSE; 491 } 492 493 if ((total_segments - actual_segments - 1) > 0 && 494 total_len == 0) { 495 DTRACE_PROBE2(krpc__e__xdrrdma_getbytes_toobig, 496 int, total_segments, int, actual_segments); 497 } 498 499 /* 500 * RDMA READ the chunk data from the remote end. 501 * First prep the destination buffer by registering 502 * it, then RDMA READ the chunk data. Since we are 503 * doing streaming memory, sync the destination 504 * buffer to CPU and deregister the buffer. 505 */ 506 if (xdrp->xp_conn == NULL) { 507 return (FALSE); 508 } 509 cl = *cle; 510 cl.c_next = NULL; 511 status = clist_register(xdrp->xp_conn, &cl, 512 CLIST_REG_DST); 513 if (status != RDMA_SUCCESS) { 514 retval = FALSE; 515 /* 516 * Deregister the previous chunks 517 * before return 518 */ 519 goto out; 520 } 521 522 cle->c_dmemhandle = cl.c_dmemhandle; 523 cle->c_dsynchandle = cl.c_dsynchandle; 524 525 /* 526 * Now read the chunk in 527 */ 528 if ((total_segments - actual_segments - 1) == 0 || 529 total_len == 0) { 530 status = RDMA_READ(xdrp->xp_conn, &cl, WAIT); 531 } else { 532 status = RDMA_READ(xdrp->xp_conn, &cl, NOWAIT); 533 } 534 if (status != RDMA_SUCCESS) { 535 DTRACE_PROBE1( 536 krpc__i__xdrrdma_getblk_readfailed, 537 int, status); 538 retval = FALSE; 539 } 540 541 cle = cle->c_next; 542 543 } 544 545 /* 546 * sync the memory for cpu 547 */ 548 cl = *cls; 549 cl.c_next = NULL; 550 cl.c_len = cur_offset; 551 if (clist_syncmem( 552 xdrp->xp_conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) { 553 retval = FALSE; 554 } 555 out: 556 557 /* 558 * Deregister the chunks 559 */ 560 cle = cls; 561 while (actual_segments != 0) { 562 cl = *cle; 563 cl.c_next = NULL; 564 565 cl.c_regtype = CLIST_REG_DST; 566 (void) clist_deregister(xdrp->xp_conn, &cl); 567 568 cle = cle->c_next; 569 actual_segments--; 570 } 571 572 if (alen) { 573 cle = *(xdrp->xp_rcl_next); 574 cle->w.c_saddr = 575 (uint64)(uintptr_t)cle->w.c_saddr + cle->c_len; 576 cle->c_len = alen - cle->c_len; 577 } 578 579 return (retval); 580 } 581 582 if ((xdrs->x_handy -= len) < 0) 583 return (FALSE); 584 585 bcopy(xdrp->xp_offp, addr, len); 586 587 xdrp->xp_offp += len; 588 589 if (xdrp->xp_off != 0) 590 xdrp->xp_off += len; 591 592 return (TRUE); 593 } 594 595 /* 596 * ENCODE some bytes into an XDR stream xp_min_chunk = 0, means the stream of 597 * bytes contain no chunks to seperate out, and if the bytes do not fit in 598 * the supplied buffer, grow the buffer and free the old buffer. 599 */ 600 static bool_t 601 xdrrdma_putbytes(XDR *xdrs, caddr_t addr, int len) 602 { 603 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 604 /* 605 * Is this stream accepting chunks? 606 * If so, does the either of the two following conditions exist? 607 * - length of bytes to encode is greater than the min chunk size? 608 * - remaining space in this stream is shorter than length of 609 * bytes to encode? 610 * 611 * If the above exists, then create a chunk for this encoding 612 * and save the addresses, etc. 613 */ 614 if (xdrp->xp_flags & XDR_RDMA_CHUNK && 615 ((xdrp->xp_min_chunk != 0 && 616 len >= xdrp->xp_min_chunk) || 617 (xdrs->x_handy - len < 0))) { 618 struct clist *cle; 619 int offset = xdrp->xp_offp - xdrs->x_base; 620 621 cle = clist_alloc(); 622 cle->c_xdroff = offset; 623 cle->c_len = len; 624 cle->w.c_saddr = (uint64)(uintptr_t)addr; 625 cle->c_next = NULL; 626 627 *(xdrp->xp_rcl_next) = cle; 628 xdrp->xp_rcl_next = &(cle->c_next); 629 630 return (TRUE); 631 } 632 /* Is there enough space to encode what is left? */ 633 if ((xdrs->x_handy -= len) < 0) { 634 return (FALSE); 635 } 636 bcopy(addr, xdrp->xp_offp, len); 637 xdrp->xp_offp += len; 638 639 return (TRUE); 640 } 641 642 uint_t 643 xdrrdma_getpos(XDR *xdrs) 644 { 645 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 646 647 return ((uint_t)((uintptr_t)xdrp->xp_offp - (uintptr_t)xdrs->x_base)); 648 } 649 650 bool_t 651 xdrrdma_setpos(XDR *xdrs, uint_t pos) 652 { 653 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 654 655 caddr_t newaddr = xdrs->x_base + pos; 656 caddr_t lastaddr = xdrp->xp_offp + xdrs->x_handy; 657 ptrdiff_t diff; 658 659 if (newaddr > lastaddr) 660 return (FALSE); 661 662 xdrp->xp_offp = newaddr; 663 diff = lastaddr - newaddr; 664 xdrs->x_handy = (int)diff; 665 666 return (TRUE); 667 } 668 669 /* ARGSUSED */ 670 static rpc_inline_t * 671 xdrrdma_inline(XDR *xdrs, int len) 672 { 673 rpc_inline_t *buf = NULL; 674 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 675 struct clist *cle = *(xdrp->xp_rcl_next); 676 677 if (xdrs->x_op == XDR_DECODE) { 678 /* 679 * Since chunks aren't in-line, check to see whether there is 680 * a chunk in the inline range. 681 */ 682 if (cle != NULL && 683 cle->c_xdroff <= (xdrp->xp_offp - xdrs->x_base + len)) 684 return (NULL); 685 } 686 687 /* LINTED pointer alignment */ 688 buf = (rpc_inline_t *)xdrp->xp_offp; 689 if (!IS_P2ALIGNED(buf, sizeof (int32_t))) 690 return (NULL); 691 692 if ((xdrs->x_handy < len) || (xdrp->xp_min_chunk != 0 && 693 len >= xdrp->xp_min_chunk)) { 694 return (NULL); 695 } else { 696 xdrs->x_handy -= len; 697 xdrp->xp_offp += len; 698 return (buf); 699 } 700 } 701 702 static bool_t 703 xdrrdma_control(XDR *xdrs, int request, void *info) 704 { 705 int32_t *int32p; 706 int len, i; 707 uint_t in_flags; 708 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 709 rdma_chunkinfo_t *rcip = NULL; 710 rdma_wlist_conn_info_t *rwcip = NULL; 711 rdma_chunkinfo_lengths_t *rcilp = NULL; 712 struct uio *uiop; 713 struct clist *rwl = NULL, *first = NULL; 714 struct clist *prev = NULL; 715 716 switch (request) { 717 case XDR_PEEK: 718 /* 719 * Return the next 4 byte unit in the XDR stream. 720 */ 721 if (xdrs->x_handy < sizeof (int32_t)) 722 return (FALSE); 723 724 int32p = (int32_t *)info; 725 *int32p = (int32_t)ntohl((uint32_t) 726 (*((int32_t *)(xdrp->xp_offp)))); 727 728 return (TRUE); 729 730 case XDR_SKIPBYTES: 731 /* 732 * Skip the next N bytes in the XDR stream. 733 */ 734 int32p = (int32_t *)info; 735 len = RNDUP((int)(*int32p)); 736 if ((xdrs->x_handy -= len) < 0) 737 return (FALSE); 738 xdrp->xp_offp += len; 739 740 return (TRUE); 741 742 case XDR_RDMA_SET_FLAGS: 743 /* 744 * Set the flags provided in the *info in xp_flags for rdma 745 * xdr stream control. 746 */ 747 int32p = (int32_t *)info; 748 in_flags = (uint_t)(*int32p); 749 750 xdrp->xp_flags |= in_flags; 751 return (TRUE); 752 753 case XDR_RDMA_GET_FLAGS: 754 /* 755 * Get the flags provided in xp_flags return through *info 756 */ 757 int32p = (int32_t *)info; 758 759 *int32p = (int32_t)xdrp->xp_flags; 760 return (TRUE); 761 762 case XDR_RDMA_GET_CHUNK_LEN: 763 rcilp = (rdma_chunkinfo_lengths_t *)info; 764 rcilp->rcil_len = xdrp->xp_reply_chunk_len; 765 rcilp->rcil_len_alt = xdrp->xp_reply_chunk_len_alt; 766 767 return (TRUE); 768 769 case XDR_RDMA_ADD_CHUNK: 770 /* 771 * Store wlist information 772 */ 773 774 rcip = (rdma_chunkinfo_t *)info; 775 776 DTRACE_PROBE2(krpc__i__xdrrdma__control__add__chunk, 777 rci_type_t, rcip->rci_type, uint32, rcip->rci_len); 778 switch (rcip->rci_type) { 779 case RCI_WRITE_UIO_CHUNK: 780 xdrp->xp_reply_chunk_len_alt += rcip->rci_len; 781 782 if ((rcip->rci_len + XDR_RDMA_BUF_OVERHEAD) < 783 xdrp->xp_min_chunk) { 784 xdrp->xp_wcl = NULL; 785 *(rcip->rci_clpp) = NULL; 786 return (TRUE); 787 } 788 uiop = rcip->rci_a.rci_uiop; 789 790 for (i = 0; i < uiop->uio_iovcnt; i++) { 791 rwl = clist_alloc(); 792 if (first == NULL) 793 first = rwl; 794 rwl->c_len = uiop->uio_iov[i].iov_len; 795 rwl->u.c_daddr = 796 (uint64)(uintptr_t) 797 (uiop->uio_iov[i].iov_base); 798 /* 799 * if userspace address, put adspace ptr in 800 * clist. If not, then do nothing since it's 801 * already set to NULL (from kmem_zalloc) 802 */ 803 if (uiop->uio_segflg == UIO_USERSPACE) { 804 rwl->c_adspc = ttoproc(curthread)->p_as; 805 } 806 807 if (prev == NULL) 808 prev = rwl; 809 else { 810 prev->c_next = rwl; 811 prev = rwl; 812 } 813 } 814 815 rwl->c_next = NULL; 816 xdrp->xp_wcl = first; 817 *(rcip->rci_clpp) = first; 818 819 break; 820 821 case RCI_WRITE_ADDR_CHUNK: 822 rwl = clist_alloc(); 823 824 rwl->c_len = rcip->rci_len; 825 rwl->u.c_daddr3 = rcip->rci_a.rci_addr; 826 rwl->c_next = NULL; 827 xdrp->xp_reply_chunk_len_alt += rcip->rci_len; 828 829 xdrp->xp_wcl = rwl; 830 *(rcip->rci_clpp) = rwl; 831 832 break; 833 834 case RCI_REPLY_CHUNK: 835 xdrp->xp_reply_chunk_len += rcip->rci_len; 836 break; 837 } 838 return (TRUE); 839 840 case XDR_RDMA_GET_WLIST: 841 *((struct clist **)info) = xdrp->xp_wcl; 842 return (TRUE); 843 844 case XDR_RDMA_SET_WLIST: 845 xdrp->xp_wcl = (struct clist *)info; 846 return (TRUE); 847 848 case XDR_RDMA_GET_RLIST: 849 *((struct clist **)info) = xdrp->xp_rcl; 850 return (TRUE); 851 852 case XDR_RDMA_GET_WCINFO: 853 rwcip = (rdma_wlist_conn_info_t *)info; 854 855 rwcip->rwci_wlist = xdrp->xp_wcl; 856 rwcip->rwci_conn = xdrp->xp_conn; 857 858 return (TRUE); 859 860 default: 861 return (FALSE); 862 } 863 } 864 865 bool_t xdr_do_clist(XDR *, clist **); 866 867 /* 868 * Not all fields in struct clist are interesting to the RPC over RDMA 869 * protocol. Only XDR the interesting fields. 870 */ 871 bool_t 872 xdr_clist(XDR *xdrs, clist *objp) 873 { 874 if (!xdr_uint32(xdrs, &objp->c_xdroff)) 875 return (FALSE); 876 if (!xdr_uint32(xdrs, &objp->c_smemhandle.mrc_rmr)) 877 return (FALSE); 878 if (!xdr_uint32(xdrs, &objp->c_len)) 879 return (FALSE); 880 if (!xdr_uint64(xdrs, &objp->w.c_saddr)) 881 return (FALSE); 882 if (!xdr_do_clist(xdrs, &objp->c_next)) 883 return (FALSE); 884 return (TRUE); 885 } 886 887 /* 888 * The following two functions are forms of xdr_pointer() 889 * and xdr_reference(). Since the generic versions just 890 * kmem_alloc() a new clist, we actually want to use the 891 * rdma_clist kmem_cache. 892 */ 893 894 /* 895 * Generate or free a clist structure from the 896 * kmem_cache "rdma_clist" 897 */ 898 bool_t 899 xdr_ref_clist(XDR *xdrs, caddr_t *pp) 900 { 901 caddr_t loc = *pp; 902 bool_t stat; 903 904 if (loc == NULL) { 905 switch (xdrs->x_op) { 906 case XDR_FREE: 907 return (TRUE); 908 909 case XDR_DECODE: 910 *pp = loc = (caddr_t)clist_alloc(); 911 break; 912 913 case XDR_ENCODE: 914 ASSERT(loc); 915 break; 916 } 917 } 918 919 stat = xdr_clist(xdrs, (struct clist *)loc); 920 921 if (xdrs->x_op == XDR_FREE) { 922 kmem_cache_free(clist_cache, loc); 923 *pp = NULL; 924 } 925 return (stat); 926 } 927 928 /* 929 * XDR a pointer to a possibly recursive clist. This differs 930 * with xdr_reference in that it can serialize/deserialiaze 931 * trees correctly. 932 * 933 * What is sent is actually a union: 934 * 935 * union object_pointer switch (boolean b) { 936 * case TRUE: object_data data; 937 * case FALSE: void nothing; 938 * } 939 * 940 * > objpp: Pointer to the pointer to the object. 941 * 942 */ 943 944 bool_t 945 xdr_do_clist(XDR *xdrs, clist **objpp) 946 { 947 bool_t more_data; 948 949 more_data = (*objpp != NULL); 950 if (!xdr_bool(xdrs, &more_data)) 951 return (FALSE); 952 if (!more_data) { 953 *objpp = NULL; 954 return (TRUE); 955 } 956 return (xdr_ref_clist(xdrs, (caddr_t *)objpp)); 957 } 958 959 uint_t 960 xdr_getbufsize(XDR *xdrs) 961 { 962 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 963 964 return ((uint_t)xdrp->xp_buf_size); 965 } 966 967 /* ARGSUSED */ 968 bool_t 969 xdr_encode_rlist_svc(XDR *xdrs, clist *rlist) 970 { 971 bool_t vfalse = FALSE; 972 973 ASSERT(rlist == NULL); 974 return (xdr_bool(xdrs, &vfalse)); 975 } 976 977 bool_t 978 xdr_encode_wlist(XDR *xdrs, clist *w) 979 { 980 bool_t vfalse = FALSE, vtrue = TRUE; 981 int i; 982 uint_t num_segment = 0; 983 struct clist *cl; 984 985 /* does a wlist exist? */ 986 if (w == NULL) { 987 return (xdr_bool(xdrs, &vfalse)); 988 } 989 /* Encode N consecutive segments, 1, N, HLOO, ..., HLOO, 0 */ 990 if (!xdr_bool(xdrs, &vtrue)) 991 return (FALSE); 992 993 for (cl = w; cl != NULL; cl = cl->c_next) { 994 num_segment++; 995 } 996 997 if (!xdr_uint32(xdrs, &num_segment)) 998 return (FALSE); 999 for (i = 0; i < num_segment; i++) { 1000 1001 DTRACE_PROBE1(krpc__i__xdr_encode_wlist_len, uint_t, w->c_len); 1002 1003 if (!xdr_uint32(xdrs, &w->c_dmemhandle.mrc_rmr)) 1004 return (FALSE); 1005 1006 if (!xdr_uint32(xdrs, &w->c_len)) 1007 return (FALSE); 1008 1009 if (!xdr_uint64(xdrs, &w->u.c_daddr)) 1010 return (FALSE); 1011 1012 w = w->c_next; 1013 } 1014 1015 if (!xdr_bool(xdrs, &vfalse)) 1016 return (FALSE); 1017 1018 return (TRUE); 1019 } 1020 1021 1022 /* 1023 * Conditionally decode a RDMA WRITE chunk list from XDR stream. 1024 * 1025 * If the next boolean in the XDR stream is false there is no 1026 * RDMA WRITE chunk list present. Otherwise iterate over the 1027 * array and for each entry: allocate a struct clist and decode. 1028 * Pass back an indication via wlist_exists if we have seen a 1029 * RDMA WRITE chunk list. 1030 */ 1031 bool_t 1032 xdr_decode_wlist(XDR *xdrs, struct clist **w, bool_t *wlist_exists) 1033 { 1034 struct clist *tmp; 1035 bool_t more = FALSE; 1036 uint32_t seg_array_len; 1037 uint32_t i; 1038 1039 if (!xdr_bool(xdrs, &more)) 1040 return (FALSE); 1041 1042 /* is there a wlist? */ 1043 if (more == FALSE) { 1044 *wlist_exists = FALSE; 1045 return (TRUE); 1046 } 1047 *wlist_exists = TRUE; 1048 1049 if (!xdr_uint32(xdrs, &seg_array_len)) 1050 return (FALSE); 1051 1052 tmp = *w = clist_alloc(); 1053 for (i = 0; i < seg_array_len; i++) { 1054 1055 if (!xdr_uint32(xdrs, &tmp->c_dmemhandle.mrc_rmr)) 1056 return (FALSE); 1057 if (!xdr_uint32(xdrs, &tmp->c_len)) 1058 return (FALSE); 1059 1060 DTRACE_PROBE1(krpc__i__xdr_decode_wlist_len, 1061 uint_t, tmp->c_len); 1062 1063 if (!xdr_uint64(xdrs, &tmp->u.c_daddr)) 1064 return (FALSE); 1065 if (i < seg_array_len - 1) { 1066 tmp->c_next = clist_alloc(); 1067 tmp = tmp->c_next; 1068 } else { 1069 tmp->c_next = NULL; 1070 } 1071 } 1072 1073 more = FALSE; 1074 if (!xdr_bool(xdrs, &more)) 1075 return (FALSE); 1076 1077 return (TRUE); 1078 } 1079 1080 /* 1081 * Server side RDMA WRITE list decode. 1082 * XDR context is memory ops 1083 */ 1084 bool_t 1085 xdr_decode_wlist_svc(XDR *xdrs, struct clist **wclp, bool_t *wwl, 1086 uint32_t *total_length, CONN *conn) 1087 { 1088 struct clist *first, *ncl; 1089 char *memp; 1090 uint32_t num_wclist; 1091 uint32_t wcl_length = 0; 1092 uint32_t i; 1093 bool_t more = FALSE; 1094 1095 *wclp = NULL; 1096 *wwl = FALSE; 1097 *total_length = 0; 1098 1099 if (!xdr_bool(xdrs, &more)) { 1100 return (FALSE); 1101 } 1102 1103 if (more == FALSE) { 1104 return (TRUE); 1105 } 1106 1107 *wwl = TRUE; 1108 1109 if (!xdr_uint32(xdrs, &num_wclist)) { 1110 DTRACE_PROBE(krpc__e__xdrrdma__wlistsvc__listlength); 1111 return (FALSE); 1112 } 1113 1114 first = ncl = clist_alloc(); 1115 1116 for (i = 0; i < num_wclist; i++) { 1117 1118 if (!xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr)) 1119 goto err_out; 1120 if (!xdr_uint32(xdrs, &ncl->c_len)) 1121 goto err_out; 1122 if (!xdr_uint64(xdrs, &ncl->u.c_daddr)) 1123 goto err_out; 1124 1125 if (ncl->c_len > MAX_SVC_XFER_SIZE) { 1126 DTRACE_PROBE( 1127 krpc__e__xdrrdma__wlistsvc__chunklist_toobig); 1128 ncl->c_len = MAX_SVC_XFER_SIZE; 1129 } 1130 1131 DTRACE_PROBE1(krpc__i__xdr_decode_wlist_svc_len, 1132 uint_t, ncl->c_len); 1133 1134 wcl_length += ncl->c_len; 1135 1136 if (i < num_wclist - 1) { 1137 ncl->c_next = clist_alloc(); 1138 ncl = ncl->c_next; 1139 } 1140 } 1141 1142 if (!xdr_bool(xdrs, &more)) 1143 goto err_out; 1144 1145 first->rb_longbuf.type = RDMA_LONG_BUFFER; 1146 first->rb_longbuf.len = 1147 wcl_length > WCL_BUF_LEN ? wcl_length : WCL_BUF_LEN; 1148 1149 if (rdma_buf_alloc(conn, &first->rb_longbuf)) { 1150 clist_free(first); 1151 return (FALSE); 1152 } 1153 1154 memp = first->rb_longbuf.addr; 1155 1156 ncl = first; 1157 for (i = 0; i < num_wclist; i++) { 1158 ncl->w.c_saddr3 = (caddr_t)memp; 1159 memp += ncl->c_len; 1160 ncl = ncl->c_next; 1161 } 1162 1163 *wclp = first; 1164 *total_length = wcl_length; 1165 return (TRUE); 1166 1167 err_out: 1168 clist_free(first); 1169 return (FALSE); 1170 } 1171 1172 /* 1173 * XDR decode the long reply write chunk. 1174 */ 1175 bool_t 1176 xdr_decode_reply_wchunk(XDR *xdrs, struct clist **clist) 1177 { 1178 bool_t have_rchunk = FALSE; 1179 struct clist *first = NULL, *ncl = NULL; 1180 uint32_t num_wclist; 1181 uint32_t i; 1182 1183 if (!xdr_bool(xdrs, &have_rchunk)) 1184 return (FALSE); 1185 1186 if (have_rchunk == FALSE) 1187 return (TRUE); 1188 1189 if (!xdr_uint32(xdrs, &num_wclist)) { 1190 DTRACE_PROBE(krpc__e__xdrrdma__replywchunk__listlength); 1191 return (FALSE); 1192 } 1193 1194 if (num_wclist == 0) { 1195 return (FALSE); 1196 } 1197 1198 first = ncl = clist_alloc(); 1199 1200 for (i = 0; i < num_wclist; i++) { 1201 1202 if (i > 0) { 1203 ncl->c_next = clist_alloc(); 1204 ncl = ncl->c_next; 1205 } 1206 1207 if (!xdr_uint32(xdrs, &ncl->c_dmemhandle.mrc_rmr)) 1208 goto err_out; 1209 if (!xdr_uint32(xdrs, &ncl->c_len)) 1210 goto err_out; 1211 if (!xdr_uint64(xdrs, &ncl->u.c_daddr)) 1212 goto err_out; 1213 1214 if (ncl->c_len > MAX_SVC_XFER_SIZE) { 1215 DTRACE_PROBE( 1216 krpc__e__xdrrdma__replywchunk__chunklist_toobig); 1217 ncl->c_len = MAX_SVC_XFER_SIZE; 1218 } 1219 if (!(ncl->c_dmemhandle.mrc_rmr && 1220 (ncl->c_len > 0) && ncl->u.c_daddr)) 1221 DTRACE_PROBE( 1222 krpc__e__xdrrdma__replywchunk__invalid_segaddr); 1223 1224 DTRACE_PROBE1(krpc__i__xdr_decode_reply_wchunk_c_len, 1225 uint32_t, ncl->c_len); 1226 1227 } 1228 *clist = first; 1229 return (TRUE); 1230 1231 err_out: 1232 clist_free(first); 1233 return (FALSE); 1234 } 1235 1236 1237 bool_t 1238 xdr_encode_reply_wchunk(XDR *xdrs, 1239 struct clist *cl_longreply, uint32_t seg_array_len) 1240 { 1241 int i; 1242 bool_t long_reply_exists = TRUE; 1243 uint32_t length; 1244 uint64 offset; 1245 1246 if (seg_array_len > 0) { 1247 if (!xdr_bool(xdrs, &long_reply_exists)) 1248 return (FALSE); 1249 if (!xdr_uint32(xdrs, &seg_array_len)) 1250 return (FALSE); 1251 1252 for (i = 0; i < seg_array_len; i++) { 1253 if (!cl_longreply) 1254 return (FALSE); 1255 length = cl_longreply->c_len; 1256 offset = (uint64) cl_longreply->u.c_daddr; 1257 1258 DTRACE_PROBE1( 1259 krpc__i__xdr_encode_reply_wchunk_c_len, 1260 uint32_t, length); 1261 1262 if (!xdr_uint32(xdrs, 1263 &cl_longreply->c_dmemhandle.mrc_rmr)) 1264 return (FALSE); 1265 if (!xdr_uint32(xdrs, &length)) 1266 return (FALSE); 1267 if (!xdr_uint64(xdrs, &offset)) 1268 return (FALSE); 1269 cl_longreply = cl_longreply->c_next; 1270 } 1271 } else { 1272 long_reply_exists = FALSE; 1273 if (!xdr_bool(xdrs, &long_reply_exists)) 1274 return (FALSE); 1275 } 1276 return (TRUE); 1277 } 1278 bool_t 1279 xdrrdma_read_from_client(struct clist *rlist, CONN **conn, uint_t count) 1280 { 1281 struct clist *rdclist; 1282 struct clist cl; 1283 uint_t total_len = 0; 1284 uint32_t status; 1285 bool_t retval = TRUE; 1286 1287 rlist->rb_longbuf.type = RDMA_LONG_BUFFER; 1288 rlist->rb_longbuf.len = 1289 count > RCL_BUF_LEN ? count : RCL_BUF_LEN; 1290 1291 if (rdma_buf_alloc(*conn, &rlist->rb_longbuf)) { 1292 return (FALSE); 1293 } 1294 1295 /* 1296 * The entire buffer is registered with the first chunk. 1297 * Later chunks will use the same registered memory handle. 1298 */ 1299 1300 cl = *rlist; 1301 cl.c_next = NULL; 1302 if (clist_register(*conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) { 1303 rdma_buf_free(*conn, &rlist->rb_longbuf); 1304 DTRACE_PROBE( 1305 krpc__e__xdrrdma__readfromclient__clist__reg); 1306 return (FALSE); 1307 } 1308 1309 rlist->c_regtype = CLIST_REG_DST; 1310 rlist->c_dmemhandle = cl.c_dmemhandle; 1311 rlist->c_dsynchandle = cl.c_dsynchandle; 1312 1313 for (rdclist = rlist; 1314 rdclist != NULL; rdclist = rdclist->c_next) { 1315 total_len += rdclist->c_len; 1316 #if (defined(OBJ32)||defined(DEBUG32)) 1317 rdclist->u.c_daddr3 = 1318 (caddr_t)((char *)rlist->rb_longbuf.addr + 1319 (uint32) rdclist->u.c_daddr3); 1320 #else 1321 rdclist->u.c_daddr3 = 1322 (caddr_t)((char *)rlist->rb_longbuf.addr + 1323 (uint64) rdclist->u.c_daddr); 1324 1325 #endif 1326 cl = (*rdclist); 1327 cl.c_next = NULL; 1328 1329 /* 1330 * Use the same memory handle for all the chunks 1331 */ 1332 cl.c_dmemhandle = rlist->c_dmemhandle; 1333 cl.c_dsynchandle = rlist->c_dsynchandle; 1334 1335 1336 DTRACE_PROBE1(krpc__i__xdrrdma__readfromclient__buflen, 1337 int, rdclist->c_len); 1338 1339 /* 1340 * Now read the chunk in 1341 */ 1342 if (rdclist->c_next == NULL) { 1343 status = RDMA_READ(*conn, &cl, WAIT); 1344 } else { 1345 status = RDMA_READ(*conn, &cl, NOWAIT); 1346 } 1347 if (status != RDMA_SUCCESS) { 1348 DTRACE_PROBE( 1349 krpc__e__xdrrdma__readfromclient__readfailed); 1350 rdma_buf_free(*conn, &rlist->rb_longbuf); 1351 return (FALSE); 1352 } 1353 } 1354 1355 cl = (*rlist); 1356 cl.c_next = NULL; 1357 cl.c_len = total_len; 1358 if (clist_syncmem(*conn, &cl, CLIST_REG_DST) != RDMA_SUCCESS) { 1359 retval = FALSE; 1360 } 1361 return (retval); 1362 } 1363 1364 bool_t 1365 xdrrdma_free_clist(CONN *conn, struct clist *clp) 1366 { 1367 rdma_buf_free(conn, &clp->rb_longbuf); 1368 clist_free(clp); 1369 return (TRUE); 1370 } 1371 1372 bool_t 1373 xdrrdma_send_read_data(XDR *xdrs, uint_t data_len, struct clist *wcl) 1374 { 1375 int status; 1376 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 1377 struct xdr_ops *xops = xdrrdma_xops(); 1378 struct clist *tcl, *wrcl, *cl; 1379 struct clist fcl; 1380 int rndup_present, rnduplen; 1381 1382 rndup_present = 0; 1383 wrcl = NULL; 1384 1385 /* caller is doing a sizeof */ 1386 if (xdrs->x_ops != &xdrrdma_ops || xdrs->x_ops == xops) 1387 return (TRUE); 1388 1389 /* copy of the first chunk */ 1390 fcl = *wcl; 1391 fcl.c_next = NULL; 1392 1393 /* 1394 * The entire buffer is registered with the first chunk. 1395 * Later chunks will use the same registered memory handle. 1396 */ 1397 1398 status = clist_register(xdrp->xp_conn, &fcl, CLIST_REG_SOURCE); 1399 if (status != RDMA_SUCCESS) { 1400 return (FALSE); 1401 } 1402 1403 wcl->c_regtype = CLIST_REG_SOURCE; 1404 wcl->c_smemhandle = fcl.c_smemhandle; 1405 wcl->c_ssynchandle = fcl.c_ssynchandle; 1406 1407 /* 1408 * Only transfer the read data ignoring any trailing 1409 * roundup chunks. A bit of work, but it saves an 1410 * unnecessary extra RDMA_WRITE containing only 1411 * roundup bytes. 1412 */ 1413 1414 rnduplen = clist_len(wcl) - data_len; 1415 1416 if (rnduplen) { 1417 1418 tcl = wcl->c_next; 1419 1420 /* 1421 * Check if there is a trailing roundup chunk 1422 */ 1423 while (tcl) { 1424 if ((tcl->c_next == NULL) && (tcl->c_len == rnduplen)) { 1425 rndup_present = 1; 1426 break; 1427 } 1428 tcl = tcl->c_next; 1429 } 1430 1431 /* 1432 * Make a copy chunk list skipping the last chunk 1433 */ 1434 if (rndup_present) { 1435 cl = wcl; 1436 tcl = NULL; 1437 while (cl) { 1438 if (tcl == NULL) { 1439 tcl = clist_alloc(); 1440 wrcl = tcl; 1441 } else { 1442 tcl->c_next = clist_alloc(); 1443 tcl = tcl->c_next; 1444 } 1445 1446 *tcl = *cl; 1447 cl = cl->c_next; 1448 /* last chunk */ 1449 if (cl->c_next == NULL) 1450 break; 1451 } 1452 tcl->c_next = NULL; 1453 } 1454 } 1455 1456 if (wrcl == NULL) { 1457 /* No roundup chunks */ 1458 wrcl = wcl; 1459 } 1460 1461 /* 1462 * Set the registered memory handles for the 1463 * rest of the chunks same as the first chunk. 1464 */ 1465 tcl = wrcl->c_next; 1466 while (tcl) { 1467 tcl->c_smemhandle = fcl.c_smemhandle; 1468 tcl->c_ssynchandle = fcl.c_ssynchandle; 1469 tcl = tcl->c_next; 1470 } 1471 1472 /* 1473 * Sync the total len beginning from the first chunk. 1474 */ 1475 fcl.c_len = clist_len(wrcl); 1476 status = clist_syncmem(xdrp->xp_conn, &fcl, CLIST_REG_SOURCE); 1477 if (status != RDMA_SUCCESS) { 1478 return (FALSE); 1479 } 1480 1481 status = RDMA_WRITE(xdrp->xp_conn, wrcl, WAIT); 1482 1483 if (rndup_present) 1484 clist_free(wrcl); 1485 1486 if (status != RDMA_SUCCESS) { 1487 return (FALSE); 1488 } 1489 1490 return (TRUE); 1491 } 1492 1493 1494 /* 1495 * Reads one chunk at a time 1496 */ 1497 1498 static bool_t 1499 xdrrdma_read_a_chunk(XDR *xdrs, CONN **conn) 1500 { 1501 int status; 1502 int32_t len = 0; 1503 xrdma_private_t *xdrp = (xrdma_private_t *)(xdrs->x_private); 1504 struct clist *cle = *(xdrp->xp_rcl_next); 1505 struct clist *rclp = xdrp->xp_rcl; 1506 struct clist *clp; 1507 1508 /* 1509 * len is used later to decide xdr offset in 1510 * the chunk factoring any 4-byte XDR alignment 1511 * (See read chunk example top of this file) 1512 */ 1513 while (rclp != cle) { 1514 len += rclp->c_len; 1515 rclp = rclp->c_next; 1516 } 1517 1518 len = RNDUP(len) - len; 1519 1520 ASSERT(xdrs->x_handy <= 0); 1521 1522 /* 1523 * If this is the first chunk to contain the RPC 1524 * message set xp_off to the xdr offset of the 1525 * inline message. 1526 */ 1527 if (xdrp->xp_off == 0) 1528 xdrp->xp_off = (xdrp->xp_offp - xdrs->x_base); 1529 1530 if (cle == NULL || (cle->c_xdroff != xdrp->xp_off)) 1531 return (FALSE); 1532 1533 /* 1534 * Make a copy of the chunk to read from client. 1535 * Chunks are read on demand, so read only one 1536 * for now. 1537 */ 1538 1539 rclp = clist_alloc(); 1540 *rclp = *cle; 1541 rclp->c_next = NULL; 1542 1543 xdrp->xp_rcl_next = &cle->c_next; 1544 1545 /* 1546 * If there is a roundup present, then skip those 1547 * bytes when reading. 1548 */ 1549 if (len) { 1550 rclp->w.c_saddr = 1551 (uint64)(uintptr_t)rclp->w.c_saddr + len; 1552 rclp->c_len = rclp->c_len - len; 1553 } 1554 1555 status = xdrrdma_read_from_client(rclp, conn, rclp->c_len); 1556 1557 if (status == FALSE) { 1558 clist_free(rclp); 1559 return (status); 1560 } 1561 1562 xdrp->xp_offp = rclp->rb_longbuf.addr; 1563 xdrs->x_base = xdrp->xp_offp; 1564 xdrs->x_handy = rclp->c_len; 1565 1566 /* 1567 * This copy of read chunks containing the XDR 1568 * message is freed later in xdrrdma_destroy() 1569 */ 1570 1571 if (xdrp->xp_rcl_xdr) { 1572 /* Add the chunk to end of the list */ 1573 clp = xdrp->xp_rcl_xdr; 1574 while (clp->c_next != NULL) 1575 clp = clp->c_next; 1576 clp->c_next = rclp; 1577 } else { 1578 xdrp->xp_rcl_xdr = rclp; 1579 } 1580 return (TRUE); 1581 } 1582 1583 static void 1584 xdrrdma_free_xdr_chunks(CONN *conn, struct clist *xdr_rcl) 1585 { 1586 struct clist *cl; 1587 1588 (void) clist_deregister(conn, xdr_rcl); 1589 1590 /* 1591 * Read chunks containing parts XDR message are 1592 * special: in case of multiple chunks each has 1593 * its own buffer. 1594 */ 1595 1596 cl = xdr_rcl; 1597 while (cl) { 1598 rdma_buf_free(conn, &cl->rb_longbuf); 1599 cl = cl->c_next; 1600 } 1601 1602 clist_free(xdr_rcl); 1603 } 1604