1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 #include <sys/ib/clients/rds/rds_kstat.h> 82 83 /* 84 * This File contains the buffer management code 85 */ 86 87 #define DUMP_USER_PARAMS() \ 88 RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \ 89 RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \ 90 RDS_DPRINTF3(LABEL, "MaxRecvMemory = %d", MaxRecvMemory); \ 91 RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \ 92 RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \ 93 RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \ 94 RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \ 95 RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \ 96 RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \ 97 RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry) 98 99 static void 100 rds_free_mblk(char *arg) 101 { 102 rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg; 103 104 /* Free the recv buffer */ 105 RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp); 106 ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ); 107 rds_free_recv_buf(bp, 1); 108 RDS_DECR_RXPKTS_PEND(1); 109 RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp); 110 } 111 112 void 113 rds_free_recv_caches(rds_state_t *statep) 114 { 115 rds_hca_t *hcap; 116 int ret; 117 118 RDS_DPRINTF4("rds_free_recv_caches", "Enter"); 119 120 mutex_enter(&rds_dpool.pool_lock); 121 if (rds_dpool.pool_memp == NULL) { 122 RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty"); 123 mutex_exit(&rds_dpool.pool_lock); 124 return; 125 } 126 127 /* 128 * All buffers must have been freed as all sessions are closed 129 * and destroyed 130 */ 131 ASSERT(rds_dpool.pool_nbusy == 0); 132 RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has " 133 "pending buffers: %d", rds_dpool.pool_nbusy); 134 while (rds_dpool.pool_nbusy != 0) { 135 mutex_exit(&rds_dpool.pool_lock); 136 delay(drv_usectohz(1000000)); 137 mutex_enter(&rds_dpool.pool_lock); 138 } 139 140 hcap = statep->rds_hcalistp; 141 while (hcap != NULL) { 142 if (hcap->hca_mrhdl != NULL) { 143 ret = ibt_deregister_mr(hcap->hca_hdl, 144 hcap->hca_mrhdl); 145 if (ret == IBT_SUCCESS) { 146 hcap->hca_mrhdl = NULL; 147 hcap->hca_lkey = 0; 148 hcap->hca_rkey = 0; 149 } else { 150 RDS_DPRINTF2(LABEL, "ibt_deregister_mr " 151 "failed: %d, mrhdl: 0x%p", ret, 152 hcap->hca_mrhdl); 153 } 154 } 155 hcap = hcap->hca_nextp; 156 } 157 158 kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers + 159 rds_cpool.pool_nbuffers) * sizeof (rds_buf_t)); 160 rds_dpool.pool_bufmemp = NULL; 161 162 kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize); 163 rds_dpool.pool_memp = NULL; 164 165 mutex_exit(&rds_dpool.pool_lock); 166 167 RDS_DPRINTF4("rds_free_recv_caches", "Return"); 168 } 169 170 int 171 rds_init_recv_caches(rds_state_t *statep) 172 { 173 uint8_t *mp; 174 rds_buf_t *bp; 175 rds_hca_t *hcap; 176 uint32_t nsessions; 177 uint_t ix; 178 uint_t ndatarx, nctrlrx; 179 uint8_t *memp; 180 uint_t memsize, nbuf; 181 rds_buf_t *bufmemp; 182 ibt_mr_attr_t mem_attr; 183 ibt_mr_desc_t mem_desc; 184 int ret; 185 186 RDS_DPRINTF4("rds_init_recv_caches", "Enter"); 187 188 DUMP_USER_PARAMS(); 189 190 mutex_enter(&rds_dpool.pool_lock); 191 if (rds_dpool.pool_memp != NULL) { 192 RDS_DPRINTF2("rds_init_recv_caches", "Pools are already " 193 "initialized"); 194 mutex_exit(&rds_dpool.pool_lock); 195 return (0); 196 } 197 198 /* Max number of receive buffers on the system */ 199 ndatarx = (MaxRecvMemory * 1024)/UserBufferSize; 200 201 /* 202 * High water mark for the receive buffers in the system. If the 203 * number of buffers used crosses this mark then all sockets in 204 * would be stalled. The port quota for the sockets is set based 205 * on this limit. 206 */ 207 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * ndatarx)/100; 208 209 /* nsessions can never be less than 1 */ 210 nsessions = ndatarx/MaxDataRecvBuffers; 211 nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers; 212 213 RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions); 214 215 /* Add the hdr */ 216 RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ; 217 218 memsize = (ndatarx * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE); 219 nbuf = ndatarx + nctrlrx; 220 RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize); 221 RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf); 222 223 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 224 if (memp == NULL) { 225 RDS_DPRINTF1(LABEL, "RDS Memory allocation failed"); 226 mutex_exit(&rds_dpool.pool_lock); 227 return (-1); 228 } 229 230 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 231 nbuf * sizeof (rds_buf_t)); 232 233 /* allocate memory for buffer entries */ 234 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 235 KM_SLEEP); 236 237 /* register the memory with all HCAs */ 238 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 239 mem_attr.mr_len = memsize; 240 mem_attr.mr_as = NULL; 241 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 242 243 hcap = statep->rds_hcalistp; 244 while (hcap != NULL) { 245 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 246 &mem_attr, &hcap->hca_mrhdl, &mem_desc); 247 if (ret != IBT_SUCCESS) { 248 RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret); 249 return (-1); 250 } 251 252 hcap->hca_lkey = mem_desc.md_lkey; 253 hcap->hca_rkey = mem_desc.md_rkey; 254 255 hcap = hcap->hca_nextp; 256 } 257 258 /* Initialize data pool */ 259 rds_dpool.pool_memp = memp; 260 rds_dpool.pool_memsize = memsize; 261 rds_dpool.pool_bufmemp = bufmemp; 262 rds_dpool.pool_nbuffers = ndatarx; 263 rds_dpool.pool_nbusy = 0; 264 rds_dpool.pool_nfree = ndatarx; 265 266 /* chain the buffers */ 267 mp = memp; 268 bp = bufmemp; 269 for (ix = 0; ix < ndatarx; ix++) { 270 bp[ix].buf_nextp = &bp[ix + 1]; 271 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 272 bp[ix].buf_state = RDS_RCVBUF_FREE; 273 bp[ix].buf_frtn.free_func = rds_free_mblk; 274 bp[ix].buf_frtn.free_arg = (char *)&bp[ix]; 275 mp = mp + RdsPktSize; 276 } 277 bp[ndatarx - 1].buf_nextp = NULL; 278 rds_dpool.pool_headp = &bp[0]; 279 rds_dpool.pool_tailp = &bp[ndatarx - 1]; 280 281 /* Initialize ctrl pool */ 282 rds_cpool.pool_nbuffers = nctrlrx; 283 rds_cpool.pool_nbusy = 0; 284 rds_cpool.pool_nfree = nctrlrx; 285 286 /* chain the buffers */ 287 for (ix = ndatarx; ix < nbuf - 1; ix++) { 288 bp[ix].buf_nextp = &bp[ix + 1]; 289 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 290 mp = mp + RDS_CTRLPKT_SIZE; 291 } 292 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 293 bp[nbuf - 1].buf_nextp = NULL; 294 rds_cpool.pool_headp = &bp[ndatarx]; 295 rds_cpool.pool_tailp = &bp[nbuf - 1]; 296 297 mutex_exit(&rds_dpool.pool_lock); 298 299 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 300 RDS_DPRINTF4("rds_init_recv_caches", "Return"); 301 return (0); 302 } 303 304 void 305 rds_free_send_pool(rds_ep_t *ep) 306 { 307 rds_bufpool_t *pool; 308 rds_hca_t *hcap; 309 int ret; 310 311 pool = &ep->ep_sndpool; 312 313 mutex_enter(&pool->pool_lock); 314 if (pool->pool_memp == NULL) { 315 mutex_exit(&pool->pool_lock); 316 RDS_DPRINTF2("rds_free_send_pool", 317 "EP(%p) DOUBLE Free on Send Pool", ep); 318 return; 319 } 320 321 /* get the hcap for the HCA hosting this channel */ 322 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 323 if (hcap == NULL) { 324 RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found", 325 ep->ep_hca_guid); 326 } else { 327 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl); 328 if (ret != IBT_SUCCESS) { 329 RDS_DPRINTF2(LABEL, 330 "ibt_deregister_mr failed: %d, mrhdl: 0x%p", 331 ret, ep->ep_snd_mrhdl); 332 } 333 334 if (ep->ep_ack_addr) { 335 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 336 if (ret != IBT_SUCCESS) { 337 RDS_DPRINTF2(LABEL, 338 "ibt_deregister_mr ackhdl failed: %d, " 339 "mrhdl: 0x%p", ret, ep->ep_ackhdl); 340 } 341 342 kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t)); 343 ep->ep_ack_addr = NULL; 344 } 345 } 346 347 kmem_free(pool->pool_memp, pool->pool_memsize); 348 kmem_free(pool->pool_bufmemp, 349 pool->pool_nbuffers * sizeof (rds_buf_t)); 350 pool->pool_memp = NULL; 351 pool->pool_bufmemp = NULL; 352 mutex_exit(&pool->pool_lock); 353 } 354 355 int 356 rds_init_send_pool(rds_ep_t *ep) 357 { 358 uint8_t *mp; 359 rds_buf_t *bp; 360 rds_hca_t *hcap; 361 uint_t ix, rcv_len; 362 ibt_mr_attr_t mem_attr; 363 ibt_mr_desc_t mem_desc; 364 uint8_t *memp; 365 rds_buf_t *bufmemp; 366 uintptr_t ack_addr = NULL; 367 uint_t memsize; 368 uint_t nbuf; 369 rds_bufpool_t *spool; 370 rds_data_hdr_t *pktp; 371 int ret; 372 373 RDS_DPRINTF2("rds_init_send_pool", "Enter"); 374 375 spool = &ep->ep_sndpool; 376 377 ASSERT(spool->pool_memp == NULL); 378 379 /* get the hcap for the HCA hosting this channel */ 380 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 381 if (hcap == NULL) { 382 RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found", 383 ep->ep_hca_guid); 384 return (-1); 385 } 386 387 if (ep->ep_type == RDS_EP_TYPE_DATA) { 388 spool->pool_nbuffers = MaxDataSendBuffers; 389 spool->pool_nbusy = 0; 390 spool->pool_nfree = MaxDataSendBuffers; 391 memsize = (MaxDataSendBuffers * RdsPktSize) + 392 sizeof (uintptr_t); 393 rcv_len = RdsPktSize; 394 } else { 395 spool->pool_nbuffers = MaxCtrlSendBuffers; 396 spool->pool_nbusy = 0; 397 spool->pool_nfree = MaxCtrlSendBuffers; 398 memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE; 399 rcv_len = RDS_CTRLPKT_SIZE; 400 } 401 nbuf = spool->pool_nbuffers; 402 403 RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize); 404 405 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 406 if (memp == NULL) { 407 RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed"); 408 return (-1); 409 } 410 411 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 412 nbuf * sizeof (rds_buf_t)); 413 414 /* allocate memory for buffer entries */ 415 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 416 KM_SLEEP); 417 418 if (ep->ep_type == RDS_EP_TYPE_DATA) { 419 ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP); 420 421 /* register the memory with the HCA for this channel */ 422 mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr; 423 mem_attr.mr_len = sizeof (uintptr_t); 424 mem_attr.mr_as = NULL; 425 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 426 IBT_MR_ENABLE_REMOTE_WRITE; 427 428 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 429 &mem_attr, &ep->ep_ackhdl, &mem_desc); 430 if (ret != IBT_SUCCESS) { 431 RDS_DPRINTF2("rds_init_send_pool", 432 "EP(%p): ibt_register_mr for ack failed: %d", 433 ep, ret); 434 kmem_free(memp, memsize); 435 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 436 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 437 return (-1); 438 } 439 ep->ep_ack_rkey = mem_desc.md_rkey; 440 ep->ep_ack_addr = ack_addr; 441 } 442 443 /* register the memory with the HCA for this channel */ 444 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 445 mem_attr.mr_len = memsize; 446 mem_attr.mr_as = NULL; 447 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 448 449 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 450 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 451 if (ret != IBT_SUCCESS) { 452 RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr " 453 "failed: %d", ep, ret); 454 kmem_free(memp, memsize); 455 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 456 if (ack_addr != NULL) 457 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 458 return (-1); 459 } 460 ep->ep_snd_lkey = mem_desc.md_lkey; 461 462 463 /* Initialize the pool */ 464 spool->pool_memp = memp; 465 spool->pool_memsize = memsize; 466 spool->pool_bufmemp = bufmemp; 467 spool->pool_sqpoll_pending = B_FALSE; 468 469 /* chain the buffers and initialize them */ 470 mp = memp; 471 bp = bufmemp; 472 473 if (ep->ep_type == RDS_EP_TYPE_DATA) { 474 for (ix = 0; ix < nbuf - 1; ix++) { 475 bp[ix].buf_nextp = &bp[ix + 1]; 476 bp[ix].buf_ep = ep; 477 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 478 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 479 bp[ix].buf_state = RDS_SNDBUF_FREE; 480 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 481 pktp->dh_bufid = (uintptr_t)&bp[ix]; 482 mp = mp + rcv_len; 483 } 484 bp[nbuf - 1].buf_nextp = NULL; 485 bp[nbuf - 1].buf_ep = ep; 486 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 487 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 488 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 489 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 490 pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1]; 491 492 spool->pool_headp = &bp[0]; 493 spool->pool_tailp = &bp[nbuf - 1]; 494 495 mp = mp + rcv_len; 496 ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 497 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 498 ep->ep_ackds.ds_len = sizeof (uintptr_t); 499 500 *(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp; 501 } else { 502 /* control send pool */ 503 for (ix = 0; ix < nbuf - 1; ix++) { 504 bp[ix].buf_nextp = &bp[ix + 1]; 505 bp[ix].buf_ep = ep; 506 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 507 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 508 bp[ix].buf_state = RDS_SNDBUF_FREE; 509 mp = mp + rcv_len; 510 } 511 bp[nbuf - 1].buf_nextp = NULL; 512 bp[nbuf - 1].buf_ep = ep; 513 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 514 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 515 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 516 spool->pool_headp = &bp[0]; 517 spool->pool_tailp = &bp[nbuf - 1]; 518 } 519 520 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 521 RDS_DPRINTF2("rds_init_send_pool", "Return"); 522 523 return (0); 524 } 525 526 void 527 rds_free_recv_pool(rds_ep_t *ep) 528 { 529 rds_bufpool_t *pool; 530 531 if (ep->ep_type == RDS_EP_TYPE_DATA) { 532 pool = &rds_dpool; 533 } else { 534 pool = &rds_cpool; 535 } 536 537 mutex_enter(&ep->ep_rcvpool.pool_lock); 538 if (ep->ep_rcvpool.pool_nfree != 0) { 539 rds_free_buf(pool, ep->ep_rcvpool.pool_headp, 540 ep->ep_rcvpool.pool_nfree); 541 ep->ep_rcvpool.pool_nfree = 0; 542 ep->ep_rcvpool.pool_headp = NULL; 543 ep->ep_rcvpool.pool_tailp = NULL; 544 } 545 mutex_exit(&ep->ep_rcvpool.pool_lock); 546 } 547 548 int 549 rds_init_recv_pool(rds_ep_t *ep) 550 { 551 rds_bufpool_t *rpool; 552 rds_qp_t *recvqp; 553 554 recvqp = &ep->ep_recvqp; 555 rpool = &ep->ep_rcvpool; 556 if (ep->ep_type == RDS_EP_TYPE_DATA) { 557 recvqp->qp_depth = MaxDataRecvBuffers; 558 recvqp->qp_level = 0; 559 recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100; 560 recvqp->qp_taskqpending = B_FALSE; 561 562 rpool->pool_nbuffers = MaxDataRecvBuffers; 563 rpool->pool_nbusy = 0; 564 rpool->pool_nfree = 0; 565 } else { 566 recvqp->qp_depth = MaxCtrlRecvBuffers; 567 recvqp->qp_level = 0; 568 recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100; 569 recvqp->qp_taskqpending = B_FALSE; 570 571 rpool->pool_nbuffers = MaxCtrlRecvBuffers; 572 rpool->pool_nbusy = 0; 573 rpool->pool_nfree = 0; 574 } 575 576 return (0); 577 } 578 579 /* Free buffers to the global pool, either cpool or dpool */ 580 void 581 rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf) 582 { 583 uint_t ix; 584 585 RDS_DPRINTF4("rds_free_buf", "Enter"); 586 587 ASSERT(nbuf != 0); 588 589 mutex_enter(&pool->pool_lock); 590 591 if (pool->pool_nfree != 0) { 592 pool->pool_tailp->buf_nextp = bp; 593 } else { 594 pool->pool_headp = bp; 595 } 596 597 if (nbuf == 1) { 598 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 599 bp->buf_ep = NULL; 600 bp->buf_nextp = NULL; 601 pool->pool_tailp = bp; 602 } else { 603 for (ix = 1; ix < nbuf; ix++) { 604 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 605 bp->buf_ep = NULL; 606 bp = bp->buf_nextp; 607 } 608 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 609 bp->buf_ep = NULL; 610 bp->buf_nextp = NULL; 611 pool->pool_tailp = bp; 612 } 613 /* tail is always the last buffer */ 614 pool->pool_tailp->buf_nextp = NULL; 615 616 pool->pool_nfree += nbuf; 617 pool->pool_nbusy -= nbuf; 618 619 mutex_exit(&pool->pool_lock); 620 621 RDS_DPRINTF4("rds_free_buf", "Return"); 622 } 623 624 /* Get buffers from the global pools, either cpool or dpool */ 625 rds_buf_t * 626 rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret) 627 { 628 rds_buf_t *bp = NULL, *bp1; 629 uint_t ix; 630 631 RDS_DPRINTF4("rds_get_buf", "Enter"); 632 633 mutex_enter(&pool->pool_lock); 634 635 RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d", 636 pool->pool_nfree, nbuf); 637 638 if (nbuf < pool->pool_nfree) { 639 *nret = nbuf; 640 641 bp1 = pool->pool_headp; 642 for (ix = 1; ix < nbuf; ix++) { 643 bp1 = bp1->buf_nextp; 644 } 645 646 bp = pool->pool_headp; 647 pool->pool_headp = bp1->buf_nextp; 648 bp1->buf_nextp = NULL; 649 650 pool->pool_nfree -= nbuf; 651 pool->pool_nbusy += nbuf; 652 } else if (nbuf >= pool->pool_nfree) { 653 *nret = pool->pool_nfree; 654 655 bp = pool->pool_headp; 656 657 pool->pool_headp = NULL; 658 pool->pool_tailp = NULL; 659 660 pool->pool_nbusy += pool->pool_nfree; 661 pool->pool_nfree = 0; 662 } 663 664 mutex_exit(&pool->pool_lock); 665 666 RDS_DPRINTF4("rds_get_buf", "Return"); 667 668 return (bp); 669 } 670 671 boolean_t 672 rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait) 673 { 674 rds_qp_t *recvqp; 675 rds_bufpool_t *rpool; 676 boolean_t ret = B_TRUE; 677 678 recvqp = &ep->ep_recvqp; 679 mutex_enter(&recvqp->qp_lock); 680 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs", 681 ep, recvqp->qp_level); 682 if (wait) { 683 /* wait until the RQ is empty */ 684 while (recvqp->qp_level != 0) { 685 /* wait one second and try again */ 686 mutex_exit(&recvqp->qp_lock); 687 delay(drv_usectohz(1000000)); 688 mutex_enter(&recvqp->qp_lock); 689 } 690 } else if (recvqp->qp_level != 0) { 691 ret = B_FALSE; 692 } 693 mutex_exit(&recvqp->qp_lock); 694 695 rpool = &ep->ep_rcvpool; 696 mutex_enter(&rpool->pool_lock); 697 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): " 698 "There are %d pending buffers on sockqs", ep, rpool->pool_nbusy); 699 if (wait) { 700 /* Wait for all buffers to be freed by sockfs */ 701 while (rpool->pool_nbusy != 0) { 702 /* wait one second and try again */ 703 mutex_exit(&rpool->pool_lock); 704 delay(drv_usectohz(1000000)); 705 mutex_enter(&rpool->pool_lock); 706 } 707 } else if (rpool->pool_nbusy != 0) { 708 ret = B_FALSE; 709 } 710 mutex_exit(&rpool->pool_lock); 711 712 return (ret); 713 } 714 715 boolean_t 716 rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) 717 { 718 rds_bufpool_t *spool; 719 rds_buf_t *bp; 720 boolean_t ret1 = B_TRUE; 721 722 /* check if all the sends completed */ 723 spool = &ep->ep_sndpool; 724 mutex_enter(&spool->pool_lock); 725 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 726 "Send Pool contains: %d", ep, spool->pool_nbusy); 727 if (wait) { 728 while (spool->pool_nbusy != 0) { 729 if (rds_no_interrupts) { 730 /* wait one second and try again */ 731 delay(drv_usectohz(1000000)); 732 rds_poll_send_completions(ep->ep_sendcq, ep, 733 B_TRUE); 734 } else { 735 /* wait one second and try again */ 736 mutex_exit(&spool->pool_lock); 737 delay(drv_usectohz(1000000)); 738 mutex_enter(&spool->pool_lock); 739 } 740 } 741 742 if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) { 743 rds_buf_t *ackbp; 744 745 /* 746 * If the last one is acknowledged then everything 747 * is acknowledged 748 */ 749 bp = spool->pool_tailp; 750 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 751 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 752 "Checking for acknowledgements", ep); 753 while (bp != ackbp) { 754 RDS_DPRINTF2("rds_is_sendq_empty", 755 "EP(%p) BP(0x%p/0x%p) last " 756 "sent/acknowledged", ep, bp, ackbp); 757 mutex_exit(&spool->pool_lock); 758 delay(drv_usectohz(1000000)); 759 mutex_enter(&spool->pool_lock); 760 761 bp = spool->pool_tailp; 762 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 763 } 764 } 765 } else if (spool->pool_nbusy != 0) { 766 ret1 = B_FALSE; 767 } 768 mutex_exit(&spool->pool_lock); 769 770 /* check if all the rdma acks completed */ 771 mutex_enter(&ep->ep_lock); 772 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 773 "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt); 774 if (wait) { 775 while (ep->ep_rdmacnt != 0) { 776 if (rds_no_interrupts) { 777 /* wait one second and try again */ 778 delay(drv_usectohz(1000000)); 779 rds_poll_send_completions(ep->ep_sendcq, ep, 780 B_FALSE); 781 } else { 782 /* wait one second and try again */ 783 mutex_exit(&ep->ep_lock); 784 delay(drv_usectohz(1000000)); 785 mutex_enter(&ep->ep_lock); 786 } 787 } 788 } else if (ep->ep_rdmacnt != 0) { 789 ret1 = B_FALSE; 790 } 791 mutex_exit(&ep->ep_lock); 792 793 return (ret1); 794 } 795 796 /* Get buffers from the send pool */ 797 rds_buf_t * 798 rds_get_send_buf(rds_ep_t *ep, uint_t nbuf) 799 { 800 rds_buf_t *bp = NULL, *bp1; 801 rds_bufpool_t *spool; 802 uint_t waittime = rds_waittime_ms * 1000; 803 uint_t ix; 804 int ret; 805 806 RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d", 807 ep, nbuf); 808 809 spool = &ep->ep_sndpool; 810 mutex_enter(&spool->pool_lock); 811 812 if (rds_no_interrupts) { 813 if ((spool->pool_sqpoll_pending == B_FALSE) && 814 (spool->pool_nbusy > 815 (spool->pool_nbuffers * rds_poll_percent_full)/100)) { 816 spool->pool_sqpoll_pending = B_TRUE; 817 mutex_exit(&spool->pool_lock); 818 rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE); 819 mutex_enter(&spool->pool_lock); 820 spool->pool_sqpoll_pending = B_FALSE; 821 } 822 } 823 824 if (spool->pool_nfree < nbuf) { 825 /* wait for buffers to become available */ 826 spool->pool_cv_count += nbuf; 827 ret = cv_timedwait_sig(&spool->pool_cv, &spool->pool_lock, 828 ddi_get_lbolt() + drv_usectohz(waittime)); 829 /* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */ 830 if (ret == 0) { 831 /* signal pending */ 832 spool->pool_cv_count -= nbuf; 833 mutex_exit(&spool->pool_lock); 834 return (NULL); 835 } 836 837 spool->pool_cv_count -= nbuf; 838 } 839 840 /* Have the number of buffers needed */ 841 if (spool->pool_nfree > nbuf) { 842 bp = spool->pool_headp; 843 844 if (ep->ep_type == RDS_EP_TYPE_DATA) { 845 rds_buf_t *ackbp; 846 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 847 848 /* check if all the needed buffers are acknowledged */ 849 bp1 = bp; 850 for (ix = 0; ix < nbuf; ix++) { 851 if ((bp1 == ackbp) || 852 (bp1->buf_state != RDS_SNDBUF_FREE)) { 853 /* 854 * The buffer is not yet signalled or 855 * is not yet acknowledged 856 */ 857 RDS_DPRINTF5("rds_get_send_buf", 858 "EP(%p) Buffer (%p) not yet " 859 "acked/completed", ep, bp1); 860 mutex_exit(&spool->pool_lock); 861 return (NULL); 862 } 863 864 bp1 = bp1->buf_nextp; 865 } 866 } 867 868 /* mark the buffers as pending */ 869 bp1 = bp; 870 for (ix = 1; ix < nbuf; ix++) { 871 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 872 bp1->buf_state = RDS_SNDBUF_PENDING; 873 bp1 = bp1->buf_nextp; 874 } 875 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 876 bp1->buf_state = RDS_SNDBUF_PENDING; 877 878 spool->pool_headp = bp1->buf_nextp; 879 bp1->buf_nextp = NULL; 880 if (spool->pool_headp == NULL) 881 spool->pool_tailp = NULL; 882 spool->pool_nfree -= nbuf; 883 spool->pool_nbusy += nbuf; 884 } 885 mutex_exit(&spool->pool_lock); 886 887 RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d", 888 ep, nbuf); 889 890 return (bp); 891 } 892 893 #define RDS_MIN_BUF_TO_WAKE_THREADS 10 894 895 void 896 rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf, 897 boolean_t lock) 898 { 899 rds_bufpool_t *spool; 900 rds_buf_t *tmp; 901 902 RDS_DPRINTF4("rds_free_send_buf", "Enter"); 903 904 ASSERT(nbuf != 0); 905 906 if (tailp == NULL) { 907 if (nbuf > 1) { 908 tmp = headp; 909 while (tmp->buf_nextp) { 910 tmp = tmp->buf_nextp; 911 } 912 tailp = tmp; 913 } else { 914 tailp = headp; 915 } 916 } 917 918 spool = &ep->ep_sndpool; 919 920 if (lock == B_FALSE) { 921 /* lock is not held outside */ 922 mutex_enter(&spool->pool_lock); 923 } 924 925 if (spool->pool_nfree) { 926 spool->pool_tailp->buf_nextp = headp; 927 } else { 928 spool->pool_headp = headp; 929 } 930 spool->pool_tailp = tailp; 931 932 spool->pool_nfree += nbuf; 933 spool->pool_nbusy -= nbuf; 934 935 if ((spool->pool_cv_count > 0) && 936 (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) { 937 if (spool->pool_nfree >= spool->pool_cv_count) 938 cv_broadcast(&spool->pool_cv); 939 else 940 cv_signal(&spool->pool_cv); 941 } 942 943 if (lock == B_FALSE) { 944 mutex_exit(&spool->pool_lock); 945 } 946 947 RDS_DPRINTF4("rds_free_send_buf", "Return"); 948 } 949 950 #define RDS_NBUFFERS_TO_PUTBACK 100 951 void 952 rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf) 953 { 954 rds_ep_t *ep; 955 rds_bufpool_t *rpool; 956 rds_buf_t *bp1; 957 uint_t ix; 958 959 RDS_DPRINTF4("rds_free_recv_buf", "Enter"); 960 961 ASSERT(nbuf != 0); 962 963 ep = bp->buf_ep; 964 rpool = &ep->ep_rcvpool; 965 966 mutex_enter(&rpool->pool_lock); 967 968 /* Add the buffers to the local pool */ 969 if (rpool->pool_tailp == NULL) { 970 ASSERT(rpool->pool_headp == NULL); 971 ASSERT(rpool->pool_nfree == 0); 972 rpool->pool_headp = bp; 973 bp1 = bp; 974 for (ix = 1; ix < nbuf; ix++) { 975 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 976 rpool->pool_nbusy--; 977 } 978 bp1->buf_state = RDS_RCVBUF_FREE; 979 bp1 = bp1->buf_nextp; 980 } 981 bp1->buf_nextp = NULL; 982 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 983 rpool->pool_nbusy--; 984 } 985 bp->buf_state = RDS_RCVBUF_FREE; 986 rpool->pool_tailp = bp1; 987 rpool->pool_nfree += nbuf; 988 } else { 989 bp1 = bp; 990 for (ix = 1; ix < nbuf; ix++) { 991 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 992 rpool->pool_nbusy--; 993 } 994 bp1->buf_state = RDS_RCVBUF_FREE; 995 bp1 = bp1->buf_nextp; 996 } 997 bp1->buf_nextp = NULL; 998 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 999 rpool->pool_nbusy--; 1000 } 1001 bp->buf_state = RDS_RCVBUF_FREE; 1002 rpool->pool_tailp->buf_nextp = bp; 1003 rpool->pool_tailp = bp1; 1004 rpool->pool_nfree += nbuf; 1005 } 1006 1007 if (rpool->pool_nfree >= RDS_NBUFFERS_TO_PUTBACK) { 1008 bp = rpool->pool_headp; 1009 nbuf = rpool->pool_nfree; 1010 rpool->pool_headp = NULL; 1011 rpool->pool_tailp = NULL; 1012 rpool->pool_nfree = 0; 1013 mutex_exit(&rpool->pool_lock); 1014 1015 /* Free the buffers to the global pool */ 1016 if (ep->ep_type == RDS_EP_TYPE_DATA) { 1017 rds_free_buf(&rds_dpool, bp, nbuf); 1018 } else { 1019 rds_free_buf(&rds_cpool, bp, nbuf); 1020 } 1021 1022 return; 1023 } 1024 mutex_exit(&rpool->pool_lock); 1025 1026 RDS_DPRINTF4("rds_free_recv_buf", "Return"); 1027 } 1028