1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 #include <sys/ib/clients/rds/rds_kstat.h> 82 83 /* 84 * This File contains the buffer management code 85 */ 86 87 #define DUMP_USER_PARAMS() \ 88 RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \ 89 RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \ 90 RDS_DPRINTF3(LABEL, "MaxRecvMemory = %d", MaxRecvMemory); \ 91 RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \ 92 RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \ 93 RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \ 94 RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \ 95 RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \ 96 RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \ 97 RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry) 98 99 static void 100 rds_free_mblk(char *arg) 101 { 102 rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg; 103 104 /* Free the recv buffer */ 105 RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp); 106 ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ); 107 rds_free_recv_buf(bp, 1); 108 RDS_DECR_RXPKTS_PEND(1); 109 RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp); 110 } 111 112 void 113 rds_free_recv_caches(rds_state_t *statep) 114 { 115 rds_hca_t *hcap; 116 int ret; 117 118 RDS_DPRINTF4("rds_free_recv_caches", "Enter"); 119 120 mutex_enter(&rds_dpool.pool_lock); 121 if (rds_dpool.pool_memp == NULL) { 122 RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty"); 123 mutex_exit(&rds_dpool.pool_lock); 124 return; 125 } 126 127 /* 128 * All buffers must have been freed as all sessions are closed 129 * and destroyed 130 */ 131 ASSERT(rds_dpool.pool_nbusy == 0); 132 RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has " 133 "pending buffers: %d", rds_dpool.pool_nbusy); 134 while (rds_dpool.pool_nbusy != 0) { 135 mutex_exit(&rds_dpool.pool_lock); 136 delay(drv_usectohz(1000000)); 137 mutex_enter(&rds_dpool.pool_lock); 138 } 139 140 hcap = statep->rds_hcalistp; 141 while (hcap != NULL) { 142 if (hcap->hca_mrhdl != NULL) { 143 ret = ibt_deregister_mr(hcap->hca_hdl, 144 hcap->hca_mrhdl); 145 if (ret == IBT_SUCCESS) { 146 hcap->hca_mrhdl = NULL; 147 hcap->hca_lkey = 0; 148 hcap->hca_rkey = 0; 149 } else { 150 RDS_DPRINTF2(LABEL, "ibt_deregister_mr " 151 "failed: %d, mrhdl: 0x%p", ret, 152 hcap->hca_mrhdl); 153 } 154 } 155 hcap = hcap->hca_nextp; 156 } 157 158 kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers + 159 rds_cpool.pool_nbuffers) * sizeof (rds_buf_t)); 160 rds_dpool.pool_bufmemp = NULL; 161 162 kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize); 163 rds_dpool.pool_memp = NULL; 164 165 mutex_exit(&rds_dpool.pool_lock); 166 167 RDS_DPRINTF4("rds_free_recv_caches", "Return"); 168 } 169 170 int 171 rds_init_recv_caches(rds_state_t *statep) 172 { 173 uint8_t *mp; 174 rds_buf_t *bp; 175 rds_hca_t *hcap; 176 uint32_t nsessions; 177 uint_t ix; 178 uint_t ndatarx, nctrlrx; 179 uint8_t *memp; 180 uint_t memsize, nbuf; 181 rds_buf_t *bufmemp; 182 ibt_mr_attr_t mem_attr; 183 ibt_mr_desc_t mem_desc; 184 int ret; 185 186 RDS_DPRINTF4("rds_init_recv_caches", "Enter"); 187 188 DUMP_USER_PARAMS(); 189 190 mutex_enter(&rds_dpool.pool_lock); 191 if (rds_dpool.pool_memp != NULL) { 192 RDS_DPRINTF2("rds_init_recv_caches", "Pools are already " 193 "initialized"); 194 mutex_exit(&rds_dpool.pool_lock); 195 return (0); 196 } 197 198 /* Max number of receive buffers on the system */ 199 ndatarx = (MaxRecvMemory * 1024)/UserBufferSize; 200 201 /* 202 * High water mark for the receive buffers in the system. If the 203 * number of buffers used crosses this mark then all sockets in 204 * would be stalled. The port quota for the sockets is set based 205 * on this limit. 206 */ 207 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * ndatarx)/100; 208 209 /* nsessions can never be less than 1 */ 210 nsessions = ndatarx/MaxDataRecvBuffers; 211 nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers; 212 213 RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions); 214 215 /* Add the hdr */ 216 RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ; 217 218 memsize = (ndatarx * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE); 219 nbuf = ndatarx + nctrlrx; 220 RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize); 221 RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf); 222 223 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 224 if (memp == NULL) { 225 RDS_DPRINTF1(LABEL, "RDS Memory allocation failed"); 226 mutex_exit(&rds_dpool.pool_lock); 227 return (-1); 228 } 229 230 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 231 nbuf * sizeof (rds_buf_t)); 232 233 /* allocate memory for buffer entries */ 234 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 235 KM_SLEEP); 236 237 /* register the memory with all HCAs */ 238 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 239 mem_attr.mr_len = memsize; 240 mem_attr.mr_as = NULL; 241 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 242 243 hcap = statep->rds_hcalistp; 244 while (hcap != NULL) { 245 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 246 &mem_attr, &hcap->hca_mrhdl, &mem_desc); 247 if (ret != IBT_SUCCESS) { 248 RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret); 249 return (-1); 250 } 251 252 hcap->hca_lkey = mem_desc.md_lkey; 253 hcap->hca_rkey = mem_desc.md_rkey; 254 255 hcap = hcap->hca_nextp; 256 } 257 258 /* Initialize data pool */ 259 rds_dpool.pool_memp = memp; 260 rds_dpool.pool_memsize = memsize; 261 rds_dpool.pool_bufmemp = bufmemp; 262 rds_dpool.pool_nbuffers = ndatarx; 263 rds_dpool.pool_nbusy = 0; 264 rds_dpool.pool_nfree = ndatarx; 265 266 /* chain the buffers */ 267 mp = memp; 268 bp = bufmemp; 269 for (ix = 0; ix < ndatarx; ix++) { 270 bp[ix].buf_nextp = &bp[ix + 1]; 271 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 272 bp[ix].buf_state = RDS_RCVBUF_FREE; 273 bp[ix].buf_frtn.free_func = rds_free_mblk; 274 bp[ix].buf_frtn.free_arg = (char *)&bp[ix]; 275 mp = mp + RdsPktSize; 276 } 277 bp[ndatarx - 1].buf_nextp = NULL; 278 rds_dpool.pool_headp = &bp[0]; 279 rds_dpool.pool_tailp = &bp[ndatarx - 1]; 280 281 /* Initialize ctrl pool */ 282 rds_cpool.pool_nbuffers = nctrlrx; 283 rds_cpool.pool_nbusy = 0; 284 rds_cpool.pool_nfree = nctrlrx; 285 286 /* chain the buffers */ 287 for (ix = ndatarx; ix < nbuf - 1; ix++) { 288 bp[ix].buf_nextp = &bp[ix + 1]; 289 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 290 mp = mp + RDS_CTRLPKT_SIZE; 291 } 292 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 293 bp[nbuf - 1].buf_nextp = NULL; 294 rds_cpool.pool_headp = &bp[ndatarx]; 295 rds_cpool.pool_tailp = &bp[nbuf - 1]; 296 297 mutex_exit(&rds_dpool.pool_lock); 298 299 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 300 RDS_DPRINTF4("rds_init_recv_caches", "Return"); 301 return (0); 302 } 303 304 void 305 rds_free_send_pool(rds_ep_t *ep) 306 { 307 rds_bufpool_t *pool; 308 rds_hca_t *hcap; 309 int ret; 310 311 pool = &ep->ep_sndpool; 312 313 mutex_enter(&pool->pool_lock); 314 if (pool->pool_memp == NULL) { 315 mutex_exit(&pool->pool_lock); 316 RDS_DPRINTF2("rds_free_send_pool", 317 "EP(%p) DOUBLE Free on Send Pool", ep); 318 return; 319 } 320 321 /* get the hcap for the HCA hosting this channel */ 322 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 323 if (hcap == NULL) { 324 RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found", 325 ep->ep_hca_guid); 326 } else { 327 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl); 328 if (ret != IBT_SUCCESS) { 329 RDS_DPRINTF2(LABEL, 330 "ibt_deregister_mr failed: %d, mrhdl: 0x%p", 331 ret, ep->ep_snd_mrhdl); 332 } 333 334 if (ep->ep_ack_addr) { 335 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 336 if (ret != IBT_SUCCESS) { 337 RDS_DPRINTF2(LABEL, 338 "ibt_deregister_mr ackhdl failed: %d, " 339 "mrhdl: 0x%p", ret, ep->ep_ackhdl); 340 } 341 342 kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t)); 343 ep->ep_ack_addr = NULL; 344 } 345 } 346 347 kmem_free(pool->pool_memp, pool->pool_memsize); 348 kmem_free(pool->pool_bufmemp, 349 pool->pool_nbuffers * sizeof (rds_buf_t)); 350 pool->pool_memp = NULL; 351 pool->pool_bufmemp = NULL; 352 mutex_exit(&pool->pool_lock); 353 } 354 355 int 356 rds_init_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) 357 { 358 uint8_t *mp; 359 rds_buf_t *bp; 360 rds_hca_t *hcap; 361 uint_t ix, rcv_len; 362 ibt_mr_attr_t mem_attr; 363 ibt_mr_desc_t mem_desc; 364 uint8_t *memp; 365 rds_buf_t *bufmemp; 366 uintptr_t ack_addr = NULL; 367 uint_t memsize; 368 uint_t nbuf; 369 rds_bufpool_t *spool; 370 rds_data_hdr_t *pktp; 371 int ret; 372 373 RDS_DPRINTF2("rds_init_send_pool", "Enter"); 374 375 spool = &ep->ep_sndpool; 376 377 ASSERT(spool->pool_memp == NULL); 378 ASSERT(ep->ep_hca_guid == 0); 379 380 /* get the hcap for the HCA hosting this channel */ 381 hcap = rds_get_hcap(rdsib_statep, hca_guid); 382 if (hcap == NULL) { 383 RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found", 384 hca_guid); 385 return (-1); 386 } 387 388 if (ep->ep_type == RDS_EP_TYPE_DATA) { 389 spool->pool_nbuffers = MaxDataSendBuffers; 390 spool->pool_nbusy = 0; 391 spool->pool_nfree = MaxDataSendBuffers; 392 memsize = (MaxDataSendBuffers * RdsPktSize) + 393 sizeof (uintptr_t); 394 rcv_len = RdsPktSize; 395 } else { 396 spool->pool_nbuffers = MaxCtrlSendBuffers; 397 spool->pool_nbusy = 0; 398 spool->pool_nfree = MaxCtrlSendBuffers; 399 memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE; 400 rcv_len = RDS_CTRLPKT_SIZE; 401 } 402 nbuf = spool->pool_nbuffers; 403 404 RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize); 405 406 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 407 if (memp == NULL) { 408 RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed"); 409 return (-1); 410 } 411 412 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 413 nbuf * sizeof (rds_buf_t)); 414 415 /* allocate memory for buffer entries */ 416 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 417 KM_SLEEP); 418 419 if (ep->ep_type == RDS_EP_TYPE_DATA) { 420 ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP); 421 422 /* register the memory with the HCA for this channel */ 423 mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr; 424 mem_attr.mr_len = sizeof (uintptr_t); 425 mem_attr.mr_as = NULL; 426 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 427 IBT_MR_ENABLE_REMOTE_WRITE; 428 429 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 430 &mem_attr, &ep->ep_ackhdl, &mem_desc); 431 if (ret != IBT_SUCCESS) { 432 RDS_DPRINTF2("rds_init_send_pool", 433 "EP(%p): ibt_register_mr for ack failed: %d", 434 ep, ret); 435 kmem_free(memp, memsize); 436 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 437 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 438 return (-1); 439 } 440 ep->ep_ack_rkey = mem_desc.md_rkey; 441 ep->ep_ack_addr = ack_addr; 442 } 443 444 /* register the memory with the HCA for this channel */ 445 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 446 mem_attr.mr_len = memsize; 447 mem_attr.mr_as = NULL; 448 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 449 450 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 451 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 452 if (ret != IBT_SUCCESS) { 453 RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr " 454 "failed: %d", ep, ret); 455 kmem_free(memp, memsize); 456 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 457 if (ack_addr != NULL) 458 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 459 return (-1); 460 } 461 ep->ep_snd_lkey = mem_desc.md_lkey; 462 463 464 /* Initialize the pool */ 465 spool->pool_memp = memp; 466 spool->pool_memsize = memsize; 467 spool->pool_bufmemp = bufmemp; 468 spool->pool_sqpoll_pending = B_FALSE; 469 470 /* chain the buffers and initialize them */ 471 mp = memp; 472 bp = bufmemp; 473 474 if (ep->ep_type == RDS_EP_TYPE_DATA) { 475 for (ix = 0; ix < nbuf - 1; ix++) { 476 bp[ix].buf_nextp = &bp[ix + 1]; 477 bp[ix].buf_ep = ep; 478 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 479 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 480 bp[ix].buf_state = RDS_SNDBUF_FREE; 481 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 482 pktp->dh_bufid = (uintptr_t)&bp[ix]; 483 mp = mp + rcv_len; 484 } 485 bp[nbuf - 1].buf_nextp = NULL; 486 bp[nbuf - 1].buf_ep = ep; 487 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 488 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 489 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 490 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 491 pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1]; 492 493 spool->pool_headp = &bp[0]; 494 spool->pool_tailp = &bp[nbuf - 1]; 495 496 mp = mp + rcv_len; 497 ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 498 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 499 ep->ep_ackds.ds_len = sizeof (uintptr_t); 500 501 *(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp; 502 } else { 503 /* control send pool */ 504 for (ix = 0; ix < nbuf - 1; ix++) { 505 bp[ix].buf_nextp = &bp[ix + 1]; 506 bp[ix].buf_ep = ep; 507 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 508 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 509 bp[ix].buf_state = RDS_SNDBUF_FREE; 510 mp = mp + rcv_len; 511 } 512 bp[nbuf - 1].buf_nextp = NULL; 513 bp[nbuf - 1].buf_ep = ep; 514 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 515 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 516 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 517 spool->pool_headp = &bp[0]; 518 spool->pool_tailp = &bp[nbuf - 1]; 519 } 520 521 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 522 RDS_DPRINTF2("rds_init_send_pool", "Return"); 523 524 return (0); 525 } 526 527 int 528 rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) 529 { 530 rds_buf_t *bp; 531 rds_hca_t *hcap; 532 ibt_mr_attr_t mem_attr; 533 ibt_mr_desc_t mem_desc; 534 rds_bufpool_t *spool; 535 int ret; 536 537 RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep); 538 539 spool = &ep->ep_sndpool; 540 ASSERT(spool->pool_memp != NULL); 541 542 /* deregister the send pool memory from the previous HCA */ 543 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 544 if (hcap == NULL) { 545 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 546 ep->ep_hca_guid); 547 } else { 548 if (ep->ep_snd_mrhdl != NULL) { 549 (void) ibt_deregister_mr(hcap->hca_hdl, 550 ep->ep_snd_mrhdl); 551 ep->ep_snd_mrhdl = NULL; 552 ep->ep_snd_lkey = 0; 553 } 554 555 if ((ep->ep_type == RDS_EP_TYPE_DATA) && 556 (ep->ep_ackhdl != NULL)) { 557 (void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 558 ep->ep_ackhdl = NULL; 559 ep->ep_ack_rkey = 0; 560 } 561 562 ep->ep_hca_guid = NULL; 563 } 564 565 /* get the hcap for the new HCA */ 566 hcap = rds_get_hcap(rdsib_statep, hca_guid); 567 if (hcap == NULL) { 568 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 569 hca_guid); 570 return (-1); 571 } 572 573 /* register the send memory */ 574 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp; 575 mem_attr.mr_len = spool->pool_memsize; 576 mem_attr.mr_as = NULL; 577 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 578 579 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 580 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 581 if (ret != IBT_SUCCESS) { 582 RDS_DPRINTF2("rds_reinit_send_pool", 583 "EP(%p): ibt_register_mr failed: %d", ep, ret); 584 return (-1); 585 } 586 ep->ep_snd_lkey = mem_desc.md_lkey; 587 588 /* register the acknowledgement space */ 589 if (ep->ep_type == RDS_EP_TYPE_DATA) { 590 mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr; 591 mem_attr.mr_len = sizeof (uintptr_t); 592 mem_attr.mr_as = NULL; 593 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 594 IBT_MR_ENABLE_REMOTE_WRITE; 595 596 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 597 &mem_attr, &ep->ep_ackhdl, &mem_desc); 598 if (ret != IBT_SUCCESS) { 599 RDS_DPRINTF2("rds_reinit_send_pool", 600 "EP(%p): ibt_register_mr for ack failed: %d", 601 ep, ret); 602 (void) ibt_deregister_mr(hcap->hca_hdl, 603 ep->ep_snd_mrhdl); 604 ep->ep_snd_mrhdl = NULL; 605 ep->ep_snd_lkey = 0; 606 return (-1); 607 } 608 ep->ep_ack_rkey = mem_desc.md_rkey; 609 610 /* update the LKEY in the acknowledgement WR */ 611 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 612 } 613 614 /* update the LKEY in each buffer */ 615 bp = spool->pool_headp; 616 while (bp) { 617 bp->buf_ds.ds_key = ep->ep_snd_lkey; 618 bp = bp->buf_nextp; 619 } 620 621 ep->ep_hca_guid = hca_guid; 622 623 RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep); 624 625 return (0); 626 } 627 628 void 629 rds_free_recv_pool(rds_ep_t *ep) 630 { 631 rds_bufpool_t *pool; 632 633 if (ep->ep_type == RDS_EP_TYPE_DATA) { 634 pool = &rds_dpool; 635 } else { 636 pool = &rds_cpool; 637 } 638 639 mutex_enter(&ep->ep_rcvpool.pool_lock); 640 if (ep->ep_rcvpool.pool_nfree != 0) { 641 rds_free_buf(pool, ep->ep_rcvpool.pool_headp, 642 ep->ep_rcvpool.pool_nfree); 643 ep->ep_rcvpool.pool_nfree = 0; 644 ep->ep_rcvpool.pool_headp = NULL; 645 ep->ep_rcvpool.pool_tailp = NULL; 646 } 647 mutex_exit(&ep->ep_rcvpool.pool_lock); 648 } 649 650 int 651 rds_init_recv_pool(rds_ep_t *ep) 652 { 653 rds_bufpool_t *rpool; 654 rds_qp_t *recvqp; 655 656 recvqp = &ep->ep_recvqp; 657 rpool = &ep->ep_rcvpool; 658 if (ep->ep_type == RDS_EP_TYPE_DATA) { 659 recvqp->qp_depth = MaxDataRecvBuffers; 660 recvqp->qp_level = 0; 661 recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100; 662 recvqp->qp_taskqpending = B_FALSE; 663 664 rpool->pool_nbuffers = MaxDataRecvBuffers; 665 rpool->pool_nbusy = 0; 666 rpool->pool_nfree = 0; 667 } else { 668 recvqp->qp_depth = MaxCtrlRecvBuffers; 669 recvqp->qp_level = 0; 670 recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100; 671 recvqp->qp_taskqpending = B_FALSE; 672 673 rpool->pool_nbuffers = MaxCtrlRecvBuffers; 674 rpool->pool_nbusy = 0; 675 rpool->pool_nfree = 0; 676 } 677 678 return (0); 679 } 680 681 /* Free buffers to the global pool, either cpool or dpool */ 682 void 683 rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf) 684 { 685 uint_t ix; 686 687 RDS_DPRINTF4("rds_free_buf", "Enter"); 688 689 ASSERT(nbuf != 0); 690 691 mutex_enter(&pool->pool_lock); 692 693 if (pool->pool_nfree != 0) { 694 pool->pool_tailp->buf_nextp = bp; 695 } else { 696 pool->pool_headp = bp; 697 } 698 699 if (nbuf == 1) { 700 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 701 bp->buf_ep = NULL; 702 bp->buf_nextp = NULL; 703 pool->pool_tailp = bp; 704 } else { 705 for (ix = 1; ix < nbuf; ix++) { 706 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 707 bp->buf_ep = NULL; 708 bp = bp->buf_nextp; 709 } 710 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 711 bp->buf_ep = NULL; 712 bp->buf_nextp = NULL; 713 pool->pool_tailp = bp; 714 } 715 /* tail is always the last buffer */ 716 pool->pool_tailp->buf_nextp = NULL; 717 718 pool->pool_nfree += nbuf; 719 pool->pool_nbusy -= nbuf; 720 721 mutex_exit(&pool->pool_lock); 722 723 RDS_DPRINTF4("rds_free_buf", "Return"); 724 } 725 726 /* Get buffers from the global pools, either cpool or dpool */ 727 rds_buf_t * 728 rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret) 729 { 730 rds_buf_t *bp = NULL, *bp1; 731 uint_t ix; 732 733 RDS_DPRINTF4("rds_get_buf", "Enter"); 734 735 mutex_enter(&pool->pool_lock); 736 737 RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d", 738 pool->pool_nfree, nbuf); 739 740 if (nbuf < pool->pool_nfree) { 741 *nret = nbuf; 742 743 bp1 = pool->pool_headp; 744 for (ix = 1; ix < nbuf; ix++) { 745 bp1 = bp1->buf_nextp; 746 } 747 748 bp = pool->pool_headp; 749 pool->pool_headp = bp1->buf_nextp; 750 bp1->buf_nextp = NULL; 751 752 pool->pool_nfree -= nbuf; 753 pool->pool_nbusy += nbuf; 754 } else if (nbuf >= pool->pool_nfree) { 755 *nret = pool->pool_nfree; 756 757 bp = pool->pool_headp; 758 759 pool->pool_headp = NULL; 760 pool->pool_tailp = NULL; 761 762 pool->pool_nbusy += pool->pool_nfree; 763 pool->pool_nfree = 0; 764 } 765 766 mutex_exit(&pool->pool_lock); 767 768 RDS_DPRINTF4("rds_get_buf", "Return"); 769 770 return (bp); 771 } 772 773 boolean_t 774 rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait) 775 { 776 rds_qp_t *recvqp; 777 rds_bufpool_t *rpool; 778 boolean_t ret = B_TRUE; 779 780 recvqp = &ep->ep_recvqp; 781 mutex_enter(&recvqp->qp_lock); 782 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs", 783 ep, recvqp->qp_level); 784 if (wait) { 785 /* wait until the RQ is empty */ 786 while (recvqp->qp_level != 0) { 787 /* wait one second and try again */ 788 mutex_exit(&recvqp->qp_lock); 789 delay(drv_usectohz(1000000)); 790 mutex_enter(&recvqp->qp_lock); 791 } 792 } else if (recvqp->qp_level != 0) { 793 ret = B_FALSE; 794 } 795 mutex_exit(&recvqp->qp_lock); 796 797 rpool = &ep->ep_rcvpool; 798 mutex_enter(&rpool->pool_lock); 799 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): " 800 "There are %d pending buffers on sockqs", ep, rpool->pool_nbusy); 801 if (wait) { 802 /* Wait for all buffers to be freed by sockfs */ 803 while (rpool->pool_nbusy != 0) { 804 /* wait one second and try again */ 805 mutex_exit(&rpool->pool_lock); 806 delay(drv_usectohz(1000000)); 807 mutex_enter(&rpool->pool_lock); 808 } 809 } else if (rpool->pool_nbusy != 0) { 810 ret = B_FALSE; 811 } 812 mutex_exit(&rpool->pool_lock); 813 814 return (ret); 815 } 816 817 boolean_t 818 rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) 819 { 820 rds_bufpool_t *spool; 821 rds_buf_t *bp; 822 boolean_t ret1 = B_TRUE; 823 824 /* check if all the sends completed */ 825 spool = &ep->ep_sndpool; 826 mutex_enter(&spool->pool_lock); 827 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 828 "Send Pool contains: %d", ep, spool->pool_nbusy); 829 if (wait) { 830 while (spool->pool_nbusy != 0) { 831 if (rds_no_interrupts) { 832 /* wait one second and try again */ 833 delay(drv_usectohz(1000000)); 834 rds_poll_send_completions(ep->ep_sendcq, ep, 835 B_TRUE); 836 } else { 837 /* wait one second and try again */ 838 mutex_exit(&spool->pool_lock); 839 delay(drv_usectohz(1000000)); 840 mutex_enter(&spool->pool_lock); 841 } 842 } 843 844 if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) { 845 rds_buf_t *ackbp; 846 847 /* 848 * If the last one is acknowledged then everything 849 * is acknowledged 850 */ 851 bp = spool->pool_tailp; 852 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 853 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 854 "Checking for acknowledgements", ep); 855 while (bp != ackbp) { 856 RDS_DPRINTF2("rds_is_sendq_empty", 857 "EP(%p) BP(0x%p/0x%p) last " 858 "sent/acknowledged", ep, bp, ackbp); 859 mutex_exit(&spool->pool_lock); 860 delay(drv_usectohz(1000000)); 861 mutex_enter(&spool->pool_lock); 862 863 bp = spool->pool_tailp; 864 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 865 } 866 } 867 } else if (spool->pool_nbusy != 0) { 868 ret1 = B_FALSE; 869 } 870 mutex_exit(&spool->pool_lock); 871 872 /* check if all the rdma acks completed */ 873 mutex_enter(&ep->ep_lock); 874 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 875 "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt); 876 if (wait) { 877 while (ep->ep_rdmacnt != 0) { 878 if (rds_no_interrupts) { 879 /* wait one second and try again */ 880 delay(drv_usectohz(1000000)); 881 rds_poll_send_completions(ep->ep_sendcq, ep, 882 B_FALSE); 883 } else { 884 /* wait one second and try again */ 885 mutex_exit(&ep->ep_lock); 886 delay(drv_usectohz(1000000)); 887 mutex_enter(&ep->ep_lock); 888 } 889 } 890 } else if (ep->ep_rdmacnt != 0) { 891 ret1 = B_FALSE; 892 } 893 mutex_exit(&ep->ep_lock); 894 895 return (ret1); 896 } 897 898 /* Get buffers from the send pool */ 899 rds_buf_t * 900 rds_get_send_buf(rds_ep_t *ep, uint_t nbuf) 901 { 902 rds_buf_t *bp = NULL, *bp1; 903 rds_bufpool_t *spool; 904 uint_t waittime = rds_waittime_ms * 1000; 905 uint_t ix; 906 int ret; 907 908 RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d", 909 ep, nbuf); 910 911 spool = &ep->ep_sndpool; 912 mutex_enter(&spool->pool_lock); 913 914 if (rds_no_interrupts) { 915 if ((spool->pool_sqpoll_pending == B_FALSE) && 916 (spool->pool_nbusy > 917 (spool->pool_nbuffers * rds_poll_percent_full)/100)) { 918 spool->pool_sqpoll_pending = B_TRUE; 919 mutex_exit(&spool->pool_lock); 920 rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE); 921 mutex_enter(&spool->pool_lock); 922 spool->pool_sqpoll_pending = B_FALSE; 923 } 924 } 925 926 if (spool->pool_nfree < nbuf) { 927 /* wait for buffers to become available */ 928 spool->pool_cv_count += nbuf; 929 ret = cv_timedwait_sig(&spool->pool_cv, &spool->pool_lock, 930 ddi_get_lbolt() + drv_usectohz(waittime)); 931 /* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */ 932 if (ret == 0) { 933 /* signal pending */ 934 spool->pool_cv_count -= nbuf; 935 mutex_exit(&spool->pool_lock); 936 return (NULL); 937 } 938 939 spool->pool_cv_count -= nbuf; 940 } 941 942 /* Have the number of buffers needed */ 943 if (spool->pool_nfree > nbuf) { 944 bp = spool->pool_headp; 945 946 if (ep->ep_type == RDS_EP_TYPE_DATA) { 947 rds_buf_t *ackbp; 948 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 949 950 /* check if all the needed buffers are acknowledged */ 951 bp1 = bp; 952 for (ix = 0; ix < nbuf; ix++) { 953 if ((bp1 == ackbp) || 954 (bp1->buf_state != RDS_SNDBUF_FREE)) { 955 /* 956 * The buffer is not yet signalled or 957 * is not yet acknowledged 958 */ 959 RDS_DPRINTF5("rds_get_send_buf", 960 "EP(%p) Buffer (%p) not yet " 961 "acked/completed", ep, bp1); 962 mutex_exit(&spool->pool_lock); 963 return (NULL); 964 } 965 966 bp1 = bp1->buf_nextp; 967 } 968 } 969 970 /* mark the buffers as pending */ 971 bp1 = bp; 972 for (ix = 1; ix < nbuf; ix++) { 973 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 974 bp1->buf_state = RDS_SNDBUF_PENDING; 975 bp1 = bp1->buf_nextp; 976 } 977 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 978 bp1->buf_state = RDS_SNDBUF_PENDING; 979 980 spool->pool_headp = bp1->buf_nextp; 981 bp1->buf_nextp = NULL; 982 if (spool->pool_headp == NULL) 983 spool->pool_tailp = NULL; 984 spool->pool_nfree -= nbuf; 985 spool->pool_nbusy += nbuf; 986 } 987 mutex_exit(&spool->pool_lock); 988 989 RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d", 990 ep, nbuf); 991 992 return (bp); 993 } 994 995 #define RDS_MIN_BUF_TO_WAKE_THREADS 10 996 997 void 998 rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf, 999 boolean_t lock) 1000 { 1001 rds_bufpool_t *spool; 1002 rds_buf_t *tmp; 1003 1004 RDS_DPRINTF4("rds_free_send_buf", "Enter"); 1005 1006 ASSERT(nbuf != 0); 1007 1008 if (tailp == NULL) { 1009 if (nbuf > 1) { 1010 tmp = headp; 1011 while (tmp->buf_nextp) { 1012 tmp = tmp->buf_nextp; 1013 } 1014 tailp = tmp; 1015 } else { 1016 tailp = headp; 1017 } 1018 } 1019 1020 spool = &ep->ep_sndpool; 1021 1022 if (lock == B_FALSE) { 1023 /* lock is not held outside */ 1024 mutex_enter(&spool->pool_lock); 1025 } 1026 1027 if (spool->pool_nfree) { 1028 spool->pool_tailp->buf_nextp = headp; 1029 } else { 1030 spool->pool_headp = headp; 1031 } 1032 spool->pool_tailp = tailp; 1033 1034 spool->pool_nfree += nbuf; 1035 spool->pool_nbusy -= nbuf; 1036 1037 if ((spool->pool_cv_count > 0) && 1038 (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) { 1039 if (spool->pool_nfree >= spool->pool_cv_count) 1040 cv_broadcast(&spool->pool_cv); 1041 else 1042 cv_signal(&spool->pool_cv); 1043 } 1044 1045 if (lock == B_FALSE) { 1046 mutex_exit(&spool->pool_lock); 1047 } 1048 1049 RDS_DPRINTF4("rds_free_send_buf", "Return"); 1050 } 1051 1052 #define RDS_NBUFFERS_TO_PUTBACK 100 1053 void 1054 rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf) 1055 { 1056 rds_ep_t *ep; 1057 rds_bufpool_t *rpool; 1058 rds_buf_t *bp1; 1059 uint_t ix; 1060 1061 RDS_DPRINTF4("rds_free_recv_buf", "Enter"); 1062 1063 ASSERT(nbuf != 0); 1064 1065 ep = bp->buf_ep; 1066 rpool = &ep->ep_rcvpool; 1067 1068 mutex_enter(&rpool->pool_lock); 1069 1070 /* Add the buffers to the local pool */ 1071 if (rpool->pool_tailp == NULL) { 1072 ASSERT(rpool->pool_headp == NULL); 1073 ASSERT(rpool->pool_nfree == 0); 1074 rpool->pool_headp = bp; 1075 bp1 = bp; 1076 for (ix = 1; ix < nbuf; ix++) { 1077 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1078 rpool->pool_nbusy--; 1079 } 1080 bp1->buf_state = RDS_RCVBUF_FREE; 1081 bp1 = bp1->buf_nextp; 1082 } 1083 bp1->buf_nextp = NULL; 1084 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1085 rpool->pool_nbusy--; 1086 } 1087 bp->buf_state = RDS_RCVBUF_FREE; 1088 rpool->pool_tailp = bp1; 1089 rpool->pool_nfree += nbuf; 1090 } else { 1091 bp1 = bp; 1092 for (ix = 1; ix < nbuf; ix++) { 1093 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1094 rpool->pool_nbusy--; 1095 } 1096 bp1->buf_state = RDS_RCVBUF_FREE; 1097 bp1 = bp1->buf_nextp; 1098 } 1099 bp1->buf_nextp = NULL; 1100 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1101 rpool->pool_nbusy--; 1102 } 1103 bp->buf_state = RDS_RCVBUF_FREE; 1104 rpool->pool_tailp->buf_nextp = bp; 1105 rpool->pool_tailp = bp1; 1106 rpool->pool_nfree += nbuf; 1107 } 1108 1109 if (rpool->pool_nfree >= RDS_NBUFFERS_TO_PUTBACK) { 1110 bp = rpool->pool_headp; 1111 nbuf = rpool->pool_nfree; 1112 rpool->pool_headp = NULL; 1113 rpool->pool_tailp = NULL; 1114 rpool->pool_nfree = 0; 1115 mutex_exit(&rpool->pool_lock); 1116 1117 /* Free the buffers to the global pool */ 1118 if (ep->ep_type == RDS_EP_TYPE_DATA) { 1119 rds_free_buf(&rds_dpool, bp, nbuf); 1120 } else { 1121 rds_free_buf(&rds_cpool, bp, nbuf); 1122 } 1123 1124 return; 1125 } 1126 mutex_exit(&rpool->pool_lock); 1127 1128 RDS_DPRINTF4("rds_free_recv_buf", "Return"); 1129 } 1130