1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/types.h> 76 #include <sys/ddi.h> 77 #include <sys/sunddi.h> 78 #include <sys/ib/clients/rds/rdsib_cm.h> 79 #include <sys/ib/clients/rds/rdsib_ib.h> 80 #include <sys/ib/clients/rds/rdsib_buf.h> 81 #include <sys/ib/clients/rds/rdsib_ep.h> 82 #include <sys/ib/clients/rds/rds_kstat.h> 83 84 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 85 ibt_async_code_t code, ibt_async_event_t *event); 86 87 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 88 IBTI_V_CURR, 89 IBT_NETWORK, 90 rds_async_handler, 91 NULL, 92 "RDS" 93 }; 94 95 /* performance tunables */ 96 uint_t rds_no_interrupts = 0; 97 uint_t rds_poll_percent_full = 25; 98 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 99 uint_t rds_waittime_ms = 100; /* ms */ 100 101 extern dev_info_t *rdsib_dev_info; 102 extern void rds_close_sessions(); 103 104 static void 105 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 106 { 107 /* The SQ size should not be more than that supported by the HCA */ 108 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 109 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 110 RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater " 111 "than that supported by the HCA driver " 112 "(%d + %d > %d or %d), lowering it to a supported value.", 113 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 114 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 115 116 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 117 hattrp->hca_max_cq_sz) ? 118 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 119 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 120 } 121 122 /* The RQ size should not be more than that supported by the HCA */ 123 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 124 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 125 RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that " 126 "supported by the HCA driver (%d > %d or %d), lowering it " 127 "to a supported value.", MaxDataRecvBuffers, 128 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 129 130 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 131 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 132 hattrp->hca_max_chan_sz; 133 } 134 135 /* The SQ size should not be more than that supported by the HCA */ 136 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 137 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 138 RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that " 139 "supported by the HCA driver (%d > %d or %d), lowering it " 140 "to a supported value.", MaxCtrlSendBuffers, 141 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 142 143 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 144 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 145 hattrp->hca_max_chan_sz; 146 } 147 148 /* The RQ size should not be more than that supported by the HCA */ 149 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 150 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 151 RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that " 152 "supported by the HCA driver (%d > %d or %d), lowering it " 153 "to a supported value.", MaxCtrlRecvBuffers, 154 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 155 156 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 157 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 158 hattrp->hca_max_chan_sz; 159 } 160 161 /* The MaxRecvMemory should be less than that supported by the HCA */ 162 if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) { 163 RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that " 164 "supported by the HCA driver (%d > %d), lowering it to %d", 165 NDataRX * RdsPktSize, hattrp->hca_max_memr_len, 166 hattrp->hca_max_memr_len); 167 168 NDataRX = hattrp->hca_max_memr_len/RdsPktSize; 169 } 170 } 171 172 /* Return hcap, given the hca guid */ 173 rds_hca_t * 174 rds_lkup_hca(ib_guid_t hca_guid) 175 { 176 rds_hca_t *hcap; 177 178 RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p " 179 "guid: %llx", rdsib_statep, hca_guid); 180 181 rw_enter(&rdsib_statep->rds_hca_lock, RW_READER); 182 183 hcap = rdsib_statep->rds_hcalistp; 184 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 185 hcap = hcap->hca_nextp; 186 } 187 188 rw_exit(&rdsib_statep->rds_hca_lock); 189 190 RDS_DPRINTF4("rds_lkup_hca", "return"); 191 192 return (hcap); 193 } 194 195 void rds_randomize_qps(rds_hca_t *hcap); 196 197 static rds_hca_t * 198 rdsib_init_hca(ib_guid_t hca_guid) 199 { 200 rds_hca_t *hcap; 201 boolean_t alloc = B_FALSE; 202 int ret; 203 204 RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid); 205 206 /* Do a HCA lookup */ 207 hcap = rds_lkup_hca(hca_guid); 208 209 if (hcap != NULL && hcap->hca_hdl != NULL) { 210 /* 211 * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA 212 * that we have already opened. Just return NULL so that 213 * we'll not end up reinitializing the HCA again. 214 */ 215 RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized"); 216 return (NULL); 217 } 218 219 if (hcap == NULL) { 220 RDS_DPRINTF2("rdsib_init_hca", "New HCA is added"); 221 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 222 alloc = B_TRUE; 223 } 224 225 hcap->hca_guid = hca_guid; 226 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid, 227 &hcap->hca_hdl); 228 if (ret != IBT_SUCCESS) { 229 if (ret == IBT_HCA_IN_USE) { 230 RDS_DPRINTF2("rdsib_init_hca", 231 "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE", 232 hca_guid); 233 } else { 234 RDS_DPRINTF2("rdsib_init_hca", 235 "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret); 236 } 237 if (alloc == B_TRUE) { 238 kmem_free(hcap, sizeof (rds_hca_t)); 239 } 240 return (NULL); 241 } 242 243 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 244 if (ret != IBT_SUCCESS) { 245 RDS_DPRINTF2("rdsib_init_hca", 246 "Query HCA: 0x%llx failed: %d", hca_guid, ret); 247 ret = ibt_close_hca(hcap->hca_hdl); 248 ASSERT(ret == IBT_SUCCESS); 249 if (alloc == B_TRUE) { 250 kmem_free(hcap, sizeof (rds_hca_t)); 251 } else { 252 hcap->hca_hdl = NULL; 253 } 254 return (NULL); 255 } 256 257 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 258 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 259 if (ret != IBT_SUCCESS) { 260 RDS_DPRINTF2("rdsib_init_hca", 261 "Query HCA 0x%llx ports failed: %d", hca_guid, 262 ret); 263 ret = ibt_close_hca(hcap->hca_hdl); 264 hcap->hca_hdl = NULL; 265 ASSERT(ret == IBT_SUCCESS); 266 if (alloc == B_TRUE) { 267 kmem_free(hcap, sizeof (rds_hca_t)); 268 } else { 269 hcap->hca_hdl = NULL; 270 } 271 return (NULL); 272 } 273 274 /* Only one PD per HCA is allocated, so do it here */ 275 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 276 &hcap->hca_pdhdl); 277 if (ret != IBT_SUCCESS) { 278 RDS_DPRINTF2("rdsib_init_hca", 279 "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret); 280 (void) ibt_free_portinfo(hcap->hca_pinfop, 281 hcap->hca_pinfo_sz); 282 ret = ibt_close_hca(hcap->hca_hdl); 283 ASSERT(ret == IBT_SUCCESS); 284 hcap->hca_hdl = NULL; 285 if (alloc == B_TRUE) { 286 kmem_free(hcap, sizeof (rds_hca_t)); 287 } else { 288 hcap->hca_hdl = NULL; 289 } 290 return (NULL); 291 } 292 293 rdsib_validate_chan_sizes(&hcap->hca_attr); 294 295 /* To minimize stale connections after ungraceful reboots */ 296 rds_randomize_qps(hcap); 297 298 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 299 hcap->hca_state = RDS_HCA_STATE_OPEN; 300 if (alloc == B_TRUE) { 301 /* this is a new HCA, add it to the list */ 302 rdsib_statep->rds_nhcas++; 303 hcap->hca_nextp = rdsib_statep->rds_hcalistp; 304 rdsib_statep->rds_hcalistp = hcap; 305 } 306 rw_exit(&rdsib_statep->rds_hca_lock); 307 308 RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid); 309 310 return (hcap); 311 } 312 313 /* 314 * Called from attach 315 */ 316 int 317 rdsib_initialize_ib() 318 { 319 ib_guid_t *guidp; 320 rds_hca_t *hcap; 321 uint_t ix, hcaix, nhcas; 322 int ret; 323 324 RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep); 325 326 ASSERT(rdsib_statep != NULL); 327 if (rdsib_statep == NULL) { 328 RDS_DPRINTF1("rdsib_initialize_ib", 329 "RDS Statep not initialized"); 330 return (-1); 331 } 332 333 /* How many hcas are there? */ 334 nhcas = ibt_get_hca_list(&guidp); 335 if (nhcas == 0) { 336 RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available"); 337 return (-1); 338 } 339 340 RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas); 341 342 /* Register with IBTF */ 343 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 344 &rdsib_statep->rds_ibhdl); 345 if (ret != IBT_SUCCESS) { 346 RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d", 347 ret); 348 (void) ibt_free_hca_list(guidp, nhcas); 349 return (-1); 350 } 351 352 /* 353 * Open each HCA and gather its information. Don't care about HCAs 354 * that cannot be opened. It is OK as long as atleast one HCA can be 355 * opened. 356 * Initialize a HCA only if all the information is available. 357 */ 358 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 359 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 360 361 hcap = rdsib_init_hca(guidp[ix]); 362 if (hcap != NULL) hcaix++; 363 } 364 365 /* free the HCA list, we are done with it */ 366 (void) ibt_free_hca_list(guidp, nhcas); 367 368 if (hcaix == 0) { 369 /* Failed to Initialize even one HCA */ 370 RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized"); 371 (void) ibt_detach(rdsib_statep->rds_ibhdl); 372 rdsib_statep->rds_ibhdl = NULL; 373 return (-1); 374 } 375 376 if (hcaix < nhcas) { 377 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 378 (nhcas - hcaix), nhcas); 379 } 380 381 RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep); 382 383 return (0); 384 } 385 386 /* 387 * Called from detach 388 */ 389 void 390 rdsib_deinitialize_ib() 391 { 392 rds_hca_t *hcap, *nextp; 393 int ret; 394 395 RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep); 396 397 /* close and destroy all the sessions */ 398 rds_close_sessions(NULL); 399 400 /* Release all HCA resources */ 401 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 402 RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d", 403 rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas); 404 hcap = rdsib_statep->rds_hcalistp; 405 rdsib_statep->rds_hcalistp = NULL; 406 rdsib_statep->rds_nhcas = 0; 407 rw_exit(&rdsib_statep->rds_hca_lock); 408 409 while (hcap != NULL) { 410 nextp = hcap->hca_nextp; 411 412 if (hcap->hca_hdl != NULL) { 413 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 414 ASSERT(ret == IBT_SUCCESS); 415 416 (void) ibt_free_portinfo(hcap->hca_pinfop, 417 hcap->hca_pinfo_sz); 418 419 ret = ibt_close_hca(hcap->hca_hdl); 420 ASSERT(ret == IBT_SUCCESS); 421 } 422 423 kmem_free(hcap, sizeof (rds_hca_t)); 424 hcap = nextp; 425 } 426 427 /* Deregister with IBTF */ 428 if (rdsib_statep->rds_ibhdl != NULL) { 429 (void) ibt_detach(rdsib_statep->rds_ibhdl); 430 rdsib_statep->rds_ibhdl = NULL; 431 } 432 433 RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p", 434 rdsib_statep); 435 } 436 437 /* 438 * Called on open of first RDS socket 439 */ 440 int 441 rdsib_open_ib() 442 { 443 int ret; 444 445 RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep); 446 447 /* Enable incoming connection requests */ 448 if (rdsib_statep->rds_srvhdl == NULL) { 449 rdsib_statep->rds_srvhdl = 450 rds_register_service(rdsib_statep->rds_ibhdl); 451 if (rdsib_statep->rds_srvhdl == NULL) { 452 RDS_DPRINTF2("rdsib_open_ib", 453 "Service registration failed"); 454 return (-1); 455 } else { 456 /* bind the service on all available ports */ 457 ret = rds_bind_service(rdsib_statep); 458 if (ret != 0) { 459 RDS_DPRINTF2("rdsib_open_ib", 460 "Bind service failed: %d", ret); 461 } 462 } 463 } 464 465 RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep); 466 467 return (0); 468 } 469 470 /* 471 * Called when all ports are closed. 472 */ 473 void 474 rdsib_close_ib() 475 { 476 int ret; 477 478 RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep); 479 480 /* Disable incoming connection requests */ 481 if (rdsib_statep->rds_srvhdl != NULL) { 482 ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 483 if (ret != 0) { 484 RDS_DPRINTF2("rdsib_close_ib", 485 "ibt_unbind_all_services failed: %d\n", ret); 486 } 487 ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, 488 rdsib_statep->rds_srvhdl); 489 if (ret != 0) { 490 RDS_DPRINTF2("rdsib_close_ib", 491 "ibt_deregister_service failed: %d\n", ret); 492 } else { 493 rdsib_statep->rds_srvhdl = NULL; 494 } 495 } 496 497 RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep); 498 } 499 500 /* Return hcap, given the hca guid */ 501 rds_hca_t * 502 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 503 { 504 rds_hca_t *hcap; 505 506 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 507 "guid: %llx", statep, hca_guid); 508 509 rw_enter(&statep->rds_hca_lock, RW_READER); 510 511 hcap = statep->rds_hcalistp; 512 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 513 hcap = hcap->hca_nextp; 514 } 515 516 /* 517 * don't let anyone use this HCA until the RECV memory 518 * is registered with this HCA 519 */ 520 if ((hcap != NULL) && 521 (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) { 522 ASSERT(hcap->hca_mrhdl != NULL); 523 rw_exit(&statep->rds_hca_lock); 524 return (hcap); 525 } 526 527 RDS_DPRINTF2("rds_get_hcap", 528 "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid); 529 rw_exit(&statep->rds_hca_lock); 530 531 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 532 533 return (NULL); 534 } 535 536 /* Return hcap, given a gid */ 537 rds_hca_t * 538 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 539 { 540 rds_hca_t *hcap; 541 uint_t ix; 542 543 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 544 statep, gid.gid_prefix, gid.gid_guid); 545 546 rw_enter(&statep->rds_hca_lock, RW_READER); 547 548 hcap = statep->rds_hcalistp; 549 while (hcap != NULL) { 550 551 /* 552 * don't let anyone use this HCA until the RECV memory 553 * is registered with this HCA 554 */ 555 if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) { 556 RDS_DPRINTF3("rds_gid_to_hcap", 557 "HCA (0x%p, 0x%llx) is not initialized", 558 hcap, gid.gid_guid); 559 hcap = hcap->hca_nextp; 560 continue; 561 } 562 563 for (ix = 0; ix < hcap->hca_nports; ix++) { 564 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 565 gid.gid_prefix) && 566 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 567 gid.gid_guid)) { 568 RDS_DPRINTF4("rds_gid_to_hcap", 569 "gid found in hcap: 0x%p", hcap); 570 rw_exit(&statep->rds_hca_lock); 571 return (hcap); 572 } 573 } 574 hcap = hcap->hca_nextp; 575 } 576 577 rw_exit(&statep->rds_hca_lock); 578 579 return (NULL); 580 } 581 582 /* This is called from the send CQ handler */ 583 void 584 rds_send_acknowledgement(rds_ep_t *ep) 585 { 586 int ret; 587 uint_t ix; 588 589 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 590 591 mutex_enter(&ep->ep_lock); 592 593 ASSERT(ep->ep_rdmacnt != 0); 594 595 /* 596 * The previous ACK completed successfully, send the next one 597 * if more messages were received after sending the last ACK 598 */ 599 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 600 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 601 mutex_exit(&ep->ep_lock); 602 603 /* send acknowledgement */ 604 RDS_INCR_TXACKS(); 605 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 606 if (ret != IBT_SUCCESS) { 607 RDS_DPRINTF2("rds_send_acknowledgement", 608 "EP(%p): ibt_post_send for acknowledgement " 609 "failed: %d, SQ depth: %d", 610 ep, ret, ep->ep_sndpool.pool_nbusy); 611 mutex_enter(&ep->ep_lock); 612 ep->ep_rdmacnt--; 613 mutex_exit(&ep->ep_lock); 614 } 615 } else { 616 /* ACKed all messages, no more to ACK */ 617 ep->ep_rdmacnt--; 618 mutex_exit(&ep->ep_lock); 619 return; 620 } 621 622 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 623 } 624 625 static int 626 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 627 { 628 ibt_wc_t wc; 629 uint_t npolled; 630 rds_buf_t *bp; 631 rds_ctrl_pkt_t *cpkt; 632 rds_qp_t *recvqp; 633 int ret = IBT_SUCCESS; 634 635 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 636 637 bzero(&wc, sizeof (ibt_wc_t)); 638 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 639 if (ret != IBT_SUCCESS) { 640 if (ret != IBT_CQ_EMPTY) { 641 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 642 "returned: %d", ep, cq, ret); 643 } else { 644 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 645 "returned: IBT_CQ_EMPTY", ep, cq); 646 } 647 return (ret); 648 } 649 650 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 651 652 if (wc.wc_status != IBT_WC_SUCCESS) { 653 mutex_enter(&ep->ep_recvqp.qp_lock); 654 ep->ep_recvqp.qp_level--; 655 mutex_exit(&ep->ep_recvqp.qp_lock); 656 657 /* Free the buffer */ 658 bp->buf_state = RDS_RCVBUF_FREE; 659 rds_free_recv_buf(bp, 1); 660 661 /* Receive completion failure */ 662 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 663 RDS_DPRINTF2("rds_poll_ctrl_completions", 664 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 665 ep, cq, wc.wc_id, wc.wc_status); 666 } 667 return (ret); 668 } 669 670 /* there is one less in the RQ */ 671 recvqp = &ep->ep_recvqp; 672 mutex_enter(&recvqp->qp_lock); 673 recvqp->qp_level--; 674 if ((recvqp->qp_taskqpending == B_FALSE) && 675 (recvqp->qp_level <= recvqp->qp_lwm)) { 676 /* Time to post more buffers into the RQ */ 677 recvqp->qp_taskqpending = B_TRUE; 678 mutex_exit(&recvqp->qp_lock); 679 680 ret = ddi_taskq_dispatch(rds_taskq, 681 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 682 if (ret != DDI_SUCCESS) { 683 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 684 ret); 685 mutex_enter(&recvqp->qp_lock); 686 recvqp->qp_taskqpending = B_FALSE; 687 mutex_exit(&recvqp->qp_lock); 688 } 689 } else { 690 mutex_exit(&recvqp->qp_lock); 691 } 692 693 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 694 rds_handle_control_message(ep->ep_sp, cpkt); 695 696 bp->buf_state = RDS_RCVBUF_FREE; 697 rds_free_recv_buf(bp, 1); 698 699 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 700 701 return (ret); 702 } 703 704 #define RDS_POST_FEW_ATATIME 100 705 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 706 void 707 rds_post_recv_buf(void *arg) 708 { 709 ibt_channel_hdl_t chanhdl; 710 rds_ep_t *ep; 711 rds_session_t *sp; 712 rds_qp_t *recvqp; 713 rds_bufpool_t *gp; 714 rds_buf_t *bp, *bp1; 715 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 716 rds_hca_t *hcap; 717 uint_t npost, nspace, rcv_len; 718 uint_t ix, jx, kx; 719 int ret; 720 721 chanhdl = (ibt_channel_hdl_t)arg; 722 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 723 RDS_INCR_POST_RCV_BUF_CALLS(); 724 725 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 726 ASSERT(ep != NULL); 727 sp = ep->ep_sp; 728 recvqp = &ep->ep_recvqp; 729 730 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 731 732 /* get the hcap for the HCA hosting this channel */ 733 hcap = rds_lkup_hca(ep->ep_hca_guid); 734 if (hcap == NULL) { 735 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 736 ep->ep_hca_guid); 737 return; 738 } 739 740 /* Make sure the session is still connected */ 741 rw_enter(&sp->session_lock, RW_READER); 742 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 743 (sp->session_state != RDS_SESSION_STATE_CONNECTED) && 744 (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) { 745 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 746 "in active state (%d)", ep, sp->session_state); 747 rw_exit(&sp->session_lock); 748 return; 749 } 750 rw_exit(&sp->session_lock); 751 752 /* how many can be posted */ 753 mutex_enter(&recvqp->qp_lock); 754 nspace = recvqp->qp_depth - recvqp->qp_level; 755 if (nspace == 0) { 756 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 757 recvqp->qp_taskqpending = B_FALSE; 758 mutex_exit(&recvqp->qp_lock); 759 return; 760 } 761 mutex_exit(&recvqp->qp_lock); 762 763 if (ep->ep_type == RDS_EP_TYPE_DATA) { 764 gp = &rds_dpool; 765 rcv_len = RdsPktSize; 766 } else { 767 gp = &rds_cpool; 768 rcv_len = RDS_CTRLPKT_SIZE; 769 } 770 771 bp = rds_get_buf(gp, nspace, &jx); 772 if (bp == NULL) { 773 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 774 /* try again later */ 775 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 776 (void *)chanhdl, DDI_NOSLEEP); 777 if (ret != DDI_SUCCESS) { 778 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 779 ret); 780 mutex_enter(&recvqp->qp_lock); 781 recvqp->qp_taskqpending = B_FALSE; 782 mutex_exit(&recvqp->qp_lock); 783 } 784 return; 785 } 786 787 if (jx != nspace) { 788 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 789 "needed: %d available: %d", ep, nspace, jx); 790 nspace = jx; 791 } 792 793 bp1 = bp; 794 for (ix = 0; ix < nspace; ix++) { 795 bp1->buf_ep = ep; 796 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 797 bp1->buf_state = RDS_RCVBUF_POSTED; 798 bp1->buf_ds.ds_key = hcap->hca_lkey; 799 bp1->buf_ds.ds_len = rcv_len; 800 bp1 = bp1->buf_nextp; 801 } 802 803 #if 0 804 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 805 KM_SLEEP); 806 #else 807 wrp = &wr[0]; 808 #endif 809 810 npost = nspace; 811 while (npost) { 812 jx = (npost > RDS_POST_FEW_ATATIME) ? 813 RDS_POST_FEW_ATATIME : npost; 814 for (ix = 0; ix < jx; ix++) { 815 wrp[ix].wr_id = (uintptr_t)bp; 816 wrp[ix].wr_nds = 1; 817 wrp[ix].wr_sgl = &bp->buf_ds; 818 bp = bp->buf_nextp; 819 } 820 821 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 822 if ((ret != IBT_SUCCESS) || (kx != jx)) { 823 RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: " 824 "%d", npost, ret); 825 npost -= kx; 826 break; 827 } 828 829 npost -= jx; 830 } 831 832 mutex_enter(&recvqp->qp_lock); 833 if (npost != 0) { 834 RDS_DPRINTF2("rds_post_recv_buf", 835 "EP(%p) Failed to post %d WRs", ep, npost); 836 recvqp->qp_level += (nspace - npost); 837 } else { 838 recvqp->qp_level += nspace; 839 } 840 841 /* 842 * sometimes, the recv WRs can get consumed as soon as they are 843 * posted. In that case, taskq thread to post more WRs to the RQ will 844 * not be scheduled as the taskqpending flag is still set. 845 */ 846 if (recvqp->qp_level == 0) { 847 mutex_exit(&recvqp->qp_lock); 848 ret = ddi_taskq_dispatch(rds_taskq, 849 rds_post_recv_buf, (void *)chanhdl, DDI_NOSLEEP); 850 if (ret != DDI_SUCCESS) { 851 RDS_DPRINTF2("rds_post_recv_buf", 852 "ddi_taskq_dispatch failed: %d", ret); 853 mutex_enter(&recvqp->qp_lock); 854 recvqp->qp_taskqpending = B_FALSE; 855 mutex_exit(&recvqp->qp_lock); 856 } 857 } else { 858 recvqp->qp_taskqpending = B_FALSE; 859 mutex_exit(&recvqp->qp_lock); 860 } 861 862 #if 0 863 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 864 #endif 865 866 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 867 } 868 869 static int 870 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 871 { 872 ibt_wc_t wc; 873 rds_buf_t *bp; 874 rds_data_hdr_t *pktp; 875 rds_qp_t *recvqp; 876 uint_t npolled; 877 int ret = IBT_SUCCESS; 878 879 880 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 881 882 bzero(&wc, sizeof (ibt_wc_t)); 883 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 884 if (ret != IBT_SUCCESS) { 885 if (ret != IBT_CQ_EMPTY) { 886 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 887 "returned: %d", ep, cq, ret); 888 } else { 889 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 890 "returned: IBT_CQ_EMPTY", ep, cq); 891 } 892 return (ret); 893 } 894 895 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 896 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 897 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 898 bp->buf_nextp = NULL; 899 900 if (wc.wc_status != IBT_WC_SUCCESS) { 901 mutex_enter(&ep->ep_recvqp.qp_lock); 902 ep->ep_recvqp.qp_level--; 903 mutex_exit(&ep->ep_recvqp.qp_lock); 904 905 /* free the buffer */ 906 bp->buf_state = RDS_RCVBUF_FREE; 907 rds_free_recv_buf(bp, 1); 908 909 /* Receive completion failure */ 910 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 911 RDS_DPRINTF2("rds_poll_data_completions", 912 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 913 ep, cq, wc.wc_id, wc.wc_status); 914 RDS_INCR_RXERRS(); 915 } 916 return (ret); 917 } 918 919 /* there is one less in the RQ */ 920 recvqp = &ep->ep_recvqp; 921 mutex_enter(&recvqp->qp_lock); 922 recvqp->qp_level--; 923 if ((recvqp->qp_taskqpending == B_FALSE) && 924 (recvqp->qp_level <= recvqp->qp_lwm)) { 925 /* Time to post more buffers into the RQ */ 926 recvqp->qp_taskqpending = B_TRUE; 927 mutex_exit(&recvqp->qp_lock); 928 929 ret = ddi_taskq_dispatch(rds_taskq, 930 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 931 if (ret != DDI_SUCCESS) { 932 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 933 ret); 934 mutex_enter(&recvqp->qp_lock); 935 recvqp->qp_taskqpending = B_FALSE; 936 mutex_exit(&recvqp->qp_lock); 937 } 938 } else { 939 mutex_exit(&recvqp->qp_lock); 940 } 941 942 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 943 ASSERT(pktp->dh_datalen != 0); 944 945 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 946 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 947 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 948 pktp->dh_npkts, pktp->dh_psn); 949 950 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 951 pktp->dh_npkts, pktp->dh_psn); 952 953 if (pktp->dh_npkts == 1) { 954 /* single pkt or last packet */ 955 if (pktp->dh_psn != 0) { 956 /* last packet of a segmented message */ 957 ASSERT(ep->ep_seglbp != NULL); 958 ep->ep_seglbp->buf_nextp = bp; 959 ep->ep_seglbp = bp; 960 rds_received_msg(ep, ep->ep_segfbp); 961 ep->ep_segfbp = NULL; 962 ep->ep_seglbp = NULL; 963 } else { 964 /* single packet */ 965 rds_received_msg(ep, bp); 966 } 967 } else { 968 /* multi-pkt msg */ 969 if (pktp->dh_psn == 0) { 970 /* first packet */ 971 ASSERT(ep->ep_segfbp == NULL); 972 ep->ep_segfbp = bp; 973 ep->ep_seglbp = bp; 974 } else { 975 /* intermediate packet */ 976 ASSERT(ep->ep_segfbp != NULL); 977 ep->ep_seglbp->buf_nextp = bp; 978 ep->ep_seglbp = bp; 979 } 980 } 981 982 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 983 984 return (ret); 985 } 986 987 void 988 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 989 { 990 rds_ep_t *ep; 991 int ret = IBT_SUCCESS; 992 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 993 994 ep = (rds_ep_t *)arg; 995 996 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 997 998 if (ep->ep_type == RDS_EP_TYPE_DATA) { 999 func = rds_poll_data_completions; 1000 } else { 1001 func = rds_poll_ctrl_completions; 1002 } 1003 1004 do { 1005 ret = func(cq, ep); 1006 } while (ret != IBT_CQ_EMPTY); 1007 1008 /* enable the CQ */ 1009 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 1010 if (ret != IBT_SUCCESS) { 1011 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1012 "failed: %d", ep, cq, ret); 1013 return; 1014 } 1015 1016 do { 1017 ret = func(cq, ep); 1018 } while (ret != IBT_CQ_EMPTY); 1019 1020 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 1021 } 1022 1023 void 1024 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 1025 { 1026 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 1027 uint_t npolled, nret, send_error = 0; 1028 rds_buf_t *headp, *tailp, *bp; 1029 int ret, ix; 1030 1031 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 1032 1033 headp = NULL; 1034 tailp = NULL; 1035 npolled = 0; 1036 do { 1037 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 1038 if (ret != IBT_SUCCESS) { 1039 if (ret != IBT_CQ_EMPTY) { 1040 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 1041 "ibt_poll_cq returned: %d", ep, cq, ret); 1042 } else { 1043 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 1044 "ibt_poll_cq returned: IBT_CQ_EMPTY", 1045 ep, cq); 1046 } 1047 1048 break; 1049 } 1050 1051 for (ix = 0; ix < nret; ix++) { 1052 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 1053 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 1054 rds_send_acknowledgement(ep); 1055 continue; 1056 } 1057 1058 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1059 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1060 bp->buf_state = RDS_SNDBUF_FREE; 1061 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 1062 RDS_INCR_TXERRS(); 1063 RDS_DPRINTF5("rds_poll_send_completions", 1064 "EP(%p): WC ID: %p ERROR: %d", ep, 1065 wc[ix].wc_id, wc[ix].wc_status); 1066 1067 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1068 mutex_enter(&ep->ep_lock); 1069 ep->ep_rdmacnt--; 1070 mutex_exit(&ep->ep_lock); 1071 continue; 1072 } 1073 1074 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1075 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1076 bp->buf_state = RDS_SNDBUF_FREE; 1077 } else { 1078 RDS_INCR_TXERRS(); 1079 RDS_DPRINTF2("rds_poll_send_completions", 1080 "EP(%p): WC ID: %p ERROR: %d", ep, 1081 wc[ix].wc_id, wc[ix].wc_status); 1082 if (send_error == 0) { 1083 rds_session_t *sp = ep->ep_sp; 1084 1085 /* don't let anyone send anymore */ 1086 rw_enter(&sp->session_lock, RW_WRITER); 1087 if (sp->session_state != 1088 RDS_SESSION_STATE_ERROR) { 1089 sp->session_state = 1090 RDS_SESSION_STATE_ERROR; 1091 /* Make this the active end */ 1092 sp->session_type = 1093 RDS_SESSION_ACTIVE; 1094 } 1095 rw_exit(&sp->session_lock); 1096 } 1097 1098 send_error++; 1099 1100 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1101 mutex_enter(&ep->ep_lock); 1102 ep->ep_rdmacnt--; 1103 mutex_exit(&ep->ep_lock); 1104 continue; 1105 } 1106 1107 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1108 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1109 bp->buf_state = RDS_SNDBUF_FREE; 1110 } 1111 1112 bp->buf_nextp = NULL; 1113 if (headp) { 1114 tailp->buf_nextp = bp; 1115 tailp = bp; 1116 } else { 1117 headp = bp; 1118 tailp = bp; 1119 } 1120 1121 npolled++; 1122 } 1123 1124 if (rds_no_interrupts && (npolled > 100)) { 1125 break; 1126 } 1127 1128 if (rds_no_interrupts == 1) { 1129 break; 1130 } 1131 } while (ret != IBT_CQ_EMPTY); 1132 1133 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 1134 npolled, send_error); 1135 1136 /* put the buffers to the pool */ 1137 if (npolled != 0) { 1138 rds_free_send_buf(ep, headp, tailp, npolled, lock); 1139 } 1140 1141 if (send_error != 0) { 1142 rds_handle_send_error(ep); 1143 } 1144 1145 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 1146 } 1147 1148 void 1149 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 1150 { 1151 rds_ep_t *ep; 1152 int ret; 1153 1154 ep = (rds_ep_t *)arg; 1155 1156 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 1157 1158 /* enable the CQ */ 1159 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1160 if (ret != IBT_SUCCESS) { 1161 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1162 "failed: %d", ep, cq, ret); 1163 return; 1164 } 1165 1166 rds_poll_send_completions(cq, ep, B_FALSE); 1167 1168 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1169 } 1170 1171 void 1172 rds_ep_free_rc_channel(rds_ep_t *ep) 1173 { 1174 int ret; 1175 1176 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1177 1178 ASSERT(mutex_owned(&ep->ep_lock)); 1179 1180 /* free the QP */ 1181 if (ep->ep_chanhdl != NULL) { 1182 /* wait until the RQ is empty */ 1183 (void) ibt_flush_channel(ep->ep_chanhdl); 1184 (void) rds_is_recvq_empty(ep, B_TRUE); 1185 ret = ibt_free_channel(ep->ep_chanhdl); 1186 if (ret != IBT_SUCCESS) { 1187 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) " 1188 "ibt_free_channel returned: %d", ep, ret); 1189 } 1190 ep->ep_chanhdl = NULL; 1191 } else { 1192 RDS_DPRINTF2("rds_ep_free_rc_channel", 1193 "EP(%p) Channel is ALREADY FREE", ep); 1194 } 1195 1196 /* free the Send CQ */ 1197 if (ep->ep_sendcq != NULL) { 1198 ret = ibt_free_cq(ep->ep_sendcq); 1199 if (ret != IBT_SUCCESS) { 1200 RDS_DPRINTF2("rds_ep_free_rc_channel", 1201 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1202 ep, ret); 1203 } 1204 ep->ep_sendcq = NULL; 1205 } else { 1206 RDS_DPRINTF2("rds_ep_free_rc_channel", 1207 "EP(%p) SendCQ is ALREADY FREE", ep); 1208 } 1209 1210 /* free the Recv CQ */ 1211 if (ep->ep_recvcq != NULL) { 1212 ret = ibt_free_cq(ep->ep_recvcq); 1213 if (ret != IBT_SUCCESS) { 1214 RDS_DPRINTF2("rds_ep_free_rc_channel", 1215 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1216 ep, ret); 1217 } 1218 ep->ep_recvcq = NULL; 1219 } else { 1220 RDS_DPRINTF2("rds_ep_free_rc_channel", 1221 "EP(%p) RecvCQ is ALREADY FREE", ep); 1222 } 1223 1224 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1225 } 1226 1227 /* Allocate resources for RC channel */ 1228 ibt_channel_hdl_t 1229 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1230 { 1231 int ret = IBT_SUCCESS; 1232 ibt_cq_attr_t scqattr, rcqattr; 1233 ibt_rc_chan_alloc_args_t chanargs; 1234 ibt_channel_hdl_t chanhdl; 1235 rds_session_t *sp; 1236 rds_hca_t *hcap; 1237 1238 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1239 ep, hca_port); 1240 1241 /* Update the EP with the right IP address and HCA guid */ 1242 sp = ep->ep_sp; 1243 ASSERT(sp != NULL); 1244 rw_enter(&sp->session_lock, RW_READER); 1245 mutex_enter(&ep->ep_lock); 1246 ep->ep_myip = sp->session_myip; 1247 ep->ep_remip = sp->session_remip; 1248 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1249 ep->ep_hca_guid = hcap->hca_guid; 1250 mutex_exit(&ep->ep_lock); 1251 rw_exit(&sp->session_lock); 1252 1253 /* reset taskqpending flag here */ 1254 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1255 1256 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1257 scqattr.cq_size = MaxCtrlSendBuffers; 1258 scqattr.cq_sched = NULL; 1259 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1260 1261 rcqattr.cq_size = MaxCtrlRecvBuffers; 1262 rcqattr.cq_sched = NULL; 1263 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1264 1265 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1266 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1267 chanargs.rc_sizes.cs_sq_sgl = 1; 1268 chanargs.rc_sizes.cs_rq_sgl = 1; 1269 } else { 1270 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1271 scqattr.cq_sched = NULL; 1272 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1273 1274 rcqattr.cq_size = MaxDataRecvBuffers; 1275 rcqattr.cq_sched = NULL; 1276 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1277 1278 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1279 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1280 chanargs.rc_sizes.cs_sq_sgl = 1; 1281 chanargs.rc_sizes.cs_rq_sgl = 1; 1282 } 1283 1284 mutex_enter(&ep->ep_lock); 1285 if (ep->ep_sendcq == NULL) { 1286 /* returned size is always greater than the requested size */ 1287 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1288 &ep->ep_sendcq, NULL); 1289 if (ret != IBT_SUCCESS) { 1290 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1291 "failed, size = %d: %d", scqattr.cq_size, ret); 1292 mutex_exit(&ep->ep_lock); 1293 return (NULL); 1294 } 1295 1296 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1297 ep); 1298 1299 if (rds_no_interrupts == 0) { 1300 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1301 IBT_NEXT_COMPLETION); 1302 if (ret != IBT_SUCCESS) { 1303 RDS_DPRINTF2(LABEL, 1304 "ibt_enable_cq_notify failed: %d", ret); 1305 (void) ibt_free_cq(ep->ep_sendcq); 1306 ep->ep_sendcq = NULL; 1307 mutex_exit(&ep->ep_lock); 1308 return (NULL); 1309 } 1310 } 1311 } 1312 1313 if (ep->ep_recvcq == NULL) { 1314 /* returned size is always greater than the requested size */ 1315 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1316 &ep->ep_recvcq, NULL); 1317 if (ret != IBT_SUCCESS) { 1318 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1319 "failed, size = %d: %d", rcqattr.cq_size, ret); 1320 (void) ibt_free_cq(ep->ep_sendcq); 1321 ep->ep_sendcq = NULL; 1322 mutex_exit(&ep->ep_lock); 1323 return (NULL); 1324 } 1325 1326 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1327 ep); 1328 1329 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1330 if (ret != IBT_SUCCESS) { 1331 RDS_DPRINTF2(LABEL, 1332 "ibt_enable_cq_notify failed: %d", ret); 1333 (void) ibt_free_cq(ep->ep_recvcq); 1334 ep->ep_recvcq = NULL; 1335 (void) ibt_free_cq(ep->ep_sendcq); 1336 ep->ep_sendcq = NULL; 1337 mutex_exit(&ep->ep_lock); 1338 return (NULL); 1339 } 1340 } 1341 1342 chanargs.rc_flags = IBT_ALL_SIGNALED; 1343 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1344 IBT_CEP_ATOMIC; 1345 chanargs.rc_hca_port_num = hca_port; 1346 chanargs.rc_scq = ep->ep_sendcq; 1347 chanargs.rc_rcq = ep->ep_recvcq; 1348 chanargs.rc_pd = hcap->hca_pdhdl; 1349 chanargs.rc_srq = NULL; 1350 1351 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1352 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1353 if (ret != IBT_SUCCESS) { 1354 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1355 ret); 1356 (void) ibt_free_cq(ep->ep_recvcq); 1357 ep->ep_recvcq = NULL; 1358 (void) ibt_free_cq(ep->ep_sendcq); 1359 ep->ep_sendcq = NULL; 1360 mutex_exit(&ep->ep_lock); 1361 return (NULL); 1362 } 1363 mutex_exit(&ep->ep_lock); 1364 1365 /* Chan private should contain the ep */ 1366 (void) ibt_set_chan_private(chanhdl, ep); 1367 1368 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1369 1370 return (chanhdl); 1371 } 1372 1373 1374 #if 0 1375 1376 /* Return node guid given a port gid */ 1377 ib_guid_t 1378 rds_gid_to_node_guid(ib_gid_t gid) 1379 { 1380 ibt_node_info_t nodeinfo; 1381 int ret; 1382 1383 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1384 gid.gid_prefix, gid.gid_guid); 1385 1386 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1387 if (ret != IBT_SUCCESS) { 1388 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1389 "failed", gid.gid_prefix, gid.gid_guid); 1390 return (0LL); 1391 } 1392 1393 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1394 nodeinfo.n_node_guid); 1395 1396 return (nodeinfo.n_node_guid); 1397 } 1398 1399 #endif 1400 1401 static void 1402 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1403 ibt_async_event_t *event) 1404 { 1405 rds_hca_t *hcap; 1406 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1407 uint_t newsize, oldsize, nport; 1408 ib_gid_t gid; 1409 int ret; 1410 1411 RDS_DPRINTF2("rds_handle_portup_event", 1412 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1413 1414 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1415 1416 hcap = statep->rds_hcalistp; 1417 while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) { 1418 hcap = hcap->hca_nextp; 1419 } 1420 1421 if (hcap == NULL) { 1422 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1423 "not in our list", event->ev_hca_guid); 1424 rw_exit(&statep->rds_hca_lock); 1425 return; 1426 } 1427 1428 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1429 if (ret != IBT_SUCCESS) { 1430 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1431 rw_exit(&statep->rds_hca_lock); 1432 return; 1433 } 1434 1435 oldpinfop = hcap->hca_pinfop; 1436 oldsize = hcap->hca_pinfo_sz; 1437 hcap->hca_pinfop = newpinfop; 1438 hcap->hca_pinfo_sz = newsize; 1439 1440 (void) ibt_free_portinfo(oldpinfop, oldsize); 1441 1442 /* If RDS service is not registered then no bind is needed */ 1443 if (statep->rds_srvhdl == NULL) { 1444 RDS_DPRINTF2("rds_handle_portup_event", 1445 "RDS Service is not registered, so no action needed"); 1446 rw_exit(&statep->rds_hca_lock); 1447 return; 1448 } 1449 1450 /* 1451 * If the service was previously bound on this port and 1452 * if this port has changed state down and now up, we do not 1453 * need to bind the service again. The bind is expected to 1454 * persist across state changes. If the service was never bound 1455 * before then we bind it this time. 1456 */ 1457 if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) { 1458 1459 /* structure copy */ 1460 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1461 1462 /* bind RDS service on the port, pass statep as cm_private */ 1463 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, 1464 &hcap->hca_bindhdl[event->ev_port - 1]); 1465 if (ret != IBT_SUCCESS) { 1466 RDS_DPRINTF2("rds_handle_portup_event", 1467 "Bind service for HCA: 0x%llx Port: %d " 1468 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1469 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1470 } 1471 } 1472 1473 rw_exit(&statep->rds_hca_lock); 1474 1475 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1476 event->ev_hca_guid); 1477 } 1478 1479 static void 1480 rdsib_add_hca(ib_guid_t hca_guid) 1481 { 1482 rds_hca_t *hcap; 1483 ibt_mr_attr_t mem_attr; 1484 ibt_mr_desc_t mem_desc; 1485 int ret; 1486 1487 RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid); 1488 1489 hcap = rdsib_init_hca(hca_guid); 1490 if (hcap == NULL) 1491 return; 1492 1493 /* register the recv memory with this hca */ 1494 mutex_enter(&rds_dpool.pool_lock); 1495 if (rds_dpool.pool_memp == NULL) { 1496 /* no memory to register */ 1497 RDS_DPRINTF2("rdsib_add_hca", "No memory to register"); 1498 mutex_exit(&rds_dpool.pool_lock); 1499 return; 1500 } 1501 1502 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp; 1503 mem_attr.mr_len = rds_dpool.pool_memsize; 1504 mem_attr.mr_as = NULL; 1505 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 1506 1507 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr, 1508 &hcap->hca_mrhdl, &mem_desc); 1509 1510 mutex_exit(&rds_dpool.pool_lock); 1511 1512 if (ret != IBT_SUCCESS) { 1513 RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d", 1514 ret); 1515 } else { 1516 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 1517 hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED; 1518 hcap->hca_lkey = mem_desc.md_lkey; 1519 hcap->hca_rkey = mem_desc.md_rkey; 1520 rw_exit(&rdsib_statep->rds_hca_lock); 1521 } 1522 1523 RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid); 1524 } 1525 1526 void rds_close_this_session(rds_session_t *sp, uint8_t wait); 1527 int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port); 1528 1529 static void 1530 rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid) 1531 { 1532 rds_session_t *sp; 1533 rds_hca_t *hcap; 1534 rds_hca_state_t saved_state; 1535 int ret, ix; 1536 1537 RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid); 1538 1539 /* 1540 * This should be a write lock as we don't want anyone to get access 1541 * to the hcap while we are modifing its contents 1542 */ 1543 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1544 1545 hcap = statep->rds_hcalistp; 1546 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 1547 hcap = hcap->hca_nextp; 1548 } 1549 1550 /* Prevent initiating any new activity on this HCA */ 1551 ASSERT(hcap != NULL); 1552 saved_state = hcap->hca_state; 1553 hcap->hca_state = RDS_HCA_STATE_STOPPING; 1554 1555 rw_exit(&statep->rds_hca_lock); 1556 1557 /* 1558 * stop the outgoing traffic and close any active sessions on this hca. 1559 * Any pending messages in the SQ will be allowed to complete. 1560 */ 1561 rw_enter(&statep->rds_sessionlock, RW_READER); 1562 sp = statep->rds_sessionlistp; 1563 while (sp) { 1564 if (sp->session_hca_guid != hca_guid) { 1565 sp = sp->session_nextp; 1566 continue; 1567 } 1568 1569 rw_enter(&sp->session_lock, RW_WRITER); 1570 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1571 sp->session_state); 1572 /* 1573 * We are changing the session state in advance. This prevents 1574 * further messages to be posted to the SQ. We then 1575 * send a control message to the remote and tell it close 1576 * the session. 1577 */ 1578 sp->session_state = RDS_SESSION_STATE_HCA_CLOSING; 1579 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 1580 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 1581 rw_exit(&sp->session_lock); 1582 1583 /* 1584 * wait until the sendq is empty then tell the remote to 1585 * close this session. This enables for graceful shutdown of 1586 * the session 1587 */ 1588 (void) rds_is_sendq_empty(&sp->session_dataep, 2); 1589 (void) rds_post_control_message(sp, 1590 RDS_CTRL_CODE_CLOSE_SESSION, 0); 1591 1592 sp = sp->session_nextp; 1593 } 1594 1595 /* wait until all the sessions are off this HCA */ 1596 sp = statep->rds_sessionlistp; 1597 while (sp) { 1598 if (sp->session_hca_guid != hca_guid) { 1599 sp = sp->session_nextp; 1600 continue; 1601 } 1602 1603 rw_enter(&sp->session_lock, RW_READER); 1604 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1605 sp->session_state); 1606 1607 while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) || 1608 (sp->session_state == RDS_SESSION_STATE_ERROR) || 1609 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) || 1610 (sp->session_state == RDS_SESSION_STATE_CLOSED)) { 1611 rw_exit(&sp->session_lock); 1612 delay(drv_usectohz(1000000)); 1613 rw_enter(&sp->session_lock, RW_READER); 1614 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp, 1615 sp->session_state); 1616 } 1617 1618 rw_exit(&sp->session_lock); 1619 1620 sp = sp->session_nextp; 1621 } 1622 rw_exit(&statep->rds_sessionlock); 1623 1624 /* 1625 * if rdsib_close_ib was called before this, then that would have 1626 * unbound the service on all ports. In that case, the HCA structs 1627 * will contain stale bindhdls. Hence, we do not call unbind unless 1628 * the service is still registered. 1629 */ 1630 if (statep->rds_srvhdl != NULL) { 1631 /* unbind RDS service on all ports on this HCA */ 1632 for (ix = 0; ix < hcap->hca_nports; ix++) { 1633 if (hcap->hca_bindhdl[ix] == NULL) { 1634 continue; 1635 } 1636 1637 RDS_DPRINTF2("rdsib_del_hca", 1638 "Unbinding Service: port: %d, bindhdl: %p", 1639 ix + 1, hcap->hca_bindhdl[ix]); 1640 (void) ibt_unbind_service(rdsib_statep->rds_srvhdl, 1641 hcap->hca_bindhdl[ix]); 1642 hcap->hca_bindhdl[ix] = NULL; 1643 } 1644 } 1645 1646 RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap, 1647 hcap->hca_state); 1648 1649 switch (saved_state) { 1650 case RDS_HCA_STATE_MEM_REGISTERED: 1651 ASSERT(hcap->hca_mrhdl != NULL); 1652 ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl); 1653 if (ret != IBT_SUCCESS) { 1654 RDS_DPRINTF2("rdsib_del_hca", 1655 "ibt_deregister_mr failed: %d", ret); 1656 return; 1657 } 1658 hcap->hca_mrhdl = NULL; 1659 /* FALLTHRU */ 1660 case RDS_HCA_STATE_OPEN: 1661 ASSERT(hcap->hca_hdl != NULL); 1662 ASSERT(hcap->hca_pdhdl != NULL); 1663 1664 1665 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 1666 if (ret != IBT_SUCCESS) { 1667 RDS_DPRINTF2("rdsib_del_hca", 1668 "ibt_free_pd failed: %d", ret); 1669 } 1670 1671 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 1672 1673 ret = ibt_close_hca(hcap->hca_hdl); 1674 if (ret != IBT_SUCCESS) { 1675 RDS_DPRINTF2("rdsib_del_hca", 1676 "ibt_close_hca failed: %d", ret); 1677 } 1678 1679 hcap->hca_hdl = NULL; 1680 hcap->hca_pdhdl = NULL; 1681 hcap->hca_lkey = 0; 1682 hcap->hca_rkey = 0; 1683 } 1684 1685 /* 1686 * This should be a write lock as we don't want anyone to get access 1687 * to the hcap while we are modifing its contents 1688 */ 1689 rw_enter(&statep->rds_hca_lock, RW_WRITER); 1690 hcap->hca_state = RDS_HCA_STATE_REMOVED; 1691 rw_exit(&statep->rds_hca_lock); 1692 1693 RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid); 1694 } 1695 1696 static void 1697 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1698 ibt_async_event_t *event) 1699 { 1700 rds_state_t *statep = (rds_state_t *)clntp; 1701 1702 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1703 1704 switch (code) { 1705 case IBT_EVENT_PORT_UP: 1706 rds_handle_portup_event(statep, hdl, event); 1707 break; 1708 case IBT_HCA_ATTACH_EVENT: 1709 /* 1710 * NOTE: In some error recovery paths, it is possible to 1711 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. 1712 */ 1713 (void) rdsib_add_hca(event->ev_hca_guid); 1714 break; 1715 case IBT_HCA_DETACH_EVENT: 1716 (void) rdsib_del_hca(statep, event->ev_hca_guid); 1717 break; 1718 1719 default: 1720 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1721 } 1722 1723 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1724 } 1725 1726 /* 1727 * This routine exists to minimize stale connections across ungraceful 1728 * reboots of nodes in a cluster. 1729 */ 1730 void 1731 rds_randomize_qps(rds_hca_t *hcap) 1732 { 1733 ibt_cq_attr_t cqattr; 1734 ibt_rc_chan_alloc_args_t chanargs; 1735 ibt_channel_hdl_t qp1, qp2; 1736 ibt_cq_hdl_t cq_hdl; 1737 hrtime_t nsec; 1738 uint8_t i, j, rand1, rand2; 1739 int ret; 1740 1741 bzero(&cqattr, sizeof (ibt_cq_attr_t)); 1742 cqattr.cq_size = 1; 1743 cqattr.cq_sched = NULL; 1744 cqattr.cq_flags = IBT_CQ_NO_FLAGS; 1745 ret = ibt_alloc_cq(hcap->hca_hdl, &cqattr, &cq_hdl, NULL); 1746 if (ret != IBT_SUCCESS) { 1747 RDS_DPRINTF2("rds_randomize_qps", 1748 "ibt_alloc_cq failed: %d", ret); 1749 return; 1750 } 1751 1752 bzero(&chanargs, sizeof (ibt_rc_chan_alloc_args_t)); 1753 chanargs.rc_flags = IBT_ALL_SIGNALED; 1754 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1755 IBT_CEP_ATOMIC; 1756 chanargs.rc_hca_port_num = 1; 1757 chanargs.rc_scq = cq_hdl; 1758 chanargs.rc_rcq = cq_hdl; 1759 chanargs.rc_pd = hcap->hca_pdhdl; 1760 chanargs.rc_srq = NULL; 1761 1762 nsec = gethrtime(); 1763 rand1 = (nsec & 0xF); 1764 rand2 = (nsec >> 4) & 0xF; 1765 RDS_DPRINTF2("rds_randomize_qps", "rand1: %d rand2: %d", 1766 rand1, rand2); 1767 1768 for (i = 0; i < rand1 + 3; i++) { 1769 if (ibt_alloc_rc_channel(hcap->hca_hdl, 1770 IBT_ACHAN_NO_FLAGS, &chanargs, &qp1, NULL) != 1771 IBT_SUCCESS) { 1772 RDS_DPRINTF2("rds_randomize_qps", 1773 "Bailing at i: %d", i); 1774 (void) ibt_free_cq(cq_hdl); 1775 return; 1776 } 1777 for (j = 0; j < rand2 + 3; j++) { 1778 if (ibt_alloc_rc_channel(hcap->hca_hdl, 1779 IBT_ACHAN_NO_FLAGS, &chanargs, &qp2, 1780 NULL) != IBT_SUCCESS) { 1781 RDS_DPRINTF2("rds_randomize_qps", 1782 "Bailing at i: %d j: %d", i, j); 1783 (void) ibt_free_channel(qp1); 1784 (void) ibt_free_cq(cq_hdl); 1785 return; 1786 } 1787 (void) ibt_free_channel(qp2); 1788 } 1789 (void) ibt_free_channel(qp1); 1790 } 1791 1792 (void) ibt_free_cq(cq_hdl); 1793 } 1794