1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/types.h> 78 #include <sys/ddi.h> 79 #include <sys/sunddi.h> 80 #include <sys/ib/clients/rds/rdsib_cm.h> 81 #include <sys/ib/clients/rds/rdsib_ib.h> 82 #include <sys/ib/clients/rds/rdsib_buf.h> 83 #include <sys/ib/clients/rds/rdsib_ep.h> 84 #include <sys/ib/clients/rds/rds_kstat.h> 85 86 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 87 ibt_async_code_t code, ibt_async_event_t *event); 88 89 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 90 IBTI_V2, 91 IBT_NETWORK, 92 rds_async_handler, 93 NULL, 94 "RDS" 95 }; 96 97 /* performance tunables */ 98 uint_t rds_no_interrupts = 0; 99 uint_t rds_poll_percent_full = 25; 100 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 101 uint_t rds_waittime_ms = 100; /* ms */ 102 103 extern dev_info_t *rdsib_dev_info; 104 extern void rds_close_sessions(); 105 106 static void 107 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 108 { 109 /* The SQ size should not be more than that supported by the HCA */ 110 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 111 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 112 RDS_DPRINTF0("RDSIB", "MaxDataSendBuffers + %d is greater " 113 "than that supported by the HCA driver " 114 "(%d + %d > %d or %d), lowering it to a supported value.", 115 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 116 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 117 118 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 119 hattrp->hca_max_cq_sz) ? 120 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 121 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 122 } 123 124 /* The RQ size should not be more than that supported by the HCA */ 125 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 126 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 127 RDS_DPRINTF0("RDSIB", "MaxDataRecvBuffers is greater than that " 128 "supported by the HCA driver (%d > %d or %d), lowering it " 129 "to a supported value.", MaxDataRecvBuffers, 130 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 131 132 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 133 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 134 hattrp->hca_max_chan_sz; 135 } 136 137 /* The SQ size should not be more than that supported by the HCA */ 138 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 139 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 140 RDS_DPRINTF0("RDSIB", "MaxCtrlSendBuffers is greater than that " 141 "supported by the HCA driver (%d > %d or %d), lowering it " 142 "to a supported value.", MaxCtrlSendBuffers, 143 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 144 145 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 146 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 147 hattrp->hca_max_chan_sz; 148 } 149 150 /* The RQ size should not be more than that supported by the HCA */ 151 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 152 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 153 RDS_DPRINTF0("RDSIB", "MaxCtrlRecvBuffers is greater than that " 154 "supported by the HCA driver (%d > %d or %d), lowering it " 155 "to a supported value.", MaxCtrlRecvBuffers, 156 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 157 158 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 159 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 160 hattrp->hca_max_chan_sz; 161 } 162 163 /* The MaxRecvMemory should be less than that supported by the HCA */ 164 if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) { 165 RDS_DPRINTF0("RDSIB", "MaxRecvMemory is greater than that " 166 "supported by the HCA driver (%d > %d), lowering it to %d", 167 NDataRX * RdsPktSize, hattrp->hca_max_memr_len, 168 hattrp->hca_max_memr_len); 169 170 NDataRX = hattrp->hca_max_memr_len/RdsPktSize; 171 } 172 } 173 174 /* 175 * Called on open of first RDS socket 176 */ 177 int 178 rdsib_open_ib() 179 { 180 ib_guid_t *guidp; 181 rds_hca_t *hcap, *hcap1; 182 uint_t ix, hcaix, nhcas; 183 int ret; 184 185 RDS_DPRINTF4("rdsib_open_ib", "enter: statep %p", rdsib_statep); 186 187 ASSERT(rdsib_statep != NULL); 188 if (rdsib_statep == NULL) { 189 RDS_DPRINTF1("rdsib_open_ib", "RDS Statep not initialized"); 190 return (-1); 191 } 192 193 /* How many hcas are there? */ 194 nhcas = ibt_get_hca_list(&guidp); 195 if (nhcas == 0) { 196 RDS_DPRINTF2("rdsib_open_ib", "No IB HCAs Available"); 197 return (-1); 198 } 199 200 RDS_DPRINTF3("rdsib_open_ib", "Number of HCAs: %d", nhcas); 201 202 /* Register with IBTF */ 203 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 204 &rdsib_statep->rds_ibhdl); 205 if (ret != IBT_SUCCESS) { 206 RDS_DPRINTF2(LABEL, "ibt_attach failed: %d", ret); 207 (void) ibt_free_hca_list(guidp, nhcas); 208 return (-1); 209 } 210 211 /* 212 * Open each HCA and gather its information. Don't care about HCAs 213 * that cannot be opened. It is OK as long as atleast one HCA can be 214 * opened. 215 * Initialize a HCA only if all the information is available. 216 */ 217 hcap1 = NULL; 218 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 219 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 220 221 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 222 223 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix], 224 &hcap->hca_hdl); 225 if (ret != IBT_SUCCESS) { 226 RDS_DPRINTF2("rdsib_open_ib", 227 "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret); 228 kmem_free(hcap, sizeof (rds_hca_t)); 229 continue; 230 } 231 232 hcap->hca_guid = guidp[ix]; 233 234 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 235 if (ret != IBT_SUCCESS) { 236 RDS_DPRINTF2("rdsib_open_ib", 237 "Query HCA: 0x%llx failed: %d", guidp[ix], ret); 238 ret = ibt_close_hca(hcap->hca_hdl); 239 ASSERT(ret == IBT_SUCCESS); 240 kmem_free(hcap, sizeof (rds_hca_t)); 241 continue; 242 } 243 244 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 245 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 246 if (ret != IBT_SUCCESS) { 247 RDS_DPRINTF2("rdsib_open_ib", 248 "Query HCA 0x%llx ports failed: %d", guidp[ix], 249 ret); 250 ret = ibt_close_hca(hcap->hca_hdl); 251 ASSERT(ret == IBT_SUCCESS); 252 kmem_free(hcap, sizeof (rds_hca_t)); 253 continue; 254 } 255 256 /* Only one PD per HCA is allocated, so do it here */ 257 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 258 &hcap->hca_pdhdl); 259 if (ret != IBT_SUCCESS) { 260 RDS_DPRINTF2(LABEL, "ibt_alloc_pd 0x%llx failed: %d", 261 guidp[ix], ret); 262 (void) ibt_free_portinfo(hcap->hca_pinfop, 263 hcap->hca_pinfo_sz); 264 ret = ibt_close_hca(hcap->hca_hdl); 265 ASSERT(ret == IBT_SUCCESS); 266 kmem_free(hcap, sizeof (rds_hca_t)); 267 continue; 268 } 269 270 rdsib_validate_chan_sizes(&hcap->hca_attr); 271 272 /* this HCA is fully initialized, go to the next one */ 273 hcaix++; 274 hcap->hca_nextp = hcap1; 275 hcap1 = hcap; 276 } 277 278 /* free the HCA list, we are done with it */ 279 (void) ibt_free_hca_list(guidp, nhcas); 280 281 if (hcaix == 0) { 282 /* Failed to Initialize even one HCA */ 283 RDS_DPRINTF2("rdsib_open_ib", "No HCAs are initialized"); 284 (void) ibt_detach(rdsib_statep->rds_ibhdl); 285 rdsib_statep->rds_ibhdl = NULL; 286 return (-1); 287 } 288 289 if (hcaix < nhcas) { 290 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 291 (nhcas - hcaix), nhcas); 292 } 293 294 rdsib_statep->rds_hcalistp = hcap1; 295 rdsib_statep->rds_nhcas = hcaix; 296 297 /* register the RDS service */ 298 rdsib_statep->rds_srvhdl = 299 rds_register_service(rdsib_statep->rds_ibhdl); 300 if (rdsib_statep->rds_srvhdl == NULL) { 301 RDS_DPRINTF2("rdsib_open_ib", "Service registration failed"); 302 } else { 303 /* bind the service on all available ports */ 304 ret = rds_bind_service(rdsib_statep); 305 if (ret != 0) { 306 RDS_DPRINTF2("rdsib_open_ib", "Bind service failed"); 307 } 308 } 309 310 RDS_DPRINTF4("rdsib_open_ib", "return: statep %p", rdsib_statep); 311 312 return (0); 313 } 314 315 /* 316 * Called when all ports are closed. 317 */ 318 void 319 rdsib_close_ib() 320 { 321 rds_hca_t *hcap, *nextp; 322 int ret; 323 324 RDS_DPRINTF2("rds_close_ib", "enter: statep %p", rdsib_statep); 325 326 if (rdsib_statep->rds_srvhdl != NULL) { 327 (void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 328 (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, 329 rdsib_statep->rds_srvhdl); 330 (void) ibt_release_ip_sid(rdsib_statep->rds_service_id); 331 332 (void) ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl); 333 (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, 334 rdsib_statep->rds_old_srvhdl); 335 } 336 337 /* close and destroy all the sessions */ 338 rds_close_sessions(NULL); 339 340 /* Release all HCA resources */ 341 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 342 hcap = rdsib_statep->rds_hcalistp; 343 rdsib_statep->rds_hcalistp = NULL; 344 rdsib_statep->rds_nhcas = 0; 345 rw_exit(&rdsib_statep->rds_hca_lock); 346 347 while (hcap != NULL) { 348 nextp = hcap->hca_nextp; 349 350 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 351 ASSERT(ret == IBT_SUCCESS); 352 353 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 354 355 ret = ibt_close_hca(hcap->hca_hdl); 356 ASSERT(ret == IBT_SUCCESS); 357 358 kmem_free(hcap, sizeof (rds_hca_t)); 359 hcap = nextp; 360 } 361 362 /* Deregister with IBTF */ 363 if (rdsib_statep->rds_ibhdl != NULL) { 364 (void) ibt_detach(rdsib_statep->rds_ibhdl); 365 rdsib_statep->rds_ibhdl = NULL; 366 } 367 368 RDS_DPRINTF2("rds_close_ib", "return: statep %p", rdsib_statep); 369 } 370 371 /* Return hcap, given the hca guid */ 372 rds_hca_t * 373 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 374 { 375 rds_hca_t *hcap; 376 377 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 378 "guid: %llx", statep, hca_guid); 379 380 rw_enter(&statep->rds_hca_lock, RW_READER); 381 382 hcap = statep->rds_hcalistp; 383 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 384 hcap = hcap->hca_nextp; 385 } 386 387 rw_exit(&statep->rds_hca_lock); 388 389 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 390 391 return (hcap); 392 } 393 394 /* Return hcap, given a gid */ 395 rds_hca_t * 396 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 397 { 398 rds_hca_t *hcap; 399 uint_t ix; 400 401 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 402 statep, gid.gid_prefix, gid.gid_guid); 403 404 rw_enter(&statep->rds_hca_lock, RW_READER); 405 406 hcap = statep->rds_hcalistp; 407 while (hcap != NULL) { 408 for (ix = 0; ix < hcap->hca_nports; ix++) { 409 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 410 gid.gid_prefix) && 411 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 412 gid.gid_guid)) { 413 RDS_DPRINTF4("rds_gid_to_hcap", 414 "gid found in hcap: 0x%p", hcap); 415 rw_exit(&statep->rds_hca_lock); 416 return (hcap); 417 } 418 } 419 hcap = hcap->hca_nextp; 420 } 421 422 rw_exit(&statep->rds_hca_lock); 423 424 return (NULL); 425 } 426 427 /* This is called from the send CQ handler */ 428 void 429 rds_send_acknowledgement(rds_ep_t *ep) 430 { 431 int ret; 432 uint_t ix; 433 434 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 435 436 mutex_enter(&ep->ep_lock); 437 438 ASSERT(ep->ep_rdmacnt != 0); 439 440 /* 441 * The previous ACK completed successfully, send the next one 442 * if more messages were received after sending the last ACK 443 */ 444 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 445 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 446 mutex_exit(&ep->ep_lock); 447 448 /* send acknowledgement */ 449 RDS_INCR_TXACKS(); 450 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 451 if (ret != IBT_SUCCESS) { 452 RDS_DPRINTF1("rds_send_acknowledgement", 453 "EP(%p): ibt_post_send for acknowledgement " 454 "failed: %d, SQ depth: %d", 455 ep, ret, ep->ep_sndpool.pool_nbusy); 456 mutex_enter(&ep->ep_lock); 457 ep->ep_rdmacnt--; 458 mutex_exit(&ep->ep_lock); 459 } 460 } else { 461 /* ACKed all messages, no more to ACK */ 462 ep->ep_rdmacnt--; 463 mutex_exit(&ep->ep_lock); 464 return; 465 } 466 467 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 468 } 469 470 static int 471 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 472 { 473 ibt_wc_t wc; 474 uint_t npolled; 475 rds_buf_t *bp; 476 rds_ctrl_pkt_t *cpkt; 477 rds_qp_t *recvqp; 478 int ret = IBT_SUCCESS; 479 480 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 481 482 bzero(&wc, sizeof (ibt_wc_t)); 483 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 484 if (ret != IBT_SUCCESS) { 485 if (ret != IBT_CQ_EMPTY) { 486 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 487 "returned: %d", ep, cq, ret); 488 } else { 489 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 490 "returned: IBT_CQ_EMPTY", ep, cq); 491 } 492 return (ret); 493 } 494 495 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 496 497 if (wc.wc_status != IBT_WC_SUCCESS) { 498 mutex_enter(&ep->ep_recvqp.qp_lock); 499 ep->ep_recvqp.qp_level--; 500 mutex_exit(&ep->ep_recvqp.qp_lock); 501 502 /* Free the buffer */ 503 bp->buf_state = RDS_RCVBUF_FREE; 504 rds_free_recv_buf(bp, 1); 505 506 /* Receive completion failure */ 507 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 508 RDS_DPRINTF2("rds_poll_ctrl_completions", 509 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 510 ep, cq, wc.wc_id, wc.wc_status); 511 } 512 return (ret); 513 } 514 515 /* there is one less in the RQ */ 516 recvqp = &ep->ep_recvqp; 517 mutex_enter(&recvqp->qp_lock); 518 recvqp->qp_level--; 519 if ((recvqp->qp_taskqpending == B_FALSE) && 520 (recvqp->qp_level <= recvqp->qp_lwm)) { 521 /* Time to post more buffers into the RQ */ 522 recvqp->qp_taskqpending = B_TRUE; 523 mutex_exit(&recvqp->qp_lock); 524 525 ret = ddi_taskq_dispatch(rds_taskq, 526 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 527 if (ret != DDI_SUCCESS) { 528 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 529 ret); 530 mutex_enter(&recvqp->qp_lock); 531 recvqp->qp_taskqpending = B_FALSE; 532 mutex_exit(&recvqp->qp_lock); 533 } 534 } else { 535 mutex_exit(&recvqp->qp_lock); 536 } 537 538 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 539 rds_handle_control_message(ep->ep_sp, cpkt); 540 541 bp->buf_state = RDS_RCVBUF_FREE; 542 rds_free_recv_buf(bp, 1); 543 544 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 545 546 return (ret); 547 } 548 549 #define RDS_POST_FEW_ATATIME 100 550 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 551 void 552 rds_post_recv_buf(void *arg) 553 { 554 ibt_channel_hdl_t chanhdl; 555 rds_ep_t *ep; 556 rds_session_t *sp; 557 rds_qp_t *recvqp; 558 rds_bufpool_t *gp; 559 rds_buf_t *bp, *bp1; 560 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 561 rds_hca_t *hcap; 562 uint_t npost, nspace, rcv_len; 563 uint_t ix, jx, kx; 564 int ret; 565 566 chanhdl = (ibt_channel_hdl_t)arg; 567 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 568 RDS_INCR_POST_RCV_BUF_CALLS(); 569 570 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 571 ASSERT(ep != NULL); 572 sp = ep->ep_sp; 573 recvqp = &ep->ep_recvqp; 574 575 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 576 577 /* get the hcap for the HCA hosting this channel */ 578 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 579 if (hcap == NULL) { 580 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 581 ep->ep_hca_guid); 582 return; 583 } 584 585 /* Make sure the session is still connected */ 586 rw_enter(&sp->session_lock, RW_READER); 587 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 588 (sp->session_state != RDS_SESSION_STATE_CONNECTED)) { 589 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 590 "in active state (%d)", ep, sp->session_state); 591 rw_exit(&sp->session_lock); 592 return; 593 } 594 rw_exit(&sp->session_lock); 595 596 /* how many can be posted */ 597 mutex_enter(&recvqp->qp_lock); 598 nspace = recvqp->qp_depth - recvqp->qp_level; 599 if (nspace == 0) { 600 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 601 recvqp->qp_taskqpending = B_FALSE; 602 mutex_exit(&recvqp->qp_lock); 603 return; 604 } 605 mutex_exit(&recvqp->qp_lock); 606 607 if (ep->ep_type == RDS_EP_TYPE_DATA) { 608 gp = &rds_dpool; 609 rcv_len = RdsPktSize; 610 } else { 611 gp = &rds_cpool; 612 rcv_len = RDS_CTRLPKT_SIZE; 613 } 614 615 bp = rds_get_buf(gp, nspace, &jx); 616 if (bp == NULL) { 617 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 618 /* try again later */ 619 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 620 (void *)ep->ep_chanhdl, DDI_NOSLEEP); 621 if (ret != DDI_SUCCESS) { 622 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 623 ret); 624 mutex_enter(&recvqp->qp_lock); 625 recvqp->qp_taskqpending = B_FALSE; 626 mutex_exit(&recvqp->qp_lock); 627 } 628 return; 629 } 630 631 if (jx != nspace) { 632 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 633 "needed: %d available: %d", ep, nspace, jx); 634 nspace = jx; 635 } 636 637 bp1 = bp; 638 for (ix = 0; ix < nspace; ix++) { 639 bp1->buf_ep = ep; 640 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 641 bp1->buf_state = RDS_RCVBUF_POSTED; 642 bp1->buf_ds.ds_key = hcap->hca_lkey; 643 bp1->buf_ds.ds_len = rcv_len; 644 bp1 = bp1->buf_nextp; 645 } 646 647 #if 0 648 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 649 KM_SLEEP); 650 #else 651 wrp = &wr[0]; 652 #endif 653 654 npost = nspace; 655 while (npost) { 656 jx = (npost > RDS_POST_FEW_ATATIME) ? 657 RDS_POST_FEW_ATATIME : npost; 658 for (ix = 0; ix < jx; ix++) { 659 wrp[ix].wr_id = (uintptr_t)bp; 660 wrp[ix].wr_nds = 1; 661 wrp[ix].wr_sgl = &bp->buf_ds; 662 bp = bp->buf_nextp; 663 } 664 665 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 666 if ((ret != IBT_SUCCESS) || (kx != jx)) { 667 RDS_DPRINTF1(LABEL, "ibt_post_recv for %d WRs failed: " 668 "%d", npost, ret); 669 npost -= kx; 670 break; 671 } 672 673 npost -= jx; 674 } 675 676 mutex_enter(&recvqp->qp_lock); 677 if (npost != 0) { 678 RDS_DPRINTF2("rds_post_recv_buf", 679 "EP(%p) Failed to post %d WRs", ep, npost); 680 recvqp->qp_level += (nspace - npost); 681 } else { 682 recvqp->qp_level += nspace; 683 } 684 685 /* 686 * sometimes, the recv WRs can get consumed as soon as they are 687 * posted. In that case, taskq thread to post more WRs to the RQ will 688 * not be scheduled as the taskqpending flag is still set. 689 */ 690 if (recvqp->qp_level == 0) { 691 mutex_exit(&recvqp->qp_lock); 692 ret = ddi_taskq_dispatch(rds_taskq, 693 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 694 if (ret != DDI_SUCCESS) { 695 RDS_DPRINTF1("rds_post_recv_buf", 696 "ddi_taskq_dispatch failed: %d", ret); 697 mutex_enter(&recvqp->qp_lock); 698 recvqp->qp_taskqpending = B_FALSE; 699 mutex_exit(&recvqp->qp_lock); 700 } 701 } else { 702 recvqp->qp_taskqpending = B_FALSE; 703 mutex_exit(&recvqp->qp_lock); 704 } 705 706 #if 0 707 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 708 #endif 709 710 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 711 } 712 713 static int 714 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 715 { 716 ibt_wc_t wc; 717 rds_buf_t *bp; 718 rds_data_hdr_t *pktp; 719 rds_qp_t *recvqp; 720 uint_t npolled; 721 int ret = IBT_SUCCESS; 722 723 724 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 725 726 bzero(&wc, sizeof (ibt_wc_t)); 727 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 728 if (ret != IBT_SUCCESS) { 729 if (ret != IBT_CQ_EMPTY) { 730 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 731 "returned: %d", ep, cq, ret); 732 } else { 733 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 734 "returned: IBT_CQ_EMPTY", ep, cq); 735 } 736 return (ret); 737 } 738 739 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 740 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 741 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 742 bp->buf_nextp = NULL; 743 744 if (wc.wc_status != IBT_WC_SUCCESS) { 745 mutex_enter(&ep->ep_recvqp.qp_lock); 746 ep->ep_recvqp.qp_level--; 747 mutex_exit(&ep->ep_recvqp.qp_lock); 748 749 /* free the buffer */ 750 bp->buf_state = RDS_RCVBUF_FREE; 751 rds_free_recv_buf(bp, 1); 752 753 /* Receive completion failure */ 754 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 755 RDS_DPRINTF2("rds_poll_data_completions", 756 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 757 ep, cq, wc.wc_id, wc.wc_status); 758 RDS_INCR_RXERRS(); 759 } 760 return (ret); 761 } 762 763 /* there is one less in the RQ */ 764 recvqp = &ep->ep_recvqp; 765 mutex_enter(&recvqp->qp_lock); 766 recvqp->qp_level--; 767 if ((recvqp->qp_taskqpending == B_FALSE) && 768 (recvqp->qp_level <= recvqp->qp_lwm)) { 769 /* Time to post more buffers into the RQ */ 770 recvqp->qp_taskqpending = B_TRUE; 771 mutex_exit(&recvqp->qp_lock); 772 773 ret = ddi_taskq_dispatch(rds_taskq, 774 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 775 if (ret != DDI_SUCCESS) { 776 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 777 ret); 778 mutex_enter(&recvqp->qp_lock); 779 recvqp->qp_taskqpending = B_FALSE; 780 mutex_exit(&recvqp->qp_lock); 781 } 782 } else { 783 mutex_exit(&recvqp->qp_lock); 784 } 785 786 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 787 ASSERT(pktp->dh_datalen != 0); 788 789 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 790 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 791 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 792 pktp->dh_npkts, pktp->dh_psn); 793 794 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 795 pktp->dh_npkts, pktp->dh_psn); 796 797 if (pktp->dh_npkts == 1) { 798 /* single pkt or last packet */ 799 if (pktp->dh_psn != 0) { 800 /* last packet of a segmented message */ 801 ASSERT(ep->ep_seglbp != NULL); 802 ep->ep_seglbp->buf_nextp = bp; 803 ep->ep_seglbp = bp; 804 rds_received_msg(ep, ep->ep_segfbp); 805 ep->ep_segfbp = NULL; 806 ep->ep_seglbp = NULL; 807 } else { 808 /* single packet */ 809 rds_received_msg(ep, bp); 810 } 811 } else { 812 /* multi-pkt msg */ 813 if (pktp->dh_psn == 0) { 814 /* first packet */ 815 ASSERT(ep->ep_segfbp == NULL); 816 ep->ep_segfbp = bp; 817 ep->ep_seglbp = bp; 818 } else { 819 /* intermediate packet */ 820 ASSERT(ep->ep_segfbp != NULL); 821 ep->ep_seglbp->buf_nextp = bp; 822 ep->ep_seglbp = bp; 823 } 824 } 825 826 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 827 828 return (ret); 829 } 830 831 void 832 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 833 { 834 rds_ep_t *ep; 835 int ret = IBT_SUCCESS; 836 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 837 838 ep = (rds_ep_t *)arg; 839 840 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 841 842 if (ep->ep_type == RDS_EP_TYPE_DATA) { 843 func = rds_poll_data_completions; 844 } else { 845 func = rds_poll_ctrl_completions; 846 } 847 848 do { 849 ret = func(cq, ep); 850 } while (ret != IBT_CQ_EMPTY); 851 852 /* enable the CQ */ 853 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 854 if (ret != IBT_SUCCESS) { 855 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 856 "failed: %d", ep, cq, ret); 857 return; 858 } 859 860 do { 861 ret = func(cq, ep); 862 } while (ret != IBT_CQ_EMPTY); 863 864 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 865 } 866 867 void 868 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 869 { 870 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 871 uint_t npolled, nret, send_error = 0; 872 rds_buf_t *headp, *tailp, *bp; 873 int ret, ix; 874 875 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 876 877 headp = NULL; 878 tailp = NULL; 879 npolled = 0; 880 do { 881 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 882 if (ret != IBT_SUCCESS) { 883 if (ret != IBT_CQ_EMPTY) { 884 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 885 "ibt_poll_cq returned: %d", ep, cq, ret); 886 } else { 887 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 888 "ibt_poll_cq returned: IBT_CQ_EMPTY", 889 ep, cq); 890 } 891 892 break; 893 } 894 895 for (ix = 0; ix < nret; ix++) { 896 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 897 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 898 rds_send_acknowledgement(ep); 899 continue; 900 } 901 902 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 903 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 904 bp->buf_state = RDS_SNDBUF_FREE; 905 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 906 RDS_INCR_TXERRS(); 907 RDS_DPRINTF5("rds_poll_send_completions", 908 "EP(%p): WC ID: %p ERROR: %d", ep, 909 wc[ix].wc_id, wc[ix].wc_status); 910 911 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 912 mutex_enter(&ep->ep_lock); 913 ep->ep_rdmacnt--; 914 mutex_exit(&ep->ep_lock); 915 continue; 916 } 917 918 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 919 bp->buf_state = RDS_SNDBUF_ERROR; 920 } else { 921 RDS_INCR_TXERRS(); 922 RDS_DPRINTF2("rds_poll_send_completions", 923 "EP(%p): WC ID: %p ERROR: %d", ep, 924 wc[ix].wc_id, wc[ix].wc_status); 925 if (send_error == 0) { 926 rds_session_t *sp = ep->ep_sp; 927 928 /* don't let anyone send anymore */ 929 rw_enter(&sp->session_lock, RW_WRITER); 930 if (sp->session_state != 931 RDS_SESSION_STATE_ERROR) { 932 sp->session_state = 933 RDS_SESSION_STATE_ERROR; 934 /* Make this the active end */ 935 sp->session_type = 936 RDS_SESSION_ACTIVE; 937 } 938 rw_exit(&sp->session_lock); 939 } 940 941 send_error++; 942 943 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 944 mutex_enter(&ep->ep_lock); 945 ep->ep_rdmacnt--; 946 mutex_exit(&ep->ep_lock); 947 continue; 948 } 949 950 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 951 bp->buf_state = RDS_SNDBUF_ERROR; 952 } 953 954 bp->buf_nextp = NULL; 955 if (headp) { 956 tailp->buf_nextp = bp; 957 tailp = bp; 958 } else { 959 headp = bp; 960 tailp = bp; 961 } 962 963 npolled++; 964 } 965 966 if (rds_no_interrupts && (npolled > 100)) { 967 break; 968 } 969 970 if (rds_no_interrupts == 1) { 971 break; 972 } 973 } while (ret != IBT_CQ_EMPTY); 974 975 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 976 npolled, send_error); 977 978 /* put the buffers to the pool */ 979 if (npolled != 0) { 980 rds_free_send_buf(ep, headp, tailp, npolled, lock); 981 } 982 983 if (send_error != 0) { 984 rds_handle_send_error(ep); 985 } 986 987 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 988 } 989 990 void 991 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 992 { 993 rds_ep_t *ep; 994 int ret; 995 996 ep = (rds_ep_t *)arg; 997 998 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 999 1000 /* enable the CQ */ 1001 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1002 if (ret != IBT_SUCCESS) { 1003 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1004 "failed: %d", ep, cq, ret); 1005 return; 1006 } 1007 1008 rds_poll_send_completions(cq, ep, B_FALSE); 1009 1010 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1011 } 1012 1013 void 1014 rds_ep_free_rc_channel(rds_ep_t *ep) 1015 { 1016 int ret; 1017 1018 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1019 1020 ASSERT(mutex_owned(&ep->ep_lock)); 1021 1022 /* free the QP */ 1023 if (ep->ep_chanhdl != NULL) { 1024 /* wait until the RQ is empty */ 1025 (void) ibt_flush_channel(ep->ep_chanhdl); 1026 (void) rds_is_recvq_empty(ep, B_TRUE); 1027 ret = ibt_free_channel(ep->ep_chanhdl); 1028 if (ret != IBT_SUCCESS) { 1029 RDS_DPRINTF1("rds_ep_free_rc_channel", "EP(%p) " 1030 "ibt_free_channel returned: %d", ep, ret); 1031 } 1032 ep->ep_chanhdl = NULL; 1033 } else { 1034 RDS_DPRINTF2("rds_ep_free_rc_channel", 1035 "EP(%p) Channel is ALREADY FREE", ep); 1036 } 1037 1038 /* free the Send CQ */ 1039 if (ep->ep_sendcq != NULL) { 1040 ret = ibt_free_cq(ep->ep_sendcq); 1041 if (ret != IBT_SUCCESS) { 1042 RDS_DPRINTF1("rds_ep_free_rc_channel", 1043 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1044 ep, ret); 1045 } 1046 ep->ep_sendcq = NULL; 1047 } else { 1048 RDS_DPRINTF2("rds_ep_free_rc_channel", 1049 "EP(%p) SendCQ is ALREADY FREE", ep); 1050 } 1051 1052 /* free the Recv CQ */ 1053 if (ep->ep_recvcq != NULL) { 1054 ret = ibt_free_cq(ep->ep_recvcq); 1055 if (ret != IBT_SUCCESS) { 1056 RDS_DPRINTF1("rds_ep_free_rc_channel", 1057 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1058 ep, ret); 1059 } 1060 ep->ep_recvcq = NULL; 1061 } else { 1062 RDS_DPRINTF2("rds_ep_free_rc_channel", 1063 "EP(%p) RecvCQ is ALREADY FREE", ep); 1064 } 1065 1066 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1067 } 1068 1069 /* Allocate resources for RC channel */ 1070 ibt_channel_hdl_t 1071 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1072 { 1073 int ret = IBT_SUCCESS; 1074 ibt_cq_attr_t scqattr, rcqattr; 1075 ibt_rc_chan_alloc_args_t chanargs; 1076 ibt_channel_hdl_t chanhdl; 1077 rds_session_t *sp; 1078 rds_hca_t *hcap; 1079 1080 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1081 ep, hca_port); 1082 1083 /* Update the EP with the right IP address and HCA guid */ 1084 sp = ep->ep_sp; 1085 ASSERT(sp != NULL); 1086 rw_enter(&sp->session_lock, RW_READER); 1087 mutex_enter(&ep->ep_lock); 1088 ep->ep_myip = sp->session_myip; 1089 ep->ep_remip = sp->session_remip; 1090 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1091 ep->ep_hca_guid = hcap->hca_guid; 1092 mutex_exit(&ep->ep_lock); 1093 rw_exit(&sp->session_lock); 1094 1095 /* reset taskqpending flag here */ 1096 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1097 1098 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1099 scqattr.cq_size = MaxCtrlSendBuffers; 1100 scqattr.cq_sched = NULL; 1101 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1102 1103 rcqattr.cq_size = MaxCtrlRecvBuffers; 1104 rcqattr.cq_sched = NULL; 1105 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1106 1107 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1108 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1109 chanargs.rc_sizes.cs_sq_sgl = 1; 1110 chanargs.rc_sizes.cs_rq_sgl = 1; 1111 } else { 1112 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1113 scqattr.cq_sched = NULL; 1114 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1115 1116 rcqattr.cq_size = MaxDataRecvBuffers; 1117 rcqattr.cq_sched = NULL; 1118 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1119 1120 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1121 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1122 chanargs.rc_sizes.cs_sq_sgl = 1; 1123 chanargs.rc_sizes.cs_rq_sgl = 1; 1124 } 1125 1126 mutex_enter(&ep->ep_lock); 1127 if (ep->ep_sendcq == NULL) { 1128 /* returned size is always greater than the requested size */ 1129 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1130 &ep->ep_sendcq, NULL); 1131 if (ret != IBT_SUCCESS) { 1132 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1133 "failed, size = %d: %d", scqattr.cq_size, ret); 1134 mutex_exit(&ep->ep_lock); 1135 return (NULL); 1136 } 1137 1138 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1139 ep); 1140 1141 if (rds_no_interrupts == 0) { 1142 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1143 IBT_NEXT_COMPLETION); 1144 if (ret != IBT_SUCCESS) { 1145 RDS_DPRINTF2(LABEL, 1146 "ibt_enable_cq_notify failed: %d", ret); 1147 (void) ibt_free_cq(ep->ep_sendcq); 1148 ep->ep_sendcq = NULL; 1149 mutex_exit(&ep->ep_lock); 1150 return (NULL); 1151 } 1152 } 1153 } 1154 1155 if (ep->ep_recvcq == NULL) { 1156 /* returned size is always greater than the requested size */ 1157 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1158 &ep->ep_recvcq, NULL); 1159 if (ret != IBT_SUCCESS) { 1160 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1161 "failed, size = %d: %d", rcqattr.cq_size, ret); 1162 (void) ibt_free_cq(ep->ep_sendcq); 1163 ep->ep_sendcq = NULL; 1164 mutex_exit(&ep->ep_lock); 1165 return (NULL); 1166 } 1167 1168 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1169 ep); 1170 1171 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1172 if (ret != IBT_SUCCESS) { 1173 RDS_DPRINTF2(LABEL, 1174 "ibt_enable_cq_notify failed: %d", ret); 1175 (void) ibt_free_cq(ep->ep_recvcq); 1176 ep->ep_recvcq = NULL; 1177 (void) ibt_free_cq(ep->ep_sendcq); 1178 ep->ep_sendcq = NULL; 1179 mutex_exit(&ep->ep_lock); 1180 return (NULL); 1181 } 1182 } 1183 1184 chanargs.rc_flags = IBT_ALL_SIGNALED; 1185 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1186 IBT_CEP_ATOMIC; 1187 chanargs.rc_hca_port_num = hca_port; 1188 chanargs.rc_scq = ep->ep_sendcq; 1189 chanargs.rc_rcq = ep->ep_recvcq; 1190 chanargs.rc_pd = hcap->hca_pdhdl; 1191 chanargs.rc_srq = NULL; 1192 1193 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1194 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1195 if (ret != IBT_SUCCESS) { 1196 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1197 ret); 1198 (void) ibt_free_cq(ep->ep_recvcq); 1199 ep->ep_recvcq = NULL; 1200 (void) ibt_free_cq(ep->ep_sendcq); 1201 ep->ep_sendcq = NULL; 1202 mutex_exit(&ep->ep_lock); 1203 return (NULL); 1204 } 1205 mutex_exit(&ep->ep_lock); 1206 1207 /* Chan private should contain the ep */ 1208 (void) ibt_set_chan_private(chanhdl, ep); 1209 1210 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1211 1212 return (chanhdl); 1213 } 1214 1215 1216 #if 0 1217 1218 /* Return node guid given a port gid */ 1219 ib_guid_t 1220 rds_gid_to_node_guid(ib_gid_t gid) 1221 { 1222 ibt_node_info_t nodeinfo; 1223 int ret; 1224 1225 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1226 gid.gid_prefix, gid.gid_guid); 1227 1228 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1229 if (ret != IBT_SUCCESS) { 1230 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1231 "failed", gid.gid_prefix, gid.gid_guid); 1232 return (0LL); 1233 } 1234 1235 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1236 nodeinfo.n_node_guid); 1237 1238 return (nodeinfo.n_node_guid); 1239 } 1240 1241 #endif 1242 1243 static void 1244 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1245 ibt_async_event_t *event) 1246 { 1247 rds_hca_t *hcap; 1248 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1249 uint_t newsize, oldsize, nport; 1250 ib_gid_t gid; 1251 int ret; 1252 1253 RDS_DPRINTF2("rds_handle_portup_event", 1254 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1255 1256 hcap = rds_get_hcap(statep, event->ev_hca_guid); 1257 if (hcap == NULL) { 1258 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1259 "not in our list", event->ev_hca_guid); 1260 return; 1261 } 1262 1263 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1264 if (ret != IBT_SUCCESS) { 1265 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1266 return; 1267 } 1268 1269 oldpinfop = hcap->hca_pinfop; 1270 oldsize = hcap->hca_pinfo_sz; 1271 hcap->hca_pinfop = newpinfop; 1272 hcap->hca_pinfo_sz = newsize; 1273 1274 /* structure copy */ 1275 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1276 1277 /* bind RDS service on the port, pass statep as cm_private */ 1278 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL); 1279 if (ret != IBT_SUCCESS) { 1280 RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d " 1281 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1282 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1283 } 1284 1285 (void) ibt_free_portinfo(oldpinfop, oldsize); 1286 1287 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1288 event->ev_hca_guid); 1289 } 1290 1291 static void 1292 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1293 ibt_async_event_t *event) 1294 { 1295 rds_state_t *statep; 1296 1297 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1298 1299 switch (code) { 1300 case IBT_EVENT_PORT_UP: 1301 statep = (rds_state_t *)clntp; 1302 rds_handle_portup_event(statep, hdl, event); 1303 break; 1304 1305 default: 1306 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1307 } 1308 1309 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1310 } 1311