1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/types.h> 78 #include <sys/ddi.h> 79 #include <sys/sunddi.h> 80 #include <sys/ib/ibtl/ibti.h> 81 #include <sys/ib/ibtl/ibtl_types.h> 82 #include <sys/ib/clients/rds/rdsib_cm.h> 83 #include <sys/ib/clients/rds/rdsib_ib.h> 84 #include <sys/ib/clients/rds/rdsib_buf.h> 85 #include <sys/ib/clients/rds/rdsib_ep.h> 86 #include <sys/ib/clients/rds/rds_kstat.h> 87 88 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 89 ibt_async_code_t code, ibt_async_event_t *event); 90 91 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 92 IBTI_V2, 93 IBT_NETWORK, 94 rds_async_handler, 95 NULL, 96 "RDS" 97 }; 98 99 /* performance tunables */ 100 uint_t rds_no_interrupts = 0; 101 uint_t rds_poll_percent_full = 25; 102 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 103 uint_t rds_waittime_ms = 100; /* ms */ 104 105 extern dev_info_t *rdsib_dev_info; 106 extern void rds_close_sessions(); 107 108 static void 109 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 110 { 111 /* The SQ size should not be more than that supported by the HCA */ 112 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 113 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 114 RDS_DPRINTF0("RDSIB", "MaxDataSendBuffers + %d is greater " 115 "than that supported by the HCA driver " 116 "(%d + %d > %d or %d), lowering it to a supported value.", 117 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 118 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 119 120 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 121 hattrp->hca_max_cq_sz) ? 122 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 123 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 124 } 125 126 /* The RQ size should not be more than that supported by the HCA */ 127 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 128 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 129 RDS_DPRINTF0("RDSIB", "MaxDataRecvBuffers is greater than that " 130 "supported by the HCA driver (%d > %d or %d), lowering it " 131 "to a supported value.", MaxDataRecvBuffers, 132 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 133 134 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 135 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 136 hattrp->hca_max_chan_sz; 137 } 138 139 /* The SQ size should not be more than that supported by the HCA */ 140 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 141 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 142 RDS_DPRINTF0("RDSIB", "MaxCtrlSendBuffers is greater than that " 143 "supported by the HCA driver (%d > %d or %d), lowering it " 144 "to a supported value.", MaxCtrlSendBuffers, 145 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 146 147 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 148 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 149 hattrp->hca_max_chan_sz; 150 } 151 152 /* The RQ size should not be more than that supported by the HCA */ 153 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 154 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 155 RDS_DPRINTF0("RDSIB", "MaxCtrlRecvBuffers is greater than that " 156 "supported by the HCA driver (%d > %d or %d), lowering it " 157 "to a supported value.", MaxCtrlRecvBuffers, 158 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 159 160 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 161 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 162 hattrp->hca_max_chan_sz; 163 } 164 165 /* The MaxRecvMemory should be less than that supported by the HCA */ 166 if ((MaxRecvMemory * 1024) > hattrp->hca_max_memr_len) { 167 RDS_DPRINTF0("RDSIB", "MaxRecvMemory is greater than that " 168 "supported by the HCA driver (%d > %d), lowering it to %d", 169 MaxRecvMemory, hattrp->hca_max_memr_len, 170 hattrp->hca_max_memr_len); 171 172 MaxRecvMemory = hattrp->hca_max_memr_len; 173 } 174 } 175 176 /* 177 * Called on open of first RDS socket 178 */ 179 int 180 rdsib_open_ib() 181 { 182 ib_guid_t *guidp; 183 rds_hca_t *hcap, *hcap1; 184 uint_t ix, hcaix, nhcas; 185 int ret; 186 187 RDS_DPRINTF4("rdsib_open_ib", "enter: statep %p", rdsib_statep); 188 189 ASSERT(rdsib_statep != NULL); 190 if (rdsib_statep == NULL) { 191 RDS_DPRINTF1("rdsib_open_ib", "RDS Statep not initialized"); 192 return (-1); 193 } 194 195 /* How many hcas are there? */ 196 nhcas = ibt_get_hca_list(&guidp); 197 if (nhcas == 0) { 198 RDS_DPRINTF2("rdsib_open_ib", "No IB HCAs Available"); 199 return (-1); 200 } 201 202 RDS_DPRINTF3("rdsib_open_ib", "Number of HCAs: %d", nhcas); 203 204 /* Register with IBTF */ 205 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 206 &rdsib_statep->rds_ibhdl); 207 if (ret != IBT_SUCCESS) { 208 RDS_DPRINTF2(LABEL, "ibt_attach failed: %d", ret); 209 (void) ibt_free_hca_list(guidp, nhcas); 210 return (-1); 211 } 212 213 /* 214 * Open each HCA and gather its information. Don't care about HCAs 215 * that cannot be opened. It is OK as long as atleast one HCA can be 216 * opened. 217 * Initialize a HCA only if all the information is available. 218 */ 219 hcap1 = NULL; 220 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 221 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 222 223 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 224 225 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix], 226 &hcap->hca_hdl); 227 if (ret != IBT_SUCCESS) { 228 RDS_DPRINTF2("rdsib_open_ib", 229 "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret); 230 kmem_free(hcap, sizeof (rds_hca_t)); 231 continue; 232 } 233 234 hcap->hca_guid = guidp[ix]; 235 236 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 237 if (ret != IBT_SUCCESS) { 238 RDS_DPRINTF2("rdsib_open_ib", 239 "Query HCA: 0x%llx failed: %d", guidp[ix], ret); 240 ret = ibt_close_hca(hcap->hca_hdl); 241 ASSERT(ret == IBT_SUCCESS); 242 kmem_free(hcap, sizeof (rds_hca_t)); 243 continue; 244 } 245 246 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 247 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 248 if (ret != IBT_SUCCESS) { 249 RDS_DPRINTF2("rdsib_open_ib", 250 "Query HCA 0x%llx ports failed: %d", guidp[ix], 251 ret); 252 ret = ibt_close_hca(hcap->hca_hdl); 253 ASSERT(ret == IBT_SUCCESS); 254 kmem_free(hcap, sizeof (rds_hca_t)); 255 continue; 256 } 257 258 /* Only one PD per HCA is allocated, so do it here */ 259 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 260 &hcap->hca_pdhdl); 261 if (ret != IBT_SUCCESS) { 262 RDS_DPRINTF2(LABEL, "ibt_alloc_pd 0x%llx failed: %d", 263 guidp[ix], ret); 264 (void) ibt_free_portinfo(hcap->hca_pinfop, 265 hcap->hca_pinfo_sz); 266 ret = ibt_close_hca(hcap->hca_hdl); 267 ASSERT(ret == IBT_SUCCESS); 268 kmem_free(hcap, sizeof (rds_hca_t)); 269 continue; 270 } 271 272 rdsib_validate_chan_sizes(&hcap->hca_attr); 273 274 /* this HCA is fully initialized, go to the next one */ 275 hcaix++; 276 hcap->hca_nextp = hcap1; 277 hcap1 = hcap; 278 } 279 280 /* free the HCA list, we are done with it */ 281 (void) ibt_free_hca_list(guidp, nhcas); 282 283 if (hcaix == 0) { 284 /* Failed to Initialize even one HCA */ 285 RDS_DPRINTF2("rdsib_open_ib", "No HCAs are initialized"); 286 (void) ibt_detach(rdsib_statep->rds_ibhdl); 287 rdsib_statep->rds_ibhdl = NULL; 288 return (-1); 289 } 290 291 if (hcaix < nhcas) { 292 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 293 (nhcas - hcaix), nhcas); 294 } 295 296 rdsib_statep->rds_hcalistp = hcap1; 297 rdsib_statep->rds_nhcas = hcaix; 298 299 /* register the RDS service */ 300 rdsib_statep->rds_srvhdl = 301 rds_register_service(rdsib_statep->rds_ibhdl); 302 if (rdsib_statep->rds_srvhdl == NULL) { 303 RDS_DPRINTF2("rdsib_open_ib", "Service registration failed"); 304 } else { 305 /* bind the service on all available ports */ 306 ret = rds_bind_service(rdsib_statep); 307 if (ret != 0) { 308 RDS_DPRINTF2("rdsib_open_ib", "Bind service failed"); 309 } 310 } 311 312 RDS_DPRINTF4("rdsib_open_ib", "return: statep %p", rdsib_statep); 313 314 return (0); 315 } 316 317 /* 318 * Called when all ports are closed. 319 */ 320 void 321 rdsib_close_ib() 322 { 323 rds_hca_t *hcap, *nextp; 324 int ret; 325 326 RDS_DPRINTF2("rds_close_ib", "enter: statep %p", rdsib_statep); 327 328 if (rdsib_statep->rds_srvhdl != NULL) { 329 (void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 330 (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, 331 rdsib_statep->rds_srvhdl); 332 (void) ibt_release_ip_sid(rdsib_statep->rds_service_id); 333 334 (void) ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl); 335 (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, 336 rdsib_statep->rds_old_srvhdl); 337 } 338 339 /* close and destroy all the sessions */ 340 rds_close_sessions(NULL); 341 342 /* Release all HCA resources */ 343 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 344 hcap = rdsib_statep->rds_hcalistp; 345 rdsib_statep->rds_hcalistp = NULL; 346 rdsib_statep->rds_nhcas = 0; 347 rw_exit(&rdsib_statep->rds_hca_lock); 348 349 while (hcap != NULL) { 350 nextp = hcap->hca_nextp; 351 352 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 353 ASSERT(ret == IBT_SUCCESS); 354 355 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 356 357 ret = ibt_close_hca(hcap->hca_hdl); 358 ASSERT(ret == IBT_SUCCESS); 359 360 kmem_free(hcap, sizeof (rds_hca_t)); 361 hcap = nextp; 362 } 363 364 /* Deregister with IBTF */ 365 if (rdsib_statep->rds_ibhdl != NULL) { 366 (void) ibt_detach(rdsib_statep->rds_ibhdl); 367 rdsib_statep->rds_ibhdl = NULL; 368 } 369 370 RDS_DPRINTF2("rds_close_ib", "return: statep %p", rdsib_statep); 371 } 372 373 /* Return hcap, given the hca guid */ 374 rds_hca_t * 375 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 376 { 377 rds_hca_t *hcap; 378 379 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 380 "guid: %llx", statep, hca_guid); 381 382 rw_enter(&statep->rds_hca_lock, RW_READER); 383 384 hcap = statep->rds_hcalistp; 385 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 386 hcap = hcap->hca_nextp; 387 } 388 389 rw_exit(&statep->rds_hca_lock); 390 391 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 392 393 return (hcap); 394 } 395 396 /* Return hcap, given a gid */ 397 rds_hca_t * 398 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 399 { 400 rds_hca_t *hcap; 401 uint_t ix; 402 403 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 404 statep, gid.gid_prefix, gid.gid_guid); 405 406 rw_enter(&statep->rds_hca_lock, RW_READER); 407 408 hcap = statep->rds_hcalistp; 409 while (hcap != NULL) { 410 for (ix = 0; ix < hcap->hca_nports; ix++) { 411 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 412 gid.gid_prefix) && 413 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 414 gid.gid_guid)) { 415 RDS_DPRINTF4("rds_gid_to_hcap", 416 "gid found in hcap: 0x%p", hcap); 417 rw_exit(&statep->rds_hca_lock); 418 return (hcap); 419 } 420 } 421 hcap = hcap->hca_nextp; 422 } 423 424 rw_exit(&statep->rds_hca_lock); 425 426 return (NULL); 427 } 428 429 /* This is called from the send CQ handler */ 430 void 431 rds_send_acknowledgement(rds_ep_t *ep) 432 { 433 int ret; 434 uint_t ix; 435 436 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 437 438 mutex_enter(&ep->ep_lock); 439 440 ASSERT(ep->ep_rdmacnt != 0); 441 442 /* 443 * The previous ACK completed successfully, send the next one 444 * if more messages were received after sending the last ACK 445 */ 446 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 447 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 448 mutex_exit(&ep->ep_lock); 449 450 /* send acknowledgement */ 451 RDS_INCR_TXACKS(); 452 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 453 if (ret != IBT_SUCCESS) { 454 RDS_DPRINTF1("rds_send_acknowledgement", 455 "EP(%p): ibt_post_send for acknowledgement " 456 "failed: %d, SQ depth: %d", 457 ep, ret, ep->ep_sndpool.pool_nbusy); 458 mutex_enter(&ep->ep_lock); 459 ep->ep_rdmacnt--; 460 mutex_exit(&ep->ep_lock); 461 } 462 } else { 463 /* ACKed all messages, no more to ACK */ 464 ep->ep_rdmacnt--; 465 mutex_exit(&ep->ep_lock); 466 return; 467 } 468 469 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 470 } 471 472 static int 473 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 474 { 475 ibt_wc_t wc; 476 uint_t npolled; 477 rds_buf_t *bp; 478 rds_ctrl_pkt_t *cpkt; 479 rds_qp_t *recvqp; 480 int ret = IBT_SUCCESS; 481 482 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 483 484 bzero(&wc, sizeof (ibt_wc_t)); 485 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 486 if (ret != IBT_SUCCESS) { 487 if (ret != IBT_CQ_EMPTY) { 488 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 489 "returned: %d", ep, cq, ret); 490 } else { 491 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 492 "returned: IBT_CQ_EMPTY", ep, cq); 493 } 494 return (ret); 495 } 496 497 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 498 499 if (wc.wc_status != IBT_WC_SUCCESS) { 500 mutex_enter(&ep->ep_recvqp.qp_lock); 501 ep->ep_recvqp.qp_level--; 502 mutex_exit(&ep->ep_recvqp.qp_lock); 503 504 /* Free the buffer */ 505 bp->buf_state = RDS_RCVBUF_FREE; 506 rds_free_recv_buf(bp, 1); 507 508 /* Receive completion failure */ 509 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 510 RDS_DPRINTF2("rds_poll_ctrl_completions", 511 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 512 ep, cq, wc.wc_id, wc.wc_status); 513 } 514 return (ret); 515 } 516 517 /* there is one less in the RQ */ 518 recvqp = &ep->ep_recvqp; 519 mutex_enter(&recvqp->qp_lock); 520 recvqp->qp_level--; 521 if ((recvqp->qp_taskqpending == B_FALSE) && 522 (recvqp->qp_level <= recvqp->qp_lwm)) { 523 /* Time to post more buffers into the RQ */ 524 recvqp->qp_taskqpending = B_TRUE; 525 mutex_exit(&recvqp->qp_lock); 526 527 ret = ddi_taskq_dispatch(rds_taskq, 528 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 529 if (ret != DDI_SUCCESS) { 530 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 531 ret); 532 mutex_enter(&recvqp->qp_lock); 533 recvqp->qp_taskqpending = B_FALSE; 534 mutex_exit(&recvqp->qp_lock); 535 } 536 } else { 537 mutex_exit(&recvqp->qp_lock); 538 } 539 540 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 541 rds_handle_control_message(ep->ep_sp, cpkt); 542 543 bp->buf_state = RDS_RCVBUF_FREE; 544 rds_free_recv_buf(bp, 1); 545 546 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 547 548 return (ret); 549 } 550 551 #define RDS_POST_FEW_ATATIME 100 552 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 553 void 554 rds_post_recv_buf(void *arg) 555 { 556 ibt_channel_hdl_t chanhdl; 557 rds_ep_t *ep; 558 rds_session_t *sp; 559 rds_qp_t *recvqp; 560 rds_bufpool_t *gp; 561 rds_buf_t *bp, *bp1; 562 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 563 rds_hca_t *hcap; 564 uint_t npost, nspace, rcv_len; 565 uint_t ix, jx, kx; 566 int ret; 567 568 chanhdl = (ibt_channel_hdl_t)arg; 569 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 570 RDS_INCR_POST_RCV_BUF_CALLS(); 571 572 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 573 ASSERT(ep != NULL); 574 sp = ep->ep_sp; 575 recvqp = &ep->ep_recvqp; 576 577 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 578 579 /* get the hcap for the HCA hosting this channel */ 580 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 581 if (hcap == NULL) { 582 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 583 ep->ep_hca_guid); 584 return; 585 } 586 587 /* Make sure the session is still connected */ 588 rw_enter(&sp->session_lock, RW_READER); 589 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 590 (sp->session_state != RDS_SESSION_STATE_CONNECTED)) { 591 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 592 "in active state (%d)", ep, sp->session_state); 593 rw_exit(&sp->session_lock); 594 return; 595 } 596 rw_exit(&sp->session_lock); 597 598 /* how many can be posted */ 599 mutex_enter(&recvqp->qp_lock); 600 nspace = recvqp->qp_depth - recvqp->qp_level; 601 if (nspace == 0) { 602 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 603 recvqp->qp_taskqpending = B_FALSE; 604 mutex_exit(&recvqp->qp_lock); 605 return; 606 } 607 mutex_exit(&recvqp->qp_lock); 608 609 if (ep->ep_type == RDS_EP_TYPE_DATA) { 610 gp = &rds_dpool; 611 rcv_len = RdsPktSize; 612 } else { 613 gp = &rds_cpool; 614 rcv_len = RDS_CTRLPKT_SIZE; 615 } 616 617 bp = rds_get_buf(gp, nspace, &jx); 618 if (bp == NULL) { 619 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 620 /* try again later */ 621 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 622 (void *)ep->ep_chanhdl, DDI_NOSLEEP); 623 if (ret != DDI_SUCCESS) { 624 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 625 ret); 626 mutex_enter(&recvqp->qp_lock); 627 recvqp->qp_taskqpending = B_FALSE; 628 mutex_exit(&recvqp->qp_lock); 629 } 630 return; 631 } 632 633 if (jx != nspace) { 634 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 635 "needed: %d available: %d", ep, nspace, jx); 636 nspace = jx; 637 } 638 639 bp1 = bp; 640 for (ix = 0; ix < nspace; ix++) { 641 bp1->buf_ep = ep; 642 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 643 bp1->buf_state = RDS_RCVBUF_POSTED; 644 bp1->buf_ds.ds_key = hcap->hca_lkey; 645 bp1->buf_ds.ds_len = rcv_len; 646 bp1 = bp1->buf_nextp; 647 } 648 649 #if 0 650 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 651 KM_SLEEP); 652 #else 653 wrp = &wr[0]; 654 #endif 655 656 npost = nspace; 657 while (npost) { 658 jx = (npost > RDS_POST_FEW_ATATIME) ? 659 RDS_POST_FEW_ATATIME : npost; 660 for (ix = 0; ix < jx; ix++) { 661 wrp[ix].wr_id = (uintptr_t)bp; 662 wrp[ix].wr_nds = 1; 663 wrp[ix].wr_sgl = &bp->buf_ds; 664 bp = bp->buf_nextp; 665 } 666 667 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 668 if ((ret != IBT_SUCCESS) || (kx != jx)) { 669 RDS_DPRINTF1(LABEL, "ibt_post_recv for %d WRs failed: " 670 "%d", npost, ret); 671 npost -= kx; 672 break; 673 } 674 675 npost -= jx; 676 } 677 678 mutex_enter(&recvqp->qp_lock); 679 if (npost != 0) { 680 RDS_DPRINTF2("rds_post_recv_buf", 681 "EP(%p) Failed to post %d WRs", ep, npost); 682 recvqp->qp_level += (nspace - npost); 683 } else { 684 recvqp->qp_level += nspace; 685 } 686 687 /* 688 * sometimes, the recv WRs can get consumed as soon as they are 689 * posted. In that case, taskq thread to post more WRs to the RQ will 690 * not be scheduled as the taskqpending flag is still set. 691 */ 692 if (recvqp->qp_level == 0) { 693 mutex_exit(&recvqp->qp_lock); 694 ret = ddi_taskq_dispatch(rds_taskq, 695 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 696 if (ret != DDI_SUCCESS) { 697 RDS_DPRINTF1("rds_post_recv_buf", 698 "ddi_taskq_dispatch failed: %d", ret); 699 mutex_enter(&recvqp->qp_lock); 700 recvqp->qp_taskqpending = B_FALSE; 701 mutex_exit(&recvqp->qp_lock); 702 } 703 } else { 704 recvqp->qp_taskqpending = B_FALSE; 705 mutex_exit(&recvqp->qp_lock); 706 } 707 708 #if 0 709 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 710 #endif 711 712 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 713 } 714 715 static int 716 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 717 { 718 ibt_wc_t wc; 719 rds_buf_t *bp; 720 rds_data_hdr_t *pktp; 721 rds_qp_t *recvqp; 722 uint_t npolled; 723 int ret = IBT_SUCCESS; 724 725 726 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 727 728 bzero(&wc, sizeof (ibt_wc_t)); 729 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 730 if (ret != IBT_SUCCESS) { 731 if (ret != IBT_CQ_EMPTY) { 732 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 733 "returned: %d", ep, cq, ret); 734 } else { 735 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 736 "returned: IBT_CQ_EMPTY", ep, cq); 737 } 738 return (ret); 739 } 740 741 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 742 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 743 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 744 bp->buf_nextp = NULL; 745 746 if (wc.wc_status != IBT_WC_SUCCESS) { 747 mutex_enter(&ep->ep_recvqp.qp_lock); 748 ep->ep_recvqp.qp_level--; 749 mutex_exit(&ep->ep_recvqp.qp_lock); 750 751 /* free the buffer */ 752 bp->buf_state = RDS_RCVBUF_FREE; 753 rds_free_recv_buf(bp, 1); 754 755 /* Receive completion failure */ 756 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 757 RDS_DPRINTF2("rds_poll_data_completions", 758 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 759 ep, cq, wc.wc_id, wc.wc_status); 760 RDS_INCR_RXERRS(); 761 } 762 return (ret); 763 } 764 765 /* there is one less in the RQ */ 766 recvqp = &ep->ep_recvqp; 767 mutex_enter(&recvqp->qp_lock); 768 recvqp->qp_level--; 769 if ((recvqp->qp_taskqpending == B_FALSE) && 770 (recvqp->qp_level <= recvqp->qp_lwm)) { 771 /* Time to post more buffers into the RQ */ 772 recvqp->qp_taskqpending = B_TRUE; 773 mutex_exit(&recvqp->qp_lock); 774 775 ret = ddi_taskq_dispatch(rds_taskq, 776 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 777 if (ret != DDI_SUCCESS) { 778 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 779 ret); 780 mutex_enter(&recvqp->qp_lock); 781 recvqp->qp_taskqpending = B_FALSE; 782 mutex_exit(&recvqp->qp_lock); 783 } 784 } else { 785 mutex_exit(&recvqp->qp_lock); 786 } 787 788 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 789 ASSERT(pktp->dh_datalen != 0); 790 791 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 792 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 793 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 794 pktp->dh_npkts, pktp->dh_psn); 795 796 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 797 pktp->dh_npkts, pktp->dh_psn); 798 799 if (pktp->dh_npkts == 1) { 800 /* single pkt or last packet */ 801 if (pktp->dh_psn != 0) { 802 /* last packet of a segmented message */ 803 ASSERT(ep->ep_seglbp != NULL); 804 ep->ep_seglbp->buf_nextp = bp; 805 ep->ep_seglbp = bp; 806 rds_received_msg(ep, ep->ep_segfbp); 807 ep->ep_segfbp = NULL; 808 ep->ep_seglbp = NULL; 809 } else { 810 /* single packet */ 811 rds_received_msg(ep, bp); 812 } 813 } else { 814 /* multi-pkt msg */ 815 if (pktp->dh_psn == 0) { 816 /* first packet */ 817 ASSERT(ep->ep_segfbp == NULL); 818 ep->ep_segfbp = bp; 819 ep->ep_seglbp = bp; 820 } else { 821 /* intermediate packet */ 822 ASSERT(ep->ep_segfbp != NULL); 823 ep->ep_seglbp->buf_nextp = bp; 824 ep->ep_seglbp = bp; 825 } 826 } 827 828 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 829 830 return (ret); 831 } 832 833 void 834 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 835 { 836 rds_ep_t *ep; 837 int ret = IBT_SUCCESS; 838 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 839 840 ep = (rds_ep_t *)arg; 841 842 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 843 844 if (ep->ep_type == RDS_EP_TYPE_DATA) { 845 func = rds_poll_data_completions; 846 } else { 847 func = rds_poll_ctrl_completions; 848 } 849 850 do { 851 ret = func(cq, ep); 852 } while (ret != IBT_CQ_EMPTY); 853 854 /* enable the CQ */ 855 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 856 if (ret != IBT_SUCCESS) { 857 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 858 "failed: %d", ep, cq, ret); 859 return; 860 } 861 862 do { 863 ret = func(cq, ep); 864 } while (ret != IBT_CQ_EMPTY); 865 866 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 867 } 868 869 void 870 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 871 { 872 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 873 uint_t npolled, nret, send_error = 0; 874 rds_buf_t *headp, *tailp, *bp; 875 int ret, ix; 876 877 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 878 879 headp = NULL; 880 tailp = NULL; 881 npolled = 0; 882 do { 883 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 884 if (ret != IBT_SUCCESS) { 885 if (ret != IBT_CQ_EMPTY) { 886 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 887 "ibt_poll_cq returned: %d", ep, cq, ret); 888 } else { 889 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 890 "ibt_poll_cq returned: IBT_CQ_EMPTY", 891 ep, cq); 892 } 893 894 break; 895 } 896 897 for (ix = 0; ix < nret; ix++) { 898 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 899 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 900 rds_send_acknowledgement(ep); 901 continue; 902 } 903 904 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 905 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 906 bp->buf_state = RDS_SNDBUF_FREE; 907 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 908 RDS_INCR_TXERRS(); 909 RDS_DPRINTF5("rds_poll_send_completions", 910 "EP(%p): WC ID: %p ERROR: %d", ep, 911 wc[ix].wc_id, wc[ix].wc_status); 912 913 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 914 mutex_enter(&ep->ep_lock); 915 ep->ep_rdmacnt--; 916 mutex_exit(&ep->ep_lock); 917 continue; 918 } 919 920 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 921 bp->buf_state = RDS_SNDBUF_ERROR; 922 } else { 923 RDS_INCR_TXERRS(); 924 RDS_DPRINTF2("rds_poll_send_completions", 925 "EP(%p): WC ID: %p ERROR: %d", ep, 926 wc[ix].wc_id, wc[ix].wc_status); 927 if (send_error == 0) { 928 rds_session_t *sp = ep->ep_sp; 929 930 /* don't let anyone send anymore */ 931 rw_enter(&sp->session_lock, RW_WRITER); 932 if (sp->session_state != 933 RDS_SESSION_STATE_ERROR) { 934 sp->session_state = 935 RDS_SESSION_STATE_ERROR; 936 /* Make this the active end */ 937 sp->session_type = 938 RDS_SESSION_ACTIVE; 939 } 940 rw_exit(&sp->session_lock); 941 } 942 943 send_error++; 944 945 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 946 mutex_enter(&ep->ep_lock); 947 ep->ep_rdmacnt--; 948 mutex_exit(&ep->ep_lock); 949 continue; 950 } 951 952 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 953 bp->buf_state = RDS_SNDBUF_ERROR; 954 } 955 956 bp->buf_nextp = NULL; 957 if (headp) { 958 tailp->buf_nextp = bp; 959 tailp = bp; 960 } else { 961 headp = bp; 962 tailp = bp; 963 } 964 965 npolled++; 966 } 967 968 if (rds_no_interrupts && (npolled > 100)) { 969 break; 970 } 971 972 if (rds_no_interrupts == 1) { 973 break; 974 } 975 } while (ret != IBT_CQ_EMPTY); 976 977 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 978 npolled, send_error); 979 980 /* put the buffers to the pool */ 981 if (npolled != 0) { 982 rds_free_send_buf(ep, headp, tailp, npolled, lock); 983 } 984 985 if (send_error != 0) { 986 rds_handle_send_error(ep); 987 } 988 989 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 990 } 991 992 void 993 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 994 { 995 rds_ep_t *ep; 996 int ret; 997 998 ep = (rds_ep_t *)arg; 999 1000 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 1001 1002 /* enable the CQ */ 1003 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1004 if (ret != IBT_SUCCESS) { 1005 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1006 "failed: %d", ep, cq, ret); 1007 return; 1008 } 1009 1010 rds_poll_send_completions(cq, ep, B_FALSE); 1011 1012 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1013 } 1014 1015 void 1016 rds_ep_free_rc_channel(rds_ep_t *ep) 1017 { 1018 int ret; 1019 1020 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1021 1022 ASSERT(mutex_owned(&ep->ep_lock)); 1023 1024 /* free the QP */ 1025 if (ep->ep_chanhdl != NULL) { 1026 /* wait until the RQ is empty */ 1027 (void) ibt_flush_channel(ep->ep_chanhdl); 1028 (void) rds_is_recvq_empty(ep, B_TRUE); 1029 ret = ibt_free_channel(ep->ep_chanhdl); 1030 if (ret != IBT_SUCCESS) { 1031 RDS_DPRINTF1("rds_ep_free_rc_channel", "EP(%p) " 1032 "ibt_free_channel returned: %d", ep, ret); 1033 } 1034 ep->ep_chanhdl = NULL; 1035 } else { 1036 RDS_DPRINTF2("rds_ep_free_rc_channel", 1037 "EP(%p) Channel is ALREADY FREE", ep); 1038 } 1039 1040 /* free the Send CQ */ 1041 if (ep->ep_sendcq != NULL) { 1042 ret = ibt_free_cq(ep->ep_sendcq); 1043 if (ret != IBT_SUCCESS) { 1044 RDS_DPRINTF1("rds_ep_free_rc_channel", 1045 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1046 ep, ret); 1047 } 1048 ep->ep_sendcq = NULL; 1049 } else { 1050 RDS_DPRINTF2("rds_ep_free_rc_channel", 1051 "EP(%p) SendCQ is ALREADY FREE", ep); 1052 } 1053 1054 /* free the Recv CQ */ 1055 if (ep->ep_recvcq != NULL) { 1056 ret = ibt_free_cq(ep->ep_recvcq); 1057 if (ret != IBT_SUCCESS) { 1058 RDS_DPRINTF1("rds_ep_free_rc_channel", 1059 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1060 ep, ret); 1061 } 1062 ep->ep_recvcq = NULL; 1063 } else { 1064 RDS_DPRINTF2("rds_ep_free_rc_channel", 1065 "EP(%p) RecvCQ is ALREADY FREE", ep); 1066 } 1067 1068 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1069 } 1070 1071 /* Allocate resources for RC channel */ 1072 ibt_channel_hdl_t 1073 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1074 { 1075 int ret = IBT_SUCCESS; 1076 ibt_cq_attr_t scqattr, rcqattr; 1077 ibt_rc_chan_alloc_args_t chanargs; 1078 ibt_channel_hdl_t chanhdl; 1079 rds_session_t *sp; 1080 rds_hca_t *hcap; 1081 1082 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1083 ep, hca_port); 1084 1085 /* Update the EP with the right IP address and HCA guid */ 1086 sp = ep->ep_sp; 1087 ASSERT(sp != NULL); 1088 rw_enter(&sp->session_lock, RW_READER); 1089 mutex_enter(&ep->ep_lock); 1090 ep->ep_myip = sp->session_myip; 1091 ep->ep_remip = sp->session_remip; 1092 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1093 ep->ep_hca_guid = hcap->hca_guid; 1094 mutex_exit(&ep->ep_lock); 1095 rw_exit(&sp->session_lock); 1096 1097 /* reset taskqpending flag here */ 1098 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1099 1100 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1101 scqattr.cq_size = MaxCtrlSendBuffers; 1102 scqattr.cq_sched = NULL; 1103 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1104 1105 rcqattr.cq_size = MaxCtrlRecvBuffers; 1106 rcqattr.cq_sched = NULL; 1107 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1108 1109 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1110 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1111 chanargs.rc_sizes.cs_sq_sgl = 1; 1112 chanargs.rc_sizes.cs_rq_sgl = 1; 1113 } else { 1114 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1115 scqattr.cq_sched = NULL; 1116 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1117 1118 rcqattr.cq_size = MaxDataRecvBuffers; 1119 rcqattr.cq_sched = NULL; 1120 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1121 1122 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1123 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1124 chanargs.rc_sizes.cs_sq_sgl = 1; 1125 chanargs.rc_sizes.cs_rq_sgl = 1; 1126 } 1127 1128 if (ep->ep_sendcq == NULL) { 1129 /* returned size is always greater than the requested size */ 1130 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1131 &ep->ep_sendcq, NULL); 1132 if (ret != IBT_SUCCESS) { 1133 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1134 "failed, size = %d: %d", scqattr.cq_size, ret); 1135 return (NULL); 1136 } 1137 1138 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1139 ep); 1140 1141 if (rds_no_interrupts == 0) { 1142 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1143 IBT_NEXT_COMPLETION); 1144 if (ret != IBT_SUCCESS) { 1145 RDS_DPRINTF2(LABEL, 1146 "ibt_enable_cq_notify failed: %d", ret); 1147 (void) ibt_free_cq(ep->ep_sendcq); 1148 ep->ep_sendcq = NULL; 1149 return (NULL); 1150 } 1151 } 1152 } 1153 1154 if (ep->ep_recvcq == NULL) { 1155 /* returned size is always greater than the requested size */ 1156 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1157 &ep->ep_recvcq, NULL); 1158 if (ret != IBT_SUCCESS) { 1159 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1160 "failed, size = %d: %d", rcqattr.cq_size, ret); 1161 (void) ibt_free_cq(ep->ep_sendcq); 1162 ep->ep_sendcq = NULL; 1163 return (NULL); 1164 } 1165 1166 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1167 ep); 1168 1169 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1170 if (ret != IBT_SUCCESS) { 1171 RDS_DPRINTF2(LABEL, 1172 "ibt_enable_cq_notify failed: %d", ret); 1173 (void) ibt_free_cq(ep->ep_recvcq); 1174 ep->ep_recvcq = NULL; 1175 (void) ibt_free_cq(ep->ep_sendcq); 1176 ep->ep_sendcq = NULL; 1177 return (NULL); 1178 } 1179 } 1180 1181 chanargs.rc_flags = IBT_ALL_SIGNALED; 1182 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1183 IBT_CEP_ATOMIC; 1184 chanargs.rc_hca_port_num = hca_port; 1185 chanargs.rc_scq = ep->ep_sendcq; 1186 chanargs.rc_rcq = ep->ep_recvcq; 1187 chanargs.rc_pd = hcap->hca_pdhdl; 1188 chanargs.rc_srq = NULL; 1189 1190 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1191 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1192 if (ret != IBT_SUCCESS) { 1193 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1194 ret); 1195 (void) ibt_free_cq(ep->ep_recvcq); 1196 ep->ep_recvcq = NULL; 1197 (void) ibt_free_cq(ep->ep_sendcq); 1198 ep->ep_sendcq = NULL; 1199 return (NULL); 1200 } 1201 1202 /* Chan private should contain the ep */ 1203 (void) ibt_set_chan_private(chanhdl, ep); 1204 1205 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1206 1207 return (chanhdl); 1208 } 1209 1210 1211 #if 0 1212 1213 /* Return node guid given a port gid */ 1214 ib_guid_t 1215 rds_gid_to_node_guid(ib_gid_t gid) 1216 { 1217 ibt_node_info_t nodeinfo; 1218 int ret; 1219 1220 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1221 gid.gid_prefix, gid.gid_guid); 1222 1223 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1224 if (ret != IBT_SUCCESS) { 1225 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1226 "failed", gid.gid_prefix, gid.gid_guid); 1227 return (0LL); 1228 } 1229 1230 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1231 nodeinfo.n_node_guid); 1232 1233 return (nodeinfo.n_node_guid); 1234 } 1235 1236 #endif 1237 1238 static void 1239 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1240 ibt_async_event_t *event) 1241 { 1242 rds_hca_t *hcap; 1243 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1244 uint_t newsize, oldsize, nport; 1245 ib_gid_t gid; 1246 int ret; 1247 1248 RDS_DPRINTF2("rds_handle_portup_event", 1249 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1250 1251 hcap = rds_get_hcap(statep, event->ev_hca_guid); 1252 if (hcap == NULL) { 1253 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1254 "not in our list", event->ev_hca_guid); 1255 return; 1256 } 1257 1258 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1259 if (ret != IBT_SUCCESS) { 1260 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1261 return; 1262 } 1263 1264 oldpinfop = hcap->hca_pinfop; 1265 oldsize = hcap->hca_pinfo_sz; 1266 hcap->hca_pinfop = newpinfop; 1267 hcap->hca_pinfo_sz = newsize; 1268 1269 /* structure copy */ 1270 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1271 1272 /* bind RDS service on the port, pass statep as cm_private */ 1273 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL); 1274 if (ret != IBT_SUCCESS) { 1275 RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d " 1276 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1277 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1278 } 1279 1280 (void) ibt_free_portinfo(oldpinfop, oldsize); 1281 1282 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1283 event->ev_hca_guid); 1284 } 1285 1286 static void 1287 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1288 ibt_async_event_t *event) 1289 { 1290 rds_state_t *statep; 1291 1292 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1293 1294 switch (code) { 1295 case IBT_EVENT_PORT_UP: 1296 statep = (rds_state_t *)clntp; 1297 rds_handle_portup_event(statep, hdl, event); 1298 break; 1299 1300 default: 1301 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1302 } 1303 1304 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1305 } 1306