1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/stream.h> 76 #include <sys/ib/clients/rds/rdsib_cm.h> 77 #include <sys/ib/clients/rds/rdsib_ib.h> 78 #include <sys/ib/clients/rds/rdsib_buf.h> 79 #include <sys/ib/clients/rds/rdsib_ep.h> 80 #include <sys/ib/clients/rds/rds_kstat.h> 81 #include <sys/zone.h> 82 83 #define RDS_POLL_CQ_IN_2TICKS 1 84 85 /* 86 * This File contains the endpoint related calls 87 */ 88 89 extern boolean_t rds_islocal(ipaddr_t addr); 90 extern uint_t rds_wc_signal; 91 92 #define RDS_LOOPBACK 0 93 #define RDS_LOCAL 1 94 #define RDS_REMOTE 2 95 96 #define IBT_IPADDR 1 97 98 static uint8_t 99 rds_is_port_marked(rds_session_t *sp, in_port_t port, uint_t qualifier) 100 { 101 uint8_t ret; 102 103 switch (qualifier) { 104 case RDS_LOOPBACK: /* loopback */ 105 rw_enter(&rds_loopback_portmap_lock, RW_READER); 106 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 107 rw_exit(&rds_loopback_portmap_lock); 108 break; 109 110 case RDS_LOCAL: /* Session local */ 111 ASSERT(sp != NULL); 112 rw_enter(&sp->session_local_portmap_lock, RW_READER); 113 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 114 rw_exit(&sp->session_local_portmap_lock); 115 break; 116 117 case RDS_REMOTE: /* Session remote */ 118 ASSERT(sp != NULL); 119 rw_enter(&sp->session_remote_portmap_lock, RW_READER); 120 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 121 rw_exit(&sp->session_remote_portmap_lock); 122 break; 123 } 124 125 return (ret); 126 } 127 128 static uint8_t 129 rds_check_n_mark_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 130 { 131 uint8_t ret; 132 133 switch (qualifier) { 134 case RDS_LOOPBACK: /* loopback */ 135 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 136 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 137 if (!ret) { 138 /* port is not marked, mark it */ 139 rds_loopback_portmap[port/8] = 140 rds_loopback_portmap[port/8] | (1 << (port % 8)); 141 } 142 rw_exit(&rds_loopback_portmap_lock); 143 break; 144 145 case RDS_LOCAL: /* Session local */ 146 ASSERT(sp != NULL); 147 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 148 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 149 if (!ret) { 150 /* port is not marked, mark it */ 151 sp->session_local_portmap[port/8] = 152 sp->session_local_portmap[port/8] | 153 (1 << (port % 8)); 154 } 155 rw_exit(&sp->session_local_portmap_lock); 156 break; 157 158 case RDS_REMOTE: /* Session remote */ 159 ASSERT(sp != NULL); 160 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 161 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 162 if (!ret) { 163 /* port is not marked, mark it */ 164 sp->session_remote_portmap[port/8] = 165 sp->session_remote_portmap[port/8] | 166 (1 << (port % 8)); 167 } 168 rw_exit(&sp->session_remote_portmap_lock); 169 break; 170 } 171 172 return (ret); 173 } 174 175 static uint8_t 176 rds_check_n_unmark_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 177 { 178 uint8_t ret; 179 180 switch (qualifier) { 181 case RDS_LOOPBACK: /* loopback */ 182 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 183 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 184 if (ret) { 185 /* port is marked, unmark it */ 186 rds_loopback_portmap[port/8] = 187 rds_loopback_portmap[port/8] & ~(1 << (port % 8)); 188 } 189 rw_exit(&rds_loopback_portmap_lock); 190 break; 191 192 case RDS_LOCAL: /* Session local */ 193 ASSERT(sp != NULL); 194 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 195 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 196 if (ret) { 197 /* port is marked, unmark it */ 198 sp->session_local_portmap[port/8] = 199 sp->session_local_portmap[port/8] & 200 ~(1 << (port % 8)); 201 } 202 rw_exit(&sp->session_local_portmap_lock); 203 break; 204 205 case RDS_REMOTE: /* Session remote */ 206 ASSERT(sp != NULL); 207 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 208 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 209 if (ret) { 210 /* port is marked, unmark it */ 211 sp->session_remote_portmap[port/8] = 212 sp->session_remote_portmap[port/8] & 213 ~(1 << (port % 8)); 214 } 215 rw_exit(&sp->session_remote_portmap_lock); 216 break; 217 } 218 219 return (ret); 220 } 221 222 static void 223 rds_mark_all_ports(rds_session_t *sp, uint_t qualifier) 224 { 225 switch (qualifier) { 226 case RDS_LOOPBACK: /* loopback */ 227 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 228 (void) memset(rds_loopback_portmap, 0xFF, RDS_PORT_MAP_SIZE); 229 rw_exit(&rds_loopback_portmap_lock); 230 break; 231 232 case RDS_LOCAL: /* Session local */ 233 ASSERT(sp != NULL); 234 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 235 (void) memset(sp->session_local_portmap, 0xFF, 236 RDS_PORT_MAP_SIZE); 237 rw_exit(&sp->session_local_portmap_lock); 238 break; 239 240 case RDS_REMOTE: /* Session remote */ 241 ASSERT(sp != NULL); 242 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 243 (void) memset(sp->session_remote_portmap, 0xFF, 244 RDS_PORT_MAP_SIZE); 245 rw_exit(&sp->session_remote_portmap_lock); 246 break; 247 } 248 } 249 250 static void 251 rds_unmark_all_ports(rds_session_t *sp, uint_t qualifier) 252 { 253 switch (qualifier) { 254 case RDS_LOOPBACK: /* loopback */ 255 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 256 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE); 257 rw_exit(&rds_loopback_portmap_lock); 258 break; 259 260 case RDS_LOCAL: /* Session local */ 261 ASSERT(sp != NULL); 262 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 263 bzero(sp->session_local_portmap, RDS_PORT_MAP_SIZE); 264 rw_exit(&sp->session_local_portmap_lock); 265 break; 266 267 case RDS_REMOTE: /* Session remote */ 268 ASSERT(sp != NULL); 269 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 270 bzero(sp->session_remote_portmap, RDS_PORT_MAP_SIZE); 271 rw_exit(&sp->session_remote_portmap_lock); 272 break; 273 } 274 } 275 276 static boolean_t 277 rds_add_session(rds_session_t *sp, boolean_t locked) 278 { 279 boolean_t retval = B_TRUE; 280 281 RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp); 282 283 if (!locked) { 284 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 285 } 286 287 /* Don't allow more sessions than configured in rdsib.conf */ 288 if (rdsib_statep->rds_nsessions >= (MaxNodes - 1)) { 289 RDS_DPRINTF1("rds_add_session", "Max session limit reached"); 290 retval = B_FALSE; 291 } else { 292 sp->session_nextp = rdsib_statep->rds_sessionlistp; 293 rdsib_statep->rds_sessionlistp = sp; 294 rdsib_statep->rds_nsessions++; 295 RDS_INCR_SESS(); 296 } 297 298 if (!locked) { 299 rw_exit(&rdsib_statep->rds_sessionlock); 300 } 301 302 RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp); 303 304 return (retval); 305 } 306 307 /* Session lookup based on destination IP or destination node guid */ 308 rds_session_t * 309 rds_session_lkup(rds_state_t *statep, ipaddr_t remoteip, ib_guid_t node_guid) 310 { 311 rds_session_t *sp; 312 313 RDS_DPRINTF4("rds_session_lkup", "Enter: 0x%p 0x%x 0x%llx", statep, 314 remoteip, node_guid); 315 316 /* A read/write lock is expected, will panic if none of them are held */ 317 ASSERT(rw_lock_held(&statep->rds_sessionlock)); 318 sp = statep->rds_sessionlistp; 319 while (sp) { 320 if ((sp->session_remip == remoteip) || ((node_guid != 0) && 321 (sp->session_rgid.gid_guid == node_guid))) { 322 break; 323 } 324 325 sp = sp->session_nextp; 326 } 327 328 RDS_DPRINTF4("rds_session_lkup", "Return: SP(%p)", sp); 329 330 return (sp); 331 } 332 333 boolean_t 334 rds_session_lkup_by_sp(rds_session_t *sp) 335 { 336 rds_session_t *sessionp; 337 338 RDS_DPRINTF4("rds_session_lkup_by_sp", "Enter: 0x%p", sp); 339 340 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 341 sessionp = rdsib_statep->rds_sessionlistp; 342 while (sessionp) { 343 if (sessionp == sp) { 344 rw_exit(&rdsib_statep->rds_sessionlock); 345 return (B_TRUE); 346 } 347 348 sessionp = sessionp->session_nextp; 349 } 350 rw_exit(&rdsib_statep->rds_sessionlock); 351 352 return (B_FALSE); 353 } 354 355 static void 356 rds_ep_fini(rds_ep_t *ep) 357 { 358 RDS_DPRINTF3("rds_ep_fini", "Enter: EP(%p) type: %d", ep, ep->ep_type); 359 360 /* free send pool */ 361 rds_free_send_pool(ep); 362 363 /* free recv pool */ 364 rds_free_recv_pool(ep); 365 366 mutex_enter(&ep->ep_lock); 367 ep->ep_hca_guid = 0; 368 mutex_exit(&ep->ep_lock); 369 370 RDS_DPRINTF3("rds_ep_fini", "Return EP(%p)", ep); 371 } 372 373 /* Assumes SP write lock is held */ 374 int 375 rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid) 376 { 377 uint_t ret; 378 379 RDS_DPRINTF3("rds_ep_init", "Enter: EP(%p) Type: %d", ep, ep->ep_type); 380 381 /* send pool */ 382 ret = rds_init_send_pool(ep, hca_guid); 383 if (ret != 0) { 384 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_send_pool failed: %d", 385 ep, ret); 386 return (-1); 387 } 388 389 /* recv pool */ 390 ret = rds_init_recv_pool(ep); 391 if (ret != 0) { 392 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_recv_pool failed: %d", 393 ep, ret); 394 rds_free_send_pool(ep); 395 return (-1); 396 } 397 398 /* reset the ep state */ 399 mutex_enter(&ep->ep_lock); 400 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 401 ep->ep_hca_guid = hca_guid; 402 ep->ep_lbufid = NULL; 403 ep->ep_rbufid = NULL; 404 ep->ep_segfbp = NULL; 405 ep->ep_seglbp = NULL; 406 407 /* Initialize the WR to send acknowledgements */ 408 ep->ep_ackwr.wr_id = RDS_RDMAW_WRID; 409 ep->ep_ackwr.wr_flags = IBT_WR_SEND_SOLICIT; 410 ep->ep_ackwr.wr_trans = IBT_RC_SRV; 411 ep->ep_ackwr.wr_opcode = IBT_WRC_RDMAW; 412 ep->ep_ackwr.wr_nds = 1; 413 ep->ep_ackwr.wr_sgl = &ep->ep_ackds; 414 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = NULL; 415 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = 0; 416 mutex_exit(&ep->ep_lock); 417 418 RDS_DPRINTF3("rds_ep_init", "Return: EP(%p) type: %d", ep, ep->ep_type); 419 420 return (0); 421 } 422 423 static int 424 rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid) 425 { 426 int ret; 427 428 RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d", 429 ep, ep->ep_type); 430 431 /* Re-initialize send pool */ 432 ret = rds_reinit_send_pool(ep, hca_guid); 433 if (ret != 0) { 434 RDS_DPRINTF2("rds_ep_reinit", 435 "EP(%p): rds_reinit_send_pool failed: %d", ep, ret); 436 return (-1); 437 } 438 439 /* free all the receive buffers in the pool */ 440 rds_free_recv_pool(ep); 441 442 RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d", 443 ep, ep->ep_type); 444 445 return (0); 446 } 447 448 void 449 rds_session_fini(rds_session_t *sp) 450 { 451 RDS_DPRINTF2("rds_session_fini", "Enter: SP(0x%p)", sp); 452 453 rds_ep_fini(&sp->session_dataep); 454 rds_ep_fini(&sp->session_ctrlep); 455 456 RDS_DPRINTF2("rds_session_fini", "Return: SP(0x%p)", sp); 457 } 458 459 /* 460 * Allocate and initialize the resources needed for the control and 461 * data channels 462 */ 463 int 464 rds_session_init(rds_session_t *sp) 465 { 466 int ret; 467 rds_hca_t *hcap; 468 ib_guid_t hca_guid; 469 470 RDS_DPRINTF2("rds_session_init", "Enter: SP(0x%p)", sp); 471 472 /* CALLED WITH SESSION WRITE LOCK */ 473 474 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 475 if (hcap == NULL) { 476 RDS_DPRINTF2("rds_session_init", "SGID is on an uninitialized " 477 "HCA: %llx", sp->session_lgid.gid_guid); 478 return (-1); 479 } 480 481 hca_guid = hcap->hca_guid; 482 sp->session_hca_guid = hca_guid; 483 484 /* allocate and initialize the ctrl channel */ 485 ret = rds_ep_init(&sp->session_ctrlep, hca_guid); 486 if (ret != 0) { 487 RDS_DPRINTF2(LABEL, "SP(%p): Ctrl EP(%p) initialization " 488 "failed", sp, &sp->session_ctrlep); 489 return (-1); 490 } 491 492 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p)", sp, &sp->session_ctrlep); 493 494 /* allocate and initialize the data channel */ 495 ret = rds_ep_init(&sp->session_dataep, hca_guid); 496 if (ret != 0) { 497 RDS_DPRINTF2(LABEL, "SP(%p): Data EP(%p) initialization " 498 "failed", sp, &sp->session_dataep); 499 rds_ep_fini(&sp->session_ctrlep); 500 return (-1); 501 } 502 503 /* Clear the portmaps */ 504 rds_unmark_all_ports(sp, RDS_LOCAL); 505 rds_unmark_all_ports(sp, RDS_REMOTE); 506 507 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p)", sp, &sp->session_dataep); 508 509 RDS_DPRINTF2("rds_session_init", "Return"); 510 511 return (0); 512 } 513 514 /* 515 * This should be called before moving a session from ERROR state to 516 * INIT state. This will update the HCA keys incase the session has moved from 517 * one HCA to another. 518 */ 519 int 520 rds_session_reinit(rds_session_t *sp, ib_gid_t lgid) 521 { 522 rds_hca_t *hcap, *hcap1; 523 int ret; 524 525 RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p) - state: %d", 526 sp, sp->session_state); 527 528 /* CALLED WITH SESSION WRITE LOCK */ 529 530 /* Clear the portmaps */ 531 rds_unmark_all_ports(sp, RDS_LOCAL); 532 rds_unmark_all_ports(sp, RDS_REMOTE); 533 534 /* This should not happen but just a safe guard */ 535 if (sp->session_dataep.ep_ack_addr == NULL) { 536 RDS_DPRINTF2("rds_session_reinit", 537 "ERROR: Unexpected: SP(0x%p) - state: %d", 538 sp, sp->session_state); 539 return (-1); 540 } 541 542 /* make the last buffer as the acknowledged */ 543 *(uintptr_t *)sp->session_dataep.ep_ack_addr = 544 (uintptr_t)sp->session_dataep.ep_sndpool.pool_tailp; 545 546 hcap = rds_gid_to_hcap(rdsib_statep, lgid); 547 if (hcap == NULL) { 548 RDS_DPRINTF2("rds_session_reinit", "SGID is on an " 549 "uninitialized HCA: %llx", lgid.gid_guid); 550 return (-1); 551 } 552 553 hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 554 if (hcap1 == NULL) { 555 RDS_DPRINTF2("rds_session_reinit", "Seems like HCA %llx " 556 "is unplugged", sp->session_lgid.gid_guid); 557 } else if (hcap->hca_guid == hcap1->hca_guid) { 558 /* 559 * No action is needed as the session did not move across 560 * HCAs 561 */ 562 RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA"); 563 return (0); 564 } 565 566 RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs"); 567 568 sp->session_hca_guid = hcap->hca_guid; 569 570 /* re-initialize the control channel */ 571 ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid); 572 if (ret != 0) { 573 RDS_DPRINTF2("rds_session_reinit", 574 "SP(%p): Ctrl EP(%p) re-initialization failed", 575 sp, &sp->session_ctrlep); 576 return (-1); 577 } 578 579 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)", 580 sp, &sp->session_ctrlep); 581 582 /* re-initialize the data channel */ 583 ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid); 584 if (ret != 0) { 585 RDS_DPRINTF2("rds_session_reinit", 586 "SP(%p): Data EP(%p) re-initialization failed", 587 sp, &sp->session_dataep); 588 return (-1); 589 } 590 591 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)", 592 sp, &sp->session_dataep); 593 594 sp->session_lgid = lgid; 595 596 RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp); 597 598 return (0); 599 } 600 601 static int 602 rds_session_connect(rds_session_t *sp) 603 { 604 ibt_channel_hdl_t ctrlchan, datachan; 605 rds_ep_t *ep; 606 int ret; 607 608 RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp); 609 610 sp->session_pinfo.pi_sid = rdsib_statep->rds_service_id; 611 612 /* Override the packet life time based on the conf file */ 613 if (IBPktLifeTime != 0) { 614 sp->session_pinfo.pi_prim_cep_path.cep_cm_opaque1 = 615 IBPktLifeTime; 616 } 617 618 /* Session type may change if we run into peer-to-peer case. */ 619 rw_enter(&sp->session_lock, RW_READER); 620 if (sp->session_type == RDS_SESSION_PASSIVE) { 621 RDS_DPRINTF2("rds_session_connect", "SP(%p) is no longer the " 622 "active end", sp); 623 rw_exit(&sp->session_lock); 624 return (0); /* return success */ 625 } 626 rw_exit(&sp->session_lock); 627 628 /* connect the data ep first */ 629 ep = &sp->session_dataep; 630 mutex_enter(&ep->ep_lock); 631 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 632 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 633 mutex_exit(&ep->ep_lock); 634 ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, 635 &datachan); 636 if (ret != IBT_SUCCESS) { 637 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 638 "failed: %d", ep, ret); 639 return (-1); 640 } 641 sp->session_dataep.ep_chanhdl = datachan; 642 } else { 643 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p) is in " 644 "unexpected state: %d", sp, ep, ep->ep_state); 645 mutex_exit(&ep->ep_lock); 646 return (-1); 647 } 648 649 RDS_DPRINTF3(LABEL, "SP(%p) EP(%p): Data channel is connected", 650 sp, ep); 651 652 ep = &sp->session_ctrlep; 653 mutex_enter(&ep->ep_lock); 654 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 655 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 656 mutex_exit(&ep->ep_lock); 657 ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, 658 &ctrlchan); 659 if (ret != IBT_SUCCESS) { 660 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 661 "failed: %d", ep, ret); 662 return (-1); 663 } 664 sp->session_ctrlep.ep_chanhdl = ctrlchan; 665 } else { 666 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p) is in " 667 "unexpected state: %d", sp, ep, ep->ep_state); 668 mutex_exit(&ep->ep_lock); 669 return (-1); 670 } 671 672 RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED", 673 sp, sp->session_myip, sp->session_remip); 674 675 RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp); 676 677 return (0); 678 } 679 680 /* 681 * Can be called with or without session_lock. 682 */ 683 void 684 rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait) 685 { 686 rds_ep_t *ep; 687 688 RDS_DPRINTF2("rds_session_close", "SP(%p) State: %d", sp, 689 sp->session_state); 690 691 ep = &sp->session_dataep; 692 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 693 694 /* wait until the SQ is empty before closing */ 695 if (wait != 0) { 696 (void) rds_is_sendq_empty(ep, wait); 697 } 698 699 mutex_enter(&ep->ep_lock); 700 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 701 mutex_exit(&ep->ep_lock); 702 delay(drv_usectohz(300000)); 703 mutex_enter(&ep->ep_lock); 704 } 705 706 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 707 ep->ep_state = RDS_EP_STATE_CLOSING; 708 mutex_exit(&ep->ep_lock); 709 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 710 if (wait == 0) { 711 /* make sure all WCs are flushed before proceeding */ 712 (void) rds_is_sendq_empty(ep, 1); 713 } 714 mutex_enter(&ep->ep_lock); 715 } 716 rds_ep_free_rc_channel(ep); 717 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 718 ep->ep_segfbp = NULL; 719 ep->ep_seglbp = NULL; 720 mutex_exit(&ep->ep_lock); 721 722 ep = &sp->session_ctrlep; 723 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 724 725 /* wait until the SQ is empty before closing */ 726 if (wait != 0) { 727 (void) rds_is_sendq_empty(ep, wait); 728 } 729 730 mutex_enter(&ep->ep_lock); 731 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 732 mutex_exit(&ep->ep_lock); 733 delay(drv_usectohz(300000)); 734 mutex_enter(&ep->ep_lock); 735 } 736 737 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 738 ep->ep_state = RDS_EP_STATE_CLOSING; 739 mutex_exit(&ep->ep_lock); 740 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 741 if (wait == 0) { 742 /* make sure all WCs are flushed before proceeding */ 743 (void) rds_is_sendq_empty(ep, 1); 744 } 745 mutex_enter(&ep->ep_lock); 746 } 747 rds_ep_free_rc_channel(ep); 748 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 749 ep->ep_segfbp = NULL; 750 ep->ep_seglbp = NULL; 751 mutex_exit(&ep->ep_lock); 752 753 RDS_DPRINTF2("rds_session_close", "Return (%p)", sp); 754 } 755 756 /* Free the session */ 757 static void 758 rds_destroy_session(rds_session_t *sp) 759 { 760 rds_ep_t *ep; 761 rds_bufpool_t *pool; 762 763 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 764 (sp->session_state == RDS_SESSION_STATE_FAILED) || 765 (sp->session_state == RDS_SESSION_STATE_FINI) || 766 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)); 767 768 rw_enter(&sp->session_lock, RW_READER); 769 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d", sp, 770 sp->session_state); 771 while (!((sp->session_state == RDS_SESSION_STATE_CLOSED) || 772 (sp->session_state == RDS_SESSION_STATE_FAILED) || 773 (sp->session_state == RDS_SESSION_STATE_FINI))) { 774 rw_exit(&sp->session_lock); 775 delay(drv_usectohz(1000000)); 776 rw_enter(&sp->session_lock, RW_READER); 777 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d WAITING " 778 "ON SESSION", sp, sp->session_state); 779 } 780 rw_exit(&sp->session_lock); 781 782 /* data channel */ 783 ep = &sp->session_dataep; 784 785 /* send pool locks */ 786 pool = &ep->ep_sndpool; 787 cv_destroy(&pool->pool_cv); 788 mutex_destroy(&pool->pool_lock); 789 790 /* recv pool locks */ 791 pool = &ep->ep_rcvpool; 792 cv_destroy(&pool->pool_cv); 793 mutex_destroy(&pool->pool_lock); 794 mutex_destroy(&ep->ep_recvqp.qp_lock); 795 796 /* control channel */ 797 ep = &sp->session_ctrlep; 798 799 /* send pool locks */ 800 pool = &ep->ep_sndpool; 801 cv_destroy(&pool->pool_cv); 802 mutex_destroy(&pool->pool_lock); 803 804 /* recv pool locks */ 805 pool = &ep->ep_rcvpool; 806 cv_destroy(&pool->pool_cv); 807 mutex_destroy(&pool->pool_lock); 808 mutex_destroy(&ep->ep_recvqp.qp_lock); 809 810 /* session */ 811 rw_destroy(&sp->session_lock); 812 rw_destroy(&sp->session_local_portmap_lock); 813 rw_destroy(&sp->session_remote_portmap_lock); 814 815 /* free the session */ 816 kmem_free(sp, sizeof (rds_session_t)); 817 818 RDS_DPRINTF2("rds_destroy_session", "SP(%p) Return", sp); 819 } 820 821 /* This is called on the taskq thread */ 822 void 823 rds_failover_session(void *arg) 824 { 825 rds_session_t *sp = (rds_session_t *)arg; 826 ib_gid_t lgid, rgid; 827 ipaddr_t myip, remip; 828 int ret, cnt = 0; 829 uint8_t sp_state; 830 831 RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp); 832 833 /* Make sure the session is still alive */ 834 if (rds_session_lkup_by_sp(sp) == B_FALSE) { 835 RDS_DPRINTF2("rds_failover_session", 836 "Return: SP(%p) not ALIVE", sp); 837 return; 838 } 839 840 RDS_INCR_FAILOVERS(); 841 842 rw_enter(&sp->session_lock, RW_WRITER); 843 if (sp->session_type != RDS_SESSION_ACTIVE) { 844 /* 845 * The remote side must have seen the error and initiated 846 * a re-connect. 847 */ 848 RDS_DPRINTF2("rds_failover_session", 849 "SP(%p) has become passive", sp); 850 rw_exit(&sp->session_lock); 851 return; 852 } 853 sp->session_failover = 1; 854 sp_state = sp->session_state; 855 rw_exit(&sp->session_lock); 856 857 /* 858 * The session is in ERROR state but close both channels 859 * for a clean start. 860 */ 861 if (sp_state == RDS_SESSION_STATE_ERROR) { 862 rds_session_close(sp, IBT_BLOCKING, 1); 863 } 864 865 /* wait 1 sec before re-connecting */ 866 delay(drv_usectohz(1000000)); 867 868 do { 869 ibt_ip_path_attr_t ipattr; 870 ibt_ip_addr_t dstip; 871 872 /* The ipaddr should be in the network order */ 873 myip = sp->session_myip; 874 remip = sp->session_remip; 875 ret = rds_sc_path_lookup(&myip, &remip); 876 if (ret == 0) { 877 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 878 myip, remip); 879 } 880 /* check if we have (new) path from the source to destination */ 881 lgid.gid_prefix = 0; 882 lgid.gid_guid = 0; 883 rgid.gid_prefix = 0; 884 rgid.gid_guid = 0; 885 886 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 887 dstip.family = AF_INET; 888 dstip.un.ip4addr = remip; 889 ipattr.ipa_dst_ip = &dstip; 890 ipattr.ipa_src_ip.family = AF_INET; 891 ipattr.ipa_src_ip.un.ip4addr = myip; 892 ipattr.ipa_ndst = 1; 893 ipattr.ipa_max_paths = 1; 894 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 895 myip, remip); 896 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 897 IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, NULL, NULL); 898 if (ret == IBT_SUCCESS) { 899 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 900 lgid = sp->session_pinfo. 901 pi_prim_cep_path.cep_adds_vect.av_sgid; 902 rgid = sp->session_pinfo. 903 pi_prim_cep_path.cep_adds_vect.av_dgid; 904 break; 905 } 906 907 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret); 908 909 /* wait 1 sec before re-trying */ 910 delay(drv_usectohz(1000000)); 911 cnt++; 912 } while (cnt < 5); 913 914 if (ret != IBT_SUCCESS) { 915 rw_enter(&sp->session_lock, RW_WRITER); 916 if (sp->session_type == RDS_SESSION_ACTIVE) { 917 rds_session_fini(sp); 918 sp->session_state = RDS_SESSION_STATE_FAILED; 919 sp->session_failover = 0; 920 RDS_DPRINTF3("rds_failover_session", 921 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 922 } else { 923 RDS_DPRINTF2("rds_failover_session", 924 "SP(%p) has become passive", sp); 925 } 926 rw_exit(&sp->session_lock); 927 return; 928 } 929 930 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 931 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 932 rgid.gid_guid); 933 934 rw_enter(&sp->session_lock, RW_WRITER); 935 if (sp->session_type != RDS_SESSION_ACTIVE) { 936 /* 937 * The remote side must have seen the error and initiated 938 * a re-connect. 939 */ 940 RDS_DPRINTF2("rds_failover_session", 941 "SP(%p) has become passive", sp); 942 rw_exit(&sp->session_lock); 943 return; 944 } 945 946 /* move the session to init state */ 947 ret = rds_session_reinit(sp, lgid); 948 sp->session_lgid = lgid; 949 sp->session_rgid = rgid; 950 if (ret != 0) { 951 rds_session_fini(sp); 952 sp->session_state = RDS_SESSION_STATE_FAILED; 953 sp->session_failover = 0; 954 RDS_DPRINTF3("rds_failover_session", 955 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 956 rw_exit(&sp->session_lock); 957 return; 958 } else { 959 sp->session_state = RDS_SESSION_STATE_INIT; 960 RDS_DPRINTF3("rds_failover_session", 961 "SP(%p) State RDS_SESSION_STATE_INIT", sp); 962 } 963 rw_exit(&sp->session_lock); 964 965 rds_session_open(sp); 966 967 RDS_DPRINTF2("rds_failover_session", "Return: (%p)", sp); 968 } 969 970 void 971 rds_handle_send_error(rds_ep_t *ep) 972 { 973 if (rds_is_sendq_empty(ep, 0)) { 974 /* Session should already be in ERROR, try to reconnect */ 975 RDS_DPRINTF2("rds_handle_send_error", 976 "Dispatching taskq to failover SP(%p)", ep->ep_sp); 977 (void) ddi_taskq_dispatch(rds_taskq, rds_failover_session, 978 (void *)ep->ep_sp, DDI_SLEEP); 979 } 980 } 981 982 /* 983 * Called in the CM handler on the passive side 984 * Called on a taskq thread. 985 */ 986 void 987 rds_cleanup_passive_session(void *arg) 988 { 989 rds_session_t *sp = arg; 990 991 RDS_DPRINTF2("rds_cleanup_passive_session", "SP(%p) State: %d", sp, 992 sp->session_state); 993 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 994 (sp->session_state == RDS_SESSION_STATE_ERROR)); 995 996 rds_session_close(sp, IBT_BLOCKING, 1); 997 998 rw_enter(&sp->session_lock, RW_WRITER); 999 if (sp->session_state == RDS_SESSION_STATE_CLOSED) { 1000 rds_session_fini(sp); 1001 sp->session_state = RDS_SESSION_STATE_FINI; 1002 sp->session_failover = 0; 1003 RDS_DPRINTF3("rds_cleanup_passive_session", 1004 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 1005 } else if (sp->session_state == RDS_SESSION_STATE_ERROR) { 1006 rds_session_fini(sp); 1007 sp->session_state = RDS_SESSION_STATE_FAILED; 1008 sp->session_failover = 0; 1009 RDS_DPRINTF3("rds_cleanup_passive_session", 1010 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 1011 } 1012 rw_exit(&sp->session_lock); 1013 1014 RDS_DPRINTF2("rds_cleanup_passive_session", "Return: SP (%p)", sp); 1015 } 1016 1017 /* 1018 * Called by the CM handler on the passive side 1019 * Called with WRITE lock on the session 1020 */ 1021 void 1022 rds_passive_session_fini(rds_session_t *sp) 1023 { 1024 rds_ep_t *ep; 1025 1026 RDS_DPRINTF2("rds_passive_session_fini", "SP(%p) State: %d", sp, 1027 sp->session_state); 1028 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 1029 (sp->session_state == RDS_SESSION_STATE_ERROR)); 1030 1031 /* clean the data channel */ 1032 ep = &sp->session_dataep; 1033 (void) rds_is_sendq_empty(ep, 1); 1034 mutex_enter(&ep->ep_lock); 1035 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 1036 ep->ep_state); 1037 rds_ep_free_rc_channel(ep); 1038 mutex_exit(&ep->ep_lock); 1039 1040 /* clean the control channel */ 1041 ep = &sp->session_ctrlep; 1042 (void) rds_is_sendq_empty(ep, 1); 1043 mutex_enter(&ep->ep_lock); 1044 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 1045 ep->ep_state); 1046 rds_ep_free_rc_channel(ep); 1047 mutex_exit(&ep->ep_lock); 1048 1049 rds_session_fini(sp); 1050 sp->session_failover = 0; 1051 1052 RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp); 1053 } 1054 1055 void 1056 rds_close_this_session(rds_session_t *sp, uint8_t wait) 1057 { 1058 switch (sp->session_state) { 1059 case RDS_SESSION_STATE_CONNECTED: 1060 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 1061 rw_exit(&sp->session_lock); 1062 1063 rds_session_close(sp, IBT_BLOCKING, wait); 1064 1065 rw_enter(&sp->session_lock, RW_WRITER); 1066 sp->session_state = RDS_SESSION_STATE_CLOSED; 1067 RDS_DPRINTF3("rds_close_sessions", 1068 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 1069 rds_session_fini(sp); 1070 sp->session_state = RDS_SESSION_STATE_FINI; 1071 sp->session_failover = 0; 1072 RDS_DPRINTF3("rds_close_sessions", 1073 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 1074 break; 1075 1076 case RDS_SESSION_STATE_ERROR: 1077 case RDS_SESSION_STATE_PASSIVE_CLOSING: 1078 case RDS_SESSION_STATE_INIT: 1079 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 1080 rw_exit(&sp->session_lock); 1081 1082 rds_session_close(sp, IBT_BLOCKING, wait); 1083 1084 rw_enter(&sp->session_lock, RW_WRITER); 1085 sp->session_state = RDS_SESSION_STATE_CLOSED; 1086 RDS_DPRINTF3("rds_close_sessions", 1087 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 1088 /* FALLTHRU */ 1089 case RDS_SESSION_STATE_CLOSED: 1090 rds_session_fini(sp); 1091 sp->session_state = RDS_SESSION_STATE_FINI; 1092 sp->session_failover = 0; 1093 RDS_DPRINTF3("rds_close_sessions", 1094 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 1095 break; 1096 } 1097 } 1098 1099 /* 1100 * Can be called: 1101 * 1. on driver detach 1102 * 2. on taskq thread 1103 * arg is always NULL 1104 */ 1105 /* ARGSUSED */ 1106 void 1107 rds_close_sessions(void *arg) 1108 { 1109 rds_session_t *sp, *spnextp; 1110 1111 RDS_DPRINTF2("rds_close_sessions", "Enter"); 1112 1113 /* wait until all the buffers are freed by the sockets */ 1114 while (RDS_GET_RXPKTS_PEND() != 0) { 1115 /* wait one second and try again */ 1116 RDS_DPRINTF2("rds_close_sessions", "waiting on " 1117 "pending packets", RDS_GET_RXPKTS_PEND()); 1118 delay(drv_usectohz(1000000)); 1119 } 1120 RDS_DPRINTF2("rds_close_sessions", "No more RX packets pending"); 1121 1122 /* close all the sessions */ 1123 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 1124 sp = rdsib_statep->rds_sessionlistp; 1125 while (sp) { 1126 rw_enter(&sp->session_lock, RW_WRITER); 1127 RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp, 1128 sp->session_state); 1129 rds_close_this_session(sp, 2); 1130 rw_exit(&sp->session_lock); 1131 sp = sp->session_nextp; 1132 } 1133 1134 sp = rdsib_statep->rds_sessionlistp; 1135 rdsib_statep->rds_sessionlistp = NULL; 1136 rdsib_statep->rds_nsessions = 0; 1137 rw_exit(&rdsib_statep->rds_sessionlock); 1138 1139 while (sp) { 1140 spnextp = sp->session_nextp; 1141 rds_destroy_session(sp); 1142 RDS_DECR_SESS(); 1143 sp = spnextp; 1144 } 1145 1146 /* free the global pool */ 1147 rds_free_recv_caches(rdsib_statep); 1148 1149 RDS_DPRINTF2("rds_close_sessions", "Return"); 1150 } 1151 1152 void 1153 rds_session_open(rds_session_t *sp) 1154 { 1155 int ret; 1156 1157 RDS_DPRINTF2("rds_session_open", "Enter SP(%p)", sp); 1158 1159 ret = rds_session_connect(sp); 1160 if (ret == -1) { 1161 /* 1162 * may be the session has become passive due to 1163 * hitting peer-to-peer case 1164 */ 1165 rw_enter(&sp->session_lock, RW_READER); 1166 if (sp->session_type == RDS_SESSION_PASSIVE) { 1167 RDS_DPRINTF2("rds_session_open", "SP(%p) " 1168 "has become passive from active", sp); 1169 rw_exit(&sp->session_lock); 1170 return; 1171 } 1172 1173 /* get the lock for writing */ 1174 rw_exit(&sp->session_lock); 1175 rw_enter(&sp->session_lock, RW_WRITER); 1176 sp->session_state = RDS_SESSION_STATE_ERROR; 1177 RDS_DPRINTF3("rds_session_open", 1178 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 1179 rw_exit(&sp->session_lock); 1180 1181 /* Connect request failed */ 1182 rds_session_close(sp, IBT_BLOCKING, 1); 1183 1184 rw_enter(&sp->session_lock, RW_WRITER); 1185 rds_session_fini(sp); 1186 sp->session_state = RDS_SESSION_STATE_FAILED; 1187 sp->session_failover = 0; 1188 RDS_DPRINTF3("rds_session_open", 1189 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 1190 rw_exit(&sp->session_lock); 1191 1192 return; 1193 } 1194 1195 RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp); 1196 } 1197 1198 /* 1199 * Creates a session and inserts it into the list of sessions. The session 1200 * state would be CREATED. 1201 * Return Values: 1202 * EWOULDBLOCK 1203 */ 1204 rds_session_t * 1205 rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, 1206 ibt_cm_req_rcv_t *reqp, uint8_t type) 1207 { 1208 ib_gid_t lgid, rgid; 1209 rds_session_t *newp, *oldp; 1210 rds_ep_t *dataep, *ctrlep; 1211 rds_bufpool_t *pool; 1212 int ret; 1213 1214 RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x, type: %d", 1215 statep, localip, remip, type); 1216 1217 /* Check if there is space for a new session */ 1218 rw_enter(&statep->rds_sessionlock, RW_READER); 1219 if (statep->rds_nsessions >= (MaxNodes - 1)) { 1220 rw_exit(&statep->rds_sessionlock); 1221 RDS_DPRINTF1("rds_session_create", "No More Sessions allowed"); 1222 return (NULL); 1223 } 1224 rw_exit(&statep->rds_sessionlock); 1225 1226 /* Allocate and initialize global buffer pool */ 1227 ret = rds_init_recv_caches(statep); 1228 if (ret != 0) { 1229 RDS_DPRINTF2(LABEL, "Buffer Cache Initialization failed"); 1230 return (NULL); 1231 } 1232 1233 /* enough memory for session (includes 2 endpoints) */ 1234 newp = kmem_zalloc(sizeof (rds_session_t), KM_SLEEP); 1235 1236 newp->session_remip = remip; 1237 newp->session_myip = localip; 1238 newp->session_type = type; 1239 newp->session_state = RDS_SESSION_STATE_CREATED; 1240 RDS_DPRINTF3("rds_session_create", 1241 "SP(%p) State RDS_SESSION_STATE_CREATED", newp); 1242 rw_init(&newp->session_lock, NULL, RW_DRIVER, NULL); 1243 rw_init(&newp->session_local_portmap_lock, NULL, RW_DRIVER, NULL); 1244 rw_init(&newp->session_remote_portmap_lock, NULL, RW_DRIVER, NULL); 1245 1246 /* Initialize data endpoint */ 1247 dataep = &newp->session_dataep; 1248 dataep->ep_remip = newp->session_remip; 1249 dataep->ep_myip = newp->session_myip; 1250 dataep->ep_state = RDS_EP_STATE_UNCONNECTED; 1251 dataep->ep_sp = newp; 1252 dataep->ep_type = RDS_EP_TYPE_DATA; 1253 mutex_init(&dataep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1254 1255 /* Initialize send pool locks */ 1256 pool = &dataep->ep_sndpool; 1257 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1258 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1259 1260 /* Initialize recv pool locks */ 1261 pool = &dataep->ep_rcvpool; 1262 mutex_init(&dataep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1263 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1264 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1265 1266 /* Initialize control endpoint */ 1267 ctrlep = &newp->session_ctrlep; 1268 ctrlep->ep_remip = newp->session_remip; 1269 ctrlep->ep_myip = newp->session_myip; 1270 ctrlep->ep_state = RDS_EP_STATE_UNCONNECTED; 1271 ctrlep->ep_sp = newp; 1272 ctrlep->ep_type = RDS_EP_TYPE_CTRL; 1273 mutex_init(&ctrlep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1274 1275 /* Initialize send pool locks */ 1276 pool = &ctrlep->ep_sndpool; 1277 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1278 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1279 1280 /* Initialize recv pool locks */ 1281 pool = &ctrlep->ep_rcvpool; 1282 mutex_init(&ctrlep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1283 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1284 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1285 1286 /* lkup if there is already a session */ 1287 rw_enter(&statep->rds_sessionlock, RW_WRITER); 1288 oldp = rds_session_lkup(statep, remip, 0); 1289 if (oldp != NULL) { 1290 /* A session to this destination exists */ 1291 rw_exit(&statep->rds_sessionlock); 1292 rw_destroy(&newp->session_lock); 1293 rw_destroy(&newp->session_local_portmap_lock); 1294 rw_destroy(&newp->session_remote_portmap_lock); 1295 mutex_destroy(&dataep->ep_lock); 1296 mutex_destroy(&ctrlep->ep_lock); 1297 kmem_free(newp, sizeof (rds_session_t)); 1298 return (NULL); 1299 } 1300 1301 /* Insert this session into the list */ 1302 if (rds_add_session(newp, B_TRUE) != B_TRUE) { 1303 /* No room to add this session */ 1304 rw_exit(&statep->rds_sessionlock); 1305 rw_destroy(&newp->session_lock); 1306 rw_destroy(&newp->session_local_portmap_lock); 1307 rw_destroy(&newp->session_remote_portmap_lock); 1308 mutex_destroy(&dataep->ep_lock); 1309 mutex_destroy(&ctrlep->ep_lock); 1310 kmem_free(newp, sizeof (rds_session_t)); 1311 return (NULL); 1312 } 1313 1314 /* unlock the session list */ 1315 rw_exit(&statep->rds_sessionlock); 1316 1317 if (type == RDS_SESSION_ACTIVE) { 1318 ipaddr_t localip1, remip1; 1319 ibt_ip_path_attr_t ipattr; 1320 ibt_ip_addr_t dstip; 1321 1322 /* The ipaddr should be in the network order */ 1323 localip1 = localip; 1324 remip1 = remip; 1325 ret = rds_sc_path_lookup(&localip1, &remip1); 1326 if (ret == 0) { 1327 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 1328 localip, remip); 1329 } 1330 1331 /* Get the gids for the source and destination ip addrs */ 1332 lgid.gid_prefix = 0; 1333 lgid.gid_guid = 0; 1334 rgid.gid_prefix = 0; 1335 rgid.gid_guid = 0; 1336 1337 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 1338 dstip.family = AF_INET; 1339 dstip.un.ip4addr = remip1; 1340 ipattr.ipa_dst_ip = &dstip; 1341 ipattr.ipa_src_ip.family = AF_INET; 1342 ipattr.ipa_src_ip.un.ip4addr = localip1; 1343 ipattr.ipa_ndst = 1; 1344 ipattr.ipa_max_paths = 1; 1345 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 1346 localip1, remip1); 1347 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 1348 IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo, 1349 NULL, NULL); 1350 if (ret != IBT_SUCCESS) { 1351 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d " 1352 "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, 1353 lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid); 1354 1355 RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED); 1356 return (NULL); 1357 } 1358 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 1359 lgid = 1360 newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_sgid; 1361 rgid = 1362 newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_dgid; 1363 1364 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 1365 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 1366 rgid.gid_guid); 1367 } 1368 1369 rw_enter(&newp->session_lock, RW_WRITER); 1370 /* check for peer-to-peer case */ 1371 if (type == newp->session_type) { 1372 /* no peer-to-peer case */ 1373 if (type == RDS_SESSION_ACTIVE) { 1374 newp->session_lgid = lgid; 1375 newp->session_rgid = rgid; 1376 } else { 1377 /* rgid is requester gid & lgid is receiver gid */ 1378 newp->session_rgid = reqp->req_prim_addr.av_dgid; 1379 newp->session_lgid = reqp->req_prim_addr.av_sgid; 1380 } 1381 } 1382 rw_exit(&newp->session_lock); 1383 1384 RDS_DPRINTF2("rds_session_create", "Return SP(%p)", newp); 1385 1386 return (newp); 1387 } 1388 1389 void 1390 rds_handle_close_session_request(void *arg) 1391 { 1392 rds_session_t *sp = (rds_session_t *)arg; 1393 1394 RDS_DPRINTF2("rds_handle_close_session_request", 1395 "Enter: Closing this Session (%p)", sp); 1396 1397 rw_enter(&sp->session_lock, RW_WRITER); 1398 RDS_DPRINTF2("rds_handle_close_session_request", 1399 "SP(%p) State: %d", sp, sp->session_state); 1400 rds_close_this_session(sp, 2); 1401 rw_exit(&sp->session_lock); 1402 1403 RDS_DPRINTF2("rds_handle_close_session_request", "Return SP(%p)", sp); 1404 } 1405 1406 void 1407 rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) 1408 { 1409 RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d " 1410 "port: %d", sp, cpkt->rcp_code, cpkt->rcp_port); 1411 1412 switch (cpkt->rcp_code) { 1413 case RDS_CTRL_CODE_STALL: 1414 RDS_INCR_STALLS_RCVD(); 1415 (void) rds_check_n_mark_port(sp, cpkt->rcp_port, RDS_REMOTE); 1416 break; 1417 case RDS_CTRL_CODE_UNSTALL: 1418 RDS_INCR_UNSTALLS_RCVD(); 1419 (void) rds_check_n_unmark_port(sp, cpkt->rcp_port, RDS_REMOTE); 1420 break; 1421 case RDS_CTRL_CODE_STALL_PORTS: 1422 rds_mark_all_ports(sp, RDS_REMOTE); 1423 break; 1424 case RDS_CTRL_CODE_UNSTALL_PORTS: 1425 rds_unmark_all_ports(sp, RDS_REMOTE); 1426 break; 1427 case RDS_CTRL_CODE_HEARTBEAT: 1428 break; 1429 case RDS_CTRL_CODE_CLOSE_SESSION: 1430 RDS_DPRINTF2("rds_handle_control_message", 1431 "SP(%p) Remote Requested to close this session", sp); 1432 (void) ddi_taskq_dispatch(rds_taskq, 1433 rds_handle_close_session_request, (void *)sp, DDI_SLEEP); 1434 break; 1435 default: 1436 RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d", 1437 cpkt->rcp_code); 1438 break; 1439 } 1440 1441 RDS_DPRINTF4("rds_handle_control_message", "Return"); 1442 } 1443 1444 int 1445 rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port) 1446 { 1447 ibt_send_wr_t wr; 1448 rds_ep_t *ep; 1449 rds_buf_t *bp; 1450 rds_ctrl_pkt_t *cp; 1451 int ret; 1452 1453 RDS_DPRINTF4("rds_post_control_message", "Enter: SP(%p) Code: %d " 1454 "Port: %d", sp, code, port); 1455 1456 ep = &sp->session_ctrlep; 1457 1458 bp = rds_get_send_buf(ep, 1); 1459 if (bp == NULL) { 1460 RDS_DPRINTF2(LABEL, "No buffers available to send control " 1461 "message: SP(%p) Code: %d Port: %d", sp, code, 1462 port); 1463 return (-1); 1464 } 1465 1466 cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1467 cp->rcp_code = code; 1468 cp->rcp_port = port; 1469 bp->buf_ds.ds_len = RDS_CTRLPKT_SIZE; 1470 1471 wr.wr_id = (uintptr_t)bp; 1472 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1473 wr.wr_trans = IBT_RC_SRV; 1474 wr.wr_opcode = IBT_WRC_SEND; 1475 wr.wr_nds = 1; 1476 wr.wr_sgl = &bp->buf_ds; 1477 RDS_DPRINTF5(LABEL, "ds_va %p ds_len %d ds_lkey 0x%llx", 1478 bp->buf_ds.ds_va, bp->buf_ds.ds_len, bp->buf_ds.ds_key); 1479 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1480 if (ret != IBT_SUCCESS) { 1481 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1482 "%d", ep, ret); 1483 bp->buf_state = RDS_SNDBUF_FREE; 1484 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1485 return (-1); 1486 } 1487 1488 RDS_DPRINTF4("rds_post_control_message", "Return SP(%p) Code: %d " 1489 "Port: %d", sp, code, port); 1490 1491 return (0); 1492 } 1493 1494 void 1495 rds_stall_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 1496 { 1497 int ret; 1498 1499 RDS_DPRINTF4("rds_stall_port", "Enter: SP(%p) Port %d", sp, port); 1500 1501 RDS_INCR_STALLS_TRIGGERED(); 1502 1503 if (!rds_check_n_mark_port(sp, port, qualifier)) { 1504 1505 if (sp != NULL) { 1506 ret = rds_post_control_message(sp, 1507 RDS_CTRL_CODE_STALL, port); 1508 if (ret != 0) { 1509 (void) rds_check_n_unmark_port(sp, port, 1510 qualifier); 1511 return; 1512 } 1513 RDS_INCR_STALLS_SENT(); 1514 } 1515 } else { 1516 RDS_DPRINTF3(LABEL, 1517 "Port %d is already in stall state", port); 1518 } 1519 1520 RDS_DPRINTF4("rds_stall_port", "Return: SP(%p) Port %d", sp, port); 1521 } 1522 1523 void 1524 rds_resume_port(in_port_t port) 1525 { 1526 rds_session_t *sp; 1527 uint_t ix; 1528 int ret; 1529 1530 RDS_DPRINTF4("rds_resume_port", "Enter: Port %d", port); 1531 1532 RDS_INCR_UNSTALLS_TRIGGERED(); 1533 1534 /* resume loopback traffic */ 1535 (void) rds_check_n_unmark_port(NULL, port, RDS_LOOPBACK); 1536 1537 /* send unstall messages to resume the remote traffic */ 1538 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1539 1540 sp = rdsib_statep->rds_sessionlistp; 1541 for (ix = 0; ix < rdsib_statep->rds_nsessions; ix++) { 1542 ASSERT(sp != NULL); 1543 if ((sp->session_state == RDS_SESSION_STATE_CONNECTED) && 1544 (rds_check_n_unmark_port(sp, port, RDS_LOCAL))) { 1545 ret = rds_post_control_message(sp, 1546 RDS_CTRL_CODE_UNSTALL, port); 1547 if (ret != 0) { 1548 (void) rds_check_n_mark_port(sp, port, 1549 RDS_LOCAL); 1550 } else { 1551 RDS_INCR_UNSTALLS_SENT(); 1552 } 1553 } 1554 1555 sp = sp->session_nextp; 1556 } 1557 1558 rw_exit(&rdsib_statep->rds_sessionlock); 1559 1560 RDS_DPRINTF4("rds_resume_port", "Return: Port %d", port); 1561 } 1562 1563 static int 1564 rds_build_n_post_msg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1565 in_port_t recvport) 1566 { 1567 ibt_send_wr_t *wrp, wr; 1568 rds_buf_t *bp, *bp1; 1569 rds_data_hdr_t *pktp; 1570 uint32_t msgsize, npkts, residual, pktno, ix; 1571 int ret; 1572 1573 RDS_DPRINTF4("rds_build_n_post_msg", "Enter: EP(%p) UIOP(%p)", 1574 ep, uiop); 1575 1576 /* how many pkts are needed to carry this msg */ 1577 msgsize = uiop->uio_resid; 1578 npkts = ((msgsize - 1) / UserBufferSize) + 1; 1579 residual = ((msgsize - 1) % UserBufferSize) + 1; 1580 1581 RDS_DPRINTF5(LABEL, "EP(%p) UIOP(%p) msg size: %d npkts: %d", ep, uiop, 1582 msgsize, npkts); 1583 1584 /* Get the buffers needed to post this message */ 1585 bp = rds_get_send_buf(ep, npkts); 1586 if (bp == NULL) { 1587 RDS_INCR_ENOBUFS(); 1588 return (ENOBUFS); 1589 } 1590 1591 if (npkts > 1) { 1592 /* 1593 * multi-pkt messages are posted at the same time as a list 1594 * of WRs 1595 */ 1596 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 1597 npkts, KM_SLEEP); 1598 } 1599 1600 1601 pktno = 0; 1602 bp1 = bp; 1603 do { 1604 /* prepare the header */ 1605 pktp = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 1606 pktp->dh_datalen = UserBufferSize; 1607 pktp->dh_npkts = npkts - pktno; 1608 pktp->dh_psn = pktno; 1609 pktp->dh_sendport = sendport; 1610 pktp->dh_recvport = recvport; 1611 bp1->buf_ds.ds_len = RdsPktSize; 1612 1613 /* copy the data */ 1614 ret = uiomove((uint8_t *)pktp + RDS_DATA_HDR_SZ, 1615 UserBufferSize, UIO_WRITE, uiop); 1616 if (ret != 0) { 1617 break; 1618 } 1619 1620 if (uiop->uio_resid == 0) { 1621 pktp->dh_datalen = residual; 1622 bp1->buf_ds.ds_len = residual + RDS_DATA_HDR_SZ; 1623 break; 1624 } 1625 pktno++; 1626 bp1 = bp1->buf_nextp; 1627 } while (uiop->uio_resid); 1628 1629 if (ret) { 1630 /* uiomove failed */ 1631 RDS_DPRINTF2("rds_build_n_post_msg", "UIO(%p) Move FAILED: %d", 1632 uiop, ret); 1633 if (npkts > 1) { 1634 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1635 } 1636 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1637 return (ret); 1638 } 1639 1640 if (npkts > 1) { 1641 /* multi-pkt message */ 1642 RDS_DPRINTF5(LABEL, "EP(%p) Sending Multiple Packets", ep); 1643 1644 bp1 = bp; 1645 for (ix = 0; ix < npkts; ix++) { 1646 wrp[ix].wr_id = (uintptr_t)bp1; 1647 wrp[ix].wr_flags = IBT_WR_NO_FLAGS; 1648 wrp[ix].wr_trans = IBT_RC_SRV; 1649 wrp[ix].wr_opcode = IBT_WRC_SEND; 1650 wrp[ix].wr_nds = 1; 1651 wrp[ix].wr_sgl = &bp1->buf_ds; 1652 bp1 = bp1->buf_nextp; 1653 } 1654 wrp[npkts - 1].wr_flags = IBT_WR_SEND_SOLICIT; 1655 1656 ret = ibt_post_send(ep->ep_chanhdl, wrp, npkts, &ix); 1657 if (ret != IBT_SUCCESS) { 1658 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1659 "%d for %d pkts", ep, ret, npkts); 1660 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1661 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1662 return (ret); 1663 } 1664 1665 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1666 } else { 1667 /* single pkt */ 1668 RDS_DPRINTF5(LABEL, "EP(%p) Sending Single Packet", ep); 1669 wr.wr_id = (uintptr_t)bp; 1670 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1671 wr.wr_trans = IBT_RC_SRV; 1672 wr.wr_opcode = IBT_WRC_SEND; 1673 wr.wr_nds = 1; 1674 wr.wr_sgl = &bp->buf_ds; 1675 RDS_DPRINTF5(LABEL, "ds_va %p ds_key 0x%llx ds_len %d ", 1676 bp->buf_ds.ds_va, bp->buf_ds.ds_key, bp->buf_ds.ds_len); 1677 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1678 if (ret != IBT_SUCCESS) { 1679 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1680 "%d", ep, ret); 1681 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1682 return (ret); 1683 } 1684 } 1685 1686 RDS_INCR_TXPKTS(npkts); 1687 RDS_INCR_TXBYTES(msgsize); 1688 1689 RDS_DPRINTF4("rds_build_n_post_msg", "Return: EP(%p) UIOP(%p)", 1690 ep, uiop); 1691 1692 return (0); 1693 } 1694 1695 static int 1696 rds_deliver_loopback_msg(uio_t *uiop, ipaddr_t recvip, ipaddr_t sendip, 1697 in_port_t recvport, in_port_t sendport, zoneid_t zoneid) 1698 { 1699 mblk_t *mp; 1700 int ret; 1701 1702 RDS_DPRINTF4("rds_deliver_loopback_msg", "Enter"); 1703 1704 RDS_DPRINTF3(LABEL, "Loopback message: sendport: " 1705 "%d to recvport: %d", sendport, recvport); 1706 1707 mp = allocb(uiop->uio_resid, BPRI_MED); 1708 if (mp == NULL) { 1709 RDS_DPRINTF2(LABEL, "allocb failed, size: %d\n", 1710 uiop->uio_resid); 1711 return (ENOSPC); 1712 } 1713 mp->b_wptr = mp->b_rptr + uiop->uio_resid; 1714 1715 ret = uiomove(mp->b_rptr, uiop->uio_resid, UIO_WRITE, uiop); 1716 if (ret) { 1717 RDS_DPRINTF2(LABEL, "ERROR: uiomove returned: %d", ret); 1718 freeb(mp); 1719 return (ret); 1720 } 1721 1722 ret = rds_deliver_new_msg(mp, recvip, sendip, recvport, sendport, 1723 zoneid); 1724 if (ret != 0) { 1725 if (ret == ENOSPC) { 1726 /* 1727 * The message is delivered but cannot take more, 1728 * stop further loopback traffic to this port 1729 */ 1730 RDS_DPRINTF3("rds_deliver_loopback_msg", 1731 "Port %d NO SPACE", recvport); 1732 rds_stall_port(NULL, recvport, RDS_LOOPBACK); 1733 } else { 1734 RDS_DPRINTF2(LABEL, "Loopback message: port %d -> " 1735 "port %d failed: %d", sendport, recvport, ret); 1736 return (ret); 1737 } 1738 } 1739 1740 RDS_DPRINTF4("rds_deliver_loopback_msg", "Return"); 1741 return (0); 1742 } 1743 1744 static void 1745 rds_resend_messages(void *arg) 1746 { 1747 rds_session_t *sp = (rds_session_t *)arg; 1748 rds_ep_t *ep; 1749 rds_bufpool_t *spool; 1750 rds_buf_t *bp, *endp, *tmp; 1751 ibt_send_wr_t *wrp; 1752 uint_t nwr = 0, ix, jx; 1753 int ret; 1754 1755 RDS_DPRINTF2("rds_resend_messages", "Enter: SP(%p)", sp); 1756 1757 ep = &sp->session_dataep; 1758 1759 spool = &ep->ep_sndpool; 1760 mutex_enter(&spool->pool_lock); 1761 1762 ASSERT(spool->pool_nfree == spool->pool_nbuffers); 1763 1764 if (ep->ep_lbufid == NULL) { 1765 RDS_DPRINTF2("rds_resend_messages", 1766 "SP(%p) Remote session is cleaned up ", sp); 1767 /* 1768 * The remote end cleaned up its session. There may be loss 1769 * of messages. Mark all buffers as acknowledged. 1770 */ 1771 tmp = spool->pool_tailp; 1772 } else { 1773 tmp = (rds_buf_t *)ep->ep_lbufid; 1774 RDS_DPRINTF2("rds_resend_messages", 1775 "SP(%p) Last successful BP(%p) ", sp, tmp); 1776 } 1777 1778 endp = spool->pool_tailp; 1779 bp = spool->pool_headp; 1780 jx = 0; 1781 while ((bp != NULL) && (bp != tmp)) { 1782 bp->buf_state = RDS_SNDBUF_FREE; 1783 jx++; 1784 bp = bp->buf_nextp; 1785 } 1786 1787 if (bp == NULL) { 1788 mutex_exit(&spool->pool_lock); 1789 RDS_DPRINTF2("rds_resend_messages", "Alert: lbufid(%p) is not " 1790 "found in the list", tmp); 1791 1792 rw_enter(&sp->session_lock, RW_WRITER); 1793 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1794 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1795 } else { 1796 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1797 "Expected State: %d", sp, sp->session_state, 1798 RDS_SESSION_STATE_CONNECTED); 1799 } 1800 sp->session_failover = 0; 1801 rw_exit(&sp->session_lock); 1802 return; 1803 } 1804 1805 /* Found the match */ 1806 bp->buf_state = RDS_SNDBUF_FREE; 1807 jx++; 1808 1809 spool->pool_tailp = bp; 1810 bp = bp->buf_nextp; 1811 spool->pool_tailp->buf_nextp = NULL; 1812 nwr = spool->pool_nfree - jx; 1813 spool->pool_nfree = jx; 1814 mutex_exit(&spool->pool_lock); 1815 1816 RDS_DPRINTF2("rds_resend_messages", "SP(%p): Number of " 1817 "bufs (BP %p) to re-send: %d", sp, bp, nwr); 1818 1819 if (bp) { 1820 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 100, 1821 KM_SLEEP); 1822 1823 while (nwr) { 1824 jx = (nwr > 100) ? 100 : nwr; 1825 1826 tmp = bp; 1827 for (ix = 0; ix < jx; ix++) { 1828 bp->buf_state = RDS_SNDBUF_PENDING; 1829 wrp[ix].wr_id = (uintptr_t)bp; 1830 wrp[ix].wr_flags = IBT_WR_SEND_SOLICIT; 1831 wrp[ix].wr_trans = IBT_RC_SRV; 1832 wrp[ix].wr_opcode = IBT_WRC_SEND; 1833 wrp[ix].wr_nds = 1; 1834 wrp[ix].wr_sgl = &bp->buf_ds; 1835 bp = bp->buf_nextp; 1836 } 1837 1838 ret = ibt_post_send(ep->ep_chanhdl, wrp, jx, &ix); 1839 if (ret != IBT_SUCCESS) { 1840 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send " 1841 "failed: %d for % pkts", ep, ret, jx); 1842 break; 1843 } 1844 1845 mutex_enter(&spool->pool_lock); 1846 spool->pool_nbusy += jx; 1847 mutex_exit(&spool->pool_lock); 1848 1849 nwr -= jx; 1850 } 1851 1852 kmem_free(wrp, sizeof (ibt_send_wr_t) * 100); 1853 1854 if (nwr != 0) { 1855 1856 /* 1857 * An error while failover is in progress. Some WRs are 1858 * posted while other remain. If any of the posted WRs 1859 * complete in error then they would dispatch a taskq to 1860 * do a failover. Getting the session lock will prevent 1861 * the taskq to wait until we are done here. 1862 */ 1863 rw_enter(&sp->session_lock, RW_READER); 1864 1865 /* 1866 * Wait until all the previous WRs are completed and 1867 * then queue the remaining, otherwise the order of 1868 * the messages may change. 1869 */ 1870 (void) rds_is_sendq_empty(ep, 1); 1871 1872 /* free the remaining buffers */ 1873 rds_free_send_buf(ep, tmp, endp, nwr, B_FALSE); 1874 1875 rw_exit(&sp->session_lock); 1876 return; 1877 } 1878 } 1879 1880 rw_enter(&sp->session_lock, RW_WRITER); 1881 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1882 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1883 } else { 1884 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1885 "Expected State: %d", sp, sp->session_state, 1886 RDS_SESSION_STATE_CONNECTED); 1887 } 1888 sp->session_failover = 0; 1889 rw_exit(&sp->session_lock); 1890 1891 RDS_DPRINTF2("rds_resend_messages", "Return: SP(%p)", sp); 1892 } 1893 1894 /* 1895 * This is called when a channel is connected. Transition the session to 1896 * CONNECTED state iff both channels are connected. 1897 */ 1898 void 1899 rds_session_active(rds_session_t *sp) 1900 { 1901 rds_ep_t *ep; 1902 uint_t failover; 1903 1904 RDS_DPRINTF2("rds_session_active", "Enter: 0x%p", sp); 1905 1906 rw_enter(&sp->session_lock, RW_READER); 1907 1908 failover = sp->session_failover; 1909 1910 /* 1911 * we establish the data channel first, so check the control channel 1912 * first but make sure it is initialized. 1913 */ 1914 ep = &sp->session_ctrlep; 1915 mutex_enter(&ep->ep_lock); 1916 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1917 /* the session is not ready yet */ 1918 mutex_exit(&ep->ep_lock); 1919 rw_exit(&sp->session_lock); 1920 return; 1921 } 1922 mutex_exit(&ep->ep_lock); 1923 1924 /* control channel is connected, check the data channel */ 1925 ep = &sp->session_dataep; 1926 mutex_enter(&ep->ep_lock); 1927 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1928 /* data channel is not yet connected */ 1929 mutex_exit(&ep->ep_lock); 1930 rw_exit(&sp->session_lock); 1931 return; 1932 } 1933 mutex_exit(&ep->ep_lock); 1934 1935 if (failover) { 1936 rw_exit(&sp->session_lock); 1937 1938 /* 1939 * The session has failed over. Previous msgs have to be 1940 * re-sent before the session is moved to the connected 1941 * state. 1942 */ 1943 RDS_DPRINTF2("rds_session_active", "SP(%p) Dispatching taskq " 1944 "to re-send messages", sp); 1945 (void) ddi_taskq_dispatch(rds_taskq, 1946 rds_resend_messages, (void *)sp, DDI_SLEEP); 1947 return; 1948 } 1949 1950 /* the session is ready */ 1951 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1952 RDS_DPRINTF3("rds_session_active", 1953 "SP(%p) State RDS_SESSION_STATE_CONNECTED", sp); 1954 1955 rw_exit(&sp->session_lock); 1956 1957 RDS_DPRINTF2("rds_session_active", "Return: SP(%p) is CONNECTED", sp); 1958 } 1959 1960 static int 1961 rds_ep_sendmsg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1962 in_port_t recvport) 1963 { 1964 int ret; 1965 1966 RDS_DPRINTF4("rds_ep_sendmsg", "Enter: EP(%p) sendport: %d recvport: " 1967 "%d", ep, sendport, recvport); 1968 1969 /* make sure the remote port is not stalled */ 1970 if (rds_is_port_marked(ep->ep_sp, recvport, RDS_REMOTE)) { 1971 RDS_DPRINTF2(LABEL, "SP(%p) Port:%d is in stall state", 1972 ep->ep_sp, recvport); 1973 RDS_INCR_EWOULDBLOCK(); 1974 ret = ENOMEM; 1975 } else { 1976 ret = rds_build_n_post_msg(ep, uiop, sendport, recvport); 1977 } 1978 1979 RDS_DPRINTF4("rds_ep_sendmsg", "Return: EP(%p)", ep); 1980 1981 return (ret); 1982 } 1983 1984 /* Send a message to a destination socket */ 1985 int 1986 rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, 1987 in_port_t recvport, zoneid_t zoneid) 1988 { 1989 rds_session_t *sp; 1990 ib_gid_t lgid, rgid; 1991 int ret; 1992 1993 RDS_DPRINTF4("rds_sendmsg", "Enter: uiop: 0x%p, srcIP: 0x%x destIP: " 1994 "0x%x sndport: %d recvport: %d", uiop, sendip, recvip, 1995 sendport, recvport); 1996 1997 /* If msg length is 0, just return success */ 1998 if (uiop->uio_resid == 0) { 1999 RDS_DPRINTF2("rds_sendmsg", "Zero sized message"); 2000 return (0); 2001 } 2002 2003 /* Is there a session to the destination? */ 2004 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 2005 sp = rds_session_lkup(rdsib_statep, recvip, 0); 2006 rw_exit(&rdsib_statep->rds_sessionlock); 2007 2008 /* Is this a loopback message? */ 2009 if ((sp == NULL) && (rds_islocal(recvip))) { 2010 /* make sure the port is not stalled */ 2011 if (rds_is_port_marked(NULL, recvport, RDS_LOOPBACK)) { 2012 RDS_DPRINTF2(LABEL, "Local Port:%d is in stall state", 2013 recvport); 2014 RDS_INCR_EWOULDBLOCK(); 2015 return (ENOMEM); 2016 } 2017 ret = rds_deliver_loopback_msg(uiop, recvip, sendip, recvport, 2018 sendport, zoneid); 2019 return (ret); 2020 } 2021 2022 /* Not a loopback message */ 2023 if (sp == NULL) { 2024 /* There is no session to the destination, create one. */ 2025 RDS_DPRINTF3(LABEL, "There is no session to the destination " 2026 "IP: 0x%x", recvip); 2027 sp = rds_session_create(rdsib_statep, sendip, recvip, NULL, 2028 RDS_SESSION_ACTIVE); 2029 if (sp != NULL) { 2030 rw_enter(&sp->session_lock, RW_WRITER); 2031 if (sp->session_type == RDS_SESSION_ACTIVE) { 2032 ret = rds_session_init(sp); 2033 if (ret != 0) { 2034 RDS_DPRINTF2("rds_sendmsg", 2035 "SP(%p): rds_session_init failed", 2036 sp); 2037 sp->session_state = 2038 RDS_SESSION_STATE_FAILED; 2039 RDS_DPRINTF3("rds_sendmsg", 2040 "SP(%p) State " 2041 "RDS_SESSION_STATE_FAILED", sp); 2042 rw_exit(&sp->session_lock); 2043 return (EFAULT); 2044 } 2045 sp->session_state = RDS_SESSION_STATE_INIT; 2046 RDS_DPRINTF3("rds_sendmsg", 2047 "SP(%p) State " 2048 "RDS_SESSION_STATE_INIT", sp); 2049 rw_exit(&sp->session_lock); 2050 rds_session_open(sp); 2051 } else { 2052 rw_exit(&sp->session_lock); 2053 } 2054 } else { 2055 /* Is a session created for this destination */ 2056 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 2057 sp = rds_session_lkup(rdsib_statep, recvip, 0); 2058 rw_exit(&rdsib_statep->rds_sessionlock); 2059 if (sp == NULL) { 2060 return (EFAULT); 2061 } 2062 } 2063 } 2064 2065 /* There is a session to the destination */ 2066 rw_enter(&sp->session_lock, RW_READER); 2067 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 2068 rw_exit(&sp->session_lock); 2069 2070 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 2071 recvport); 2072 return (ret); 2073 } else if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 2074 (sp->session_state == RDS_SESSION_STATE_FINI)) { 2075 ipaddr_t sendip1, recvip1; 2076 2077 RDS_DPRINTF3("rds_sendmsg", "SP(%p) is not connected, State: " 2078 "%d", sp, sp->session_state); 2079 rw_exit(&sp->session_lock); 2080 rw_enter(&sp->session_lock, RW_WRITER); 2081 if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 2082 (sp->session_state == RDS_SESSION_STATE_FINI)) { 2083 ibt_ip_path_attr_t ipattr; 2084 ibt_ip_addr_t dstip; 2085 2086 sp->session_state = RDS_SESSION_STATE_CREATED; 2087 sp->session_type = RDS_SESSION_ACTIVE; 2088 RDS_DPRINTF3("rds_sendmsg", "SP(%p) State " 2089 "RDS_SESSION_STATE_CREATED", sp); 2090 rw_exit(&sp->session_lock); 2091 2092 2093 /* The ipaddr should be in the network order */ 2094 sendip1 = sendip; 2095 recvip1 = recvip; 2096 ret = rds_sc_path_lookup(&sendip1, &recvip1); 2097 if (ret == 0) { 2098 RDS_DPRINTF2(LABEL, "Path not found " 2099 "(0x%x 0x%x)", sendip1, recvip1); 2100 } 2101 2102 /* Resolve the IP addresses */ 2103 lgid.gid_prefix = 0; 2104 lgid.gid_guid = 0; 2105 rgid.gid_prefix = 0; 2106 rgid.gid_guid = 0; 2107 2108 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 2109 dstip.family = AF_INET; 2110 dstip.un.ip4addr = recvip1; 2111 ipattr.ipa_dst_ip = &dstip; 2112 ipattr.ipa_src_ip.family = AF_INET; 2113 ipattr.ipa_src_ip.un.ip4addr = sendip1; 2114 ipattr.ipa_ndst = 1; 2115 ipattr.ipa_max_paths = 1; 2116 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 2117 sendip1, recvip1); 2118 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 2119 IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, 2120 NULL, NULL); 2121 if (ret != IBT_SUCCESS) { 2122 RDS_DPRINTF2("rds_sendmsg", 2123 "ibt_get_ip_paths failed, ret: %d ", ret); 2124 2125 rw_enter(&sp->session_lock, RW_WRITER); 2126 if (sp->session_type == RDS_SESSION_ACTIVE) { 2127 sp->session_state = 2128 RDS_SESSION_STATE_FAILED; 2129 RDS_DPRINTF3("rds_sendmsg", 2130 "SP(%p) State " 2131 "RDS_SESSION_STATE_FAILED", sp); 2132 rw_exit(&sp->session_lock); 2133 return (EFAULT); 2134 } else { 2135 rw_exit(&sp->session_lock); 2136 return (ENOMEM); 2137 } 2138 } 2139 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 2140 lgid = sp->session_pinfo. 2141 pi_prim_cep_path.cep_adds_vect.av_sgid; 2142 rgid = sp->session_pinfo. 2143 pi_prim_cep_path.cep_adds_vect.av_dgid; 2144 2145 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 2146 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 2147 rgid.gid_guid); 2148 2149 rw_enter(&sp->session_lock, RW_WRITER); 2150 if (sp->session_type == RDS_SESSION_ACTIVE) { 2151 sp->session_lgid = lgid; 2152 sp->session_rgid = rgid; 2153 ret = rds_session_init(sp); 2154 if (ret != 0) { 2155 RDS_DPRINTF2("rds_sendmsg", 2156 "SP(%p): rds_session_init failed", 2157 sp); 2158 sp->session_state = 2159 RDS_SESSION_STATE_FAILED; 2160 RDS_DPRINTF3("rds_sendmsg", 2161 "SP(%p) State " 2162 "RDS_SESSION_STATE_FAILED", sp); 2163 rw_exit(&sp->session_lock); 2164 return (EFAULT); 2165 } 2166 sp->session_state = RDS_SESSION_STATE_INIT; 2167 rw_exit(&sp->session_lock); 2168 2169 rds_session_open(sp); 2170 2171 } else { 2172 RDS_DPRINTF2("rds_sendmsg", 2173 "SP(%p): type changed to %d", 2174 sp, sp->session_type); 2175 rw_exit(&sp->session_lock); 2176 return (ENOMEM); 2177 } 2178 } else { 2179 RDS_DPRINTF2("rds_sendmsg", 2180 "SP(%p): Session state %d changed", 2181 sp, sp->session_state); 2182 rw_exit(&sp->session_lock); 2183 return (ENOMEM); 2184 } 2185 } else { 2186 RDS_DPRINTF4("rds_sendmsg", "SP(%p): Session is in %d state", 2187 sp, sp->session_state); 2188 rw_exit(&sp->session_lock); 2189 return (ENOMEM); 2190 } 2191 2192 rw_enter(&sp->session_lock, RW_READER); 2193 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 2194 rw_exit(&sp->session_lock); 2195 2196 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 2197 recvport); 2198 } else { 2199 RDS_DPRINTF2("rds_sendmsg", "SP(%p): state(%d) not connected", 2200 sp, sp->session_state); 2201 rw_exit(&sp->session_lock); 2202 } 2203 2204 RDS_DPRINTF4("rds_sendmsg", "Return: SP(%p) ret: %d", sp, ret); 2205 2206 return (ret); 2207 } 2208 2209 /* Note: This is called on the CQ handler thread */ 2210 void 2211 rds_received_msg(rds_ep_t *ep, rds_buf_t *bp) 2212 { 2213 mblk_t *mp, *mp1; 2214 rds_data_hdr_t *pktp, *pktp1; 2215 uint8_t *datap; 2216 rds_buf_t *bp1; 2217 rds_bufpool_t *rpool; 2218 uint_t npkts, ix; 2219 int ret; 2220 2221 RDS_DPRINTF4("rds_received_msg", "Enter: EP(%p)", ep); 2222 2223 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 2224 datap = ((uint8_t *)(uintptr_t)bp->buf_ds.ds_va) + RDS_DATA_HDR_SZ; 2225 npkts = pktp->dh_npkts; 2226 2227 /* increment rx pending here */ 2228 rpool = &ep->ep_rcvpool; 2229 mutex_enter(&rpool->pool_lock); 2230 rpool->pool_nbusy += npkts; 2231 mutex_exit(&rpool->pool_lock); 2232 2233 /* this will get freed by sockfs */ 2234 mp = esballoc(datap, pktp->dh_datalen, BPRI_HI, &bp->buf_frtn); 2235 if (mp == NULL) { 2236 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2237 ep, bp); 2238 rds_free_recv_buf(bp, npkts); 2239 return; 2240 } 2241 mp->b_wptr = datap + pktp->dh_datalen; 2242 mp->b_datap->db_type = M_DATA; 2243 2244 mp1 = mp; 2245 bp1 = bp->buf_nextp; 2246 while (bp1 != NULL) { 2247 pktp1 = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 2248 datap = ((uint8_t *)(uintptr_t)bp1->buf_ds.ds_va) + 2249 RDS_DATA_HDR_SZ; 2250 2251 mp1->b_cont = esballoc(datap, pktp1->dh_datalen, 2252 BPRI_HI, &bp1->buf_frtn); 2253 if (mp1->b_cont == NULL) { 2254 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2255 ep, bp1); 2256 freemsg(mp); 2257 rds_free_recv_buf(bp1, pktp1->dh_npkts); 2258 return; 2259 } 2260 mp1 = mp1->b_cont; 2261 mp1->b_wptr = datap + pktp1->dh_datalen; 2262 mp1->b_datap->db_type = M_DATA; 2263 2264 bp1 = bp1->buf_nextp; 2265 } 2266 2267 RDS_INCR_RXPKTS_PEND(npkts); 2268 RDS_INCR_RXPKTS(npkts); 2269 RDS_INCR_RXBYTES(msgdsize(mp)); 2270 2271 RDS_DPRINTF5(LABEL, "Deliver Message: sendIP: 0x%x recvIP: 0x%x " 2272 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 2273 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 2274 npkts, pktp->dh_psn); 2275 2276 /* store the last buffer id, no lock needed */ 2277 if (npkts > 1) { 2278 ep->ep_rbufid = pktp1->dh_bufid; 2279 } else { 2280 ep->ep_rbufid = pktp->dh_bufid; 2281 } 2282 2283 ret = rds_deliver_new_msg(mp, ep->ep_myip, ep->ep_remip, 2284 pktp->dh_recvport, pktp->dh_sendport, ALL_ZONES); 2285 if (ret != 0) { 2286 if (ret == ENOSPC) { 2287 /* 2288 * The message is delivered but cannot take more, 2289 * stop further remote messages coming to this port 2290 */ 2291 RDS_DPRINTF3("rds_received_msg", "Port %d NO SPACE", 2292 pktp->dh_recvport); 2293 rds_stall_port(ep->ep_sp, pktp->dh_recvport, RDS_LOCAL); 2294 } else { 2295 RDS_DPRINTF2(LABEL, "rds_deliver_new_msg returned: %d", 2296 ret); 2297 } 2298 } 2299 2300 mutex_enter(&ep->ep_lock); 2301 /* The first message can come in before the conn est event */ 2302 if ((ep->ep_rdmacnt == 0) && (ep->ep_state == RDS_EP_STATE_CONNECTED)) { 2303 ep->ep_rdmacnt++; 2304 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 2305 mutex_exit(&ep->ep_lock); 2306 2307 /* send acknowledgement */ 2308 RDS_INCR_TXACKS(); 2309 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 2310 if (ret != IBT_SUCCESS) { 2311 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send for " 2312 "acknowledgement failed: %d, SQ depth: %d", 2313 ep, ret, ep->ep_sndpool.pool_nbusy); 2314 mutex_enter(&ep->ep_lock); 2315 ep->ep_rdmacnt--; 2316 mutex_exit(&ep->ep_lock); 2317 } 2318 } else { 2319 /* no room to send acknowledgement */ 2320 mutex_exit(&ep->ep_lock); 2321 } 2322 2323 RDS_DPRINTF4("rds_received_msg", "Return: EP(%p)", ep); 2324 } 2325