1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/kmem.h> 28 #include <sys/conf.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/ksynch.h> 32 #include <sys/callb.h> 33 #include <sys/mac_provider.h> 34 35 #include <sys/ib/clients/eoib/eib_impl.h> 36 37 /* 38 * Thread to handle EoIB events asynchronously 39 */ 40 void 41 eib_events_handler(eib_t *ss) 42 { 43 eib_event_t *evi; 44 eib_event_t *nxt; 45 kmutex_t ci_lock; 46 callb_cpr_t ci; 47 48 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 49 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR); 50 51 wait_for_event: 52 mutex_enter(&ss->ei_ev_lock); 53 while ((evi = ss->ei_event) == NULL) { 54 mutex_enter(&ci_lock); 55 CALLB_CPR_SAFE_BEGIN(&ci); 56 mutex_exit(&ci_lock); 57 58 cv_wait(&ss->ei_ev_cv, &ss->ei_ev_lock); 59 60 mutex_enter(&ci_lock); 61 CALLB_CPR_SAFE_END(&ci, &ci_lock); 62 mutex_exit(&ci_lock); 63 } 64 65 /* 66 * Are we being asked to die ? 67 */ 68 if (evi->ev_code == EIB_EV_SHUTDOWN) { 69 while (evi) { 70 nxt = evi->ev_next; 71 kmem_free(evi, sizeof (eib_event_t)); 72 evi = nxt; 73 } 74 ss->ei_event = NULL; 75 mutex_exit(&ss->ei_ev_lock); 76 77 mutex_enter(&ci_lock); 78 CALLB_CPR_EXIT(&ci); 79 mutex_destroy(&ci_lock); 80 81 return; 82 } 83 84 /* 85 * Otherwise, pull out the first entry from our work queue 86 */ 87 ss->ei_event = evi->ev_next; 88 evi->ev_next = NULL; 89 90 mutex_exit(&ss->ei_ev_lock); 91 92 /* 93 * Process this event 94 * 95 * Note that we don't want to race with plumb/unplumb in this 96 * handler, since we may have to restart vnics or do stuff that 97 * may get re-initialized or released if we allowed plumb/unplumb 98 * to happen in parallel. 99 */ 100 eib_mac_set_nic_state(ss, EIB_NIC_RESTARTING); 101 102 switch (evi->ev_code) { 103 case EIB_EV_PORT_DOWN: 104 EIB_DPRINTF_DEBUG(ss->ei_instance, 105 "eib_events_handler: Begin EIB_EV_PORT_DOWN"); 106 107 eib_mac_link_down(ss, B_FALSE); 108 109 EIB_DPRINTF_DEBUG(ss->ei_instance, 110 "eib_events_handler: End EIB_EV_PORT_DOWN"); 111 break; 112 113 case EIB_EV_PORT_UP: 114 EIB_DPRINTF_DEBUG(ss->ei_instance, 115 "eib_events_handler: Begin EIB_EV_PORT_UP"); 116 117 eib_ibt_link_mod(ss); 118 119 EIB_DPRINTF_DEBUG(ss->ei_instance, 120 "eib_events_handler: End EIB_EV_PORT_UP"); 121 break; 122 123 case EIB_EV_PKEY_CHANGE: 124 EIB_DPRINTF_DEBUG(ss->ei_instance, 125 "eib_events_handler: Begin EIB_EV_PKEY_CHANGE"); 126 127 eib_ibt_link_mod(ss); 128 129 EIB_DPRINTF_DEBUG(ss->ei_instance, 130 "eib_events_handler: End EIB_EV_PKEY_CHANGE"); 131 break; 132 133 case EIB_EV_SGID_CHANGE: 134 EIB_DPRINTF_DEBUG(ss->ei_instance, 135 "eib_events_handler: Begin EIB_EV_SGID_CHANGE"); 136 137 eib_ibt_link_mod(ss); 138 139 EIB_DPRINTF_DEBUG(ss->ei_instance, 140 "eib_events_handler: End EIB_EV_SGID_CHANGE"); 141 break; 142 143 case EIB_EV_CLNT_REREG: 144 EIB_DPRINTF_DEBUG(ss->ei_instance, 145 "eib_events_handler: Begin EIB_EV_CLNT_REREG"); 146 147 eib_ibt_link_mod(ss); 148 149 EIB_DPRINTF_DEBUG(ss->ei_instance, 150 "eib_events_handler: End EIB_EV_CLNT_REREG"); 151 break; 152 153 case EIB_EV_GW_UP: 154 EIB_DPRINTF_DEBUG(ss->ei_instance, 155 "eib_events_handler: Begin EIB_EV_GW_UP"); 156 157 /* 158 * EoIB nexus has notified us that our gateway is now 159 * reachable. Unless we already think it is reachable, 160 * mark it so in our records and try to resurrect dead 161 * vnics. 162 */ 163 mutex_enter(&ss->ei_vnic_lock); 164 if (ss->ei_gw_unreachable == B_FALSE) { 165 EIB_DPRINTF_DEBUG(ss->ei_instance, 166 "eib_events_handler: gw reachable"); 167 mutex_exit(&ss->ei_vnic_lock); 168 169 EIB_DPRINTF_DEBUG(ss->ei_instance, 170 "eib_events_handler: End EIB_EV_GW_UP"); 171 break; 172 } 173 ss->ei_gw_unreachable = B_FALSE; 174 mutex_exit(&ss->ei_vnic_lock); 175 176 /* 177 * If we've not even started yet, we have nothing to do. 178 */ 179 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) { 180 EIB_DPRINTF_DEBUG(ss->ei_instance, 181 "eib_events_handler: End EIB_EV_GW_UP"); 182 break; 183 } 184 185 if (eib_mac_hca_portstate(ss, NULL, NULL) != EIB_E_SUCCESS) { 186 EIB_DPRINTF_DEBUG(ss->ei_instance, 187 "eib_events_handler: " 188 "HCA portstate failed, marking link down"); 189 190 eib_mac_link_down(ss, B_FALSE); 191 } else { 192 uint8_t vn0_mac[ETHERADDRL]; 193 194 EIB_DPRINTF_DEBUG(ss->ei_instance, 195 "eib_events_handler: " 196 "HCA portstate ok, resurrecting zombies"); 197 198 bcopy(eib_zero_mac, vn0_mac, ETHERADDRL); 199 eib_vnic_resurrect_zombies(ss, vn0_mac); 200 201 /* 202 * If we've resurrected the zombies because the gateway 203 * went down and came back, it is possible our unicast 204 * mac address changed from what it was earlier. If 205 * so, we need to update our unicast address with the 206 * mac layer before marking the link up. 207 */ 208 if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0) { 209 EIB_DPRINTF_DEBUG(ss->ei_instance, 210 "eib_events_handler: updating unicast " 211 "addr to %x:%x:%x:%x:%x:%x", vn0_mac[0], 212 vn0_mac[1], vn0_mac[2], vn0_mac[3], 213 vn0_mac[4], vn0_mac[5]); 214 215 mac_unicst_update(ss->ei_mac_hdl, vn0_mac); 216 } 217 218 EIB_DPRINTF_DEBUG(ss->ei_instance, 219 "eib_events_handler: eib_mac_link_up(B_FALSE)"); 220 221 eib_mac_link_up(ss, B_FALSE); 222 } 223 224 EIB_DPRINTF_DEBUG(ss->ei_instance, 225 "eib_events_handler: End EIB_EV_GW_UP"); 226 break; 227 228 case EIB_EV_GW_INFO_UPDATE: 229 EIB_DPRINTF_DEBUG(ss->ei_instance, 230 "eib_events_handler: Begin EIB_EV_GW_INFO_UPDATE"); 231 232 if (evi->ev_arg) { 233 eib_update_props(ss, (eib_gw_info_t *)(evi->ev_arg)); 234 kmem_free(evi->ev_arg, sizeof (eib_gw_info_t)); 235 } 236 237 EIB_DPRINTF_DEBUG(ss->ei_instance, 238 "eib_events_handler: End EIB_EV_GW_INFO_UPDATE"); 239 break; 240 241 case EIB_EV_MCG_DELETED: 242 EIB_DPRINTF_DEBUG(ss->ei_instance, 243 "eib_events_handler: Begin-End EIB_EV_MCG_DELETED"); 244 break; 245 246 case EIB_EV_MCG_CREATED: 247 EIB_DPRINTF_DEBUG(ss->ei_instance, 248 "eib_events_handler: Begin-End EIB_EV_MCG_CREATED"); 249 break; 250 251 case EIB_EV_GW_EPORT_DOWN: 252 EIB_DPRINTF_DEBUG(ss->ei_instance, 253 "eib_events_handler: Begin-End EIB_EV_GW_EPORT_DOWN"); 254 break; 255 256 case EIB_EV_GW_DOWN: 257 EIB_DPRINTF_DEBUG(ss->ei_instance, 258 "eib_events_handler: Begin-End EIB_EV_GW_DOWN"); 259 break; 260 } 261 262 eib_mac_clr_nic_state(ss, EIB_NIC_RESTARTING); 263 264 kmem_free(evi, sizeof (eib_event_t)); 265 goto wait_for_event; 266 267 /*NOTREACHED*/ 268 } 269 270 void 271 eib_svc_enqueue_event(eib_t *ss, eib_event_t *evi) 272 { 273 eib_event_t *elem = NULL; 274 eib_event_t *tail = NULL; 275 276 mutex_enter(&ss->ei_ev_lock); 277 278 /* 279 * Notice to shutdown has a higher priority than the 280 * rest and goes to the head of the list. Everything 281 * else goes at the end. 282 */ 283 if (evi->ev_code == EIB_EV_SHUTDOWN) { 284 evi->ev_next = ss->ei_event; 285 ss->ei_event = evi; 286 } else { 287 for (elem = ss->ei_event; elem; elem = elem->ev_next) 288 tail = elem; 289 290 if (tail) 291 tail->ev_next = evi; 292 else 293 ss->ei_event = evi; 294 } 295 296 cv_signal(&ss->ei_ev_cv); 297 mutex_exit(&ss->ei_ev_lock); 298 } 299 300 /* 301 * Thread to refill channels with rwqes whenever they get low. 302 */ 303 void 304 eib_refill_rwqes(eib_t *ss) 305 { 306 eib_chan_t *chan; 307 kmutex_t ci_lock; 308 callb_cpr_t ci; 309 310 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 311 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_RWQES_REFILLER); 312 313 wait_for_refill_work: 314 mutex_enter(&ss->ei_rxpost_lock); 315 316 while ((ss->ei_rxpost == NULL) && (ss->ei_rxpost_die == 0)) { 317 mutex_enter(&ci_lock); 318 CALLB_CPR_SAFE_BEGIN(&ci); 319 mutex_exit(&ci_lock); 320 321 cv_wait(&ss->ei_rxpost_cv, &ss->ei_rxpost_lock); 322 323 mutex_enter(&ci_lock); 324 CALLB_CPR_SAFE_END(&ci, &ci_lock); 325 mutex_exit(&ci_lock); 326 } 327 328 /* 329 * Discard all requests for refill if we're being asked to die 330 */ 331 if (ss->ei_rxpost_die) { 332 ss->ei_rxpost = NULL; 333 mutex_exit(&ss->ei_rxpost_lock); 334 335 mutex_enter(&ci_lock); 336 CALLB_CPR_EXIT(&ci); 337 mutex_destroy(&ci_lock); 338 339 return; 340 } 341 ASSERT(ss->ei_rxpost != NULL); 342 343 /* 344 * Take the first element out of the queue 345 */ 346 chan = ss->ei_rxpost; 347 ss->ei_rxpost = chan->ch_rxpost_next; 348 chan->ch_rxpost_next = NULL; 349 350 mutex_exit(&ss->ei_rxpost_lock); 351 352 /* 353 * Try to post a bunch of recv wqes into this channel. If we 354 * fail, it means that we haven't even been able to post a 355 * single recv wqe. This is alarming, but there's nothing 356 * we can do. We just move on to the next channel needing 357 * our service. 358 */ 359 if (eib_chan_post_rx(ss, chan, NULL) != EIB_E_SUCCESS) { 360 EIB_DPRINTF_ERR(ss->ei_instance, 361 "eib_refill_rwqes: eib_chan_post_rx() failed"); 362 } 363 364 /* 365 * Mark it to indicate that the refilling is done 366 */ 367 mutex_enter(&chan->ch_rx_lock); 368 chan->ch_rx_refilling = B_FALSE; 369 mutex_exit(&chan->ch_rx_lock); 370 371 goto wait_for_refill_work; 372 373 /*NOTREACHED*/ 374 } 375 376 /* 377 * Thread to create or restart vnics when required 378 */ 379 void 380 eib_vnic_creator(eib_t *ss) 381 { 382 eib_vnic_req_t *vrq; 383 eib_vnic_req_t *elem; 384 eib_vnic_req_t *nxt; 385 kmutex_t ci_lock; 386 callb_cpr_t ci; 387 uint_t vr_req; 388 uint8_t *vr_mac; 389 int ret; 390 int err; 391 392 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 393 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_VNIC_CREATOR); 394 395 wait_for_vnic_req: 396 mutex_enter(&ss->ei_vnic_req_lock); 397 398 while ((vrq = ss->ei_vnic_req) == NULL) { 399 mutex_enter(&ci_lock); 400 CALLB_CPR_SAFE_BEGIN(&ci); 401 mutex_exit(&ci_lock); 402 403 cv_wait(&ss->ei_vnic_req_cv, &ss->ei_vnic_req_lock); 404 405 mutex_enter(&ci_lock); 406 CALLB_CPR_SAFE_END(&ci, &ci_lock); 407 mutex_exit(&ci_lock); 408 } 409 410 /* 411 * Pull out the first request 412 */ 413 ss->ei_vnic_req = vrq->vr_next; 414 vrq->vr_next = NULL; 415 416 vr_req = vrq->vr_req; 417 vr_mac = vrq->vr_mac; 418 419 switch (vr_req) { 420 case EIB_CR_REQ_DIE: 421 case EIB_CR_REQ_FLUSH: 422 /* 423 * Cleanup all pending reqs and failed reqs 424 */ 425 for (elem = ss->ei_vnic_req; elem; elem = nxt) { 426 nxt = elem->vr_next; 427 kmem_free(elem, sizeof (eib_vnic_req_t)); 428 } 429 for (elem = ss->ei_failed_vnic_req; elem; elem = nxt) { 430 nxt = elem->vr_next; 431 kmem_free(elem, sizeof (eib_vnic_req_t)); 432 } 433 ss->ei_vnic_req = NULL; 434 ss->ei_failed_vnic_req = NULL; 435 ss->ei_pending_vnic_req = NULL; 436 mutex_exit(&ss->ei_vnic_req_lock); 437 438 break; 439 440 case EIB_CR_REQ_NEW_VNIC: 441 ss->ei_pending_vnic_req = vrq; 442 mutex_exit(&ss->ei_vnic_req_lock); 443 444 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: " 445 "new vnic creation request for %x:%x:%x:%x:%x:%x, 0x%x", 446 vr_mac[0], vr_mac[1], vr_mac[2], vr_mac[3], vr_mac[4], 447 vr_mac[5], vrq->vr_vlan); 448 449 /* 450 * Make sure we don't race with the plumb/unplumb code. If 451 * the eoib instance has been unplumbed already, we ignore any 452 * creation requests that may have been pending. 453 */ 454 eib_mac_set_nic_state(ss, EIB_NIC_STARTING); 455 456 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) != 457 EIB_NIC_STARTED) { 458 mutex_enter(&ss->ei_vnic_req_lock); 459 ss->ei_pending_vnic_req = NULL; 460 mutex_exit(&ss->ei_vnic_req_lock); 461 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING); 462 break; 463 } 464 465 /* 466 * Try to create a new vnic with the supplied parameters. 467 */ 468 err = 0; 469 if ((ret = eib_vnic_create(ss, vrq->vr_mac, vrq->vr_vlan, 470 NULL, &err)) != EIB_E_SUCCESS) { 471 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_creator: " 472 "eib_vnic_create(mac=%x:%x:%x:%x:%x:%x, vlan=0x%x) " 473 "failed, ret=%d", vr_mac[0], vr_mac[1], vr_mac[2], 474 vr_mac[3], vr_mac[4], vr_mac[5], vrq->vr_vlan, err); 475 } 476 477 /* 478 * If we failed, add this vnic req to our failed list (unless 479 * it already exists there), so we won't try to create this 480 * vnic again. Whether we fail or succeed, we're done with 481 * processing this req, so clear the pending req. 482 */ 483 mutex_enter(&ss->ei_vnic_req_lock); 484 if ((ret != EIB_E_SUCCESS) && (err != EEXIST)) { 485 vrq->vr_next = ss->ei_failed_vnic_req; 486 ss->ei_failed_vnic_req = vrq; 487 vrq = NULL; 488 } 489 ss->ei_pending_vnic_req = NULL; 490 mutex_exit(&ss->ei_vnic_req_lock); 491 492 /* 493 * Notify the mac layer that it should retry its tx again. If we 494 * had created the vnic successfully, we'll be able to send the 495 * packets; if we had not been successful, we'll drop packets on 496 * this vnic. 497 */ 498 EIB_DPRINTF_DEBUG(ss->ei_instance, 499 "eib_vnic_creator: calling mac_tx_update()"); 500 mac_tx_update(ss->ei_mac_hdl); 501 502 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING); 503 break; 504 505 default: 506 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: " 507 "unknown request 0x%lx, ignoring", vrq->vr_req); 508 break; 509 } 510 511 /* 512 * Free the current req and quit if we have to 513 */ 514 if (vrq) { 515 kmem_free(vrq, sizeof (eib_vnic_req_t)); 516 } 517 518 if (vr_req == EIB_CR_REQ_DIE) { 519 mutex_enter(&ci_lock); 520 CALLB_CPR_EXIT(&ci); 521 mutex_destroy(&ci_lock); 522 523 return; 524 } 525 526 goto wait_for_vnic_req; 527 /*NOTREACHED*/ 528 } 529 530 /* 531 * Thread to monitor tx wqes and update the mac layer when needed. 532 * Note that this thread can only be started after the tx wqe pool 533 * has been allocated and initialized. 534 */ 535 void 536 eib_monitor_tx_wqes(eib_t *ss) 537 { 538 eib_wqe_pool_t *wp = ss->ei_tx; 539 kmutex_t ci_lock; 540 callb_cpr_t ci; 541 542 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 543 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_TXWQES_MONITOR); 544 545 ASSERT(wp != NULL); 546 547 monitor_wqe_status: 548 mutex_enter(&wp->wp_lock); 549 550 /* 551 * Wait till someone falls short of wqes 552 */ 553 while (wp->wp_status == 0) { 554 mutex_enter(&ci_lock); 555 CALLB_CPR_SAFE_BEGIN(&ci); 556 mutex_exit(&ci_lock); 557 558 cv_wait(&wp->wp_cv, &wp->wp_lock); 559 560 mutex_enter(&ci_lock); 561 CALLB_CPR_SAFE_END(&ci, &ci_lock); 562 mutex_exit(&ci_lock); 563 } 564 565 /* 566 * Have we been asked to die ? 567 */ 568 if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) { 569 mutex_exit(&wp->wp_lock); 570 571 mutex_enter(&ci_lock); 572 CALLB_CPR_EXIT(&ci); 573 mutex_destroy(&ci_lock); 574 575 return; 576 } 577 578 ASSERT((wp->wp_status & EIB_TXWQE_SHORT) != 0); 579 580 /* 581 * Start monitoring free wqes till they cross min threshold 582 */ 583 while ((wp->wp_nfree < EIB_NFREE_SWQES_HWM) && 584 ((wp->wp_status & EIB_TXWQE_MONITOR_DIE) == 0)) { 585 586 mutex_enter(&ci_lock); 587 CALLB_CPR_SAFE_BEGIN(&ci); 588 mutex_exit(&ci_lock); 589 590 cv_wait(&wp->wp_cv, &wp->wp_lock); 591 592 mutex_enter(&ci_lock); 593 CALLB_CPR_SAFE_END(&ci, &ci_lock); 594 mutex_exit(&ci_lock); 595 } 596 597 /* 598 * Have we been asked to die ? 599 */ 600 if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) { 601 mutex_exit(&wp->wp_lock); 602 603 mutex_enter(&ci_lock); 604 CALLB_CPR_EXIT(&ci); 605 mutex_destroy(&ci_lock); 606 607 return; 608 } 609 610 ASSERT(wp->wp_nfree >= EIB_NFREE_SWQES_HWM); 611 wp->wp_status &= (~EIB_TXWQE_SHORT); 612 613 mutex_exit(&wp->wp_lock); 614 615 /* 616 * Inform the mac layer that tx resources are now available 617 * and go back to monitoring 618 */ 619 if (ss->ei_mac_hdl) { 620 mac_tx_update(ss->ei_mac_hdl); 621 } 622 goto monitor_wqe_status; 623 624 /*NOTREACHED*/ 625 } 626 627 /* 628 * Thread to monitor lso bufs and update the mac layer as needed. 629 * Note that this thread can only be started after the lso buckets 630 * have been allocated and initialized. 631 */ 632 void 633 eib_monitor_lso_bufs(eib_t *ss) 634 { 635 eib_lsobkt_t *bkt = ss->ei_lso; 636 kmutex_t ci_lock; 637 callb_cpr_t ci; 638 639 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 640 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_LSOBUFS_MONITOR); 641 642 ASSERT(bkt != NULL); 643 644 monitor_lso_status: 645 mutex_enter(&bkt->bk_lock); 646 647 /* 648 * Wait till someone falls short of LSO buffers or we're asked 649 * to die 650 */ 651 while (bkt->bk_status == 0) { 652 mutex_enter(&ci_lock); 653 CALLB_CPR_SAFE_BEGIN(&ci); 654 mutex_exit(&ci_lock); 655 656 cv_wait(&bkt->bk_cv, &bkt->bk_lock); 657 658 mutex_enter(&ci_lock); 659 CALLB_CPR_SAFE_END(&ci, &ci_lock); 660 mutex_exit(&ci_lock); 661 } 662 663 if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) { 664 mutex_exit(&bkt->bk_lock); 665 666 mutex_enter(&ci_lock); 667 CALLB_CPR_EXIT(&ci); 668 mutex_destroy(&ci_lock); 669 670 return; 671 } 672 673 ASSERT((bkt->bk_status & EIB_LBUF_SHORT) != 0); 674 675 /* 676 * Start monitoring free LSO buffers till there are enough 677 * free buffers available 678 */ 679 while ((bkt->bk_nfree < EIB_LSO_FREE_BUFS_THRESH) && 680 ((bkt->bk_status & EIB_LBUF_MONITOR_DIE) == 0)) { 681 682 mutex_enter(&ci_lock); 683 CALLB_CPR_SAFE_BEGIN(&ci); 684 mutex_exit(&ci_lock); 685 686 cv_wait(&bkt->bk_cv, &bkt->bk_lock); 687 688 mutex_enter(&ci_lock); 689 CALLB_CPR_SAFE_END(&ci, &ci_lock); 690 mutex_exit(&ci_lock); 691 } 692 693 if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) { 694 mutex_exit(&bkt->bk_lock); 695 696 mutex_enter(&ci_lock); 697 CALLB_CPR_EXIT(&ci); 698 mutex_destroy(&ci_lock); 699 700 return; 701 } 702 703 /* 704 * We have enough lso buffers available now 705 */ 706 ASSERT(bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH); 707 bkt->bk_status &= (~EIB_LBUF_SHORT); 708 709 mutex_exit(&bkt->bk_lock); 710 711 /* 712 * Inform the mac layer that tx lso resources are now available 713 * and go back to monitoring 714 */ 715 if (ss->ei_mac_hdl) { 716 mac_tx_update(ss->ei_mac_hdl); 717 } 718 goto monitor_lso_status; 719 720 /*NOTREACHED*/ 721 } 722 723 /* 724 * Thread to manage the keepalive requirements for vnics and the gateway. 725 */ 726 void 727 eib_manage_keepalives(eib_t *ss) 728 { 729 eib_ka_vnics_t *elem; 730 eib_ka_vnics_t *nxt; 731 clock_t deadline; 732 int64_t lbolt64; 733 int err; 734 kmutex_t ci_lock; 735 callb_cpr_t ci; 736 737 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 738 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR); 739 740 mutex_enter(&ss->ei_ka_vnics_lock); 741 742 periodic_keepalive: 743 deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_vnic_ka_ticks; 744 745 while ((ss->ei_ka_vnics_event & 746 (EIB_KA_VNICS_DIE | EIB_KA_VNICS_TIMED_OUT)) == 0) { 747 mutex_enter(&ci_lock); 748 CALLB_CPR_SAFE_BEGIN(&ci); 749 mutex_exit(&ci_lock); 750 751 if (cv_timedwait(&ss->ei_ka_vnics_cv, &ss->ei_ka_vnics_lock, 752 deadline) == -1) { 753 ss->ei_ka_vnics_event |= EIB_KA_VNICS_TIMED_OUT; 754 } 755 756 mutex_enter(&ci_lock); 757 CALLB_CPR_SAFE_END(&ci, &ci_lock); 758 mutex_exit(&ci_lock); 759 } 760 761 if (ss->ei_ka_vnics_event & EIB_KA_VNICS_DIE) { 762 for (elem = ss->ei_ka_vnics; elem; elem = nxt) { 763 nxt = elem->ka_next; 764 kmem_free(elem, sizeof (eib_ka_vnics_t)); 765 } 766 ss->ei_ka_vnics = NULL; 767 mutex_exit(&ss->ei_ka_vnics_lock); 768 769 mutex_enter(&ci_lock); 770 CALLB_CPR_EXIT(&ci); 771 mutex_destroy(&ci_lock); 772 773 return; 774 } 775 776 /* 777 * Are there any vnics that need keepalive management ? 778 */ 779 ss->ei_ka_vnics_event &= ~EIB_KA_VNICS_TIMED_OUT; 780 if (ss->ei_ka_vnics == NULL) 781 goto periodic_keepalive; 782 783 /* 784 * Ok, we need to send vnic keepalives to our gateway. But first 785 * check if the gateway heartbeat is good as of this moment. Note 786 * that we need do get the lbolt value after acquiring ei_vnic_lock 787 * to ensure that ei_gw_last_heartbeat does not change before the 788 * comparison (to avoid a negative value in the comparison result 789 * causing us to incorrectly assume that the gateway heartbeat has 790 * stopped). 791 */ 792 mutex_enter(&ss->ei_vnic_lock); 793 794 lbolt64 = ddi_get_lbolt64(); 795 796 if (ss->ei_gw_last_heartbeat != 0) { 797 if ((lbolt64 - ss->ei_gw_last_heartbeat) > 798 ss->ei_gw_props->pp_gw_ka_ticks) { 799 800 EIB_DPRINTF_WARN(ss->ei_instance, 801 "eib_manage_keepalives: no keepalives from gateway " 802 "0x%x for hca_guid=0x%llx, port=0x%x, " 803 "last_gw_ka=0x%llx", ss->ei_gw_props->pp_gw_portid, 804 ss->ei_props->ep_hca_guid, 805 ss->ei_props->ep_port_num, 806 ss->ei_gw_last_heartbeat); 807 808 for (elem = ss->ei_ka_vnics; elem; elem = nxt) { 809 nxt = elem->ka_next; 810 ss->ei_zombie_vnics |= 811 ((uint64_t)1 << elem->ka_vnic->vn_instance); 812 kmem_free(elem, sizeof (eib_ka_vnics_t)); 813 } 814 ss->ei_ka_vnics = NULL; 815 ss->ei_gw_unreachable = B_TRUE; 816 mutex_exit(&ss->ei_vnic_lock); 817 818 eib_mac_link_down(ss, B_FALSE); 819 820 goto periodic_keepalive; 821 } 822 } 823 mutex_exit(&ss->ei_vnic_lock); 824 825 for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) 826 (void) eib_fip_heartbeat(ss, elem->ka_vnic, &err); 827 828 goto periodic_keepalive; 829 /*NOTREACHED*/ 830 } 831 832 void 833 eib_stop_events_handler(eib_t *ss) 834 { 835 eib_event_t *evi; 836 837 evi = kmem_zalloc(sizeof (eib_event_t), KM_SLEEP); 838 evi->ev_code = EIB_EV_SHUTDOWN; 839 evi->ev_arg = NULL; 840 841 eib_svc_enqueue_event(ss, evi); 842 843 thread_join(ss->ei_events_handler); 844 } 845 846 void 847 eib_stop_refill_rwqes(eib_t *ss) 848 { 849 mutex_enter(&ss->ei_rxpost_lock); 850 851 ss->ei_rxpost_die = 1; 852 853 cv_signal(&ss->ei_rxpost_cv); 854 mutex_exit(&ss->ei_rxpost_lock); 855 856 thread_join(ss->ei_rwqes_refiller); 857 } 858 859 void 860 eib_stop_vnic_creator(eib_t *ss) 861 { 862 eib_vnic_req_t *vrq; 863 864 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP); 865 vrq->vr_req = EIB_CR_REQ_DIE; 866 vrq->vr_next = NULL; 867 868 eib_vnic_enqueue_req(ss, vrq); 869 870 thread_join(ss->ei_vnic_creator); 871 } 872 873 void 874 eib_stop_monitor_tx_wqes(eib_t *ss) 875 { 876 eib_wqe_pool_t *wp = ss->ei_tx; 877 878 mutex_enter(&wp->wp_lock); 879 880 wp->wp_status |= EIB_TXWQE_MONITOR_DIE; 881 882 cv_signal(&wp->wp_cv); 883 mutex_exit(&wp->wp_lock); 884 885 thread_join(ss->ei_txwqe_monitor); 886 } 887 888 int 889 eib_stop_monitor_lso_bufs(eib_t *ss, boolean_t force) 890 { 891 eib_lsobkt_t *bkt = ss->ei_lso; 892 893 mutex_enter(&bkt->bk_lock); 894 895 /* 896 * If there are some buffers still not reaped and the force 897 * flag is not set, return without doing anything. Otherwise, 898 * stop the lso bufs monitor and wait for it to die. 899 */ 900 if ((bkt->bk_nelem != bkt->bk_nfree) && (force == B_FALSE)) { 901 mutex_exit(&bkt->bk_lock); 902 return (EIB_E_FAILURE); 903 } 904 905 bkt->bk_status |= EIB_LBUF_MONITOR_DIE; 906 907 cv_signal(&bkt->bk_cv); 908 mutex_exit(&bkt->bk_lock); 909 910 thread_join(ss->ei_lsobufs_monitor); 911 return (EIB_E_SUCCESS); 912 } 913 914 void 915 eib_stop_manage_keepalives(eib_t *ss) 916 { 917 mutex_enter(&ss->ei_ka_vnics_lock); 918 919 ss->ei_ka_vnics_event |= EIB_KA_VNICS_DIE; 920 921 cv_signal(&ss->ei_ka_vnics_cv); 922 mutex_exit(&ss->ei_ka_vnics_lock); 923 924 thread_join(ss->ei_keepalives_manager); 925 } 926 927 void 928 eib_flush_vnic_reqs(eib_t *ss) 929 { 930 eib_vnic_req_t *vrq; 931 932 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP); 933 vrq->vr_req = EIB_CR_REQ_FLUSH; 934 vrq->vr_next = NULL; 935 936 eib_vnic_enqueue_req(ss, vrq); 937 } 938 939 /*ARGSUSED*/ 940 void 941 eib_gw_alive_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg, 942 void *impl_data) 943 { 944 eib_t *ss = (eib_t *)arg; 945 eib_event_t *evi; 946 947 evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP); 948 if (evi == NULL) { 949 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_alive_cb: " 950 "no memory, ignoring this gateway alive event"); 951 } else { 952 evi->ev_code = EIB_EV_GW_UP; 953 evi->ev_arg = NULL; 954 eib_svc_enqueue_event(ss, evi); 955 } 956 } 957 958 /*ARGSUSED*/ 959 void 960 eib_login_ack_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg, 961 void *impl_data) 962 { 963 eib_t *ss = (eib_t *)arg; 964 uint8_t *pkt = (uint8_t *)impl_data; 965 eib_login_data_t ld; 966 967 /* 968 * We have received a login ack message from the gateway via the EoIB 969 * nexus (solicitation qpn). The packet is passed to us raw (unparsed) 970 * and we have to figure out if this is a vnic login ack. 971 */ 972 if (eib_fip_parse_login_ack(ss, pkt + EIB_GRH_SZ, &ld) == EIB_E_SUCCESS) 973 eib_vnic_login_ack(ss, &ld); 974 } 975 976 /*ARGSUSED*/ 977 void 978 eib_gw_info_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg, 979 void *impl_data) 980 { 981 eib_t *ss = (eib_t *)arg; 982 eib_event_t *evi; 983 984 evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP); 985 if (evi == NULL) { 986 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: " 987 "no memory, ignoring this gateway props update event"); 988 return; 989 } 990 evi->ev_arg = kmem_zalloc(sizeof (eib_gw_info_t), KM_NOSLEEP); 991 if (evi->ev_arg == NULL) { 992 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: " 993 "no memory, ignoring this gateway props update event"); 994 kmem_free(evi, sizeof (eib_event_t)); 995 return; 996 } 997 bcopy(impl_data, evi->ev_arg, sizeof (eib_gw_info_t)); 998 evi->ev_code = EIB_EV_GW_INFO_UPDATE; 999 1000 eib_svc_enqueue_event(ss, evi); 1001 } 1002