1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 #include <sys/callb.h>
33 #include <sys/mac_provider.h>
34
35 #include <sys/ib/clients/eoib/eib_impl.h>
36
37 /*
38 * Thread to handle EoIB events asynchronously
39 */
40 void
eib_events_handler(eib_t * ss)41 eib_events_handler(eib_t *ss)
42 {
43 eib_event_t *evi;
44 eib_event_t *nxt;
45 kmutex_t ci_lock;
46 callb_cpr_t ci;
47
48 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
49 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
50
51 wait_for_event:
52 mutex_enter(&ss->ei_ev_lock);
53 while ((evi = ss->ei_event) == NULL) {
54 mutex_enter(&ci_lock);
55 CALLB_CPR_SAFE_BEGIN(&ci);
56 mutex_exit(&ci_lock);
57
58 cv_wait(&ss->ei_ev_cv, &ss->ei_ev_lock);
59
60 mutex_enter(&ci_lock);
61 CALLB_CPR_SAFE_END(&ci, &ci_lock);
62 mutex_exit(&ci_lock);
63 }
64
65 /*
66 * Are we being asked to die ?
67 */
68 if (evi->ev_code == EIB_EV_SHUTDOWN) {
69 while (evi) {
70 nxt = evi->ev_next;
71 kmem_free(evi, sizeof (eib_event_t));
72 evi = nxt;
73 }
74 ss->ei_event = NULL;
75 mutex_exit(&ss->ei_ev_lock);
76
77 mutex_enter(&ci_lock);
78 CALLB_CPR_EXIT(&ci);
79 mutex_destroy(&ci_lock);
80
81 return;
82 }
83
84 /*
85 * Otherwise, pull out the first entry from our work queue
86 */
87 ss->ei_event = evi->ev_next;
88 evi->ev_next = NULL;
89
90 mutex_exit(&ss->ei_ev_lock);
91
92 /*
93 * Process this event
94 *
95 * Note that we don't want to race with plumb/unplumb in this
96 * handler, since we may have to restart vnics or do stuff that
97 * may get re-initialized or released if we allowed plumb/unplumb
98 * to happen in parallel.
99 */
100 eib_mac_set_nic_state(ss, EIB_NIC_RESTARTING);
101
102 switch (evi->ev_code) {
103 case EIB_EV_PORT_DOWN:
104 EIB_DPRINTF_DEBUG(ss->ei_instance,
105 "eib_events_handler: Begin EIB_EV_PORT_DOWN");
106
107 eib_mac_link_down(ss, B_FALSE);
108
109 EIB_DPRINTF_DEBUG(ss->ei_instance,
110 "eib_events_handler: End EIB_EV_PORT_DOWN");
111 break;
112
113 case EIB_EV_PORT_UP:
114 EIB_DPRINTF_DEBUG(ss->ei_instance,
115 "eib_events_handler: Begin EIB_EV_PORT_UP");
116
117 eib_ibt_link_mod(ss);
118
119 EIB_DPRINTF_DEBUG(ss->ei_instance,
120 "eib_events_handler: End EIB_EV_PORT_UP");
121 break;
122
123 case EIB_EV_PKEY_CHANGE:
124 EIB_DPRINTF_DEBUG(ss->ei_instance,
125 "eib_events_handler: Begin EIB_EV_PKEY_CHANGE");
126
127 eib_ibt_link_mod(ss);
128
129 EIB_DPRINTF_DEBUG(ss->ei_instance,
130 "eib_events_handler: End EIB_EV_PKEY_CHANGE");
131 break;
132
133 case EIB_EV_SGID_CHANGE:
134 EIB_DPRINTF_DEBUG(ss->ei_instance,
135 "eib_events_handler: Begin EIB_EV_SGID_CHANGE");
136
137 eib_ibt_link_mod(ss);
138
139 EIB_DPRINTF_DEBUG(ss->ei_instance,
140 "eib_events_handler: End EIB_EV_SGID_CHANGE");
141 break;
142
143 case EIB_EV_CLNT_REREG:
144 EIB_DPRINTF_DEBUG(ss->ei_instance,
145 "eib_events_handler: Begin EIB_EV_CLNT_REREG");
146
147 eib_ibt_link_mod(ss);
148
149 EIB_DPRINTF_DEBUG(ss->ei_instance,
150 "eib_events_handler: End EIB_EV_CLNT_REREG");
151 break;
152
153 case EIB_EV_GW_UP:
154 EIB_DPRINTF_DEBUG(ss->ei_instance,
155 "eib_events_handler: Begin EIB_EV_GW_UP");
156
157 /*
158 * EoIB nexus has notified us that our gateway is now
159 * reachable. Unless we already think it is reachable,
160 * mark it so in our records and try to resurrect dead
161 * vnics.
162 */
163 mutex_enter(&ss->ei_vnic_lock);
164 if (ss->ei_gw_unreachable == B_FALSE) {
165 EIB_DPRINTF_DEBUG(ss->ei_instance,
166 "eib_events_handler: gw reachable");
167 mutex_exit(&ss->ei_vnic_lock);
168
169 EIB_DPRINTF_DEBUG(ss->ei_instance,
170 "eib_events_handler: End EIB_EV_GW_UP");
171 break;
172 }
173 ss->ei_gw_unreachable = B_FALSE;
174 mutex_exit(&ss->ei_vnic_lock);
175
176 /*
177 * If we've not even started yet, we have nothing to do.
178 */
179 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) {
180 EIB_DPRINTF_DEBUG(ss->ei_instance,
181 "eib_events_handler: End EIB_EV_GW_UP");
182 break;
183 }
184
185 if (eib_mac_hca_portstate(ss, NULL, NULL) != EIB_E_SUCCESS) {
186 EIB_DPRINTF_DEBUG(ss->ei_instance,
187 "eib_events_handler: "
188 "HCA portstate failed, marking link down");
189
190 eib_mac_link_down(ss, B_FALSE);
191 } else {
192 uint8_t vn0_mac[ETHERADDRL];
193
194 EIB_DPRINTF_DEBUG(ss->ei_instance,
195 "eib_events_handler: "
196 "HCA portstate ok, resurrecting zombies");
197
198 bcopy(eib_zero_mac, vn0_mac, ETHERADDRL);
199 eib_vnic_resurrect_zombies(ss, vn0_mac);
200
201 /*
202 * If we've resurrected the zombies because the gateway
203 * went down and came back, it is possible our unicast
204 * mac address changed from what it was earlier. If
205 * so, we need to update our unicast address with the
206 * mac layer before marking the link up.
207 */
208 if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0) {
209 EIB_DPRINTF_DEBUG(ss->ei_instance,
210 "eib_events_handler: updating unicast "
211 "addr to %x:%x:%x:%x:%x:%x", vn0_mac[0],
212 vn0_mac[1], vn0_mac[2], vn0_mac[3],
213 vn0_mac[4], vn0_mac[5]);
214
215 mac_unicst_update(ss->ei_mac_hdl, vn0_mac);
216 }
217
218 EIB_DPRINTF_DEBUG(ss->ei_instance,
219 "eib_events_handler: eib_mac_link_up(B_FALSE)");
220
221 eib_mac_link_up(ss, B_FALSE);
222 }
223
224 EIB_DPRINTF_DEBUG(ss->ei_instance,
225 "eib_events_handler: End EIB_EV_GW_UP");
226 break;
227
228 case EIB_EV_GW_INFO_UPDATE:
229 EIB_DPRINTF_DEBUG(ss->ei_instance,
230 "eib_events_handler: Begin EIB_EV_GW_INFO_UPDATE");
231
232 if (evi->ev_arg) {
233 eib_update_props(ss, (eib_gw_info_t *)(evi->ev_arg));
234 kmem_free(evi->ev_arg, sizeof (eib_gw_info_t));
235 }
236
237 EIB_DPRINTF_DEBUG(ss->ei_instance,
238 "eib_events_handler: End EIB_EV_GW_INFO_UPDATE");
239 break;
240
241 case EIB_EV_MCG_DELETED:
242 EIB_DPRINTF_DEBUG(ss->ei_instance,
243 "eib_events_handler: Begin-End EIB_EV_MCG_DELETED");
244 break;
245
246 case EIB_EV_MCG_CREATED:
247 EIB_DPRINTF_DEBUG(ss->ei_instance,
248 "eib_events_handler: Begin-End EIB_EV_MCG_CREATED");
249 break;
250
251 case EIB_EV_GW_EPORT_DOWN:
252 EIB_DPRINTF_DEBUG(ss->ei_instance,
253 "eib_events_handler: Begin-End EIB_EV_GW_EPORT_DOWN");
254 break;
255
256 case EIB_EV_GW_DOWN:
257 EIB_DPRINTF_DEBUG(ss->ei_instance,
258 "eib_events_handler: Begin-End EIB_EV_GW_DOWN");
259 break;
260 }
261
262 eib_mac_clr_nic_state(ss, EIB_NIC_RESTARTING);
263
264 kmem_free(evi, sizeof (eib_event_t));
265 goto wait_for_event;
266
267 /*NOTREACHED*/
268 }
269
270 void
eib_svc_enqueue_event(eib_t * ss,eib_event_t * evi)271 eib_svc_enqueue_event(eib_t *ss, eib_event_t *evi)
272 {
273 eib_event_t *elem = NULL;
274 eib_event_t *tail = NULL;
275
276 mutex_enter(&ss->ei_ev_lock);
277
278 /*
279 * Notice to shutdown has a higher priority than the
280 * rest and goes to the head of the list. Everything
281 * else goes at the end.
282 */
283 if (evi->ev_code == EIB_EV_SHUTDOWN) {
284 evi->ev_next = ss->ei_event;
285 ss->ei_event = evi;
286 } else {
287 for (elem = ss->ei_event; elem; elem = elem->ev_next)
288 tail = elem;
289
290 if (tail)
291 tail->ev_next = evi;
292 else
293 ss->ei_event = evi;
294 }
295
296 cv_signal(&ss->ei_ev_cv);
297 mutex_exit(&ss->ei_ev_lock);
298 }
299
300 /*
301 * Thread to refill channels with rwqes whenever they get low.
302 */
303 void
eib_refill_rwqes(eib_t * ss)304 eib_refill_rwqes(eib_t *ss)
305 {
306 eib_chan_t *chan;
307 kmutex_t ci_lock;
308 callb_cpr_t ci;
309
310 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
311 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_RWQES_REFILLER);
312
313 wait_for_refill_work:
314 mutex_enter(&ss->ei_rxpost_lock);
315
316 while ((ss->ei_rxpost == NULL) && (ss->ei_rxpost_die == 0)) {
317 mutex_enter(&ci_lock);
318 CALLB_CPR_SAFE_BEGIN(&ci);
319 mutex_exit(&ci_lock);
320
321 cv_wait(&ss->ei_rxpost_cv, &ss->ei_rxpost_lock);
322
323 mutex_enter(&ci_lock);
324 CALLB_CPR_SAFE_END(&ci, &ci_lock);
325 mutex_exit(&ci_lock);
326 }
327
328 /*
329 * Discard all requests for refill if we're being asked to die
330 */
331 if (ss->ei_rxpost_die) {
332 ss->ei_rxpost = NULL;
333 mutex_exit(&ss->ei_rxpost_lock);
334
335 mutex_enter(&ci_lock);
336 CALLB_CPR_EXIT(&ci);
337 mutex_destroy(&ci_lock);
338
339 return;
340 }
341 ASSERT(ss->ei_rxpost != NULL);
342
343 /*
344 * Take the first element out of the queue
345 */
346 chan = ss->ei_rxpost;
347 ss->ei_rxpost = chan->ch_rxpost_next;
348 chan->ch_rxpost_next = NULL;
349
350 mutex_exit(&ss->ei_rxpost_lock);
351
352 /*
353 * Try to post a bunch of recv wqes into this channel. If we
354 * fail, it means that we haven't even been able to post a
355 * single recv wqe. This is alarming, but there's nothing
356 * we can do. We just move on to the next channel needing
357 * our service.
358 */
359 if (eib_chan_post_rx(ss, chan, NULL) != EIB_E_SUCCESS) {
360 EIB_DPRINTF_ERR(ss->ei_instance,
361 "eib_refill_rwqes: eib_chan_post_rx() failed");
362 }
363
364 /*
365 * Mark it to indicate that the refilling is done
366 */
367 mutex_enter(&chan->ch_rx_lock);
368 chan->ch_rx_refilling = B_FALSE;
369 mutex_exit(&chan->ch_rx_lock);
370
371 goto wait_for_refill_work;
372
373 /*NOTREACHED*/
374 }
375
376 /*
377 * Thread to create or restart vnics when required
378 */
379 void
eib_vnic_creator(eib_t * ss)380 eib_vnic_creator(eib_t *ss)
381 {
382 eib_vnic_req_t *vrq;
383 eib_vnic_req_t *elem;
384 eib_vnic_req_t *nxt;
385 kmutex_t ci_lock;
386 callb_cpr_t ci;
387 uint_t vr_req;
388 uint8_t *vr_mac;
389 int ret;
390 int err;
391
392 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
393 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_VNIC_CREATOR);
394
395 wait_for_vnic_req:
396 mutex_enter(&ss->ei_vnic_req_lock);
397
398 while ((vrq = ss->ei_vnic_req) == NULL) {
399 mutex_enter(&ci_lock);
400 CALLB_CPR_SAFE_BEGIN(&ci);
401 mutex_exit(&ci_lock);
402
403 cv_wait(&ss->ei_vnic_req_cv, &ss->ei_vnic_req_lock);
404
405 mutex_enter(&ci_lock);
406 CALLB_CPR_SAFE_END(&ci, &ci_lock);
407 mutex_exit(&ci_lock);
408 }
409
410 /*
411 * Pull out the first request
412 */
413 ss->ei_vnic_req = vrq->vr_next;
414 vrq->vr_next = NULL;
415
416 vr_req = vrq->vr_req;
417 vr_mac = vrq->vr_mac;
418
419 switch (vr_req) {
420 case EIB_CR_REQ_DIE:
421 case EIB_CR_REQ_FLUSH:
422 /*
423 * Cleanup all pending reqs and failed reqs
424 */
425 for (elem = ss->ei_vnic_req; elem; elem = nxt) {
426 nxt = elem->vr_next;
427 kmem_free(elem, sizeof (eib_vnic_req_t));
428 }
429 for (elem = ss->ei_failed_vnic_req; elem; elem = nxt) {
430 nxt = elem->vr_next;
431 kmem_free(elem, sizeof (eib_vnic_req_t));
432 }
433 ss->ei_vnic_req = NULL;
434 ss->ei_failed_vnic_req = NULL;
435 ss->ei_pending_vnic_req = NULL;
436 mutex_exit(&ss->ei_vnic_req_lock);
437
438 break;
439
440 case EIB_CR_REQ_NEW_VNIC:
441 ss->ei_pending_vnic_req = vrq;
442 mutex_exit(&ss->ei_vnic_req_lock);
443
444 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
445 "new vnic creation request for %x:%x:%x:%x:%x:%x, 0x%x",
446 vr_mac[0], vr_mac[1], vr_mac[2], vr_mac[3], vr_mac[4],
447 vr_mac[5], vrq->vr_vlan);
448
449 /*
450 * Make sure we don't race with the plumb/unplumb code. If
451 * the eoib instance has been unplumbed already, we ignore any
452 * creation requests that may have been pending.
453 */
454 eib_mac_set_nic_state(ss, EIB_NIC_STARTING);
455
456 if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) !=
457 EIB_NIC_STARTED) {
458 mutex_enter(&ss->ei_vnic_req_lock);
459 ss->ei_pending_vnic_req = NULL;
460 mutex_exit(&ss->ei_vnic_req_lock);
461 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
462 break;
463 }
464
465 /*
466 * Try to create a new vnic with the supplied parameters.
467 */
468 err = 0;
469 if ((ret = eib_vnic_create(ss, vrq->vr_mac, vrq->vr_vlan,
470 NULL, &err)) != EIB_E_SUCCESS) {
471 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_creator: "
472 "eib_vnic_create(mac=%x:%x:%x:%x:%x:%x, vlan=0x%x) "
473 "failed, ret=%d", vr_mac[0], vr_mac[1], vr_mac[2],
474 vr_mac[3], vr_mac[4], vr_mac[5], vrq->vr_vlan, err);
475 }
476
477 /*
478 * If we failed, add this vnic req to our failed list (unless
479 * it already exists there), so we won't try to create this
480 * vnic again. Whether we fail or succeed, we're done with
481 * processing this req, so clear the pending req.
482 */
483 mutex_enter(&ss->ei_vnic_req_lock);
484 if ((ret != EIB_E_SUCCESS) && (err != EEXIST)) {
485 vrq->vr_next = ss->ei_failed_vnic_req;
486 ss->ei_failed_vnic_req = vrq;
487 vrq = NULL;
488 }
489 ss->ei_pending_vnic_req = NULL;
490 mutex_exit(&ss->ei_vnic_req_lock);
491
492 /*
493 * Notify the mac layer that it should retry its tx again. If we
494 * had created the vnic successfully, we'll be able to send the
495 * packets; if we had not been successful, we'll drop packets on
496 * this vnic.
497 */
498 EIB_DPRINTF_DEBUG(ss->ei_instance,
499 "eib_vnic_creator: calling mac_tx_update()");
500 mac_tx_update(ss->ei_mac_hdl);
501
502 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
503 break;
504
505 default:
506 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
507 "unknown request 0x%lx, ignoring", vrq->vr_req);
508 break;
509 }
510
511 /*
512 * Free the current req and quit if we have to
513 */
514 if (vrq) {
515 kmem_free(vrq, sizeof (eib_vnic_req_t));
516 }
517
518 if (vr_req == EIB_CR_REQ_DIE) {
519 mutex_enter(&ci_lock);
520 CALLB_CPR_EXIT(&ci);
521 mutex_destroy(&ci_lock);
522
523 return;
524 }
525
526 goto wait_for_vnic_req;
527 /*NOTREACHED*/
528 }
529
530 /*
531 * Thread to monitor tx wqes and update the mac layer when needed.
532 * Note that this thread can only be started after the tx wqe pool
533 * has been allocated and initialized.
534 */
535 void
eib_monitor_tx_wqes(eib_t * ss)536 eib_monitor_tx_wqes(eib_t *ss)
537 {
538 eib_wqe_pool_t *wp = ss->ei_tx;
539 kmutex_t ci_lock;
540 callb_cpr_t ci;
541
542 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
543 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_TXWQES_MONITOR);
544
545 ASSERT(wp != NULL);
546
547 monitor_wqe_status:
548 mutex_enter(&wp->wp_lock);
549
550 /*
551 * Wait till someone falls short of wqes
552 */
553 while (wp->wp_status == 0) {
554 mutex_enter(&ci_lock);
555 CALLB_CPR_SAFE_BEGIN(&ci);
556 mutex_exit(&ci_lock);
557
558 cv_wait(&wp->wp_cv, &wp->wp_lock);
559
560 mutex_enter(&ci_lock);
561 CALLB_CPR_SAFE_END(&ci, &ci_lock);
562 mutex_exit(&ci_lock);
563 }
564
565 /*
566 * Have we been asked to die ?
567 */
568 if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
569 mutex_exit(&wp->wp_lock);
570
571 mutex_enter(&ci_lock);
572 CALLB_CPR_EXIT(&ci);
573 mutex_destroy(&ci_lock);
574
575 return;
576 }
577
578 ASSERT((wp->wp_status & EIB_TXWQE_SHORT) != 0);
579
580 /*
581 * Start monitoring free wqes till they cross min threshold
582 */
583 while ((wp->wp_nfree < EIB_NFREE_SWQES_HWM) &&
584 ((wp->wp_status & EIB_TXWQE_MONITOR_DIE) == 0)) {
585
586 mutex_enter(&ci_lock);
587 CALLB_CPR_SAFE_BEGIN(&ci);
588 mutex_exit(&ci_lock);
589
590 cv_wait(&wp->wp_cv, &wp->wp_lock);
591
592 mutex_enter(&ci_lock);
593 CALLB_CPR_SAFE_END(&ci, &ci_lock);
594 mutex_exit(&ci_lock);
595 }
596
597 /*
598 * Have we been asked to die ?
599 */
600 if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
601 mutex_exit(&wp->wp_lock);
602
603 mutex_enter(&ci_lock);
604 CALLB_CPR_EXIT(&ci);
605 mutex_destroy(&ci_lock);
606
607 return;
608 }
609
610 ASSERT(wp->wp_nfree >= EIB_NFREE_SWQES_HWM);
611 wp->wp_status &= (~EIB_TXWQE_SHORT);
612
613 mutex_exit(&wp->wp_lock);
614
615 /*
616 * Inform the mac layer that tx resources are now available
617 * and go back to monitoring
618 */
619 if (ss->ei_mac_hdl) {
620 mac_tx_update(ss->ei_mac_hdl);
621 }
622 goto monitor_wqe_status;
623
624 /*NOTREACHED*/
625 }
626
627 /*
628 * Thread to monitor lso bufs and update the mac layer as needed.
629 * Note that this thread can only be started after the lso buckets
630 * have been allocated and initialized.
631 */
632 void
eib_monitor_lso_bufs(eib_t * ss)633 eib_monitor_lso_bufs(eib_t *ss)
634 {
635 eib_lsobkt_t *bkt = ss->ei_lso;
636 kmutex_t ci_lock;
637 callb_cpr_t ci;
638
639 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
640 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_LSOBUFS_MONITOR);
641
642 ASSERT(bkt != NULL);
643
644 monitor_lso_status:
645 mutex_enter(&bkt->bk_lock);
646
647 /*
648 * Wait till someone falls short of LSO buffers or we're asked
649 * to die
650 */
651 while (bkt->bk_status == 0) {
652 mutex_enter(&ci_lock);
653 CALLB_CPR_SAFE_BEGIN(&ci);
654 mutex_exit(&ci_lock);
655
656 cv_wait(&bkt->bk_cv, &bkt->bk_lock);
657
658 mutex_enter(&ci_lock);
659 CALLB_CPR_SAFE_END(&ci, &ci_lock);
660 mutex_exit(&ci_lock);
661 }
662
663 if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
664 mutex_exit(&bkt->bk_lock);
665
666 mutex_enter(&ci_lock);
667 CALLB_CPR_EXIT(&ci);
668 mutex_destroy(&ci_lock);
669
670 return;
671 }
672
673 ASSERT((bkt->bk_status & EIB_LBUF_SHORT) != 0);
674
675 /*
676 * Start monitoring free LSO buffers till there are enough
677 * free buffers available
678 */
679 while ((bkt->bk_nfree < EIB_LSO_FREE_BUFS_THRESH) &&
680 ((bkt->bk_status & EIB_LBUF_MONITOR_DIE) == 0)) {
681
682 mutex_enter(&ci_lock);
683 CALLB_CPR_SAFE_BEGIN(&ci);
684 mutex_exit(&ci_lock);
685
686 cv_wait(&bkt->bk_cv, &bkt->bk_lock);
687
688 mutex_enter(&ci_lock);
689 CALLB_CPR_SAFE_END(&ci, &ci_lock);
690 mutex_exit(&ci_lock);
691 }
692
693 if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
694 mutex_exit(&bkt->bk_lock);
695
696 mutex_enter(&ci_lock);
697 CALLB_CPR_EXIT(&ci);
698 mutex_destroy(&ci_lock);
699
700 return;
701 }
702
703 /*
704 * We have enough lso buffers available now
705 */
706 ASSERT(bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH);
707 bkt->bk_status &= (~EIB_LBUF_SHORT);
708
709 mutex_exit(&bkt->bk_lock);
710
711 /*
712 * Inform the mac layer that tx lso resources are now available
713 * and go back to monitoring
714 */
715 if (ss->ei_mac_hdl) {
716 mac_tx_update(ss->ei_mac_hdl);
717 }
718 goto monitor_lso_status;
719
720 /*NOTREACHED*/
721 }
722
723 /*
724 * Thread to manage the keepalive requirements for vnics and the gateway.
725 */
726 void
eib_manage_keepalives(eib_t * ss)727 eib_manage_keepalives(eib_t *ss)
728 {
729 eib_ka_vnics_t *elem;
730 eib_ka_vnics_t *nxt;
731 clock_t deadline;
732 int64_t lbolt64;
733 int err;
734 kmutex_t ci_lock;
735 callb_cpr_t ci;
736
737 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
738 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
739
740 mutex_enter(&ss->ei_ka_vnics_lock);
741
742 periodic_keepalive:
743 deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_vnic_ka_ticks;
744
745 while ((ss->ei_ka_vnics_event &
746 (EIB_KA_VNICS_DIE | EIB_KA_VNICS_TIMED_OUT)) == 0) {
747 mutex_enter(&ci_lock);
748 CALLB_CPR_SAFE_BEGIN(&ci);
749 mutex_exit(&ci_lock);
750
751 if (cv_timedwait(&ss->ei_ka_vnics_cv, &ss->ei_ka_vnics_lock,
752 deadline) == -1) {
753 ss->ei_ka_vnics_event |= EIB_KA_VNICS_TIMED_OUT;
754 }
755
756 mutex_enter(&ci_lock);
757 CALLB_CPR_SAFE_END(&ci, &ci_lock);
758 mutex_exit(&ci_lock);
759 }
760
761 if (ss->ei_ka_vnics_event & EIB_KA_VNICS_DIE) {
762 for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
763 nxt = elem->ka_next;
764 kmem_free(elem, sizeof (eib_ka_vnics_t));
765 }
766 ss->ei_ka_vnics = NULL;
767 mutex_exit(&ss->ei_ka_vnics_lock);
768
769 mutex_enter(&ci_lock);
770 CALLB_CPR_EXIT(&ci);
771 mutex_destroy(&ci_lock);
772
773 return;
774 }
775
776 /*
777 * Are there any vnics that need keepalive management ?
778 */
779 ss->ei_ka_vnics_event &= ~EIB_KA_VNICS_TIMED_OUT;
780 if (ss->ei_ka_vnics == NULL)
781 goto periodic_keepalive;
782
783 /*
784 * Ok, we need to send vnic keepalives to our gateway. But first
785 * check if the gateway heartbeat is good as of this moment. Note
786 * that we need do get the lbolt value after acquiring ei_vnic_lock
787 * to ensure that ei_gw_last_heartbeat does not change before the
788 * comparison (to avoid a negative value in the comparison result
789 * causing us to incorrectly assume that the gateway heartbeat has
790 * stopped).
791 */
792 mutex_enter(&ss->ei_vnic_lock);
793
794 lbolt64 = ddi_get_lbolt64();
795
796 if (ss->ei_gw_last_heartbeat != 0) {
797 if ((lbolt64 - ss->ei_gw_last_heartbeat) >
798 ss->ei_gw_props->pp_gw_ka_ticks) {
799
800 EIB_DPRINTF_WARN(ss->ei_instance,
801 "eib_manage_keepalives: no keepalives from gateway "
802 "0x%x for hca_guid=0x%llx, port=0x%x, "
803 "last_gw_ka=0x%llx", ss->ei_gw_props->pp_gw_portid,
804 ss->ei_props->ep_hca_guid,
805 ss->ei_props->ep_port_num,
806 ss->ei_gw_last_heartbeat);
807
808 for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
809 nxt = elem->ka_next;
810 ss->ei_zombie_vnics |=
811 ((uint64_t)1 << elem->ka_vnic->vn_instance);
812 kmem_free(elem, sizeof (eib_ka_vnics_t));
813 }
814 ss->ei_ka_vnics = NULL;
815 ss->ei_gw_unreachable = B_TRUE;
816 mutex_exit(&ss->ei_vnic_lock);
817
818 eib_mac_link_down(ss, B_FALSE);
819
820 goto periodic_keepalive;
821 }
822 }
823 mutex_exit(&ss->ei_vnic_lock);
824
825 for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next)
826 (void) eib_fip_heartbeat(ss, elem->ka_vnic, &err);
827
828 goto periodic_keepalive;
829 /*NOTREACHED*/
830 }
831
832 void
eib_stop_events_handler(eib_t * ss)833 eib_stop_events_handler(eib_t *ss)
834 {
835 eib_event_t *evi;
836
837 evi = kmem_zalloc(sizeof (eib_event_t), KM_SLEEP);
838 evi->ev_code = EIB_EV_SHUTDOWN;
839 evi->ev_arg = NULL;
840
841 eib_svc_enqueue_event(ss, evi);
842
843 thread_join(ss->ei_events_handler);
844 }
845
846 void
eib_stop_refill_rwqes(eib_t * ss)847 eib_stop_refill_rwqes(eib_t *ss)
848 {
849 mutex_enter(&ss->ei_rxpost_lock);
850
851 ss->ei_rxpost_die = 1;
852
853 cv_signal(&ss->ei_rxpost_cv);
854 mutex_exit(&ss->ei_rxpost_lock);
855
856 thread_join(ss->ei_rwqes_refiller);
857 }
858
859 void
eib_stop_vnic_creator(eib_t * ss)860 eib_stop_vnic_creator(eib_t *ss)
861 {
862 eib_vnic_req_t *vrq;
863
864 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
865 vrq->vr_req = EIB_CR_REQ_DIE;
866 vrq->vr_next = NULL;
867
868 eib_vnic_enqueue_req(ss, vrq);
869
870 thread_join(ss->ei_vnic_creator);
871 }
872
873 void
eib_stop_monitor_tx_wqes(eib_t * ss)874 eib_stop_monitor_tx_wqes(eib_t *ss)
875 {
876 eib_wqe_pool_t *wp = ss->ei_tx;
877
878 mutex_enter(&wp->wp_lock);
879
880 wp->wp_status |= EIB_TXWQE_MONITOR_DIE;
881
882 cv_signal(&wp->wp_cv);
883 mutex_exit(&wp->wp_lock);
884
885 thread_join(ss->ei_txwqe_monitor);
886 }
887
888 int
eib_stop_monitor_lso_bufs(eib_t * ss,boolean_t force)889 eib_stop_monitor_lso_bufs(eib_t *ss, boolean_t force)
890 {
891 eib_lsobkt_t *bkt = ss->ei_lso;
892
893 mutex_enter(&bkt->bk_lock);
894
895 /*
896 * If there are some buffers still not reaped and the force
897 * flag is not set, return without doing anything. Otherwise,
898 * stop the lso bufs monitor and wait for it to die.
899 */
900 if ((bkt->bk_nelem != bkt->bk_nfree) && (force == B_FALSE)) {
901 mutex_exit(&bkt->bk_lock);
902 return (EIB_E_FAILURE);
903 }
904
905 bkt->bk_status |= EIB_LBUF_MONITOR_DIE;
906
907 cv_signal(&bkt->bk_cv);
908 mutex_exit(&bkt->bk_lock);
909
910 thread_join(ss->ei_lsobufs_monitor);
911 return (EIB_E_SUCCESS);
912 }
913
914 void
eib_stop_manage_keepalives(eib_t * ss)915 eib_stop_manage_keepalives(eib_t *ss)
916 {
917 mutex_enter(&ss->ei_ka_vnics_lock);
918
919 ss->ei_ka_vnics_event |= EIB_KA_VNICS_DIE;
920
921 cv_signal(&ss->ei_ka_vnics_cv);
922 mutex_exit(&ss->ei_ka_vnics_lock);
923
924 thread_join(ss->ei_keepalives_manager);
925 }
926
927 void
eib_flush_vnic_reqs(eib_t * ss)928 eib_flush_vnic_reqs(eib_t *ss)
929 {
930 eib_vnic_req_t *vrq;
931
932 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
933 vrq->vr_req = EIB_CR_REQ_FLUSH;
934 vrq->vr_next = NULL;
935
936 eib_vnic_enqueue_req(ss, vrq);
937 }
938
939 /*ARGSUSED*/
940 void
eib_gw_alive_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)941 eib_gw_alive_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
942 void *impl_data)
943 {
944 eib_t *ss = (eib_t *)arg;
945 eib_event_t *evi;
946
947 evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
948 if (evi == NULL) {
949 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_alive_cb: "
950 "no memory, ignoring this gateway alive event");
951 } else {
952 evi->ev_code = EIB_EV_GW_UP;
953 evi->ev_arg = NULL;
954 eib_svc_enqueue_event(ss, evi);
955 }
956 }
957
958 /*ARGSUSED*/
959 void
eib_login_ack_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)960 eib_login_ack_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
961 void *impl_data)
962 {
963 eib_t *ss = (eib_t *)arg;
964 uint8_t *pkt = (uint8_t *)impl_data;
965 eib_login_data_t ld;
966
967 /*
968 * We have received a login ack message from the gateway via the EoIB
969 * nexus (solicitation qpn). The packet is passed to us raw (unparsed)
970 * and we have to figure out if this is a vnic login ack.
971 */
972 if (eib_fip_parse_login_ack(ss, pkt + EIB_GRH_SZ, &ld) == EIB_E_SUCCESS)
973 eib_vnic_login_ack(ss, &ld);
974 }
975
976 /*ARGSUSED*/
977 void
eib_gw_info_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)978 eib_gw_info_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
979 void *impl_data)
980 {
981 eib_t *ss = (eib_t *)arg;
982 eib_event_t *evi;
983
984 evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
985 if (evi == NULL) {
986 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
987 "no memory, ignoring this gateway props update event");
988 return;
989 }
990 evi->ev_arg = kmem_zalloc(sizeof (eib_gw_info_t), KM_NOSLEEP);
991 if (evi->ev_arg == NULL) {
992 EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
993 "no memory, ignoring this gateway props update event");
994 kmem_free(evi, sizeof (eib_event_t));
995 return;
996 }
997 bcopy(impl_data, evi->ev_arg, sizeof (eib_gw_info_t));
998 evi->ev_code = EIB_EV_GW_INFO_UPDATE;
999
1000 eib_svc_enqueue_event(ss, evi);
1001 }
1002