xref: /titanic_50/usr/src/uts/common/io/ib/clients/eoib/eib_svc.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 #include <sys/callb.h>
33 #include <sys/mac_provider.h>
34 
35 #include <sys/ib/clients/eoib/eib_impl.h>
36 
37 /*
38  * Thread to handle EoIB events asynchronously
39  */
40 void
eib_events_handler(eib_t * ss)41 eib_events_handler(eib_t *ss)
42 {
43 	eib_event_t *evi;
44 	eib_event_t *nxt;
45 	kmutex_t ci_lock;
46 	callb_cpr_t ci;
47 
48 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
49 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
50 
51 wait_for_event:
52 	mutex_enter(&ss->ei_ev_lock);
53 	while ((evi = ss->ei_event) == NULL) {
54 		mutex_enter(&ci_lock);
55 		CALLB_CPR_SAFE_BEGIN(&ci);
56 		mutex_exit(&ci_lock);
57 
58 		cv_wait(&ss->ei_ev_cv, &ss->ei_ev_lock);
59 
60 		mutex_enter(&ci_lock);
61 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
62 		mutex_exit(&ci_lock);
63 	}
64 
65 	/*
66 	 * Are we being asked to die ?
67 	 */
68 	if (evi->ev_code == EIB_EV_SHUTDOWN) {
69 		while (evi) {
70 			nxt = evi->ev_next;
71 			kmem_free(evi, sizeof (eib_event_t));
72 			evi = nxt;
73 		}
74 		ss->ei_event = NULL;
75 		mutex_exit(&ss->ei_ev_lock);
76 
77 		mutex_enter(&ci_lock);
78 		CALLB_CPR_EXIT(&ci);
79 		mutex_destroy(&ci_lock);
80 
81 		return;
82 	}
83 
84 	/*
85 	 * Otherwise, pull out the first entry from our work queue
86 	 */
87 	ss->ei_event = evi->ev_next;
88 	evi->ev_next = NULL;
89 
90 	mutex_exit(&ss->ei_ev_lock);
91 
92 	/*
93 	 * Process this event
94 	 *
95 	 * Note that we don't want to race with plumb/unplumb in this
96 	 * handler, since we may have to restart vnics or do stuff that
97 	 * may get re-initialized or released if we allowed plumb/unplumb
98 	 * to happen in parallel.
99 	 */
100 	eib_mac_set_nic_state(ss, EIB_NIC_RESTARTING);
101 
102 	switch (evi->ev_code) {
103 	case EIB_EV_PORT_DOWN:
104 		EIB_DPRINTF_DEBUG(ss->ei_instance,
105 		    "eib_events_handler: Begin EIB_EV_PORT_DOWN");
106 
107 		eib_mac_link_down(ss, B_FALSE);
108 
109 		EIB_DPRINTF_DEBUG(ss->ei_instance,
110 		    "eib_events_handler: End EIB_EV_PORT_DOWN");
111 		break;
112 
113 	case EIB_EV_PORT_UP:
114 		EIB_DPRINTF_DEBUG(ss->ei_instance,
115 		    "eib_events_handler: Begin EIB_EV_PORT_UP");
116 
117 		eib_ibt_link_mod(ss);
118 
119 		EIB_DPRINTF_DEBUG(ss->ei_instance,
120 		    "eib_events_handler: End EIB_EV_PORT_UP");
121 		break;
122 
123 	case EIB_EV_PKEY_CHANGE:
124 		EIB_DPRINTF_DEBUG(ss->ei_instance,
125 		    "eib_events_handler: Begin EIB_EV_PKEY_CHANGE");
126 
127 		eib_ibt_link_mod(ss);
128 
129 		EIB_DPRINTF_DEBUG(ss->ei_instance,
130 		    "eib_events_handler: End EIB_EV_PKEY_CHANGE");
131 		break;
132 
133 	case EIB_EV_SGID_CHANGE:
134 		EIB_DPRINTF_DEBUG(ss->ei_instance,
135 		    "eib_events_handler: Begin EIB_EV_SGID_CHANGE");
136 
137 		eib_ibt_link_mod(ss);
138 
139 		EIB_DPRINTF_DEBUG(ss->ei_instance,
140 		    "eib_events_handler: End EIB_EV_SGID_CHANGE");
141 		break;
142 
143 	case EIB_EV_CLNT_REREG:
144 		EIB_DPRINTF_DEBUG(ss->ei_instance,
145 		    "eib_events_handler: Begin EIB_EV_CLNT_REREG");
146 
147 		eib_ibt_link_mod(ss);
148 
149 		EIB_DPRINTF_DEBUG(ss->ei_instance,
150 		    "eib_events_handler: End EIB_EV_CLNT_REREG");
151 		break;
152 
153 	case EIB_EV_GW_UP:
154 		EIB_DPRINTF_DEBUG(ss->ei_instance,
155 		    "eib_events_handler: Begin EIB_EV_GW_UP");
156 
157 		/*
158 		 * EoIB nexus has notified us that our gateway is now
159 		 * reachable. Unless we already think it is reachable,
160 		 * mark it so in our records and try to resurrect dead
161 		 * vnics.
162 		 */
163 		mutex_enter(&ss->ei_vnic_lock);
164 		if (ss->ei_gw_unreachable == B_FALSE) {
165 			EIB_DPRINTF_DEBUG(ss->ei_instance,
166 			    "eib_events_handler: gw reachable");
167 			mutex_exit(&ss->ei_vnic_lock);
168 
169 			EIB_DPRINTF_DEBUG(ss->ei_instance,
170 			    "eib_events_handler: End EIB_EV_GW_UP");
171 			break;
172 		}
173 		ss->ei_gw_unreachable = B_FALSE;
174 		mutex_exit(&ss->ei_vnic_lock);
175 
176 		/*
177 		 * If we've not even started yet, we have nothing to do.
178 		 */
179 		if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) {
180 			EIB_DPRINTF_DEBUG(ss->ei_instance,
181 			    "eib_events_handler: End EIB_EV_GW_UP");
182 			break;
183 		}
184 
185 		if (eib_mac_hca_portstate(ss, NULL, NULL) != EIB_E_SUCCESS) {
186 			EIB_DPRINTF_DEBUG(ss->ei_instance,
187 			    "eib_events_handler: "
188 			    "HCA portstate failed, marking link down");
189 
190 			eib_mac_link_down(ss, B_FALSE);
191 		} else {
192 			uint8_t vn0_mac[ETHERADDRL];
193 
194 			EIB_DPRINTF_DEBUG(ss->ei_instance,
195 			    "eib_events_handler: "
196 			    "HCA portstate ok, resurrecting zombies");
197 
198 			bcopy(eib_zero_mac, vn0_mac, ETHERADDRL);
199 			eib_vnic_resurrect_zombies(ss, vn0_mac);
200 
201 			/*
202 			 * If we've resurrected the zombies because the gateway
203 			 * went down and came back, it is possible our unicast
204 			 * mac address changed from what it was earlier. If
205 			 * so, we need to update our unicast address with the
206 			 * mac layer before marking the link up.
207 			 */
208 			if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0) {
209 				EIB_DPRINTF_DEBUG(ss->ei_instance,
210 				    "eib_events_handler: updating unicast "
211 				    "addr to %x:%x:%x:%x:%x:%x", vn0_mac[0],
212 				    vn0_mac[1], vn0_mac[2], vn0_mac[3],
213 				    vn0_mac[4], vn0_mac[5]);
214 
215 				mac_unicst_update(ss->ei_mac_hdl, vn0_mac);
216 			}
217 
218 			EIB_DPRINTF_DEBUG(ss->ei_instance,
219 			    "eib_events_handler: eib_mac_link_up(B_FALSE)");
220 
221 			eib_mac_link_up(ss, B_FALSE);
222 		}
223 
224 		EIB_DPRINTF_DEBUG(ss->ei_instance,
225 		    "eib_events_handler: End EIB_EV_GW_UP");
226 		break;
227 
228 	case EIB_EV_GW_INFO_UPDATE:
229 		EIB_DPRINTF_DEBUG(ss->ei_instance,
230 		    "eib_events_handler: Begin EIB_EV_GW_INFO_UPDATE");
231 
232 		if (evi->ev_arg) {
233 			eib_update_props(ss, (eib_gw_info_t *)(evi->ev_arg));
234 			kmem_free(evi->ev_arg, sizeof (eib_gw_info_t));
235 		}
236 
237 		EIB_DPRINTF_DEBUG(ss->ei_instance,
238 		    "eib_events_handler: End EIB_EV_GW_INFO_UPDATE");
239 		break;
240 
241 	case EIB_EV_MCG_DELETED:
242 		EIB_DPRINTF_DEBUG(ss->ei_instance,
243 		    "eib_events_handler: Begin-End EIB_EV_MCG_DELETED");
244 		break;
245 
246 	case EIB_EV_MCG_CREATED:
247 		EIB_DPRINTF_DEBUG(ss->ei_instance,
248 		    "eib_events_handler: Begin-End EIB_EV_MCG_CREATED");
249 		break;
250 
251 	case EIB_EV_GW_EPORT_DOWN:
252 		EIB_DPRINTF_DEBUG(ss->ei_instance,
253 		    "eib_events_handler: Begin-End EIB_EV_GW_EPORT_DOWN");
254 		break;
255 
256 	case EIB_EV_GW_DOWN:
257 		EIB_DPRINTF_DEBUG(ss->ei_instance,
258 		    "eib_events_handler: Begin-End EIB_EV_GW_DOWN");
259 		break;
260 	}
261 
262 	eib_mac_clr_nic_state(ss, EIB_NIC_RESTARTING);
263 
264 	kmem_free(evi, sizeof (eib_event_t));
265 	goto wait_for_event;
266 
267 	/*NOTREACHED*/
268 }
269 
270 void
eib_svc_enqueue_event(eib_t * ss,eib_event_t * evi)271 eib_svc_enqueue_event(eib_t *ss, eib_event_t *evi)
272 {
273 	eib_event_t *elem = NULL;
274 	eib_event_t *tail = NULL;
275 
276 	mutex_enter(&ss->ei_ev_lock);
277 
278 	/*
279 	 * Notice to shutdown has a higher priority than the
280 	 * rest and goes to the head of the list. Everything
281 	 * else goes at the end.
282 	 */
283 	if (evi->ev_code == EIB_EV_SHUTDOWN) {
284 		evi->ev_next = ss->ei_event;
285 		ss->ei_event = evi;
286 	} else {
287 		for (elem = ss->ei_event; elem; elem = elem->ev_next)
288 			tail = elem;
289 
290 		if (tail)
291 			tail->ev_next = evi;
292 		else
293 			ss->ei_event = evi;
294 	}
295 
296 	cv_signal(&ss->ei_ev_cv);
297 	mutex_exit(&ss->ei_ev_lock);
298 }
299 
300 /*
301  * Thread to refill channels with rwqes whenever they get low.
302  */
303 void
eib_refill_rwqes(eib_t * ss)304 eib_refill_rwqes(eib_t *ss)
305 {
306 	eib_chan_t *chan;
307 	kmutex_t ci_lock;
308 	callb_cpr_t ci;
309 
310 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
311 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_RWQES_REFILLER);
312 
313 wait_for_refill_work:
314 	mutex_enter(&ss->ei_rxpost_lock);
315 
316 	while ((ss->ei_rxpost == NULL) && (ss->ei_rxpost_die == 0)) {
317 		mutex_enter(&ci_lock);
318 		CALLB_CPR_SAFE_BEGIN(&ci);
319 		mutex_exit(&ci_lock);
320 
321 		cv_wait(&ss->ei_rxpost_cv, &ss->ei_rxpost_lock);
322 
323 		mutex_enter(&ci_lock);
324 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
325 		mutex_exit(&ci_lock);
326 	}
327 
328 	/*
329 	 * Discard all requests for refill if we're being asked to die
330 	 */
331 	if (ss->ei_rxpost_die) {
332 		ss->ei_rxpost = NULL;
333 		mutex_exit(&ss->ei_rxpost_lock);
334 
335 		mutex_enter(&ci_lock);
336 		CALLB_CPR_EXIT(&ci);
337 		mutex_destroy(&ci_lock);
338 
339 		return;
340 	}
341 	ASSERT(ss->ei_rxpost != NULL);
342 
343 	/*
344 	 * Take the first element out of the queue
345 	 */
346 	chan = ss->ei_rxpost;
347 	ss->ei_rxpost = chan->ch_rxpost_next;
348 	chan->ch_rxpost_next = NULL;
349 
350 	mutex_exit(&ss->ei_rxpost_lock);
351 
352 	/*
353 	 * Try to post a bunch of recv wqes into this channel. If we
354 	 * fail, it means that we haven't even been able to post a
355 	 * single recv wqe.  This is alarming, but there's nothing
356 	 * we can do. We just move on to the next channel needing
357 	 * our service.
358 	 */
359 	if (eib_chan_post_rx(ss, chan, NULL) != EIB_E_SUCCESS) {
360 		EIB_DPRINTF_ERR(ss->ei_instance,
361 		    "eib_refill_rwqes: eib_chan_post_rx() failed");
362 	}
363 
364 	/*
365 	 * Mark it to indicate that the refilling is done
366 	 */
367 	mutex_enter(&chan->ch_rx_lock);
368 	chan->ch_rx_refilling = B_FALSE;
369 	mutex_exit(&chan->ch_rx_lock);
370 
371 	goto wait_for_refill_work;
372 
373 	/*NOTREACHED*/
374 }
375 
376 /*
377  * Thread to create or restart vnics when required
378  */
379 void
eib_vnic_creator(eib_t * ss)380 eib_vnic_creator(eib_t *ss)
381 {
382 	eib_vnic_req_t *vrq;
383 	eib_vnic_req_t *elem;
384 	eib_vnic_req_t *nxt;
385 	kmutex_t ci_lock;
386 	callb_cpr_t ci;
387 	uint_t vr_req;
388 	uint8_t *vr_mac;
389 	int ret;
390 	int err;
391 
392 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
393 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_VNIC_CREATOR);
394 
395 wait_for_vnic_req:
396 	mutex_enter(&ss->ei_vnic_req_lock);
397 
398 	while ((vrq = ss->ei_vnic_req) == NULL) {
399 		mutex_enter(&ci_lock);
400 		CALLB_CPR_SAFE_BEGIN(&ci);
401 		mutex_exit(&ci_lock);
402 
403 		cv_wait(&ss->ei_vnic_req_cv, &ss->ei_vnic_req_lock);
404 
405 		mutex_enter(&ci_lock);
406 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
407 		mutex_exit(&ci_lock);
408 	}
409 
410 	/*
411 	 * Pull out the first request
412 	 */
413 	ss->ei_vnic_req = vrq->vr_next;
414 	vrq->vr_next = NULL;
415 
416 	vr_req = vrq->vr_req;
417 	vr_mac = vrq->vr_mac;
418 
419 	switch (vr_req) {
420 	case EIB_CR_REQ_DIE:
421 	case EIB_CR_REQ_FLUSH:
422 		/*
423 		 * Cleanup all pending reqs and failed reqs
424 		 */
425 		for (elem = ss->ei_vnic_req; elem; elem = nxt) {
426 			nxt = elem->vr_next;
427 			kmem_free(elem, sizeof (eib_vnic_req_t));
428 		}
429 		for (elem = ss->ei_failed_vnic_req; elem; elem = nxt) {
430 			nxt = elem->vr_next;
431 			kmem_free(elem, sizeof (eib_vnic_req_t));
432 		}
433 		ss->ei_vnic_req = NULL;
434 		ss->ei_failed_vnic_req = NULL;
435 		ss->ei_pending_vnic_req = NULL;
436 		mutex_exit(&ss->ei_vnic_req_lock);
437 
438 		break;
439 
440 	case EIB_CR_REQ_NEW_VNIC:
441 		ss->ei_pending_vnic_req = vrq;
442 		mutex_exit(&ss->ei_vnic_req_lock);
443 
444 		EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
445 		    "new vnic creation request for %x:%x:%x:%x:%x:%x, 0x%x",
446 		    vr_mac[0], vr_mac[1], vr_mac[2], vr_mac[3], vr_mac[4],
447 		    vr_mac[5], vrq->vr_vlan);
448 
449 		/*
450 		 * Make sure we don't race with the plumb/unplumb code.  If
451 		 * the eoib instance has been unplumbed already, we ignore any
452 		 * creation requests that may have been pending.
453 		 */
454 		eib_mac_set_nic_state(ss, EIB_NIC_STARTING);
455 
456 		if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) !=
457 		    EIB_NIC_STARTED) {
458 			mutex_enter(&ss->ei_vnic_req_lock);
459 			ss->ei_pending_vnic_req = NULL;
460 			mutex_exit(&ss->ei_vnic_req_lock);
461 			eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
462 			break;
463 		}
464 
465 		/*
466 		 * Try to create a new vnic with the supplied parameters.
467 		 */
468 		err = 0;
469 		if ((ret = eib_vnic_create(ss, vrq->vr_mac, vrq->vr_vlan,
470 		    NULL, &err)) != EIB_E_SUCCESS) {
471 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_creator: "
472 			    "eib_vnic_create(mac=%x:%x:%x:%x:%x:%x, vlan=0x%x) "
473 			    "failed, ret=%d", vr_mac[0], vr_mac[1], vr_mac[2],
474 			    vr_mac[3], vr_mac[4], vr_mac[5], vrq->vr_vlan, err);
475 		}
476 
477 		/*
478 		 * If we failed, add this vnic req to our failed list (unless
479 		 * it already exists there), so we won't try to create this
480 		 * vnic again.  Whether we fail or succeed, we're done with
481 		 * processing this req, so clear the pending req.
482 		 */
483 		mutex_enter(&ss->ei_vnic_req_lock);
484 		if ((ret != EIB_E_SUCCESS) && (err != EEXIST)) {
485 			vrq->vr_next = ss->ei_failed_vnic_req;
486 			ss->ei_failed_vnic_req = vrq;
487 			vrq = NULL;
488 		}
489 		ss->ei_pending_vnic_req = NULL;
490 		mutex_exit(&ss->ei_vnic_req_lock);
491 
492 		/*
493 		 * Notify the mac layer that it should retry its tx again. If we
494 		 * had created the vnic successfully, we'll be able to send the
495 		 * packets; if we had not been successful, we'll drop packets on
496 		 * this vnic.
497 		 */
498 		EIB_DPRINTF_DEBUG(ss->ei_instance,
499 		    "eib_vnic_creator: calling mac_tx_update()");
500 		mac_tx_update(ss->ei_mac_hdl);
501 
502 		eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
503 		break;
504 
505 	default:
506 		EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_creator: "
507 		    "unknown request 0x%lx, ignoring", vrq->vr_req);
508 		break;
509 	}
510 
511 	/*
512 	 * Free the current req and quit if we have to
513 	 */
514 	if (vrq) {
515 		kmem_free(vrq, sizeof (eib_vnic_req_t));
516 	}
517 
518 	if (vr_req == EIB_CR_REQ_DIE) {
519 		mutex_enter(&ci_lock);
520 		CALLB_CPR_EXIT(&ci);
521 		mutex_destroy(&ci_lock);
522 
523 		return;
524 	}
525 
526 	goto wait_for_vnic_req;
527 	/*NOTREACHED*/
528 }
529 
530 /*
531  * Thread to monitor tx wqes and update the mac layer when needed.
532  * Note that this thread can only be started after the tx wqe pool
533  * has been allocated and initialized.
534  */
535 void
eib_monitor_tx_wqes(eib_t * ss)536 eib_monitor_tx_wqes(eib_t *ss)
537 {
538 	eib_wqe_pool_t *wp = ss->ei_tx;
539 	kmutex_t ci_lock;
540 	callb_cpr_t ci;
541 
542 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
543 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_TXWQES_MONITOR);
544 
545 	ASSERT(wp != NULL);
546 
547 monitor_wqe_status:
548 	mutex_enter(&wp->wp_lock);
549 
550 	/*
551 	 * Wait till someone falls short of wqes
552 	 */
553 	while (wp->wp_status == 0) {
554 		mutex_enter(&ci_lock);
555 		CALLB_CPR_SAFE_BEGIN(&ci);
556 		mutex_exit(&ci_lock);
557 
558 		cv_wait(&wp->wp_cv, &wp->wp_lock);
559 
560 		mutex_enter(&ci_lock);
561 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
562 		mutex_exit(&ci_lock);
563 	}
564 
565 	/*
566 	 * Have we been asked to die ?
567 	 */
568 	if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
569 		mutex_exit(&wp->wp_lock);
570 
571 		mutex_enter(&ci_lock);
572 		CALLB_CPR_EXIT(&ci);
573 		mutex_destroy(&ci_lock);
574 
575 		return;
576 	}
577 
578 	ASSERT((wp->wp_status & EIB_TXWQE_SHORT) != 0);
579 
580 	/*
581 	 * Start monitoring free wqes till they cross min threshold
582 	 */
583 	while ((wp->wp_nfree < EIB_NFREE_SWQES_HWM) &&
584 	    ((wp->wp_status & EIB_TXWQE_MONITOR_DIE) == 0)) {
585 
586 		mutex_enter(&ci_lock);
587 		CALLB_CPR_SAFE_BEGIN(&ci);
588 		mutex_exit(&ci_lock);
589 
590 		cv_wait(&wp->wp_cv, &wp->wp_lock);
591 
592 		mutex_enter(&ci_lock);
593 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
594 		mutex_exit(&ci_lock);
595 	}
596 
597 	/*
598 	 * Have we been asked to die ?
599 	 */
600 	if (wp->wp_status & EIB_TXWQE_MONITOR_DIE) {
601 		mutex_exit(&wp->wp_lock);
602 
603 		mutex_enter(&ci_lock);
604 		CALLB_CPR_EXIT(&ci);
605 		mutex_destroy(&ci_lock);
606 
607 		return;
608 	}
609 
610 	ASSERT(wp->wp_nfree >= EIB_NFREE_SWQES_HWM);
611 	wp->wp_status &= (~EIB_TXWQE_SHORT);
612 
613 	mutex_exit(&wp->wp_lock);
614 
615 	/*
616 	 * Inform the mac layer that tx resources are now available
617 	 * and go back to monitoring
618 	 */
619 	if (ss->ei_mac_hdl) {
620 		mac_tx_update(ss->ei_mac_hdl);
621 	}
622 	goto monitor_wqe_status;
623 
624 	/*NOTREACHED*/
625 }
626 
627 /*
628  * Thread to monitor lso bufs and update the mac layer as needed.
629  * Note that this thread can only be started after the lso buckets
630  * have been allocated and initialized.
631  */
632 void
eib_monitor_lso_bufs(eib_t * ss)633 eib_monitor_lso_bufs(eib_t *ss)
634 {
635 	eib_lsobkt_t *bkt = ss->ei_lso;
636 	kmutex_t ci_lock;
637 	callb_cpr_t ci;
638 
639 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
640 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_LSOBUFS_MONITOR);
641 
642 	ASSERT(bkt != NULL);
643 
644 monitor_lso_status:
645 	mutex_enter(&bkt->bk_lock);
646 
647 	/*
648 	 * Wait till someone falls short of LSO buffers or we're asked
649 	 * to die
650 	 */
651 	while (bkt->bk_status == 0) {
652 		mutex_enter(&ci_lock);
653 		CALLB_CPR_SAFE_BEGIN(&ci);
654 		mutex_exit(&ci_lock);
655 
656 		cv_wait(&bkt->bk_cv, &bkt->bk_lock);
657 
658 		mutex_enter(&ci_lock);
659 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
660 		mutex_exit(&ci_lock);
661 	}
662 
663 	if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
664 		mutex_exit(&bkt->bk_lock);
665 
666 		mutex_enter(&ci_lock);
667 		CALLB_CPR_EXIT(&ci);
668 		mutex_destroy(&ci_lock);
669 
670 		return;
671 	}
672 
673 	ASSERT((bkt->bk_status & EIB_LBUF_SHORT) != 0);
674 
675 	/*
676 	 * Start monitoring free LSO buffers till there are enough
677 	 * free buffers available
678 	 */
679 	while ((bkt->bk_nfree < EIB_LSO_FREE_BUFS_THRESH) &&
680 	    ((bkt->bk_status & EIB_LBUF_MONITOR_DIE) == 0)) {
681 
682 		mutex_enter(&ci_lock);
683 		CALLB_CPR_SAFE_BEGIN(&ci);
684 		mutex_exit(&ci_lock);
685 
686 		cv_wait(&bkt->bk_cv, &bkt->bk_lock);
687 
688 		mutex_enter(&ci_lock);
689 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
690 		mutex_exit(&ci_lock);
691 	}
692 
693 	if (bkt->bk_status & EIB_LBUF_MONITOR_DIE) {
694 		mutex_exit(&bkt->bk_lock);
695 
696 		mutex_enter(&ci_lock);
697 		CALLB_CPR_EXIT(&ci);
698 		mutex_destroy(&ci_lock);
699 
700 		return;
701 	}
702 
703 	/*
704 	 * We have enough lso buffers available now
705 	 */
706 	ASSERT(bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH);
707 	bkt->bk_status &= (~EIB_LBUF_SHORT);
708 
709 	mutex_exit(&bkt->bk_lock);
710 
711 	/*
712 	 * Inform the mac layer that tx lso resources are now available
713 	 * and go back to monitoring
714 	 */
715 	if (ss->ei_mac_hdl) {
716 		mac_tx_update(ss->ei_mac_hdl);
717 	}
718 	goto monitor_lso_status;
719 
720 	/*NOTREACHED*/
721 }
722 
723 /*
724  * Thread to manage the keepalive requirements for vnics and the gateway.
725  */
726 void
eib_manage_keepalives(eib_t * ss)727 eib_manage_keepalives(eib_t *ss)
728 {
729 	eib_ka_vnics_t *elem;
730 	eib_ka_vnics_t *nxt;
731 	clock_t deadline;
732 	int64_t lbolt64;
733 	int err;
734 	kmutex_t ci_lock;
735 	callb_cpr_t ci;
736 
737 	mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
738 	CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, EIB_EVENTS_HDLR);
739 
740 	mutex_enter(&ss->ei_ka_vnics_lock);
741 
742 periodic_keepalive:
743 	deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_vnic_ka_ticks;
744 
745 	while ((ss->ei_ka_vnics_event &
746 	    (EIB_KA_VNICS_DIE | EIB_KA_VNICS_TIMED_OUT)) == 0) {
747 		mutex_enter(&ci_lock);
748 		CALLB_CPR_SAFE_BEGIN(&ci);
749 		mutex_exit(&ci_lock);
750 
751 		if (cv_timedwait(&ss->ei_ka_vnics_cv, &ss->ei_ka_vnics_lock,
752 		    deadline) == -1) {
753 			ss->ei_ka_vnics_event |= EIB_KA_VNICS_TIMED_OUT;
754 		}
755 
756 		mutex_enter(&ci_lock);
757 		CALLB_CPR_SAFE_END(&ci, &ci_lock);
758 		mutex_exit(&ci_lock);
759 	}
760 
761 	if (ss->ei_ka_vnics_event & EIB_KA_VNICS_DIE) {
762 		for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
763 			nxt = elem->ka_next;
764 			kmem_free(elem, sizeof (eib_ka_vnics_t));
765 		}
766 		ss->ei_ka_vnics = NULL;
767 		mutex_exit(&ss->ei_ka_vnics_lock);
768 
769 		mutex_enter(&ci_lock);
770 		CALLB_CPR_EXIT(&ci);
771 		mutex_destroy(&ci_lock);
772 
773 		return;
774 	}
775 
776 	/*
777 	 * Are there any vnics that need keepalive management ?
778 	 */
779 	ss->ei_ka_vnics_event &= ~EIB_KA_VNICS_TIMED_OUT;
780 	if (ss->ei_ka_vnics == NULL)
781 		goto periodic_keepalive;
782 
783 	/*
784 	 * Ok, we need to send vnic keepalives to our gateway. But first
785 	 * check if the gateway heartbeat is good as of this moment.  Note
786 	 * that we need do get the lbolt value after acquiring ei_vnic_lock
787 	 * to ensure that ei_gw_last_heartbeat does not change before the
788 	 * comparison (to avoid a negative value in the comparison result
789 	 * causing us to incorrectly assume that the gateway heartbeat has
790 	 * stopped).
791 	 */
792 	mutex_enter(&ss->ei_vnic_lock);
793 
794 	lbolt64 = ddi_get_lbolt64();
795 
796 	if (ss->ei_gw_last_heartbeat != 0) {
797 		if ((lbolt64 - ss->ei_gw_last_heartbeat) >
798 		    ss->ei_gw_props->pp_gw_ka_ticks) {
799 
800 			EIB_DPRINTF_WARN(ss->ei_instance,
801 			    "eib_manage_keepalives: no keepalives from gateway "
802 			    "0x%x for hca_guid=0x%llx, port=0x%x, "
803 			    "last_gw_ka=0x%llx", ss->ei_gw_props->pp_gw_portid,
804 			    ss->ei_props->ep_hca_guid,
805 			    ss->ei_props->ep_port_num,
806 			    ss->ei_gw_last_heartbeat);
807 
808 			for (elem = ss->ei_ka_vnics; elem; elem = nxt) {
809 				nxt = elem->ka_next;
810 				ss->ei_zombie_vnics |=
811 				    ((uint64_t)1 << elem->ka_vnic->vn_instance);
812 				kmem_free(elem, sizeof (eib_ka_vnics_t));
813 			}
814 			ss->ei_ka_vnics = NULL;
815 			ss->ei_gw_unreachable = B_TRUE;
816 			mutex_exit(&ss->ei_vnic_lock);
817 
818 			eib_mac_link_down(ss, B_FALSE);
819 
820 			goto periodic_keepalive;
821 		}
822 	}
823 	mutex_exit(&ss->ei_vnic_lock);
824 
825 	for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next)
826 		(void) eib_fip_heartbeat(ss, elem->ka_vnic, &err);
827 
828 	goto periodic_keepalive;
829 	/*NOTREACHED*/
830 }
831 
832 void
eib_stop_events_handler(eib_t * ss)833 eib_stop_events_handler(eib_t *ss)
834 {
835 	eib_event_t *evi;
836 
837 	evi = kmem_zalloc(sizeof (eib_event_t), KM_SLEEP);
838 	evi->ev_code = EIB_EV_SHUTDOWN;
839 	evi->ev_arg = NULL;
840 
841 	eib_svc_enqueue_event(ss, evi);
842 
843 	thread_join(ss->ei_events_handler);
844 }
845 
846 void
eib_stop_refill_rwqes(eib_t * ss)847 eib_stop_refill_rwqes(eib_t *ss)
848 {
849 	mutex_enter(&ss->ei_rxpost_lock);
850 
851 	ss->ei_rxpost_die = 1;
852 
853 	cv_signal(&ss->ei_rxpost_cv);
854 	mutex_exit(&ss->ei_rxpost_lock);
855 
856 	thread_join(ss->ei_rwqes_refiller);
857 }
858 
859 void
eib_stop_vnic_creator(eib_t * ss)860 eib_stop_vnic_creator(eib_t *ss)
861 {
862 	eib_vnic_req_t *vrq;
863 
864 	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
865 	vrq->vr_req = EIB_CR_REQ_DIE;
866 	vrq->vr_next = NULL;
867 
868 	eib_vnic_enqueue_req(ss, vrq);
869 
870 	thread_join(ss->ei_vnic_creator);
871 }
872 
873 void
eib_stop_monitor_tx_wqes(eib_t * ss)874 eib_stop_monitor_tx_wqes(eib_t *ss)
875 {
876 	eib_wqe_pool_t *wp = ss->ei_tx;
877 
878 	mutex_enter(&wp->wp_lock);
879 
880 	wp->wp_status |= EIB_TXWQE_MONITOR_DIE;
881 
882 	cv_signal(&wp->wp_cv);
883 	mutex_exit(&wp->wp_lock);
884 
885 	thread_join(ss->ei_txwqe_monitor);
886 }
887 
888 int
eib_stop_monitor_lso_bufs(eib_t * ss,boolean_t force)889 eib_stop_monitor_lso_bufs(eib_t *ss, boolean_t force)
890 {
891 	eib_lsobkt_t *bkt = ss->ei_lso;
892 
893 	mutex_enter(&bkt->bk_lock);
894 
895 	/*
896 	 * If there are some buffers still not reaped and the force
897 	 * flag is not set, return without doing anything. Otherwise,
898 	 * stop the lso bufs monitor and wait for it to die.
899 	 */
900 	if ((bkt->bk_nelem != bkt->bk_nfree) && (force == B_FALSE)) {
901 		mutex_exit(&bkt->bk_lock);
902 		return (EIB_E_FAILURE);
903 	}
904 
905 	bkt->bk_status |= EIB_LBUF_MONITOR_DIE;
906 
907 	cv_signal(&bkt->bk_cv);
908 	mutex_exit(&bkt->bk_lock);
909 
910 	thread_join(ss->ei_lsobufs_monitor);
911 	return (EIB_E_SUCCESS);
912 }
913 
914 void
eib_stop_manage_keepalives(eib_t * ss)915 eib_stop_manage_keepalives(eib_t *ss)
916 {
917 	mutex_enter(&ss->ei_ka_vnics_lock);
918 
919 	ss->ei_ka_vnics_event |= EIB_KA_VNICS_DIE;
920 
921 	cv_signal(&ss->ei_ka_vnics_cv);
922 	mutex_exit(&ss->ei_ka_vnics_lock);
923 
924 	thread_join(ss->ei_keepalives_manager);
925 }
926 
927 void
eib_flush_vnic_reqs(eib_t * ss)928 eib_flush_vnic_reqs(eib_t *ss)
929 {
930 	eib_vnic_req_t *vrq;
931 
932 	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_SLEEP);
933 	vrq->vr_req = EIB_CR_REQ_FLUSH;
934 	vrq->vr_next = NULL;
935 
936 	eib_vnic_enqueue_req(ss, vrq);
937 }
938 
939 /*ARGSUSED*/
940 void
eib_gw_alive_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)941 eib_gw_alive_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
942     void *impl_data)
943 {
944 	eib_t *ss = (eib_t *)arg;
945 	eib_event_t *evi;
946 
947 	evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
948 	if (evi == NULL) {
949 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_alive_cb: "
950 		    "no memory, ignoring this gateway alive event");
951 	} else {
952 		evi->ev_code = EIB_EV_GW_UP;
953 		evi->ev_arg = NULL;
954 		eib_svc_enqueue_event(ss, evi);
955 	}
956 }
957 
958 /*ARGSUSED*/
959 void
eib_login_ack_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)960 eib_login_ack_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
961     void *impl_data)
962 {
963 	eib_t *ss = (eib_t *)arg;
964 	uint8_t *pkt = (uint8_t *)impl_data;
965 	eib_login_data_t ld;
966 
967 	/*
968 	 * We have received a login ack message from the gateway via the EoIB
969 	 * nexus (solicitation qpn).  The packet is passed to us raw (unparsed)
970 	 * and we have to figure out if this is a vnic login ack.
971 	 */
972 	if (eib_fip_parse_login_ack(ss, pkt + EIB_GRH_SZ, &ld) == EIB_E_SUCCESS)
973 		eib_vnic_login_ack(ss, &ld);
974 }
975 
976 /*ARGSUSED*/
977 void
eib_gw_info_cb(dev_info_t * dip,ddi_eventcookie_t cookie,void * arg,void * impl_data)978 eib_gw_info_cb(dev_info_t *dip, ddi_eventcookie_t cookie, void *arg,
979     void *impl_data)
980 {
981 	eib_t *ss = (eib_t *)arg;
982 	eib_event_t *evi;
983 
984 	evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
985 	if (evi == NULL) {
986 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
987 		    "no memory, ignoring this gateway props update event");
988 		return;
989 	}
990 	evi->ev_arg = kmem_zalloc(sizeof (eib_gw_info_t), KM_NOSLEEP);
991 	if (evi->ev_arg == NULL) {
992 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_gw_info_cb: "
993 		    "no memory, ignoring this gateway props update event");
994 		kmem_free(evi, sizeof (eib_event_t));
995 		return;
996 	}
997 	bcopy(impl_data, evi->ev_arg, sizeof (eib_gw_info_t));
998 	evi->ev_code = EIB_EV_GW_INFO_UPDATE;
999 
1000 	eib_svc_enqueue_event(ss, evi);
1001 }
1002