xref: /titanic_52/usr/src/uts/common/io/ib/clients/eoib/eib_main.c (revision b494511a9cf72b1fc4eb13a0e593f55c624ab829)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * The Ethernet Over Infiniband driver
28  */
29 
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/devops.h>
33 #include <sys/kmem.h>
34 #include <sys/ksynch.h>
35 #include <sys/modctl.h>
36 #include <sys/stat.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 
40 #include <sys/mac_provider.h>
41 #include <sys/mac_ether.h>
42 
43 #include <sys/ib/clients/eoib/eib_impl.h>
44 
45 /*
46  * Driver entry point declarations
47  */
48 static int eib_attach(dev_info_t *, ddi_attach_cmd_t);
49 static int eib_detach(dev_info_t *, ddi_detach_cmd_t);
50 
51 /*
52  * MAC callbacks
53  */
54 static int eib_m_stat(void *, uint_t, uint64_t *);
55 static int eib_m_start(void *);
56 static void eib_m_stop(void *);
57 static int eib_m_promisc(void *, boolean_t);
58 static int eib_m_multicast(void *, boolean_t, const uint8_t *);
59 static int eib_m_unicast(void *, const uint8_t *);
60 static mblk_t *eib_m_tx(void *, mblk_t *);
61 static boolean_t eib_m_getcapab(void *, mac_capab_t, void *);
62 static int eib_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
63     const void *);
64 static int eib_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
65 static void eib_m_propinfo(void *, const char *, mac_prop_id_t,
66     mac_prop_info_handle_t);
67 
68 /*
69  * Devops definition
70  */
71 DDI_DEFINE_STREAM_OPS(eib_ops, nulldev, nulldev, eib_attach, eib_detach,
72     nodev, NULL, D_MP, NULL, ddi_quiesce_not_needed);
73 
74 /*
75  * Module Driver Info
76  */
77 static struct modldrv eib_modldrv = {
78 	&mod_driverops,		/* Driver module */
79 	"EoIB Driver",		/* Driver name and version */
80 	&eib_ops,		/* Driver ops */
81 };
82 
83 /*
84  * Module Linkage
85  */
86 static struct modlinkage eib_modlinkage = {
87 	MODREV_1, (void *)&eib_modldrv, NULL
88 };
89 
90 /*
91  * GLDv3 entry points
92  */
93 #define	EIB_M_CALLBACK_FLAGS	\
94 	(MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
95 static mac_callbacks_t eib_m_callbacks = {
96 	EIB_M_CALLBACK_FLAGS,
97 	eib_m_stat,
98 	eib_m_start,
99 	eib_m_stop,
100 	eib_m_promisc,
101 	eib_m_multicast,
102 	eib_m_unicast,
103 	eib_m_tx,
104 	NULL,
105 	NULL,
106 	eib_m_getcapab,
107 	NULL,
108 	NULL,
109 	eib_m_setprop,
110 	eib_m_getprop,
111 	eib_m_propinfo
112 };
113 
114 /*
115  * Async handler callback for ibt events
116  */
117 static ibt_clnt_modinfo_t eib_clnt_modinfo = {
118 	IBTI_V_CURR,
119 	IBT_NETWORK,
120 	eib_ibt_async_handler,
121 	NULL,
122 	EIB_DRV_NAME
123 };
124 
125 /*
126  * Driver State Pointer
127  */
128 void *eib_state;
129 
130 /*
131  * Declarations private to this file
132  */
133 static int eib_state_init(eib_t *);
134 static int eib_add_event_callbacks(eib_t *);
135 static int eib_register_with_mac(eib_t *, dev_info_t *);
136 static void eib_rb_attach(eib_t *, uint_t);
137 static void eib_rb_state_init(eib_t *);
138 static void eib_rb_add_event_callbacks(eib_t *);
139 static void eib_rb_register_with_mac(eib_t *);
140 
141 /*
142  * Definitions private to this file
143  */
144 #define	EIB_ATTACH_STATE_ALLOCD		0x01
145 #define	EIB_ATTACH_PROPS_PARSED		0x02
146 #define	EIB_ATTACH_STATE_INIT_DONE	0x04
147 #define	EIB_ATTACH_IBT_ATT_DONE		0x08
148 #define	EIB_ATTACH_EV_CBS_ADDED		0x10
149 #define	EIB_ATTACH_REGISTER_MAC_DONE	0x20
150 
151 int
152 _init()
153 {
154 	int ret;
155 
156 	if (ddi_name_to_major(EIB_DRV_NAME) == (major_t)-1)
157 		return (ENODEV);
158 
159 	if ((ret = ddi_soft_state_init(&eib_state, sizeof (eib_t), 0)) != 0)
160 		return (ret);
161 
162 	mac_init_ops(&eib_ops, EIB_DRV_NAME);
163 	if ((ret = mod_install(&eib_modlinkage)) != 0) {
164 		mac_fini_ops(&eib_ops);
165 		ddi_soft_state_fini(&eib_state);
166 		return (ret);
167 	}
168 
169 	eib_debug_init();
170 
171 	return (ret);
172 }
173 
174 int
175 _info(struct modinfo *modinfop)
176 {
177 	return (mod_info(&eib_modlinkage, modinfop));
178 }
179 
180 int
181 _fini()
182 {
183 	int ret;
184 
185 	if ((ret = mod_remove(&eib_modlinkage)) != 0)
186 		return (ret);
187 
188 	eib_debug_fini();
189 
190 	mac_fini_ops(&eib_ops);
191 	ddi_soft_state_fini(&eib_state);
192 
193 	return (ret);
194 }
195 
196 static int
197 eib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
198 {
199 	eib_t *ss;
200 	ibt_status_t ret;
201 	int instance;
202 	uint_t progress = 0;
203 
204 	if (cmd != DDI_ATTACH)
205 		return (DDI_FAILURE);
206 
207 	/*
208 	 * Allocate softstate for this instance
209 	 */
210 	instance = ddi_get_instance(dip);
211 	if (ddi_soft_state_zalloc(eib_state, instance) == DDI_FAILURE)
212 		goto attach_fail;
213 
214 	progress |= EIB_ATTACH_STATE_ALLOCD;
215 
216 	ss = ddi_get_soft_state(eib_state, instance);
217 	ss->ei_dip = dip;
218 	ss->ei_instance = (uint_t)instance;
219 
220 	/*
221 	 * Parse the node properties and get the gateway parameters
222 	 * for this instance
223 	 */
224 	if (eib_get_props(ss) != EIB_E_SUCCESS) {
225 		EIB_DPRINTF_ERR(ss->ei_instance,
226 		    "eib_attach: eib_get_props() failed");
227 		goto attach_fail;
228 	}
229 	progress |= EIB_ATTACH_PROPS_PARSED;
230 
231 	/*
232 	 * Do per-state initialization
233 	 */
234 	if (eib_state_init(ss) != EIB_E_SUCCESS) {
235 		EIB_DPRINTF_ERR(ss->ei_instance,
236 		    "eib_attach: eib_state_init() failed");
237 		goto attach_fail;
238 	}
239 	progress |= EIB_ATTACH_STATE_INIT_DONE;
240 
241 	/*
242 	 * Attach to IBTL
243 	 */
244 	if ((ret = ibt_attach(&eib_clnt_modinfo, ss->ei_dip, ss,
245 	    &ss->ei_ibt_hdl)) != IBT_SUCCESS) {
246 		EIB_DPRINTF_ERR(ss->ei_instance,
247 		    "eib_attach: ibt_attach() failed, ret=%d", ret);
248 		goto attach_fail;
249 	}
250 	progress |= EIB_ATTACH_IBT_ATT_DONE;
251 
252 	/*
253 	 * Register NDI event callbacks with EoIB nexus
254 	 */
255 	if (eib_add_event_callbacks(ss) != EIB_E_SUCCESS) {
256 		EIB_DPRINTF_ERR(ss->ei_instance,
257 		    "eib_attach: eib_add_event_callbacks() failed");
258 		goto attach_fail;
259 	}
260 	progress |= EIB_ATTACH_EV_CBS_ADDED;
261 
262 	/*
263 	 * Register with mac layer
264 	 */
265 	if (eib_register_with_mac(ss, dip) != EIB_E_SUCCESS) {
266 		EIB_DPRINTF_ERR(ss->ei_instance,
267 		    "eib_attach: eib_register_with_mac() failed");
268 		goto attach_fail;
269 	}
270 	progress |= EIB_ATTACH_REGISTER_MAC_DONE;
271 
272 	return (DDI_SUCCESS);
273 
274 attach_fail:
275 	eib_rb_attach(ss, progress);
276 	return (DDI_FAILURE);
277 }
278 
279 static int
280 eib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
281 {
282 	eib_t *ss;
283 	int instance;
284 
285 	if (cmd != DDI_DETACH)
286 		return (DDI_FAILURE);
287 
288 	instance = ddi_get_instance(dip);
289 	ss = ddi_get_soft_state(eib_state, instance);
290 
291 	/*
292 	 * If we had not cleaned up rx buffers (and hca resources) during
293 	 * unplumb because they were stuck with the nw layer at the time,
294 	 * we can try to clean them up now before doing the detach.
295 	 */
296 	eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
297 
298 	eib_rb_rsrc_setup_bufs(ss, B_FALSE);
299 	if (ss->ei_tx || ss->ei_rx || ss->ei_lso) {
300 		EIB_DPRINTF_WARN(ss->ei_instance,
301 		    "eib_detach: buffers still not returned "
302 		    "(tx=0x%llx, rx=0x%llx, lso=0x%llx), could "
303 		    "not detach", ss->ei_tx, ss->ei_rx, ss->ei_lso);
304 		eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
305 		return (DDI_FAILURE);
306 	}
307 	if (ss->ei_hca_hdl) {
308 		eib_rb_ibt_hca_init(ss, ~0);
309 	}
310 	eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
311 
312 	eib_rb_attach(ss, ~0);
313 
314 	return (DDI_SUCCESS);
315 }
316 
317 static int
318 eib_m_stat(void *arg, uint_t stat, uint64_t *val)
319 {
320 	eib_t *ss = arg;
321 	eib_stats_t *stats = ss->ei_stats;
322 
323 	switch (stat) {
324 	case MAC_STAT_IFSPEED:
325 		*val = ss->ei_props->ep_ifspeed;
326 		break;
327 
328 	case MAC_STAT_OBYTES:
329 		*val = stats->st_obytes;
330 		break;
331 
332 	case MAC_STAT_OPACKETS:
333 		*val = stats->st_opkts;
334 		break;
335 
336 	case MAC_STAT_BRDCSTXMT:
337 		*val = stats->st_brdcstxmit;
338 		break;
339 
340 	case MAC_STAT_MULTIXMT:
341 		*val = stats->st_multixmit;
342 		break;
343 
344 	case MAC_STAT_OERRORS:
345 		*val = stats->st_oerrors;
346 		break;
347 
348 	case MAC_STAT_NOXMTBUF:
349 		*val = stats->st_noxmitbuf;
350 		break;
351 
352 	case MAC_STAT_RBYTES:
353 		*val = stats->st_rbytes;
354 		break;
355 
356 	case MAC_STAT_IPACKETS:
357 		*val = stats->st_ipkts;
358 		break;
359 
360 	case MAC_STAT_BRDCSTRCV:
361 		*val = stats->st_brdcstrcv;
362 		break;
363 
364 	case MAC_STAT_MULTIRCV:
365 		*val = stats->st_multircv;
366 		break;
367 
368 	case MAC_STAT_IERRORS:
369 		*val = stats->st_ierrors;
370 		break;
371 
372 	case MAC_STAT_NORCVBUF:
373 		*val = stats->st_norcvbuf;
374 		break;
375 
376 	case ETHER_STAT_LINK_DUPLEX:
377 		*val = LINK_DUPLEX_FULL;
378 		break;
379 
380 	default:
381 		return (ENOTSUP);
382 	}
383 
384 	return (0);
385 }
386 
387 static int
388 eib_m_start(void *arg)
389 {
390 	eib_t *ss = arg;
391 	int ret = -1;
392 
393 	eib_mac_set_nic_state(ss, EIB_NIC_STARTING);
394 
395 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
396 		ret = eib_mac_start(ss);
397 
398 	if (ret == 0)
399 		eib_mac_upd_nic_state(ss, EIB_NIC_STARTING, EIB_NIC_STARTED);
400 	else
401 		eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
402 
403 	return (ret);
404 }
405 
406 static void
407 eib_m_stop(void *arg)
408 {
409 	eib_t *ss = arg;
410 
411 	eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
412 
413 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) != 0)
414 		eib_mac_stop(ss);
415 
416 	eib_mac_clr_nic_state(ss, EIB_NIC_STARTED|EIB_NIC_STOPPING);
417 }
418 
419 static int
420 eib_m_promisc(void *arg, boolean_t flag)
421 {
422 	eib_t *ss = arg;
423 
424 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
425 		return (0);
426 
427 	return (eib_mac_promisc(ss, flag));
428 }
429 
430 static int
431 eib_m_multicast(void *arg, boolean_t add, const uint8_t *mcast_mac)
432 {
433 	eib_t *ss = arg;
434 
435 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
436 		return (0);
437 
438 	/*
439 	 * We don't have any knowledge which of the vnics built on top of
440 	 * the physlink is this multicast group relevant for.  We'll join
441 	 * it for vnic0 for now.
442 	 *
443 	 * Since the tx routine in EoIB currently piggy backs all multicast
444 	 * traffic over the broadcast channel, and all vnics are joined to
445 	 * the broadcast address when they're created, everyone should receive
446 	 * all multicast traffic anyway.
447 	 *
448 	 * On the rx side, we'll check if the incoming multicast address is
449 	 * either on the vnic's list of mcgs joined to (which will only be the
450 	 * broadcast address) or on vnic0's list of mcgs.  If we find a match,
451 	 * we let the packet come through.
452 	 *
453 	 * This isn't perfect, but it's the best we can do given that we don't
454 	 * have any vlan information corresponding to this multicast address.
455 	 *
456 	 * Also, for now we'll use the synchronous multicast joins and
457 	 * leaves instead of the asynchronous mechanism provided by
458 	 * ibt_join_mcg() since that involves additional complexity for failed
459 	 * joins and removals.
460 	 */
461 	return (eib_mac_multicast(ss, add, (uint8_t *)mcast_mac));
462 }
463 
464 static int
465 eib_m_unicast(void *arg, const uint8_t *macaddr)
466 {
467 	eib_t *ss = arg;
468 	eib_vnic_t *vnic;
469 
470 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
471 		return (0);
472 
473 	mutex_enter(&ss->ei_vnic_lock);
474 
475 	vnic = ss->ei_vnic[0];
476 	if (bcmp(macaddr, vnic->vn_login_data.ld_assigned_mac,
477 	    ETHERADDRL) == 0) {
478 		mutex_exit(&ss->ei_vnic_lock);
479 		return (0);
480 	}
481 
482 	mutex_exit(&ss->ei_vnic_lock);
483 
484 	return (EINVAL);
485 }
486 
487 static mblk_t *
488 eib_m_tx(void *arg, mblk_t *mp)
489 {
490 	eib_t *ss = arg;
491 	mblk_t *next;
492 
493 	/*
494 	 * If the nic hasn't been started, drop the message(s)
495 	 */
496 	if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) {
497 		freemsgchain(mp);
498 		return (NULL);
499 	}
500 
501 	for (; mp != NULL; mp = next) {
502 		/*
503 		 * Detach this message from the message chain
504 		 */
505 		next = mp->b_next;
506 		mp->b_next = NULL;
507 
508 		/*
509 		 * Attempt to send the message; if we fail (likely due
510 		 * to lack of resources), reattach this message to the
511 		 * chain and return the unsent chain back.  When we're
512 		 * ready to send again, we'll issue a mac_tx_update().
513 		 */
514 		if (eib_mac_tx(ss, mp) != EIB_E_SUCCESS) {
515 			mp->b_next = next;
516 			break;
517 		}
518 	}
519 
520 	return (mp);
521 }
522 
523 static boolean_t
524 eib_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
525 {
526 	eib_t *ss = arg;
527 	eib_caps_t *caps = ss->ei_caps;
528 	eib_caps_t s_caps;
529 	ibt_hca_attr_t hca_attrs;
530 	ibt_status_t ret;
531 
532 	/*
533 	 * If we haven't been plumbed yet, try getting the hca attributes
534 	 * and figure out the capabilities now
535 	 */
536 	if (caps == NULL) {
537 		ASSERT(ss->ei_props != NULL);
538 
539 		ret = ibt_query_hca_byguid(ss->ei_props->ep_hca_guid,
540 		    &hca_attrs);
541 		if (ret == IBT_SUCCESS) {
542 			eib_ibt_record_capab(ss, &hca_attrs, &s_caps);
543 			caps = &s_caps;
544 		}
545 	}
546 
547 	if ((caps != NULL) && (cap == MAC_CAPAB_HCKSUM)) {
548 		uint32_t *tx_flags = cap_data;
549 
550 		if (caps->cp_cksum_flags == 0) {
551 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
552 			    "eib_m_getcapab: hw cksum disabled, cksum_flags=0");
553 			return (B_FALSE);
554 		}
555 
556 		*tx_flags = caps->cp_cksum_flags;
557 
558 		return (B_TRUE);
559 
560 	} else if ((caps != NULL) && (cap == MAC_CAPAB_LSO)) {
561 		mac_capab_lso_t *cap_lso = cap_data;
562 
563 		/*
564 		 * If the HCA supports LSO, it will advertise a non-zero
565 		 * "max lso size" parameter. Also, LSO relies on hw
566 		 * checksum being available.  Finally, if the HCA
567 		 * doesn't provide the reserved-lkey capability, LSO
568 		 * will adversely affect the performance.  So, we'll
569 		 * enable LSO only if we have a non-zero max lso size,
570 		 * support checksum offload and provide reserved lkey.
571 		 */
572 		if (caps->cp_lso_maxlen == 0 ||
573 		    caps->cp_cksum_flags == 0 ||
574 		    caps->cp_resv_lkey_capab == 0) {
575 			EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_m_getcapab: "
576 			    "LSO disabled, lso_maxlen=0x%lx, "
577 			    "cksum_flags=0x%lx, resv_lkey_capab=%d",
578 			    caps->cp_lso_maxlen,
579 			    caps->cp_cksum_flags,
580 			    caps->cp_resv_lkey_capab);
581 			return (B_FALSE);
582 		}
583 
584 		cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
585 		cap_lso->lso_basic_tcp_ipv4.lso_max = caps->cp_lso_maxlen - 1;
586 
587 		return (B_TRUE);
588 	}
589 
590 	return (B_FALSE);
591 }
592 
593 /*ARGSUSED*/
594 static int
595 eib_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
596     uint_t pr_valsize, const void *pr_val)
597 {
598 	return (ENOTSUP);
599 }
600 
601 static int
602 eib_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
603     uint_t pr_valsize, void *pr_val)
604 {
605 	eib_t *ss = arg;
606 	link_duplex_t duplex = LINK_DUPLEX_FULL;
607 	uint64_t speed = ss->ei_props->ep_ifspeed;
608 	int err = 0;
609 
610 	switch (pr_num) {
611 	case MAC_PROP_DUPLEX:
612 		ASSERT(pr_valsize >= sizeof (link_duplex_t));
613 		bcopy(&duplex, pr_val, sizeof (link_duplex_t));
614 		break;
615 
616 	case MAC_PROP_SPEED:
617 		ASSERT(pr_valsize >= sizeof (uint64_t));
618 		bcopy(&speed, pr_val, sizeof (speed));
619 		break;
620 
621 	case MAC_PROP_PRIVATE:
622 		if (strcmp(pr_name, EIB_DLPROP_GW_EPORT_STATE) == 0) {
623 			if (ss->ei_gw_eport_state == FIP_EPORT_UP) {
624 				(void) snprintf(pr_val, pr_valsize,
625 				    "%s", "up");
626 			} else {
627 				(void) snprintf(pr_val, pr_valsize,
628 				    "%s", "down");
629 			}
630 		} else if (strcmp(pr_name, EIB_DLPROP_HCA_GUID) == 0) {
631 			(void) snprintf(pr_val, pr_valsize, "%llX",
632 			    (u_longlong_t)ss->ei_props->ep_hca_guid);
633 
634 		} else if (strcmp(pr_name, EIB_DLPROP_PORT_GUID) == 0) {
635 			(void) snprintf(pr_val, pr_valsize, "%llX",
636 			    (u_longlong_t)((ss->ei_props->ep_sgid).gid_guid));
637 		}
638 		break;
639 
640 	default:
641 		err = ENOTSUP;
642 		break;
643 	}
644 
645 	return (err);
646 }
647 
648 /*ARGSUSED*/
649 static void
650 eib_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
651     mac_prop_info_handle_t prh)
652 {
653 	switch (pr_num) {
654 	case MAC_PROP_DUPLEX:
655 	case MAC_PROP_SPEED:
656 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
657 		break;
658 
659 	case MAC_PROP_MTU:
660 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
661 		mac_prop_info_set_range_uint32(prh, ETHERMTU, ETHERMTU);
662 		break;
663 
664 	case MAC_PROP_PRIVATE:
665 		if (strcmp(pr_name, EIB_DLPROP_GW_EPORT_STATE) == 0) {
666 			mac_prop_info_set_default_str(prh, "up ");
667 			mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
668 		} else if (strcmp(pr_name, EIB_DLPROP_HCA_GUID) == 0) {
669 			mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
670 		} else if (strcmp(pr_name, EIB_DLPROP_PORT_GUID) == 0) {
671 			mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
672 		}
673 		break;
674 	}
675 }
676 
677 static int
678 eib_state_init(eib_t *ss)
679 {
680 	kthread_t *kt;
681 
682 	/*
683 	 * Initialize synchronization primitives
684 	 */
685 	mutex_init(&ss->ei_vnic_lock, NULL, MUTEX_DRIVER, NULL);
686 	mutex_init(&ss->ei_av_lock, NULL, MUTEX_DRIVER, NULL);
687 	mutex_init(&ss->ei_ev_lock, NULL, MUTEX_DRIVER, NULL);
688 	mutex_init(&ss->ei_rxpost_lock, NULL, MUTEX_DRIVER, NULL);
689 	mutex_init(&ss->ei_vnic_req_lock, NULL, MUTEX_DRIVER, NULL);
690 	mutex_init(&ss->ei_ka_vnics_lock, NULL, MUTEX_DRIVER, NULL);
691 	cv_init(&ss->ei_vnic_cv, NULL, CV_DEFAULT, NULL);
692 	cv_init(&ss->ei_ev_cv, NULL, CV_DEFAULT, NULL);
693 	cv_init(&ss->ei_rxpost_cv, NULL, CV_DEFAULT, NULL);
694 	cv_init(&ss->ei_vnic_req_cv, NULL, CV_DEFAULT, NULL);
695 	cv_init(&ss->ei_ka_vnics_cv, NULL, CV_DEFAULT, NULL);
696 
697 	/*
698 	 * Create a node state structure and initialize
699 	 */
700 	ss->ei_node_state = kmem_zalloc(sizeof (eib_node_state_t), KM_SLEEP);
701 	ss->ei_node_state->ns_link_state = LINK_STATE_UNKNOWN;
702 	mutex_init(&ss->ei_node_state->ns_lock, NULL, MUTEX_DRIVER, NULL);
703 	cv_init(&ss->ei_node_state->ns_cv, NULL, CV_DEFAULT, NULL);
704 
705 	/*
706 	 * Allocate for gathering statistics
707 	 */
708 	ss->ei_stats = kmem_zalloc(sizeof (eib_stats_t), KM_SLEEP);
709 
710 	/*
711 	 * Start up service threads
712 	 */
713 	kt = thread_create(NULL, 0, eib_events_handler, ss, 0,
714 	    &p0, TS_RUN, minclsyspri);
715 	ss->ei_events_handler = kt->t_did;
716 
717 	kt = thread_create(NULL, 0, eib_refill_rwqes, ss, 0,
718 	    &p0, TS_RUN, minclsyspri);
719 	ss->ei_rwqes_refiller = kt->t_did;
720 
721 	kt = thread_create(NULL, 0, eib_vnic_creator, ss, 0,
722 	    &p0, TS_RUN, minclsyspri);
723 	ss->ei_vnic_creator = kt->t_did;
724 
725 	kt = thread_create(NULL, 0, eib_manage_keepalives, ss, 0,
726 	    &p0, TS_RUN, minclsyspri);
727 	ss->ei_keepalives_manager = kt->t_did;
728 
729 	/*
730 	 * Set default state of gw eport
731 	 */
732 	ss->ei_gw_eport_state = FIP_EPORT_UP;
733 
734 	/*
735 	 * Do static initializations of common structures
736 	 */
737 	eib_reserved_gid.gid_prefix = 0;
738 	eib_reserved_gid.gid_guid = 0;
739 
740 	return (EIB_E_SUCCESS);
741 }
742 
743 static int
744 eib_add_event_callbacks(eib_t *ss)
745 {
746 	int ret;
747 	ddi_eventcookie_t login_ack_evc;
748 	ddi_eventcookie_t gw_alive_evc;
749 	ddi_eventcookie_t gw_info_evc;
750 
751 	/*
752 	 * Add callback for receiving vnic login acks from the gateway
753 	 */
754 	if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_LOGIN_ACK,
755 	    &login_ack_evc)) != DDI_SUCCESS) {
756 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
757 		    "ddi_get_eventcookie(LOGIN_ACK) failed, ret=%d", ret);
758 		return (EIB_E_FAILURE);
759 	}
760 	if ((ret = ddi_add_event_handler(ss->ei_dip, login_ack_evc,
761 	    eib_login_ack_cb, ss, &ss->ei_login_ack_cb)) != DDI_SUCCESS) {
762 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
763 		    "ddi_add_event_handler(LOGIN_ACK) failed, ret=%d", ret);
764 		return (EIB_E_FAILURE);
765 	}
766 
767 	/*
768 	 * Add callback for receiving status on gateway transitioning from
769 	 * not-available to available
770 	 */
771 	if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_AVAILABLE,
772 	    &gw_alive_evc)) != DDI_SUCCESS) {
773 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
774 		    "ddi_get_eventcookie(GW_AVAILABLE) failed, ret=%d", ret);
775 		(void) ddi_remove_event_handler(ss->ei_login_ack_cb);
776 		return (EIB_E_FAILURE);
777 	}
778 	if ((ret = ddi_add_event_handler(ss->ei_dip, gw_alive_evc,
779 	    eib_gw_alive_cb, ss, &ss->ei_gw_alive_cb)) != DDI_SUCCESS) {
780 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
781 		    "ddi_add_event_handler(GW_AVAILABLE) failed, ret=%d", ret);
782 		(void) ddi_remove_event_handler(ss->ei_login_ack_cb);
783 		return (EIB_E_FAILURE);
784 	}
785 
786 	/*
787 	 * Add callback for receiving gateway info update
788 	 */
789 	if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_INFO_UPDATE,
790 	    &gw_info_evc)) != DDI_SUCCESS) {
791 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
792 		    "ddi_get_eventcookie(GW_INFO_UPDATE) failed, ret=%d", ret);
793 		(void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
794 		(void) ddi_remove_event_handler(ss->ei_login_ack_cb);
795 		return (EIB_E_FAILURE);
796 	}
797 	if ((ret = ddi_add_event_handler(ss->ei_dip, gw_info_evc,
798 	    eib_gw_info_cb, ss, &ss->ei_gw_info_cb)) != DDI_SUCCESS) {
799 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
800 		    "ddi_add_event_handler(GW_INFO) failed, ret=%d", ret);
801 		(void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
802 		(void) ddi_remove_event_handler(ss->ei_login_ack_cb);
803 		return (EIB_E_FAILURE);
804 	}
805 
806 	return (EIB_E_SUCCESS);
807 }
808 
809 static int
810 eib_register_with_mac(eib_t *ss, dev_info_t *dip)
811 {
812 	mac_register_t *macp;
813 	int ret;
814 
815 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
816 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_register_with_mac: "
817 		    "mac_alloc(MAC_VERSION=%d) failed", MAC_VERSION);
818 		return (EIB_E_FAILURE);
819 	}
820 
821 	/*
822 	 * Note that when we register with mac during attach, we don't
823 	 * have the mac address yet (we'll get that after we login into
824 	 * the gateway) so we'll simply register a zero macaddr that
825 	 * we'll overwrite later during plumb, in eib_m_start(). Likewise,
826 	 * we'll also update the max-sdu with the correct MTU after we
827 	 * figure it out when we login to the gateway during plumb.
828 	 */
829 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
830 	macp->m_driver = ss;
831 	macp->m_dip = dip;
832 	macp->m_src_addr = eib_zero_mac;
833 	macp->m_callbacks = &eib_m_callbacks;
834 	macp->m_min_sdu = 0;
835 	macp->m_max_sdu = ETHERMTU;
836 	macp->m_margin = VLAN_TAGSZ;
837 	macp->m_priv_props = eib_pvt_props;
838 
839 	ret = mac_register(macp, &ss->ei_mac_hdl);
840 	mac_free(macp);
841 
842 	if (ret != 0) {
843 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_register_with_mac: "
844 		    "mac_register() failed, ret=%d", ret);
845 		return (EIB_E_FAILURE);
846 	}
847 
848 	return (EIB_E_SUCCESS);
849 }
850 
851 static void
852 eib_rb_attach(eib_t *ss, uint_t progress)
853 {
854 	ibt_status_t ret;
855 	int instance;
856 
857 	if (progress & EIB_ATTACH_REGISTER_MAC_DONE)
858 		eib_rb_register_with_mac(ss);
859 
860 	if (progress & EIB_ATTACH_EV_CBS_ADDED)
861 		eib_rb_add_event_callbacks(ss);
862 
863 	if (progress & EIB_ATTACH_IBT_ATT_DONE) {
864 		ret = ibt_detach(ss->ei_ibt_hdl);
865 		if (ret != IBT_SUCCESS) {
866 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_rb_attach: "
867 			    "ibt_detach() failed, ret=%d", ret);
868 		}
869 		ss->ei_ibt_hdl = NULL;
870 	}
871 
872 	if (progress & EIB_ATTACH_STATE_INIT_DONE)
873 		eib_rb_state_init(ss);
874 
875 	if (progress & EIB_ATTACH_PROPS_PARSED)
876 		eib_rb_get_props(ss);
877 
878 	if (progress & EIB_ATTACH_STATE_ALLOCD) {
879 		instance = ddi_get_instance(ss->ei_dip);
880 		ddi_soft_state_free(eib_state, instance);
881 	}
882 }
883 
884 static void
885 eib_rb_state_init(eib_t *ss)
886 {
887 	/*
888 	 * Terminate service threads
889 	 */
890 	if (ss->ei_keepalives_manager) {
891 		eib_stop_manage_keepalives(ss);
892 		ss->ei_keepalives_manager = 0;
893 	}
894 	if (ss->ei_vnic_creator) {
895 		eib_stop_vnic_creator(ss);
896 		ss->ei_vnic_creator = 0;
897 	}
898 	if (ss->ei_rwqes_refiller) {
899 		eib_stop_refill_rwqes(ss);
900 		ss->ei_rwqes_refiller = 0;
901 	}
902 	if (ss->ei_events_handler) {
903 		eib_stop_events_handler(ss);
904 		ss->ei_events_handler = 0;
905 	}
906 
907 	/*
908 	 * Remove space allocated for gathering statistics
909 	 */
910 	if (ss->ei_stats) {
911 		kmem_free(ss->ei_stats, sizeof (eib_stats_t));
912 		ss->ei_stats = NULL;
913 	}
914 
915 	/*
916 	 * Remove space allocated for keeping node state
917 	 */
918 	if (ss->ei_node_state) {
919 		cv_destroy(&ss->ei_node_state->ns_cv);
920 		mutex_destroy(&ss->ei_node_state->ns_lock);
921 		kmem_free(ss->ei_node_state, sizeof (eib_node_state_t));
922 		ss->ei_node_state = NULL;
923 	}
924 
925 	/*
926 	 * Finally, destroy all synchronization resources
927 	 */
928 	cv_destroy(&ss->ei_ka_vnics_cv);
929 	cv_destroy(&ss->ei_vnic_req_cv);
930 	cv_destroy(&ss->ei_rxpost_cv);
931 	cv_destroy(&ss->ei_ev_cv);
932 	cv_destroy(&ss->ei_vnic_cv);
933 	mutex_destroy(&ss->ei_ka_vnics_lock);
934 	mutex_destroy(&ss->ei_vnic_req_lock);
935 	mutex_destroy(&ss->ei_rxpost_lock);
936 	mutex_destroy(&ss->ei_ev_lock);
937 	mutex_destroy(&ss->ei_av_lock);
938 	mutex_destroy(&ss->ei_vnic_lock);
939 }
940 
941 static void
942 eib_rb_add_event_callbacks(eib_t *ss)
943 {
944 	ddi_eventcookie_t evc;
945 
946 	if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_INFO_UPDATE,
947 	    &evc) == DDI_SUCCESS) {
948 		(void) ddi_remove_event_handler(ss->ei_gw_info_cb);
949 		ss->ei_gw_info_cb = NULL;
950 	}
951 
952 	if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_AVAILABLE,
953 	    &evc) == DDI_SUCCESS) {
954 		(void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
955 		ss->ei_gw_alive_cb = NULL;
956 	}
957 
958 	if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_LOGIN_ACK,
959 	    &evc) == DDI_SUCCESS) {
960 		(void) ddi_remove_event_handler(ss->ei_login_ack_cb);
961 		ss->ei_login_ack_cb = NULL;
962 	}
963 }
964 
965 static void
966 eib_rb_register_with_mac(eib_t *ss)
967 {
968 	int ret;
969 
970 	if ((ret = mac_unregister(ss->ei_mac_hdl)) != 0) {
971 		EIB_DPRINTF_WARN(ss->ei_instance,
972 		    "eib_rb_register_with_mac: "
973 		    "mac_unregister() failed, ret=%d", ret);
974 	}
975 
976 	ss->ei_mac_hdl = NULL;
977 }
978