xref: /titanic_41/usr/src/uts/common/io/ib/clients/eoib/eib_vnic.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 
33 #include <sys/ib/clients/eoib/eib_impl.h>
34 
35 /*
36  * Declarations private to this file
37  */
38 static int eib_vnic_get_instance(eib_t *, int *);
39 static void eib_vnic_ret_instance(eib_t *, int);
40 static void eib_vnic_modify_enter(eib_t *, uint_t);
41 static void eib_vnic_modify_exit(eib_t *, uint_t);
42 static int eib_vnic_create_common(eib_t *, eib_vnic_t *, int *);
43 static int eib_vnic_set_partition(eib_t *, eib_vnic_t *, int *);
44 static void eib_vnic_make_vhub_mgid(uint8_t *, uint8_t, uint8_t *, uint8_t,
45     uint8_t, uint32_t, ib_gid_t *);
46 static int eib_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *, int *);
47 static int eib_vnic_attach_vhub_table(eib_t *, eib_vnic_t *);
48 static int eib_vnic_attach_vhub_update(eib_t *, eib_vnic_t *);
49 static void eib_vnic_start_keepalives(eib_t *, eib_vnic_t *);
50 static int eib_vnic_lookup_dest(eib_vnic_t *, uint8_t *, uint16_t,
51     eib_vhub_map_t *, ibt_mcg_info_t *, int *);
52 static void eib_vnic_leave_all_data_mcgs(eib_t *, eib_vnic_t *);
53 static void eib_vnic_rejoin_data_mcgs(eib_t *, eib_vnic_t *);
54 static void eib_vnic_reattach_ctl_mcgs(eib_t *, eib_vnic_t *);
55 static void eib_rb_vnic_create_common(eib_t *, eib_vnic_t *, uint_t);
56 static void eib_rb_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *);
57 static void eib_rb_vnic_attach_vhub_table(eib_t *, eib_vnic_t *);
58 static void eib_rb_vnic_attach_vhub_update(eib_t *, eib_vnic_t *);
59 static void eib_rb_vnic_start_keepalives(eib_t *, eib_vnic_t *);
60 static void eib_rb_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *);
61 
62 /*
63  * Definitions private to this file
64  */
65 #define	EIB_VNIC_STRUCT_ALLOCD		0x0001
66 #define	EIB_VNIC_GOT_INSTANCE		0x0002
67 #define	EIB_VNIC_CREATE_COMMON_DONE	0x0004
68 #define	EIB_VNIC_CTLQP_CREATED		0x0008
69 #define	EIB_VNIC_DATAQP_CREATED		0x0010
70 #define	EIB_VNIC_LOGIN_DONE		0x0020
71 #define	EIB_VNIC_PARTITION_SET		0x0040
72 #define	EIB_VNIC_RX_POSTED_TO_CTLQP	0x0080
73 #define	EIB_VNIC_RX_POSTED_TO_DATAQP	0x0100
74 #define	EIB_VNIC_ATTACHED_TO_CTL_MCGS	0x0200
75 #define	EIB_VNIC_GOT_VHUB_TABLE		0x0400
76 #define	EIB_VNIC_KEEPALIVES_STARTED	0x0800
77 #define	EIB_VNIC_BROADCAST_JOINED	0x1000
78 
79 /*
80  * Destination type
81  */
82 #define	EIB_TX_UNICAST			1
83 #define	EIB_TX_MULTICAST		2
84 #define	EIB_TX_BROADCAST		3
85 
86 int
eib_vnic_create(eib_t * ss,uint8_t * macaddr,uint16_t vlan,eib_vnic_t ** vnicp,int * err)87 eib_vnic_create(eib_t *ss, uint8_t *macaddr, uint16_t vlan, eib_vnic_t **vnicp,
88     int *err)
89 {
90 	eib_vnic_t *vnic = NULL;
91 	boolean_t failed_vnic = B_FALSE;
92 	uint_t progress = 0;
93 
94 	eib_vnic_modify_enter(ss, EIB_VN_BEING_CREATED);
95 
96 	/*
97 	 * When a previously created vnic is being resurrected due to a
98 	 * gateway reboot, there's a race possible where a creation request
99 	 * for the existing vnic could get filed with the vnic creator
100 	 * thread. So, before we go ahead with the creation of this vnic,
101 	 * make sure we already don't have the vnic.
102 	 */
103 	if (macaddr) {
104 		if (eib_data_lookup_vnic(ss, macaddr, vlan, vnicp,
105 		    &failed_vnic) == EIB_E_SUCCESS) {
106 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: "
107 			    "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x "
108 			    "already there, no duplicate creation", macaddr[0],
109 			    macaddr[1], macaddr[2], macaddr[3], macaddr[4],
110 			    macaddr[5], vlan);
111 
112 			eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
113 			return (EIB_E_SUCCESS);
114 		} else if (failed_vnic) {
115 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: "
116 			    "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x "
117 			    "failed earlier, shouldn't be here at all",
118 			    macaddr[0], macaddr[1], macaddr[2], macaddr[3],
119 			    macaddr[4], macaddr[5], vlan);
120 
121 			*err = EEXIST;
122 
123 			eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
124 			return (EIB_E_FAILURE);
125 		}
126 	}
127 
128 	/*
129 	 * Allocate a vnic structure for this instance
130 	 */
131 	vnic = kmem_zalloc(sizeof (eib_vnic_t), KM_SLEEP);
132 	vnic->vn_ss = ss;
133 	vnic->vn_instance = -1;
134 	mutex_init(&vnic->vn_lock, NULL, MUTEX_DRIVER, NULL);
135 	cv_init(&vnic->vn_cv, NULL, CV_DEFAULT, NULL);
136 
137 	progress |= EIB_VNIC_STRUCT_ALLOCD;
138 
139 	/*
140 	 * Get a vnic instance
141 	 */
142 	if (eib_vnic_get_instance(ss, &vnic->vn_instance) != EIB_E_SUCCESS) {
143 		*err = EMFILE;
144 		goto vnic_create_fail;
145 	}
146 	progress |= EIB_VNIC_GOT_INSTANCE;
147 
148 	/*
149 	 * Initialize vnic's basic parameters.  Note that we set the 15-bit
150 	 * vnic id to send to gw during a login to be a 2-tuple of
151 	 * {devi_instance#, eoib_vnic_instance#}.
152 	 */
153 	vnic->vn_vlan = vlan;
154 	if (macaddr) {
155 		bcopy(macaddr, vnic->vn_macaddr, sizeof (vnic->vn_macaddr));
156 	}
157 	vnic->vn_id = (uint16_t)EIB_VNIC_ID(ss->ei_instance, vnic->vn_instance);
158 
159 	/*
160 	 * Start up this vnic instance
161 	 */
162 	if (eib_vnic_create_common(ss, vnic, err) != EIB_E_SUCCESS)
163 		goto vnic_create_fail;
164 
165 	progress |= EIB_VNIC_CREATE_COMMON_DONE;
166 
167 	/*
168 	 * Return the created vnic
169 	 */
170 	if (vnicp) {
171 		*vnicp = vnic;
172 	}
173 
174 	eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
175 	return (EIB_E_SUCCESS);
176 
177 vnic_create_fail:
178 	eib_rb_vnic_create(ss, vnic, progress);
179 	eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
180 	return (EIB_E_FAILURE);
181 }
182 
183 void
eib_vnic_delete(eib_t * ss,eib_vnic_t * vnic)184 eib_vnic_delete(eib_t *ss, eib_vnic_t *vnic)
185 {
186 	eib_vnic_modify_enter(ss, EIB_VN_BEING_DELETED);
187 	eib_rb_vnic_create(ss, vnic, ~0);
188 	eib_vnic_modify_exit(ss, EIB_VN_BEING_DELETED);
189 }
190 
191 /*ARGSUSED*/
192 int
eib_vnic_wait_for_login_ack(eib_t * ss,eib_vnic_t * vnic,int * err)193 eib_vnic_wait_for_login_ack(eib_t *ss, eib_vnic_t *vnic, int *err)
194 {
195 	clock_t deadline;
196 	int ret = EIB_E_SUCCESS;
197 
198 	deadline = ddi_get_lbolt() + drv_usectohz(EIB_LOGIN_TIMEOUT_USEC);
199 
200 	/*
201 	 * Wait for login ack/nack or wait time to get over. If we wake up
202 	 * with a login failure, record the reason.
203 	 */
204 	mutex_enter(&vnic->vn_lock);
205 	while (vnic->vn_state == EIB_LOGIN_ACK_WAIT) {
206 		if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock,
207 		    deadline) == -1) {
208 			if (vnic->vn_state == EIB_LOGIN_ACK_WAIT)
209 				vnic->vn_state = EIB_LOGIN_TIMED_OUT;
210 		}
211 	}
212 
213 	if (vnic->vn_state != EIB_LOGIN_ACK_RCVD) {
214 		ret = EIB_E_FAILURE;
215 		*err =  (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ?
216 		    ETIME : ECANCELED;
217 	}
218 	mutex_exit(&vnic->vn_lock);
219 
220 	return (ret);
221 }
222 
223 void
eib_vnic_login_ack(eib_t * ss,eib_login_data_t * ld)224 eib_vnic_login_ack(eib_t *ss, eib_login_data_t *ld)
225 {
226 	eib_vnic_t *vnic;
227 	uint_t vnic_instance;
228 	uint_t hdrs_sz;
229 	uint16_t vnic_id;
230 	int nack = 1;
231 
232 	/*
233 	 * The msb in the vnic id in login ack message is not
234 	 * part of our vNIC id.
235 	 */
236 	vnic_id = ld->ld_vnic_id & (~FIP_VL_VNIC_ID_MSBIT);
237 
238 	/*
239 	 * Now, we deconstruct the vnic id and determine the vnic
240 	 * instance number. If this vnic_instance number isn't
241 	 * valid or the vnic_id of the vnic for this instance
242 	 * number doesn't match in our records, we quit.
243 	 */
244 	vnic_instance = EIB_VNIC_INSTANCE(vnic_id);
245 	if (vnic_instance >= EIB_MAX_VNICS)
246 		return;
247 
248 	/*
249 	 * At this point, we haven't fully created the vnic, so
250 	 * this vnic should be present as ei_vnic_pending.
251 	 */
252 	mutex_enter(&ss->ei_vnic_lock);
253 	if ((vnic = ss->ei_vnic_pending) == NULL) {
254 		mutex_exit(&ss->ei_vnic_lock);
255 		return;
256 	} else if (vnic->vn_id != vnic_id) {
257 		mutex_exit(&ss->ei_vnic_lock);
258 		return;
259 	}
260 	mutex_exit(&ss->ei_vnic_lock);
261 
262 	/*
263 	 * First check if the vnic is still sleeping, waiting
264 	 * for login ack.  If not, we might as well quit now.
265 	 */
266 	mutex_enter(&vnic->vn_lock);
267 	if (vnic->vn_state != EIB_LOGIN_ACK_WAIT) {
268 		mutex_exit(&vnic->vn_lock);
269 		return;
270 	}
271 
272 	/*
273 	 * We NACK the waiter under these conditions:
274 	 *
275 	 * . syndrome was set
276 	 * . vhub mtu is bigger than our max mtu (minus eoib/eth hdrs sz)
277 	 * . assigned vlan is different from requested vlan (except
278 	 *   when we didn't request a specific vlan)
279 	 * . when the assigned mac is different from the requested mac
280 	 *   (except when we didn't request a specific mac)
281 	 * . when the VP bit indicates that vlan tag should be used
282 	 *   but we had not specified a vlan tag in our request
283 	 * . when the VP bit indicates that vlan tag should not be
284 	 *   present and we'd specified a vlan tag in our request
285 	 *
286 	 * The last case is interesting: if we had not specified any vlan id
287 	 * in our request, but the gateway has assigned a vlan and asks us
288 	 * to use/expect that tag on every packet dealt by this vnic, it
289 	 * means effectively the EoIB driver has to insert/remove vlan
290 	 * tagging on this vnic traffic, since the nw layer on Solaris
291 	 * won't be using/expecting any tag on traffic for this vnic. This
292 	 * feature is not supported currently.
293 	 */
294 	hdrs_sz = EIB_ENCAP_HDR_SZ + sizeof (struct ether_header) + VLAN_TAGSZ;
295 	if (ld->ld_syndrome) {
296 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
297 		    "non-zero syndrome 0x%lx, NACK", ld->ld_syndrome);
298 
299 	} else if (ld->ld_vhub_mtu > (ss->ei_props->ep_mtu - hdrs_sz)) {
300 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
301 		    "vhub mtu (0x%x) bigger than port mtu (0x%x), NACK",
302 		    ld->ld_vhub_mtu, ss->ei_props->ep_mtu);
303 
304 	} else if ((vnic->vn_vlan) && (vnic->vn_vlan != ld->ld_assigned_vlan)) {
305 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
306 		    "assigned vlan (0x%x) different from asked (0x%x), "
307 		    "for vnic id 0x%x, NACK", ld->ld_assigned_vlan,
308 		    vnic->vn_vlan, vnic->vn_id);
309 
310 	} else if (bcmp(vnic->vn_macaddr, eib_zero_mac, ETHERADDRL) &&
311 	    bcmp(vnic->vn_macaddr, ld->ld_assigned_mac, ETHERADDRL)) {
312 		uint8_t *asked, *got;
313 
314 		asked = vnic->vn_macaddr;
315 		got = ld->ld_assigned_mac;
316 
317 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
318 		    "assigned mac (%x:%x:%x:%x:%x:%x) different from "
319 		    "asked (%x:%x:%x:%x:%x:%x) for vnic id 0x%x, NACK",
320 		    got[0], got[1], got[2], got[3], got[4], got[5], asked[0],
321 		    asked[1], asked[2], asked[3], asked[4], asked[5]);
322 
323 	} else if ((vnic->vn_vlan == 0) && (ld->ld_vlan_in_packets)) {
324 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
325 		    "asked for tagless vlan, but VP flag is set "
326 		    "for vnic id 0x%x, NACK", vnic->vn_id);
327 
328 	} else if ((vnic->vn_vlan) && (!ld->ld_vlan_in_packets)) {
329 		if (eib_wa_no_good_vp_flag) {
330 			ld->ld_vlan_in_packets = 1;
331 			ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id,
332 			    ld->ld_assigned_vlan);
333 			nack = 0;
334 		} else {
335 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
336 			    "vlan was assigned correctly, but VP flag is not "
337 			    "set for vnic id 0x%x, NACK", vnic->vn_id);
338 		}
339 	} else {
340 		ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id,
341 		    ld->ld_assigned_vlan);
342 		nack = 0;
343 	}
344 
345 	/*
346 	 * ACK/NACK the waiter
347 	 */
348 	if (nack) {
349 		vnic->vn_state = EIB_LOGIN_NACK_RCVD;
350 	} else {
351 		bcopy(ld, &vnic->vn_login_data, sizeof (eib_login_data_t));
352 		vnic->vn_state = EIB_LOGIN_ACK_RCVD;
353 	}
354 
355 	cv_signal(&vnic->vn_cv);
356 	mutex_exit(&vnic->vn_lock);
357 }
358 
359 int
eib_vnic_wait_for_table(eib_t * ss,eib_vnic_t * vnic,int * err)360 eib_vnic_wait_for_table(eib_t *ss, eib_vnic_t *vnic, int *err)
361 {
362 	clock_t deadline;
363 	int ret = EIB_E_SUCCESS;
364 
365 	/*
366 	 * The EoIB spec does not detail exactly within what time a vhub table
367 	 * request is expected to be answered.  However, it does mention that
368 	 * in the worst case, the vhub update messages from the gateway must
369 	 * be seen atleast once in 2.5 * GW_KA_PERIOD (already saved in
370 	 * pp_gw_ka_ticks), so we'll settle for that limit.
371 	 */
372 	deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_gw_ka_ticks;
373 
374 	/*
375 	 * Wait for vhub table to be constructed. If we wake up with a
376 	 * vhub table construction failure, record the reason.
377 	 */
378 	mutex_enter(&vnic->vn_lock);
379 	while (vnic->vn_state == EIB_LOGIN_TBL_WAIT) {
380 		if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock,
381 		    deadline) == -1) {
382 			if (vnic->vn_state == EIB_LOGIN_TBL_WAIT)
383 				vnic->vn_state = EIB_LOGIN_TIMED_OUT;
384 		}
385 	}
386 
387 	if (vnic->vn_state != EIB_LOGIN_TBL_DONE) {
388 		ret = EIB_E_FAILURE;
389 		*err =  (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ?
390 		    ETIME : ECANCELED;
391 	}
392 	mutex_exit(&vnic->vn_lock);
393 
394 	return (ret);
395 }
396 
397 void
eib_vnic_vhub_table_done(eib_vnic_t * vnic,uint_t result_state)398 eib_vnic_vhub_table_done(eib_vnic_t *vnic, uint_t result_state)
399 {
400 	ASSERT(result_state == EIB_LOGIN_TBL_DONE ||
401 	    result_state == EIB_LOGIN_TBL_FAILED);
402 
403 	/*
404 	 * Construction of vhub table for the vnic is done one way or
405 	 * the other.  Set the login wait state appropriately and signal
406 	 * the waiter. If it's a vhub table failure, we shouldn't parse
407 	 * any more vhub table or vhub update packets until the vnic state
408 	 * is changed.
409 	 */
410 	mutex_enter(&vnic->vn_lock);
411 	vnic->vn_state = result_state;
412 	cv_signal(&vnic->vn_cv);
413 	mutex_exit(&vnic->vn_lock);
414 }
415 
416 int
eib_vnic_join_data_mcg(eib_t * ss,eib_vnic_t * vnic,uint8_t * mcast_mac,boolean_t rejoin,int * err)417 eib_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac,
418     boolean_t rejoin, int *err)
419 {
420 	eib_chan_t *chan = vnic->vn_data_chan;
421 	eib_login_data_t *ld = &vnic->vn_login_data;
422 	eib_mcg_t *mcg;
423 	eib_mcg_t *elem;
424 	eib_mcg_t *tail;
425 	ibt_mcg_info_t *mcg_info;
426 	ibt_mcg_attr_t mcg_attr;
427 	ibt_status_t ret;
428 
429 	/*
430 	 * Compose the multicast MGID to join
431 	 */
432 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
433 
434 	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
435 	    (uint8_t)EIB_MGID_VHUB_DATA, mcast_mac, ld->ld_n_mac_mcgid, 0,
436 	    ld->ld_vhub_id, &(mcg_attr.mc_mgid));
437 	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
438 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_DATA_QKEY;
439 
440 	/*
441 	 * Allocate for and prepare the mcg to add to our list
442 	 */
443 	mcg_info = kmem_zalloc(sizeof (ibt_mcg_info_t), KM_NOSLEEP);
444 	if (mcg_info == NULL) {
445 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
446 		    "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)",
447 		    mcast_mac[0], mcast_mac[1], mcast_mac[2],
448 		    mcast_mac[3], mcast_mac[4], mcast_mac[5]);
449 
450 		*err = ENOMEM;
451 		goto vnic_join_data_mcg_fail;
452 	}
453 	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
454 	if (mcg == NULL) {
455 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
456 		    "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)",
457 		    mcast_mac[0], mcast_mac[1], mcast_mac[2],
458 		    mcast_mac[3], mcast_mac[4], mcast_mac[5]);
459 
460 		*err = ENOMEM;
461 		goto vnic_join_data_mcg_fail;
462 	}
463 	mcg->mg_next = NULL;
464 	mcg->mg_rgid = ss->ei_props->ep_sgid;
465 	mcg->mg_mgid = mcg_attr.mc_mgid;
466 	mcg->mg_join_state = IB_MC_JSTATE_FULL;
467 	mcg->mg_mcginfo = mcg_info;
468 	bcopy(mcast_mac, mcg->mg_mac, ETHERADDRL);
469 
470 	/*
471 	 * Join the multicast group
472 	 *
473 	 * Should we query for the mcg and join instead of attempting to
474 	 * join directly ?
475 	 */
476 	mcg_attr.mc_join_state = mcg->mg_join_state;
477 	mcg_attr.mc_flow = 0;
478 	mcg_attr.mc_tclass = 0;
479 	mcg_attr.mc_sl = 0;
480 	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
481 
482 	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, mcg_info, NULL, NULL);
483 	if (ret != IBT_SUCCESS) {
484 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
485 		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, qkey=0x%lx, "
486 		    "jstate=0x%x) failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
487 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
488 		    mcg_attr.mc_qkey, mcg_attr.mc_join_state, ret);
489 
490 		*err = EINVAL;
491 		goto vnic_join_data_mcg_fail;
492 	}
493 
494 	/*
495 	 * Attach to the group to receive multicast messages
496 	 */
497 	ret = ibt_attach_mcg(chan->ch_chan, mcg_info);
498 	if (ret != IBT_SUCCESS) {
499 		*err = EINVAL;
500 
501 		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
502 		    eib_reserved_gid, mcg->mg_join_state);
503 		if (ret != EIB_E_SUCCESS) {
504 			EIB_DPRINTF_WARN(ss->ei_instance,
505 			    "eib_vnic_join_data_mcg: "
506 			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
507 			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
508 			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
509 		}
510 
511 		goto vnic_join_data_mcg_fail;
512 	}
513 
514 	mutex_enter(&chan->ch_vhub_lock);
515 
516 	tail = NULL;
517 	for (elem = chan->ch_vhub_data; elem != NULL; elem = elem->mg_next) {
518 		if ((elem->mg_mgid.gid_prefix == mcg_attr.mc_mgid.gid_prefix) &&
519 		    (elem->mg_mgid.gid_guid == mcg_attr.mc_mgid.gid_guid)) {
520 			break;
521 		}
522 		tail = elem;
523 	}
524 
525 	/*
526 	 * If we had't already joined to this mcg, add the newly joined mcg
527 	 * to the tail and return success
528 	 */
529 	if (elem == NULL) {
530 		if (tail)
531 			tail->mg_next = mcg;
532 		else
533 			chan->ch_vhub_data = mcg;
534 		mutex_exit(&chan->ch_vhub_lock);
535 		return (EIB_E_SUCCESS);
536 	}
537 
538 	/*
539 	 * Duplicate.  We need to leave one of the two joins.  If "rejoin"
540 	 * was requested, leave the old join, otherwise leave the new join.
541 	 *
542 	 * Note that we must not detach the qp from the mcg, since if this
543 	 * was a dup, a second ibt_attach_mcg() above would've simply been
544 	 * a nop.
545 	 *
546 	 * Note also that the leave may not be successful here if our presence
547 	 * has been removed by the SM, but we need to do this to prevent leaks
548 	 * in ibtf.
549 	 */
550 	if (rejoin) {
551 		ASSERT(elem->mg_mcginfo != NULL);
552 		kmem_free(elem->mg_mcginfo, sizeof (ibt_mcg_info_t));
553 		(void) ibt_leave_mcg(elem->mg_rgid, elem->mg_mgid,
554 		    eib_reserved_gid, elem->mg_join_state);
555 		/*
556 		 * Copy the new mcg over the old one (including the new
557 		 * mg_mcginfo), but preserve the link to the next element
558 		 * on the list
559 		 */
560 		mcg->mg_next = elem->mg_next;
561 		bcopy(mcg, elem, sizeof (eib_mcg_t));
562 	} else {
563 		ASSERT(mcg->mg_mcginfo != NULL);
564 		kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
565 		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
566 		    eib_reserved_gid, mcg->mg_join_state);
567 	}
568 	mutex_exit(&chan->ch_vhub_lock);
569 
570 	kmem_free(mcg, sizeof (eib_mcg_t));
571 	return (EIB_E_SUCCESS);
572 
573 vnic_join_data_mcg_fail:
574 	if (mcg) {
575 		kmem_free(mcg, sizeof (eib_mcg_t));
576 	}
577 	if (mcg_info) {
578 		kmem_free(mcg_info, sizeof (ibt_mcg_info_t));
579 	}
580 	return (EIB_E_FAILURE);
581 }
582 
583 int
eib_vnic_setup_dest(eib_vnic_t * vnic,eib_wqe_t * swqe,uint8_t * dmac,uint16_t vlan)584 eib_vnic_setup_dest(eib_vnic_t *vnic, eib_wqe_t *swqe, uint8_t *dmac,
585     uint16_t vlan)
586 {
587 	eib_t *ss = vnic->vn_ss;
588 	eib_stats_t *stats = ss->ei_stats;
589 	eib_avect_t *av;
590 	eib_vhub_map_t ucast;
591 	ibt_mcg_info_t mcast;
592 	ibt_status_t ret;
593 	int dtype;
594 	int rv;
595 
596 	/*
597 	 * Lookup the destination in the vhub table or in our mcg list
598 	 */
599 	rv = eib_vnic_lookup_dest(vnic, dmac, vlan, &ucast, &mcast, &dtype);
600 	if (rv != EIB_E_SUCCESS) {
601 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_setup_dest: "
602 		    "eib_vnic_lookup_dest(dmac=%x:%x:%x:%x:%x:%x, vlan=0x%x) "
603 		    "failed", dmac[0], dmac[1], dmac[2], dmac[3], dmac[4],
604 		    dmac[5], vlan);
605 
606 		return (EIB_E_FAILURE);
607 	}
608 
609 	/*
610 	 * If we found a unicast address, get an address vector for the lid
611 	 * and sl, modify the ud dest based on the address vector and return.
612 	 * If we found a multicast address, use the address vector in the
613 	 * mcg info to modify the ud dest and return.
614 	 */
615 	if (dtype == EIB_TX_UNICAST) {
616 		if ((av = eib_ibt_hold_avect(ss, ucast.mp_lid,
617 		    ucast.mp_sl)) == NULL) {
618 			EIB_DPRINTF_WARN(ss->ei_instance,
619 			    "eib_vnic_setup_dest: "
620 			    "eib_ibt_hold_avect(lid=0x%x, sl=0x%x) failed",
621 			    ucast.mp_lid, ucast.mp_sl);
622 
623 			return (EIB_E_FAILURE);
624 		}
625 		ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY,
626 		    ucast.mp_qpn, &av->av_vect);
627 
628 		eib_ibt_release_avect(ss, av);
629 
630 		if (ret != IBT_SUCCESS) {
631 			EIB_DPRINTF_WARN(ss->ei_instance,
632 			    "eib_vnic_setup_dest: "
633 			    "ibt_modify_ud_dest(qpn=0x%lx, qkey=0x%lx) "
634 			    "failed, ret=%d", ucast.mp_qpn, EIB_DATA_QKEY, ret);
635 			return (EIB_E_FAILURE);
636 		}
637 	} else {
638 		ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY,
639 		    IB_MC_QPN, &(mcast.mc_adds_vect));
640 
641 		if (dtype == EIB_TX_BROADCAST)
642 			EIB_INCR_COUNTER(&stats->st_brdcstxmit);
643 		else
644 			EIB_INCR_COUNTER(&stats->st_multixmit);
645 
646 		if (ret != IBT_SUCCESS) {
647 			EIB_DPRINTF_WARN(ss->ei_instance,
648 			    "eib_vnic_setup_dest: "
649 			    "ibt_modify_ud_dest(mc_qpn=0x%lx, qkey=0x%lx) "
650 			    "failed, ret=%d", IB_MC_QPN, EIB_DATA_QKEY, ret);
651 			return (EIB_E_FAILURE);
652 		}
653 	}
654 
655 	return (EIB_E_SUCCESS);
656 }
657 
658 void
eib_vnic_leave_data_mcg(eib_t * ss,eib_vnic_t * vnic,uint8_t * mcast_mac)659 eib_vnic_leave_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac)
660 {
661 	eib_rb_vnic_join_data_mcg(ss, vnic, mcast_mac);
662 }
663 
664 /*ARGSUSED*/
665 void
eib_vnic_init_tables(eib_t * ss,eib_vnic_t * vnic)666 eib_vnic_init_tables(eib_t *ss, eib_vnic_t *vnic)
667 {
668 	eib_vhub_table_t *tbl;
669 	eib_vhub_update_t *upd;
670 
671 	tbl = kmem_zalloc(sizeof (eib_vhub_table_t), KM_SLEEP);
672 	mutex_init(&tbl->tb_lock, NULL, MUTEX_DRIVER, NULL);
673 	tbl->tb_eport_state = FIP_EPORT_UP;
674 
675 	upd = kmem_zalloc(sizeof (eib_vhub_update_t), KM_SLEEP);
676 	mutex_init(&upd->up_lock, NULL, MUTEX_DRIVER, NULL);
677 
678 	mutex_enter(&vnic->vn_lock);
679 	vnic->vn_vhub_table = tbl;
680 	vnic->vn_vhub_update = upd;
681 	mutex_exit(&vnic->vn_lock);
682 }
683 
684 /*ARGSUSED*/
685 void
eib_vnic_fini_tables(eib_t * ss,eib_vnic_t * vnic,boolean_t clobber)686 eib_vnic_fini_tables(eib_t *ss, eib_vnic_t *vnic, boolean_t clobber)
687 {
688 	eib_vhub_update_t *upd;
689 	eib_vhub_table_t *tbl;
690 	eib_vhub_map_t *elem;
691 	eib_vhub_map_t *nxt;
692 	int i;
693 
694 	/*
695 	 * We come here only when we've either completely detached from
696 	 * the vhub multicast groups and so cannot receive anymore table
697 	 * or update control messages, or we've had a recent vhub table
698 	 * construction failure and the vnic state is currently
699 	 * EIB_LOGIN_TBL_FAILED and so won't parse any table or update
700 	 * control messages.  Also, since we haven't completed the vnic
701 	 * creation, no one from the tx path will be accessing the
702 	 * vn_vhub_table entries either.  All said, we're free to play
703 	 * around with the vnic's vn_vhub_table and vn_vhub_update here.
704 	 */
705 
706 	mutex_enter(&vnic->vn_lock);
707 	upd = vnic->vn_vhub_update;
708 	tbl = vnic->vn_vhub_table;
709 	if (clobber) {
710 		vnic->vn_vhub_update = NULL;
711 		vnic->vn_vhub_table = NULL;
712 	}
713 	mutex_exit(&vnic->vn_lock);
714 
715 	/*
716 	 * Destroy the vhub update entries if any
717 	 */
718 	if (upd) {
719 		/*
720 		 * Wipe clean the list of vnic entries accumulated via
721 		 * vhub updates so far.  Release eib_vhub_update_t only
722 		 * if explicitly asked to do so
723 		 */
724 		mutex_enter(&upd->up_lock);
725 		for (elem = upd->up_vnic_entry; elem != NULL; elem = nxt) {
726 			nxt = elem->mp_next;
727 			kmem_free(elem, sizeof (eib_vhub_map_t));
728 		}
729 		upd->up_vnic_entry = NULL;
730 		upd->up_tusn = 0;
731 		upd->up_eport_state = 0;
732 		mutex_exit(&upd->up_lock);
733 
734 		if (clobber) {
735 			mutex_destroy(&upd->up_lock);
736 			kmem_free(upd, sizeof (eib_vhub_update_t));
737 		}
738 	}
739 
740 	/*
741 	 * Destroy the vhub table entries
742 	 */
743 	if (tbl == NULL)
744 		return;
745 
746 	/*
747 	 * Wipe clean the list of entries in the vhub table collected so
748 	 * far. Release eib_vhub_table_t only if explicitly asked to do so.
749 	 */
750 	mutex_enter(&tbl->tb_lock);
751 
752 	if (tbl->tb_gateway) {
753 		kmem_free(tbl->tb_gateway, sizeof (eib_vhub_map_t));
754 		tbl->tb_gateway = NULL;
755 	}
756 
757 	if (tbl->tb_unicast_miss) {
758 		kmem_free(tbl->tb_unicast_miss, sizeof (eib_vhub_map_t));
759 		tbl->tb_unicast_miss = NULL;
760 	}
761 
762 	if (tbl->tb_vhub_multicast) {
763 		kmem_free(tbl->tb_vhub_multicast, sizeof (eib_vhub_map_t));
764 		tbl->tb_vhub_multicast = NULL;
765 	}
766 
767 	if (!eib_wa_no_mcast_entries) {
768 		for (i = 0; i < EIB_TB_NBUCKETS; i++) {
769 			for (elem = tbl->tb_mcast_entry[i]; elem != NULL;
770 			    elem = nxt) {
771 				nxt = elem->mp_next;
772 				kmem_free(elem, sizeof (eib_vhub_map_t));
773 			}
774 			tbl->tb_mcast_entry[i] = NULL;
775 		}
776 	}
777 
778 	for (i = 0; i < EIB_TB_NBUCKETS; i++) {
779 		for (elem = tbl->tb_vnic_entry[i]; elem != NULL; elem = nxt) {
780 			nxt = elem->mp_next;
781 			kmem_free(elem, sizeof (eib_vhub_map_t));
782 		}
783 		tbl->tb_vnic_entry[i] = NULL;
784 	}
785 
786 	tbl->tb_tusn = 0;
787 	tbl->tb_eport_state = 0;
788 	tbl->tb_entries_seen = 0;
789 	tbl->tb_entries_in_table = 0;
790 	tbl->tb_checksum = 0;
791 
792 	mutex_exit(&tbl->tb_lock);
793 
794 	/*
795 	 * Don't throw away space created for holding vhub table if we haven't
796 	 * been explicitly asked to do so
797 	 */
798 	if (clobber) {
799 		mutex_destroy(&tbl->tb_lock);
800 		kmem_free(tbl, sizeof (eib_vhub_table_t));
801 	}
802 }
803 
804 eib_chan_t *
eib_vnic_get_data_chan(eib_t * ss,int vinst)805 eib_vnic_get_data_chan(eib_t *ss, int vinst)
806 {
807 	eib_vnic_t *vnic;
808 	eib_chan_t *chan = NULL;
809 
810 	if (vinst >= 0 && vinst < EIB_MAX_VNICS) {
811 		mutex_enter(&ss->ei_vnic_lock);
812 		if ((vnic = ss->ei_vnic[vinst]) != NULL)
813 			chan = vnic->vn_data_chan;
814 		mutex_exit(&ss->ei_vnic_lock);
815 	}
816 
817 	return (chan);
818 }
819 
820 void
eib_vnic_need_new(eib_t * ss,uint8_t * mac,uint16_t vlan)821 eib_vnic_need_new(eib_t *ss, uint8_t *mac, uint16_t vlan)
822 {
823 	eib_vnic_req_t *vrq;
824 
825 	EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
826 
827 	/*
828 	 * Create a new vnic request for this {mac,vlan} tuple
829 	 */
830 	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP);
831 	if (vrq == NULL) {
832 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_need_new: "
833 		    "no memory, failed to queue new vnic creation request");
834 		return;
835 	}
836 	vrq->vr_next = NULL;
837 	vrq->vr_req = EIB_CR_REQ_NEW_VNIC;
838 	bcopy(mac, vrq->vr_mac, ETHERADDRL);
839 	vrq->vr_vlan = vlan;
840 
841 	eib_vnic_enqueue_req(ss, vrq);
842 }
843 
844 void
eib_vnic_enqueue_req(eib_t * ss,eib_vnic_req_t * vrq)845 eib_vnic_enqueue_req(eib_t *ss, eib_vnic_req_t *vrq)
846 {
847 	eib_vnic_req_t *elem = NULL;
848 	uint8_t *m;
849 
850 	/*
851 	 * Enqueue this new vnic request with the vnic creator and
852 	 * signal it.
853 	 */
854 	m = vrq->vr_mac;
855 	EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_enqueue_req: "
856 	    "BEGIN file request for creation of %x:%x:%x:%x:%x:%x, 0x%x",
857 	    m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan);
858 
859 
860 	mutex_enter(&ss->ei_vnic_req_lock);
861 
862 	/*
863 	 * Death request has the highest priority.  If we've already been asked
864 	 * to die, we don't entertain any more requests.
865 	 */
866 	if (ss->ei_vnic_req) {
867 		if (ss->ei_vnic_req->vr_req == EIB_CR_REQ_DIE) {
868 			mutex_exit(&ss->ei_vnic_req_lock);
869 			kmem_free(vrq, sizeof (eib_vnic_req_t));
870 			return;
871 		}
872 	}
873 
874 	if (vrq->vr_req == EIB_CR_REQ_DIE || vrq->vr_req == EIB_CR_REQ_FLUSH) {
875 		vrq->vr_next = ss->ei_vnic_req;
876 		ss->ei_vnic_req = vrq;
877 	} else {
878 		/*
879 		 * If there's already a creation request for this vnic that's
880 		 * being processed, return immediately without adding a new
881 		 * request.
882 		 */
883 		if ((elem = ss->ei_pending_vnic_req) != NULL) {
884 			EIB_DPRINTF_DEBUG(ss->ei_instance,
885 			    "eib_vnic_enqueue_req: "
886 			    "ei_pending_vnic_req not NULL");
887 
888 			if ((elem->vr_vlan == vrq->vr_vlan) &&
889 			    (bcmp(elem->vr_mac, vrq->vr_mac,
890 			    ETHERADDRL) == 0)) {
891 				EIB_DPRINTF_DEBUG(ss->ei_instance,
892 				    "eib_vnic_enqueue_req: "
893 				    "pending request already present for "
894 				    "%x:%x:%x:%x:%x:%x, 0x%x", m[0], m[1], m[2],
895 				    m[3], m[4], m[5], vrq->vr_vlan);
896 
897 				mutex_exit(&ss->ei_vnic_req_lock);
898 				kmem_free(vrq, sizeof (eib_vnic_req_t));
899 
900 				EIB_DPRINTF_DEBUG(ss->ei_instance,
901 				    "eib_vnic_enqueue_req: "
902 				    "END file request");
903 				return;
904 			}
905 
906 			EIB_DPRINTF_DEBUG(ss->ei_instance,
907 			    "eib_vnic_enqueue_req: "
908 			    "NO pending request for %x:%x:%x:%x:%x:%x, 0x%x",
909 			    m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan);
910 		}
911 
912 		/*
913 		 * Or if there's one waiting in the queue for processing, do
914 		 * the same thing
915 		 */
916 		for (elem = ss->ei_vnic_req; elem; elem = elem->vr_next) {
917 			/*
918 			 * If there's already a create request for this vnic
919 			 * waiting in the queue, return immediately
920 			 */
921 			if (elem->vr_req == EIB_CR_REQ_NEW_VNIC) {
922 				if ((elem->vr_vlan == vrq->vr_vlan) &&
923 				    (bcmp(elem->vr_mac, vrq->vr_mac,
924 				    ETHERADDRL) == 0)) {
925 
926 					EIB_DPRINTF_DEBUG(ss->ei_instance,
927 					    "eib_vnic_enqueue_req: "
928 					    "request already present for "
929 					    "%x:%x:%x:%x:%x:%x, 0x%x", m[0],
930 					    m[1], m[2], m[3], m[4], m[5],
931 					    vrq->vr_vlan);
932 
933 					mutex_exit(&ss->ei_vnic_req_lock);
934 					kmem_free(vrq, sizeof (eib_vnic_req_t));
935 
936 					EIB_DPRINTF_DEBUG(ss->ei_instance,
937 					    "eib_vnic_enqueue_req: "
938 					    "END file request");
939 					return;
940 				}
941 			}
942 
943 			if (elem->vr_next == NULL) {
944 				EIB_DPRINTF_DEBUG(ss->ei_instance,
945 				    "eib_vnic_enqueue_req: "
946 				    "request not found, filing afresh");
947 				break;
948 			}
949 		}
950 
951 		/*
952 		 * Otherwise queue up this new creation request and signal the
953 		 * service thread.
954 		 */
955 		if (elem) {
956 			elem->vr_next = vrq;
957 		} else {
958 			ss->ei_vnic_req = vrq;
959 		}
960 	}
961 
962 	cv_signal(&ss->ei_vnic_req_cv);
963 	mutex_exit(&ss->ei_vnic_req_lock);
964 
965 	EIB_DPRINTF_DEBUG(ss->ei_instance,
966 	    "eib_vnic_enqueue_req: END file request");
967 }
968 
969 void
eib_vnic_update_failed_macs(eib_t * ss,uint8_t * old_mac,uint16_t old_vlan,uint8_t * new_mac,uint16_t new_vlan)970 eib_vnic_update_failed_macs(eib_t *ss, uint8_t *old_mac, uint16_t old_vlan,
971     uint8_t *new_mac, uint16_t new_vlan)
972 {
973 	eib_vnic_req_t *vrq;
974 	eib_vnic_req_t *elem;
975 	eib_vnic_req_t *prev;
976 
977 	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP);
978 	if (vrq == NULL) {
979 		EIB_DPRINTF_WARN(ss->ei_instance,
980 		    "eib_vnic_update_failed_macs: "
981 		    "no memory, failed to drop old mac");
982 	} else {
983 		vrq->vr_next = NULL;
984 		vrq->vr_req = 0;	/* unused */
985 		bcopy(old_mac, vrq->vr_mac, ETHERADDRL);
986 		vrq->vr_vlan = old_vlan;
987 	}
988 
989 	mutex_enter(&ss->ei_vnic_req_lock);
990 
991 	/*
992 	 * We'll search the failed vnics list to see if the new {mac,vlan}
993 	 * tuple is in there and remove it if present (since the new address
994 	 * is no longer "failed").
995 	 */
996 	prev = NULL;
997 	for (elem = ss->ei_failed_vnic_req; elem; elem = elem->vr_next) {
998 		if ((bcmp(elem->vr_mac, new_mac, ETHERADDRL) == 0) &&
999 		    (elem->vr_vlan == new_vlan)) {
1000 			if (prev) {
1001 				prev->vr_next = elem->vr_next;
1002 			} else {
1003 				ss->ei_failed_vnic_req = elem->vr_next;
1004 			}
1005 			elem->vr_next = NULL;
1006 			break;
1007 		}
1008 	}
1009 	if (elem) {
1010 		kmem_free(elem, sizeof (eib_vnic_req_t));
1011 	}
1012 
1013 	/*
1014 	 * We'll also insert the old {mac,vlan} tuple to the "failed vnic req"
1015 	 * list (it shouldn't be there already), to avoid trying to recreate
1016 	 * the vnic we just explicitly discarded.
1017 	 */
1018 	if (vrq) {
1019 		vrq->vr_next = ss->ei_failed_vnic_req;
1020 		ss->ei_failed_vnic_req = vrq;
1021 	}
1022 
1023 	mutex_exit(&ss->ei_vnic_req_lock);
1024 }
1025 
1026 void
eib_vnic_resurrect_zombies(eib_t * ss,uint8_t * vn0_mac)1027 eib_vnic_resurrect_zombies(eib_t *ss, uint8_t *vn0_mac)
1028 {
1029 	int inst;
1030 
1031 	/*
1032 	 * We want to restart/relogin each vnic instance with the gateway,
1033 	 * but with the same vnic id and instance as before.
1034 	 */
1035 	while ((inst = EIB_FIND_LSB_SET(ss->ei_zombie_vnics)) != -1) {
1036 		EIB_DPRINTF_DEBUG(ss->ei_instance,
1037 		    "eib_vnic_resurrect_zombies: "
1038 		    "calling eib_vnic_restart(vn_inst=%d)", inst);
1039 
1040 		eib_vnic_restart(ss, inst, vn0_mac);
1041 
1042 		EIB_DPRINTF_DEBUG(ss->ei_instance,
1043 		    "eib_vnic_resurrect_zombies: "
1044 		    "eib_vnic_restart(vn_inst=%d) done", inst);
1045 	}
1046 }
1047 
1048 void
eib_vnic_restart(eib_t * ss,int inst,uint8_t * vn0_mac)1049 eib_vnic_restart(eib_t *ss, int inst, uint8_t *vn0_mac)
1050 {
1051 	eib_vnic_t *vnic;
1052 	eib_login_data_t *ld;
1053 	uint8_t old_mac[ETHERADDRL];
1054 	int ret;
1055 	int err;
1056 
1057 	if (inst < 0 || inst >= EIB_MAX_VNICS) {
1058 		EIB_DPRINTF_WARN(ss->ei_instance,
1059 		    "eib_vnic_restart: "
1060 		    "vnic instance (%d) invalid", inst);
1061 		return;
1062 	}
1063 
1064 	eib_vnic_modify_enter(ss, EIB_VN_BEING_MODIFIED);
1065 	if ((vnic = ss->ei_vnic[inst]) != NULL) {
1066 		/*
1067 		 * Remember what mac was allocated for this vnic last time
1068 		 */
1069 		bcopy(vnic->vn_login_data.ld_assigned_mac, old_mac, ETHERADDRL);
1070 
1071 		/*
1072 		 * Tear down and restart this vnic instance
1073 		 */
1074 		eib_rb_vnic_create_common(ss, vnic, ~0);
1075 		ret = eib_vnic_create_common(ss, vnic, &err);
1076 		if (ret != EIB_E_SUCCESS) {
1077 			EIB_DPRINTF_WARN(ss->ei_instance,
1078 			    "eib_vnic_restart: "
1079 			    "eib_vnic_create_common(vnic_inst=%d) failed, "
1080 			    "ret=%d", inst, err);
1081 		}
1082 
1083 		/*
1084 		 * If this is vnic instance 0 and if our current assigned mac is
1085 		 * different from what was assigned last time, we need to pass
1086 		 * this information back to the caller, so the mac layer can be
1087 		 * appropriately informed. We will also queue up the old mac
1088 		 * and vlan in the "failed vnic req" list, so any future packets
1089 		 * to this address on this interface will be dropped.
1090 		 */
1091 		ld = &vnic->vn_login_data;
1092 		if ((inst == 0) &&
1093 		    (bcmp(ld->ld_assigned_mac, old_mac, ETHERADDRL) != 0)) {
1094 			uint8_t *m = ld->ld_assigned_mac;
1095 
1096 			if (vn0_mac != NULL) {
1097 				bcopy(ld->ld_assigned_mac, vn0_mac,
1098 				    ETHERADDRL);
1099 			}
1100 
1101 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
1102 			    "eib_vnic_restart: updating failed macs list "
1103 			    "old=%x:%x:%x:%x:%x:%x, new=%x:%x:%x:%x:%x:%x, "
1104 			    "vlan=0x%x", old_mac[0], old_mac[1], old_mac[2],
1105 			    old_mac[3], old_mac[4], old_mac[5], m[0], m[1],
1106 			    m[2], m[3], m[4], m[5], vnic->vn_vlan);
1107 
1108 			eib_vnic_update_failed_macs(ss, old_mac, vnic->vn_vlan,
1109 			    ld->ld_assigned_mac, vnic->vn_vlan);
1110 		}
1111 
1112 		/*
1113 		 * No longer a zombie or need to rejoin mcgs
1114 		 */
1115 		mutex_enter(&ss->ei_vnic_lock);
1116 		ss->ei_zombie_vnics &= (~((uint64_t)1 << inst));
1117 		ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst));
1118 		mutex_exit(&ss->ei_vnic_lock);
1119 	}
1120 	eib_vnic_modify_exit(ss, EIB_VN_BEING_MODIFIED);
1121 }
1122 
1123 void
eib_vnic_rejoin_mcgs(eib_t * ss)1124 eib_vnic_rejoin_mcgs(eib_t *ss)
1125 {
1126 	eib_vnic_t *vnic;
1127 	int inst;
1128 
1129 	/*
1130 	 * For each vnic that still requires re-join, go through the
1131 	 * control channels and data channel and reattach/rejoin mcgs.
1132 	 */
1133 	mutex_enter(&ss->ei_vnic_lock);
1134 	while ((inst = EIB_FIND_LSB_SET(ss->ei_rejoin_vnics)) != -1) {
1135 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
1136 			eib_vnic_reattach_ctl_mcgs(ss, vnic);
1137 			eib_vnic_rejoin_data_mcgs(ss, vnic);
1138 		}
1139 		ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst));
1140 	}
1141 	mutex_exit(&ss->ei_vnic_lock);
1142 }
1143 
1144 void
eib_rb_vnic_create(eib_t * ss,eib_vnic_t * vnic,uint_t progress)1145 eib_rb_vnic_create(eib_t *ss, eib_vnic_t *vnic, uint_t progress)
1146 {
1147 	if (progress & EIB_VNIC_CREATE_COMMON_DONE) {
1148 		eib_rb_vnic_create_common(ss, vnic, ~0);
1149 	}
1150 
1151 	if (progress & EIB_VNIC_GOT_INSTANCE) {
1152 		eib_vnic_ret_instance(ss, vnic->vn_instance);
1153 		vnic->vn_instance = -1;
1154 	}
1155 
1156 	if (progress & EIB_VNIC_STRUCT_ALLOCD) {
1157 		cv_destroy(&vnic->vn_cv);
1158 		mutex_destroy(&vnic->vn_lock);
1159 		kmem_free(vnic, sizeof (eib_vnic_t));
1160 	}
1161 }
1162 
1163 /*
1164  * Currently, we only allow 64 vnics per eoib device instance, for
1165  * reasons described in eib.h (see EIB_VNIC_ID() definition), so we
1166  * could use a simple bitmap to assign the vnic instance numbers.
1167  * Once we start allowing more vnics per device instance, this
1168  * allocation scheme will need to be changed.
1169  */
1170 static int
eib_vnic_get_instance(eib_t * ss,int * vinst)1171 eib_vnic_get_instance(eib_t *ss, int *vinst)
1172 {
1173 	int bitpos;
1174 	uint64_t nval;
1175 
1176 	mutex_enter(&ss->ei_vnic_lock);
1177 
1178 	/*
1179 	 * What we have is the active vnics list --  the in-use vnics are
1180 	 * indicated by a 1 in the bit position, and the free ones are
1181 	 * indicated by 0.  We need to find the least significant '0' bit
1182 	 * to get the first free vnic instance.  Or we could bit-reverse
1183 	 * the active list and locate the least significant '1'.
1184 	 */
1185 	nval = ~(ss->ei_active_vnics);
1186 	if (nval == 0)
1187 		return (EIB_E_FAILURE);
1188 
1189 	/*
1190 	 * The single bit-position values in a 64-bit integer are relatively
1191 	 * prime with 67, so performing a modulus division with 67 guarantees
1192 	 * a unique number between 0 and 63 for each value (setbit_mod67[]).
1193 	 */
1194 	bitpos = EIB_FIND_LSB_SET(nval);
1195 	if (bitpos == -1)
1196 		return (EIB_E_FAILURE);
1197 
1198 	ss->ei_active_vnics |= ((uint64_t)1 << bitpos);
1199 	*vinst = bitpos;
1200 
1201 	mutex_exit(&ss->ei_vnic_lock);
1202 
1203 	return (EIB_E_SUCCESS);
1204 }
1205 
1206 static void
eib_vnic_ret_instance(eib_t * ss,int vinst)1207 eib_vnic_ret_instance(eib_t *ss, int vinst)
1208 {
1209 	mutex_enter(&ss->ei_vnic_lock);
1210 
1211 	if (vinst >= EIB_MAX_VNICS) {
1212 		EIB_DPRINTF_WARN(ss->ei_instance,
1213 		    "eib_vnic_ret_instance: "
1214 		    "vnic instance (%d) invalid", vinst);
1215 	} else if ((ss->ei_active_vnics & ((uint64_t)1 << vinst)) == 0) {
1216 		EIB_DPRINTF_WARN(ss->ei_instance,
1217 		    "eib_vnic_ret_instance: "
1218 		    "vnic instance (%d) not active!", vinst);
1219 	} else {
1220 		ss->ei_active_vnics &= (~((uint64_t)1 << vinst));
1221 	}
1222 
1223 	mutex_exit(&ss->ei_vnic_lock);
1224 }
1225 
1226 static void
eib_vnic_modify_enter(eib_t * ss,uint_t op)1227 eib_vnic_modify_enter(eib_t *ss, uint_t op)
1228 {
1229 	mutex_enter(&ss->ei_vnic_lock);
1230 	while (ss->ei_vnic_state & EIB_VN_BEING_MODIFIED)
1231 		cv_wait(&ss->ei_vnic_cv, &ss->ei_vnic_lock);
1232 
1233 	ss->ei_vnic_state |= op;
1234 	mutex_exit(&ss->ei_vnic_lock);
1235 }
1236 
1237 static void
eib_vnic_modify_exit(eib_t * ss,uint_t op)1238 eib_vnic_modify_exit(eib_t *ss, uint_t op)
1239 {
1240 	mutex_enter(&ss->ei_vnic_lock);
1241 	ss->ei_vnic_state &= (~op);
1242 	cv_broadcast(&ss->ei_vnic_cv);
1243 	mutex_exit(&ss->ei_vnic_lock);
1244 }
1245 
1246 static int
eib_vnic_create_common(eib_t * ss,eib_vnic_t * vnic,int * err)1247 eib_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, int *err)
1248 {
1249 	uint_t progress = 0;
1250 
1251 	/*
1252 	 * When we receive login acks within this vnic creation
1253 	 * routine we need a way to retrieve the vnic structure
1254 	 * from the vnic instance, so store this somewhere. Note
1255 	 * that there can be only one outstanding vnic creation
1256 	 * at any point of time, so we only need one vnic struct.
1257 	 */
1258 	mutex_enter(&ss->ei_vnic_lock);
1259 	ASSERT(ss->ei_vnic_pending == NULL);
1260 	ss->ei_vnic_pending = vnic;
1261 	mutex_exit(&ss->ei_vnic_lock);
1262 
1263 	/*
1264 	 * Create a control qp for this vnic
1265 	 */
1266 	if (eib_ctl_create_qp(ss, vnic, err) != EIB_E_SUCCESS) {
1267 		EIB_DPRINTF_WARN(ss->ei_instance,
1268 		    "eib_vnic_create_common: "
1269 		    "eib_ctl_create_qp(vn_id=0x%x) failed, ret=%d",
1270 		    vnic->vn_id, *err);
1271 		goto vnic_create_common_fail;
1272 	}
1273 	progress |= EIB_VNIC_CTLQP_CREATED;
1274 
1275 	/*
1276 	 * Create a data qp for this vnic
1277 	 */
1278 	if (eib_data_create_qp(ss, vnic, err) != EIB_E_SUCCESS) {
1279 		EIB_DPRINTF_WARN(ss->ei_instance,
1280 		    "eib_vnic_create_common: "
1281 		    "eib_data_create_qp(vn_id=0x%x) failed, ret=%d",
1282 		    vnic->vn_id, *err);
1283 		goto vnic_create_common_fail;
1284 	}
1285 	progress |= EIB_VNIC_DATAQP_CREATED;
1286 
1287 	/*
1288 	 * Login to the gateway with this vnic's parameters
1289 	 */
1290 	if (eib_fip_login(ss, vnic, err) != EIB_E_SUCCESS) {
1291 		EIB_DPRINTF_WARN(ss->ei_instance,
1292 		    "eib_vnic_create_common: "
1293 		    "eib_fip_login(vn_id=0x%x) failed, ret=%d",
1294 		    vnic->vn_id, *err);
1295 		goto vnic_create_common_fail;
1296 	}
1297 	progress |= EIB_VNIC_LOGIN_DONE;
1298 
1299 	/*
1300 	 * Associate the control and data qps for the vnic with the
1301 	 * vHUB partition
1302 	 */
1303 	if (eib_vnic_set_partition(ss, vnic, err) != EIB_E_SUCCESS) {
1304 		EIB_DPRINTF_WARN(ss->ei_instance,
1305 		    "eib_vnic_create_common: "
1306 		    "eib_vnic_set_partition(vn_id=0x%x) failed, ret=%d",
1307 		    vnic->vn_id, *err);
1308 		goto vnic_create_common_fail;
1309 	}
1310 	progress |= EIB_VNIC_PARTITION_SET;
1311 
1312 	/*
1313 	 * Post initial set of rx buffers on the control qp to the HCA
1314 	 */
1315 	if (eib_chan_post_rx(ss, vnic->vn_ctl_chan, NULL) != EIB_E_SUCCESS) {
1316 		EIB_DPRINTF_WARN(ss->ei_instance,
1317 		    "eib_vnic_create_common: "
1318 		    "eib_chan_post_rx(vn_id=0x%x, CTL_QP) failed, ret=%d",
1319 		    vnic->vn_id, *err);
1320 
1321 		*err = ENOMEM;
1322 		goto vnic_create_common_fail;
1323 	}
1324 	progress |= EIB_VNIC_RX_POSTED_TO_CTLQP;
1325 
1326 	/*
1327 	 * Post initial set of rx buffers on the data qp to the HCA
1328 	 */
1329 	if (eib_chan_post_rx(ss, vnic->vn_data_chan, NULL) != EIB_E_SUCCESS) {
1330 		EIB_DPRINTF_WARN(ss->ei_instance,
1331 		    "eib_vnic_create_common: "
1332 		    "eib_chan_post_rx(vn_id=0x%x, DATA_QP) failed, ret=%d",
1333 		    vnic->vn_id, *err);
1334 
1335 		*err = ENOMEM;
1336 		goto vnic_create_common_fail;
1337 	}
1338 	progress |= EIB_VNIC_RX_POSTED_TO_DATAQP;
1339 
1340 	/*
1341 	 * Attach to the vHUB table and vHUB update multicast groups
1342 	 */
1343 	if (eib_vnic_attach_ctl_mcgs(ss, vnic, err) != EIB_E_SUCCESS) {
1344 		EIB_DPRINTF_WARN(ss->ei_instance,
1345 		    "eib_vnic_create_common: "
1346 		    "eib_vnic_attach_ctl_mcgs(vn_id=0x%x) failed, ret=%d",
1347 		    vnic->vn_id, *err);
1348 		goto vnic_create_common_fail;
1349 	}
1350 	progress |= EIB_VNIC_ATTACHED_TO_CTL_MCGS;
1351 
1352 	/*
1353 	 * Send the vHUB table request and construct the vhub table
1354 	 */
1355 	if (eib_fip_vhub_table(ss, vnic, err) != EIB_E_SUCCESS) {
1356 		EIB_DPRINTF_WARN(ss->ei_instance,
1357 		    "eib_vnic_create_common: "
1358 		    "eib_fip_vhub_table(vn_id=0x%x) failed, ret=%d",
1359 		    vnic->vn_id, *err);
1360 		goto vnic_create_common_fail;
1361 	}
1362 	progress |= EIB_VNIC_GOT_VHUB_TABLE;
1363 
1364 	/*
1365 	 * Detach from the vHUB table mcg (we no longer need the vHUB
1366 	 * table messages) and start the keepalives for this vnic.
1367 	 */
1368 	eib_vnic_start_keepalives(ss, vnic);
1369 	eib_rb_vnic_attach_vhub_table(ss, vnic);
1370 
1371 	progress |= EIB_VNIC_KEEPALIVES_STARTED;
1372 
1373 	/*
1374 	 * All ethernet vnics are automatically members of the broadcast
1375 	 * group for the vlan they are participating in, so join the
1376 	 * ethernet broadcast group.  Note that when we restart vnics,
1377 	 * we rejoin the mcgs, so we pass B_TRUE to eib_vnic_join_data_mcg().
1378 	 */
1379 	if (eib_vnic_join_data_mcg(ss, vnic, eib_broadcast_mac, B_TRUE,
1380 	    err) != EIB_E_SUCCESS) {
1381 		EIB_DPRINTF_WARN(ss->ei_instance,
1382 		    "eib_vnic_create_common: "
1383 		    "eib_vnic_join_data_mcg(vn_id=0x%x, BCAST_GROUP) failed, "
1384 		    "ret=%d", vnic->vn_id, *err);
1385 		goto vnic_create_common_fail;
1386 	}
1387 	progress |= EIB_VNIC_BROADCAST_JOINED;
1388 
1389 	mutex_enter(&ss->ei_vnic_lock);
1390 	if (ss->ei_vnic[vnic->vn_instance] == NULL) {
1391 		ss->ei_vnic[vnic->vn_instance] = vnic;
1392 	}
1393 	ss->ei_vnic_pending = NULL;
1394 	mutex_exit(&ss->ei_vnic_lock);
1395 
1396 	return (EIB_E_SUCCESS);
1397 
1398 vnic_create_common_fail:
1399 	eib_rb_vnic_create_common(ss, vnic, progress);
1400 	return (EIB_E_FAILURE);
1401 }
1402 
1403 static int
eib_vnic_set_partition(eib_t * ss,eib_vnic_t * vnic,int * err)1404 eib_vnic_set_partition(eib_t *ss, eib_vnic_t *vnic, int *err)
1405 {
1406 	int ret;
1407 
1408 	/*
1409 	 * Associate the control channel with the vhub partition
1410 	 */
1411 	ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_ctl_chan,
1412 	    vnic->vn_login_data.ld_vhub_pkey);
1413 	if (ret != EIB_E_SUCCESS) {
1414 		EIB_DPRINTF_WARN(ss->ei_instance,
1415 		    "eib_vnic_set_partition: "
1416 		    "eib_ibt_modify_chan_pkey(vn_id=0x%x, CTL_CHAN, "
1417 		    "vhub_pkey=0x%x) failed", vnic->vn_id,
1418 		    vnic->vn_login_data.ld_vhub_pkey);
1419 		*err = EINVAL;
1420 		return (EIB_E_FAILURE);
1421 	}
1422 
1423 	/*
1424 	 * Now, do the same thing for the data channel. Note that if a
1425 	 * failure happens, the channel state(s) are left as-is, since
1426 	 * it is pointless to try to change them back using the same
1427 	 * interfaces that have just failed.
1428 	 */
1429 	ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_data_chan,
1430 	    vnic->vn_login_data.ld_vhub_pkey);
1431 	if (ret != EIB_E_SUCCESS) {
1432 		EIB_DPRINTF_WARN(ss->ei_instance,
1433 		    "eib_vnic_set_partition: "
1434 		    "eib_ibt_modify_chan_pkey(vn_id=0x%x, DATA_CHAN, "
1435 		    "vhub_pkey=0x%x) failed", vnic->vn_id,
1436 		    vnic->vn_login_data.ld_vhub_pkey);
1437 		*err = EINVAL;
1438 		return (EIB_E_FAILURE);
1439 	}
1440 
1441 	return (EIB_E_SUCCESS);
1442 }
1443 
1444 static void
eib_vnic_make_vhub_mgid(uint8_t * mg_prefix,uint8_t mg_type,uint8_t * mcast_mac,uint8_t n_mac,uint8_t rss_hash,uint32_t vhub_id,ib_gid_t * mgid)1445 eib_vnic_make_vhub_mgid(uint8_t *mg_prefix, uint8_t mg_type,
1446     uint8_t *mcast_mac, uint8_t n_mac, uint8_t rss_hash, uint32_t vhub_id,
1447     ib_gid_t *mgid)
1448 {
1449 	eib_mgid_t em;
1450 	uint64_t dmac_mask;
1451 	uint64_t dmac = 0;
1452 	uint8_t *dmac_str = (uint8_t *)&dmac;
1453 	uint_t	vhub_id_nw;
1454 	uint8_t *vhub_id_str = (uint8_t *)&vhub_id_nw;
1455 
1456 	/*
1457 	 * Copy mgid prefix and type
1458 	 */
1459 	bcopy(mg_prefix, em.gd_spec.sp_mgid_prefix, FIP_MGID_PREFIX_LEN);
1460 	em.gd_spec.sp_type = mg_type;
1461 
1462 	/*
1463 	 * Take n_mac bits from mcast_mac and copy dmac
1464 	 */
1465 	bcopy(mcast_mac, dmac_str + 2, ETHERADDRL);
1466 	dmac_mask = ((uint64_t)1 << n_mac) - 1;
1467 	dmac_mask = htonll(dmac_mask);
1468 	dmac &= dmac_mask;
1469 	bcopy(dmac_str + 2, em.gd_spec.sp_dmac, ETHERADDRL);
1470 
1471 	/*
1472 	 * Copy rss hash and prepare vhub id from gw port id and vlan
1473 	 */
1474 	em.gd_spec.sp_rss_hash = rss_hash;
1475 
1476 	vhub_id_nw = htonl(vhub_id);
1477 	bcopy(vhub_id_str + 1, em.gd_spec.sp_vhub_id, FIP_VHUBID_LEN);
1478 
1479 	/*
1480 	 * Ok, now we've assembled the mgid as per EoIB spec. We now have to
1481 	 * represent it in the way Solaris IBTF wants it and return (sigh).
1482 	 */
1483 	mgid->gid_prefix = ntohll(em.gd_sol.gid_prefix);
1484 	mgid->gid_guid = ntohll(em.gd_sol.gid_guid);
1485 }
1486 
1487 static int
eib_vnic_attach_ctl_mcgs(eib_t * ss,eib_vnic_t * vnic,int * err)1488 eib_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic, int *err)
1489 {
1490 	/*
1491 	 * Get tb_vhub_table and tb_vhub_update allocated and ready before
1492 	 * attaching to the vhub table and vhub update mcgs
1493 	 */
1494 	eib_vnic_init_tables(ss, vnic);
1495 
1496 	if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) {
1497 		EIB_DPRINTF_WARN(ss->ei_instance,
1498 		    "eib_vnic_attach_ctl_mcgs: "
1499 		    "eib_vnic_attach_vhub_update(vn_id=0x%x) failed",
1500 		    vnic->vn_id);
1501 
1502 		*err = EINVAL;
1503 		eib_vnic_fini_tables(ss, vnic, B_TRUE);
1504 		return (EIB_E_FAILURE);
1505 	}
1506 
1507 	if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) {
1508 		EIB_DPRINTF_WARN(ss->ei_instance,
1509 		    "eib_vnic_attach_ctl_mcgs: "
1510 		    "eib_vnic_attach_vhub_table(vn_id=0x%x) failed",
1511 		    vnic->vn_id);
1512 
1513 		*err = EINVAL;
1514 		eib_rb_vnic_attach_vhub_update(ss, vnic);
1515 		eib_vnic_fini_tables(ss, vnic, B_TRUE);
1516 		return (EIB_E_FAILURE);
1517 	}
1518 
1519 	return (EIB_E_SUCCESS);
1520 }
1521 
1522 static int
eib_vnic_attach_vhub_table(eib_t * ss,eib_vnic_t * vnic)1523 eib_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic)
1524 {
1525 	eib_chan_t *chan = vnic->vn_ctl_chan;
1526 	eib_login_data_t *ld = &vnic->vn_login_data;
1527 	eib_mcg_t *mcg;
1528 	ibt_mcg_info_t *tbl_mcginfo;
1529 	ibt_mcg_attr_t mcg_attr;
1530 	ibt_status_t ret;
1531 	uint_t entries;
1532 
1533 	/*
1534 	 * Compose the MGID for receiving VHUB table
1535 	 */
1536 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
1537 
1538 	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1539 	    (uint8_t)EIB_MGID_VHUB_TABLE, eib_broadcast_mac, ld->ld_n_mac_mcgid,
1540 	    0, ld->ld_vhub_id, &(mcg_attr.mc_mgid));
1541 	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
1542 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
1543 
1544 	/*
1545 	 * Locate the multicast group for receiving vhub table
1546 	 */
1547 	ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1,
1548 	    &tbl_mcginfo, &entries);
1549 	if (ret != IBT_SUCCESS) {
1550 		EIB_DPRINTF_WARN(ss->ei_instance,
1551 		    "eib_vnic_attach_vhub_table: "
1552 		    "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, "
1553 		    "ret=%d", mcg_attr.mc_mgid.gid_prefix,
1554 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret);
1555 		return (EIB_E_FAILURE);
1556 	}
1557 
1558 	/*
1559 	 * Allocate for and prepare the mcg to add to our list
1560 	 */
1561 	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
1562 	if (mcg == NULL) {
1563 		EIB_DPRINTF_WARN(ss->ei_instance,
1564 		    "eib_vnic_attach_vhub_table: "
1565 		    "no memory, failed to attach to vhub table "
1566 		    "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix,
1567 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1568 		ibt_free_mcg_info(tbl_mcginfo, 1);
1569 		return (EIB_E_FAILURE);
1570 	}
1571 
1572 	mcg->mg_next = NULL;
1573 	mcg->mg_rgid = ss->ei_props->ep_sgid;
1574 	mcg->mg_mgid = mcg_attr.mc_mgid;
1575 	mcg->mg_join_state = IB_MC_JSTATE_FULL;
1576 	mcg->mg_mcginfo = tbl_mcginfo;
1577 	bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL);
1578 
1579 	/*
1580 	 * Join the multicast group
1581 	 */
1582 	mcg_attr.mc_join_state = mcg->mg_join_state;
1583 	mcg_attr.mc_flow = tbl_mcginfo->mc_adds_vect.av_flow;
1584 	mcg_attr.mc_tclass = tbl_mcginfo->mc_adds_vect.av_tclass;
1585 	mcg_attr.mc_sl = tbl_mcginfo->mc_adds_vect.av_srvl;
1586 	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
1587 
1588 	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, tbl_mcginfo, NULL, NULL);
1589 	if (ret != IBT_SUCCESS) {
1590 		EIB_DPRINTF_WARN(ss->ei_instance,
1591 		    "eib_vnic_attach_vhub_table: "
1592 		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) "
1593 		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1594 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
1595 		    mcg_attr.mc_join_state, ret);
1596 
1597 		kmem_free(mcg, sizeof (eib_mcg_t));
1598 		ibt_free_mcg_info(tbl_mcginfo, 1);
1599 		return (EIB_E_FAILURE);
1600 	}
1601 
1602 	/*
1603 	 * Attach to the multicast group to receive tbl multicasts
1604 	 */
1605 	ret = ibt_attach_mcg(chan->ch_chan, tbl_mcginfo);
1606 	if (ret != IBT_SUCCESS) {
1607 		EIB_DPRINTF_WARN(ss->ei_instance,
1608 		    "eib_vnic_attach_vhub_table: "
1609 		    "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) "
1610 		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1611 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1612 
1613 		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1614 		    eib_reserved_gid, mcg->mg_join_state);
1615 		kmem_free(mcg, sizeof (eib_mcg_t));
1616 		ibt_free_mcg_info(tbl_mcginfo, 1);
1617 		return (EIB_E_FAILURE);
1618 	}
1619 
1620 	mutex_enter(&chan->ch_vhub_lock);
1621 	chan->ch_vhub_table = mcg;
1622 	mutex_exit(&chan->ch_vhub_lock);
1623 
1624 	return (EIB_E_SUCCESS);
1625 }
1626 
1627 static int
eib_vnic_attach_vhub_update(eib_t * ss,eib_vnic_t * vnic)1628 eib_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic)
1629 {
1630 	eib_chan_t *chan = vnic->vn_ctl_chan;
1631 	eib_login_data_t *ld = &vnic->vn_login_data;
1632 	eib_mcg_t *mcg;
1633 	ibt_mcg_info_t *upd_mcginfo;
1634 	ibt_mcg_attr_t mcg_attr;
1635 	ibt_status_t ret;
1636 	uint_t entries;
1637 
1638 	/*
1639 	 * Compose the MGID for receiving VHUB updates
1640 	 */
1641 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
1642 
1643 	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1644 	    (uint8_t)EIB_MGID_VHUB_UPDATE, eib_broadcast_mac,
1645 	    ld->ld_n_mac_mcgid, 0, ld->ld_vhub_id, &(mcg_attr.mc_mgid));
1646 	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
1647 	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
1648 
1649 	/*
1650 	 * Locate the multicast group for receiving vhub updates
1651 	 */
1652 	ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1,
1653 	    &upd_mcginfo, &entries);
1654 	if (ret != IBT_SUCCESS) {
1655 		EIB_DPRINTF_WARN(ss->ei_instance,
1656 		    "eib_vnic_attach_vhub_update: "
1657 		    "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, "
1658 		    "ret=%d", mcg_attr.mc_mgid.gid_prefix,
1659 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret);
1660 		return (EIB_E_FAILURE);
1661 	}
1662 
1663 	/*
1664 	 * Allocate for and prepare the mcg to add to our list
1665 	 */
1666 	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
1667 	if (mcg == NULL) {
1668 		EIB_DPRINTF_WARN(ss->ei_instance,
1669 		    "eib_vnic_attach_vhub_update: "
1670 		    "no memory, failed to attach to vhub update "
1671 		    "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix,
1672 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1673 
1674 		ibt_free_mcg_info(upd_mcginfo, 1);
1675 		return (EIB_E_FAILURE);
1676 	}
1677 
1678 	mcg->mg_next = NULL;
1679 	mcg->mg_rgid = ss->ei_props->ep_sgid;
1680 	mcg->mg_mgid = mcg_attr.mc_mgid;
1681 	mcg->mg_join_state = IB_MC_JSTATE_FULL;
1682 	mcg->mg_mcginfo = upd_mcginfo;
1683 	bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL);
1684 
1685 	/*
1686 	 * Join the multicast group
1687 	 */
1688 	mcg_attr.mc_join_state = mcg->mg_join_state;
1689 	mcg_attr.mc_flow = upd_mcginfo->mc_adds_vect.av_flow;
1690 	mcg_attr.mc_tclass = upd_mcginfo->mc_adds_vect.av_tclass;
1691 	mcg_attr.mc_sl = upd_mcginfo->mc_adds_vect.av_srvl;
1692 	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
1693 
1694 	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, upd_mcginfo, NULL, NULL);
1695 	if (ret != IBT_SUCCESS) {
1696 		EIB_DPRINTF_WARN(ss->ei_instance,
1697 		    "eib_vnic_attach_vhub_update: "
1698 		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) "
1699 		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1700 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
1701 		    mcg_attr.mc_join_state, ret);
1702 
1703 		kmem_free(mcg, sizeof (eib_mcg_t));
1704 		ibt_free_mcg_info(upd_mcginfo, 1);
1705 		return (EIB_E_FAILURE);
1706 	}
1707 
1708 	/*
1709 	 * Attach to the multicast group to receive upd multicasts
1710 	 */
1711 	ret = ibt_attach_mcg(chan->ch_chan, upd_mcginfo);
1712 	if (ret != IBT_SUCCESS) {
1713 		EIB_DPRINTF_WARN(ss->ei_instance,
1714 		    "eib_vnic_attach_vhub_update: "
1715 		    "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) "
1716 		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1717 		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1718 
1719 		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1720 		    eib_reserved_gid, mcg->mg_join_state);
1721 		kmem_free(mcg, sizeof (eib_mcg_t));
1722 		ibt_free_mcg_info(upd_mcginfo, 1);
1723 		return (EIB_E_FAILURE);
1724 	}
1725 
1726 	mutex_enter(&chan->ch_vhub_lock);
1727 	chan->ch_vhub_update = mcg;
1728 	mutex_exit(&chan->ch_vhub_lock);
1729 
1730 	return (EIB_E_SUCCESS);
1731 }
1732 
1733 static void
eib_vnic_start_keepalives(eib_t * ss,eib_vnic_t * vnic)1734 eib_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic)
1735 {
1736 	eib_ka_vnics_t *kav;
1737 	eib_ka_vnics_t *elem;
1738 	int err;
1739 
1740 	kav = kmem_zalloc(sizeof (eib_ka_vnics_t), KM_SLEEP);
1741 	kav->ka_vnic = vnic;
1742 	kav->ka_next = NULL;
1743 
1744 	/*
1745 	 * Send the first keepalive and then queue this vnic up with
1746 	 * the keepalives manager
1747 	 */
1748 	(void) eib_fip_heartbeat(ss, vnic, &err);
1749 
1750 	mutex_enter(&ss->ei_ka_vnics_lock);
1751 	for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) {
1752 		if (elem->ka_next == NULL)
1753 			break;
1754 	}
1755 	if (elem) {
1756 		elem->ka_next = kav;
1757 	} else {
1758 		ss->ei_ka_vnics = kav;
1759 	}
1760 	mutex_exit(&ss->ei_ka_vnics_lock);
1761 }
1762 
1763 /*ARGSUSED*/
1764 static int
eib_vnic_lookup_dest(eib_vnic_t * vnic,uint8_t * dmac,uint16_t vlan,eib_vhub_map_t * ucast,ibt_mcg_info_t * mcast,int * dtype)1765 eib_vnic_lookup_dest(eib_vnic_t *vnic, uint8_t *dmac, uint16_t vlan,
1766     eib_vhub_map_t *ucast, ibt_mcg_info_t *mcast, int *dtype)
1767 {
1768 	eib_t *ss = vnic->vn_ss;
1769 	eib_vhub_map_t *elem;
1770 	eib_mcg_t *mcg;
1771 	eib_chan_t *chan = vnic->vn_data_chan;
1772 	eib_login_data_t *ld = &vnic->vn_login_data;
1773 	eib_vhub_map_t *gw;
1774 	eib_vhub_table_t *tbl;
1775 	uint8_t bkt = (dmac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1776 	ib_gid_t mgid;
1777 
1778 	/*
1779 	 * If this was a unicast dmac, locate the vhub entry matching the
1780 	 * unicast dmac in our vhub table.  If it's not found, return the
1781 	 * gateway entry
1782 	 */
1783 	if (EIB_UNICAST_MAC(dmac)) {
1784 
1785 		mutex_enter(&vnic->vn_lock);
1786 		if ((tbl = vnic->vn_vhub_table) == NULL) {
1787 			mutex_exit(&vnic->vn_lock);
1788 			return (EIB_E_FAILURE);
1789 		}
1790 
1791 		mutex_enter(&tbl->tb_lock);
1792 		gw = tbl->tb_gateway;
1793 		for (elem = tbl->tb_vnic_entry[bkt]; elem != NULL;
1794 		    elem = elem->mp_next) {
1795 			if (bcmp(elem->mp_mac, dmac, ETHERADDRL) == 0)
1796 				break;
1797 		}
1798 		mutex_exit(&tbl->tb_lock);
1799 
1800 		if ((elem == NULL) && (gw == NULL)) {
1801 			mutex_exit(&vnic->vn_lock);
1802 			return (EIB_E_FAILURE);
1803 		}
1804 
1805 		*dtype = EIB_TX_UNICAST;
1806 		if (elem) {
1807 			bcopy(elem, ucast, sizeof (eib_vhub_map_t));
1808 		} else {
1809 			bcopy(gw, ucast, sizeof (eib_vhub_map_t));
1810 		}
1811 		mutex_exit(&vnic->vn_lock);
1812 
1813 		return (EIB_E_SUCCESS);
1814 	}
1815 
1816 	/*
1817 	 * Is it a broadcast ?
1818 	 */
1819 	*dtype = (bcmp(dmac, eib_broadcast_mac, ETHERADDRL) == 0) ?
1820 	    EIB_TX_BROADCAST : EIB_TX_MULTICAST;
1821 
1822 	/*
1823 	 * If this was a multicast dmac, prepare the mgid and look for it
1824 	 * in the list of mcgs we've joined and use the address vector from
1825 	 * the mcginfo stored there.
1826 	 *
1827 	 * Note that since we don't have a way to associate each vlan with
1828 	 * the mcg (see eib_m_multicast()), we'll prepare the mgid to use
1829 	 * the broadcast channel all the time.
1830 	 */
1831 	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1832 	    (uint8_t)EIB_MGID_VHUB_DATA, eib_broadcast_mac, ld->ld_n_mac_mcgid,
1833 	    0, ld->ld_vhub_id, &mgid);
1834 
1835 	mutex_enter(&chan->ch_vhub_lock);
1836 	for (mcg = chan->ch_vhub_data; mcg; mcg = mcg->mg_next) {
1837 		if ((mcg->mg_mgid.gid_prefix == mgid.gid_prefix) &&
1838 		    (mcg->mg_mgid.gid_guid == mgid.gid_guid)) {
1839 			break;
1840 		}
1841 	}
1842 	if (mcg == NULL) {
1843 		mutex_exit(&chan->ch_vhub_lock);
1844 
1845 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_lookup_dest: "
1846 		    "could not find mgid %llx.%llx",
1847 		    mgid.gid_prefix, mgid.gid_guid);
1848 
1849 		return (EIB_E_FAILURE);
1850 	}
1851 
1852 	bcopy(mcg->mg_mcginfo, mcast, sizeof (ibt_mcg_info_t));
1853 	mutex_exit(&chan->ch_vhub_lock);
1854 
1855 	return (EIB_E_SUCCESS);
1856 }
1857 
1858 /*ARGSUSED*/
1859 static void
eib_vnic_leave_all_data_mcgs(eib_t * ss,eib_vnic_t * vnic)1860 eib_vnic_leave_all_data_mcgs(eib_t *ss, eib_vnic_t *vnic)
1861 {
1862 	eib_chan_t *chan = vnic->vn_data_chan;
1863 	eib_mcg_t *mcglist;
1864 	eib_mcg_t *mcg;
1865 	eib_mcg_t *nxt = NULL;
1866 	ibt_status_t ret;
1867 
1868 	/*
1869 	 * First, take the ch_vhub_data mcg chain out of chan
1870 	 */
1871 	mutex_enter(&chan->ch_vhub_lock);
1872 	mcglist = chan->ch_vhub_data;
1873 	chan->ch_vhub_data = NULL;
1874 	mutex_exit(&chan->ch_vhub_lock);
1875 
1876 	/*
1877 	 * Go through the chain of mcgs we've joined, detach the qp from the
1878 	 * mcg, leave the group and free all associated stuff
1879 	 */
1880 	for (mcg = mcglist; mcg != NULL; mcg = nxt) {
1881 		nxt = mcg->mg_next;
1882 
1883 		ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
1884 		if (ret != IBT_SUCCESS) {
1885 			EIB_DPRINTF_WARN(ss->ei_instance,
1886 			    "eib_vnic_leave_all_data_mcgs: "
1887 			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
1888 			    "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan,
1889 			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
1890 			    mcg->mg_mgid.gid_guid, ret);
1891 		}
1892 
1893 		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1894 		    eib_reserved_gid, mcg->mg_join_state);
1895 		if (ret != IBT_SUCCESS) {
1896 			EIB_DPRINTF_WARN(ss->ei_instance,
1897 			    "eib_vnic_leave_all_data_mcgs: "
1898 			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
1899 			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
1900 			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
1901 		}
1902 
1903 		if (mcg->mg_mcginfo)
1904 			kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
1905 
1906 		kmem_free(mcg, sizeof (eib_mcg_t));
1907 	}
1908 }
1909 
1910 static void
eib_vnic_rejoin_data_mcgs(eib_t * ss,eib_vnic_t * vnic)1911 eib_vnic_rejoin_data_mcgs(eib_t *ss, eib_vnic_t *vnic)
1912 {
1913 	eib_chan_t *chan = vnic->vn_data_chan;
1914 	eib_mcg_t *mcglist;
1915 	eib_mcg_t *mcg;
1916 	eib_mcg_t *next;
1917 	int err;
1918 
1919 	/*
1920 	 * Grab the current list of mcgs
1921 	 */
1922 	mutex_enter(&chan->ch_vhub_lock);
1923 	mcglist = chan->ch_vhub_data;
1924 	chan->ch_vhub_data = NULL;
1925 	mutex_exit(&chan->ch_vhub_lock);
1926 
1927 	/*
1928 	 * When rejoin data mcgs is called, we may not even be marked as
1929 	 * joined in SM's records.  But we still have to leave the old
1930 	 * one first to prevent leaks in ibtf.
1931 	 */
1932 	for (mcg = mcglist; mcg != NULL; mcg = next) {
1933 		next = mcg->mg_next;
1934 		mcg->mg_next = NULL;
1935 
1936 		(void) ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
1937 		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1938 		    eib_reserved_gid, mcg->mg_join_state);
1939 
1940 		if (eib_vnic_join_data_mcg(ss, vnic, mcg->mg_mac, B_TRUE,
1941 		    &err) != EIB_E_SUCCESS) {
1942 			uint8_t *m;
1943 
1944 			m = mcg->mg_mac;
1945 			EIB_DPRINTF_WARN(ss->ei_instance,
1946 			    "eib_vnic_rejoin_data_mcgs: "
1947 			    "eib_vnic_join_data_mcg(mcmac=%x:%x:%x:%x:%x:%x) "
1948 			    "failed, ret=%d", m[0], m[1], m[2], m[3],
1949 			    m[4], m[5], err);
1950 		}
1951 		if (mcg->mg_mcginfo) {
1952 			kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
1953 		}
1954 		kmem_free(mcg, sizeof (eib_mcg_t));
1955 	}
1956 }
1957 
1958 static void
eib_vnic_reattach_ctl_mcgs(eib_t * ss,eib_vnic_t * vnic)1959 eib_vnic_reattach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic)
1960 {
1961 	/*
1962 	 * For reattaching to control mcgs, we will not reinitialize the
1963 	 * vhub table/vhub update we've constructed.  We'll simply detach
1964 	 * from the table and update mcgs and reattach to them.  Hopefully,
1965 	 * we wouldn't have missed any updates and won't have to restart
1966 	 * the vnic.
1967 	 */
1968 	eib_rb_vnic_attach_vhub_table(ss, vnic);
1969 	eib_rb_vnic_attach_vhub_update(ss, vnic);
1970 
1971 	if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) {
1972 		EIB_DPRINTF_WARN(ss->ei_instance,
1973 		    "eib_vnic_reattach_ctl_mcgs: "
1974 		    "eib_vnic_attach_vhub_update(vn_id=0x%x) failed",
1975 		    vnic->vn_id);
1976 	}
1977 
1978 	if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) {
1979 		EIB_DPRINTF_WARN(ss->ei_instance,
1980 		    "eib_vnic_reattach_ctl_mcgs: "
1981 		    "eib_vnic_attach_vhub_table(vn_id=0x%x) failed",
1982 		    vnic->vn_id);
1983 
1984 		eib_rb_vnic_attach_vhub_update(ss, vnic);
1985 	}
1986 }
1987 
1988 static void
eib_rb_vnic_create_common(eib_t * ss,eib_vnic_t * vnic,uint_t progress)1989 eib_rb_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, uint_t progress)
1990 {
1991 	int err;
1992 
1993 	mutex_enter(&ss->ei_vnic_lock);
1994 	ss->ei_vnic[vnic->vn_instance] = NULL;
1995 	ss->ei_vnic_pending = NULL;
1996 	mutex_exit(&ss->ei_vnic_lock);
1997 
1998 	if (progress & EIB_VNIC_BROADCAST_JOINED) {
1999 		eib_vnic_leave_all_data_mcgs(ss, vnic);
2000 	}
2001 
2002 	if (progress & EIB_VNIC_KEEPALIVES_STARTED) {
2003 		eib_rb_vnic_start_keepalives(ss, vnic);
2004 	}
2005 
2006 	if (progress & EIB_VNIC_ATTACHED_TO_CTL_MCGS) {
2007 		eib_rb_vnic_attach_ctl_mcgs(ss, vnic);
2008 	}
2009 
2010 	if (progress & EIB_VNIC_LOGIN_DONE) {
2011 		(void) eib_fip_logout(ss, vnic, &err);
2012 	}
2013 
2014 	if (progress & EIB_VNIC_DATAQP_CREATED) {
2015 		eib_rb_data_create_qp(ss, vnic);
2016 	}
2017 
2018 	if (progress & EIB_VNIC_CTLQP_CREATED) {
2019 		eib_rb_ctl_create_qp(ss, vnic);
2020 	}
2021 }
2022 
2023 static void
eib_rb_vnic_attach_ctl_mcgs(eib_t * ss,eib_vnic_t * vnic)2024 eib_rb_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic)
2025 {
2026 	/*
2027 	 * Detach from the vhub table and vhub update mcgs before blowing
2028 	 * up vn_vhub_table and vn_vhub_update, since these are assumed to
2029 	 * be available by the control cq handler.
2030 	 */
2031 	eib_rb_vnic_attach_vhub_table(ss, vnic);
2032 	eib_rb_vnic_attach_vhub_update(ss, vnic);
2033 	eib_vnic_fini_tables(ss, vnic, B_TRUE);
2034 }
2035 
2036 /*ARGSUSED*/
2037 static void
eib_rb_vnic_attach_vhub_table(eib_t * ss,eib_vnic_t * vnic)2038 eib_rb_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic)
2039 {
2040 	eib_chan_t *chan = vnic->vn_ctl_chan;
2041 	eib_mcg_t *mcg;
2042 	ibt_channel_hdl_t chan_hdl;
2043 	ibt_status_t ret;
2044 
2045 	if (chan == NULL)
2046 		return;
2047 
2048 	mutex_enter(&chan->ch_vhub_lock);
2049 	chan_hdl = chan->ch_chan;
2050 	mcg = chan->ch_vhub_table;
2051 	chan->ch_vhub_table = NULL;
2052 	mutex_exit(&chan->ch_vhub_lock);
2053 
2054 	if (chan_hdl && mcg) {
2055 		ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo);
2056 		if (ret != IBT_SUCCESS) {
2057 			EIB_DPRINTF_WARN(ss->ei_instance,
2058 			    "eib_rb_vnic_attach_vhub_table: "
2059 			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2060 			    "mgid=%llx.%llx) failed, ret=%d", chan_hdl,
2061 			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2062 			    mcg->mg_mgid.gid_guid, ret);
2063 		}
2064 
2065 		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
2066 		    eib_reserved_gid, mcg->mg_join_state);
2067 		if (ret != IBT_SUCCESS) {
2068 			EIB_DPRINTF_WARN(ss->ei_instance,
2069 			    "eib_rb_vnic_attach_vhub_table: "
2070 			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2071 			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2072 			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2073 		}
2074 
2075 		if (mcg->mg_mcginfo) {
2076 			ibt_free_mcg_info(mcg->mg_mcginfo, 1);
2077 		}
2078 		kmem_free(mcg, sizeof (eib_mcg_t));
2079 	}
2080 }
2081 
2082 /*ARGSUSED*/
2083 static void
eib_rb_vnic_attach_vhub_update(eib_t * ss,eib_vnic_t * vnic)2084 eib_rb_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic)
2085 {
2086 	eib_chan_t *chan = vnic->vn_ctl_chan;
2087 	eib_mcg_t *mcg;
2088 	ibt_channel_hdl_t chan_hdl;
2089 	ibt_status_t ret;
2090 
2091 	if (chan == NULL)
2092 		return;
2093 
2094 	mutex_enter(&chan->ch_vhub_lock);
2095 	chan_hdl = chan->ch_chan;
2096 	mcg = chan->ch_vhub_update;
2097 	chan->ch_vhub_update = NULL;
2098 	mutex_exit(&chan->ch_vhub_lock);
2099 
2100 	if (chan_hdl && mcg) {
2101 		ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo);
2102 		if (ret != IBT_SUCCESS) {
2103 			EIB_DPRINTF_WARN(ss->ei_instance,
2104 			    "eib_rb_vnic_attach_vhub_update: "
2105 			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2106 			    "mgid=%llx.%llx) failed, ret=%d", chan_hdl,
2107 			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2108 			    mcg->mg_mgid.gid_guid, ret);
2109 		}
2110 
2111 		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
2112 		    eib_reserved_gid, mcg->mg_join_state);
2113 		if (ret != IBT_SUCCESS) {
2114 			EIB_DPRINTF_WARN(ss->ei_instance,
2115 			    "eib_rb_vnic_attach_vhub_update: "
2116 			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2117 			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2118 			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2119 		}
2120 
2121 		if (mcg->mg_mcginfo) {
2122 			ibt_free_mcg_info(mcg->mg_mcginfo, 1);
2123 		}
2124 		kmem_free(mcg, sizeof (eib_mcg_t));
2125 	}
2126 }
2127 
2128 /*ARGSUSED*/
2129 static void
eib_rb_vnic_start_keepalives(eib_t * ss,eib_vnic_t * vnic)2130 eib_rb_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic)
2131 {
2132 	eib_ka_vnics_t *prev;
2133 	eib_ka_vnics_t *elem;
2134 
2135 	/*
2136 	 * We only need to locate and remove the vnic entry from the
2137 	 * keepalives manager list
2138 	 */
2139 
2140 	mutex_enter(&ss->ei_ka_vnics_lock);
2141 
2142 	prev = NULL;
2143 	for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) {
2144 		if (elem->ka_vnic == vnic)
2145 			break;
2146 
2147 		prev = elem;
2148 	}
2149 	if (elem == NULL) {
2150 		EIB_DPRINTF_DEBUG(ss->ei_instance,
2151 		    "eib_rb_vnic_start_keepalives: no keepalive element found "
2152 		    "for vnic 0x%llx (vn_inst=%d) with keepalive manager",
2153 		    vnic, vnic->vn_instance);
2154 	} else {
2155 		if (prev) {
2156 			prev->ka_next = elem->ka_next;
2157 		} else {
2158 			ss->ei_ka_vnics = elem->ka_next;
2159 		}
2160 		kmem_free(elem, sizeof (eib_ka_vnics_t));
2161 	}
2162 	mutex_exit(&ss->ei_ka_vnics_lock);
2163 }
2164 
2165 /*ARGSUSED*/
2166 static void
eib_rb_vnic_join_data_mcg(eib_t * ss,eib_vnic_t * vnic,uint8_t * mcast_mac)2167 eib_rb_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac)
2168 {
2169 	eib_chan_t *chan = vnic->vn_data_chan;
2170 	eib_mcg_t *prev;
2171 	eib_mcg_t *mcg;
2172 	ibt_status_t ret;
2173 
2174 	/*
2175 	 * Search our list and remove the item if found
2176 	 */
2177 	mutex_enter(&chan->ch_vhub_lock);
2178 
2179 	prev = NULL;
2180 	for (mcg = chan->ch_vhub_data; mcg != NULL; mcg = mcg->mg_next) {
2181 		if (bcmp(mcg->mg_mac, mcast_mac, ETHERADDRL) == 0)
2182 			break;
2183 		prev = mcg;
2184 	}
2185 
2186 	if (mcg == NULL) {
2187 		mutex_exit(&chan->ch_vhub_lock);
2188 		return;
2189 	}
2190 
2191 	if (prev != NULL)
2192 		prev->mg_next = mcg->mg_next;
2193 	else
2194 		chan->ch_vhub_data = mcg->mg_next;
2195 
2196 	mcg->mg_next = NULL;
2197 
2198 	mutex_exit(&chan->ch_vhub_lock);
2199 
2200 	/*
2201 	 * Detach data channel qp from the mcg, leave the group and free
2202 	 * all associated stuff
2203 	 */
2204 	ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
2205 	if (ret != IBT_SUCCESS) {
2206 		EIB_DPRINTF_WARN(ss->ei_instance,
2207 		    "eib_rb_vnic_join_data_mcg: "
2208 		    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2209 		    "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan,
2210 		    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2211 		    mcg->mg_mgid.gid_guid, ret);
2212 	}
2213 
2214 	ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, eib_reserved_gid,
2215 	    mcg->mg_join_state);
2216 	if (ret != IBT_SUCCESS) {
2217 		EIB_DPRINTF_WARN(ss->ei_instance,
2218 		    "eib_rb_vnic_join_data_mcg: "
2219 		    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2220 		    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2221 		    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2222 	}
2223 
2224 	if (mcg->mg_mcginfo)
2225 		kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
2226 
2227 	kmem_free(mcg, sizeof (eib_mcg_t));
2228 }
2229