xref: /titanic_52/usr/src/uts/common/io/ib/clients/eoib/eib_fip.c (revision b494511a9cf72b1fc4eb13a0e593f55c624ab829)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/byteorder.h>
31 
32 #include <sys/ib/clients/eoib/eib_impl.h>
33 
34 /*
35  * Declarations private to this file
36  */
37 static int eib_fip_make_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
38 static int eib_fip_make_update(eib_t *, eib_vnic_t *, eib_wqe_t *, int, int *);
39 static int eib_fip_make_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
40 static int eib_fip_make_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
41 static int eib_fip_make_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
42 
43 static int eib_fip_send_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
44 static int eib_fip_send_update(eib_t *, eib_vnic_t *, eib_wqe_t *,
45     uint_t, int *);
46 static int eib_fip_send_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
47 static int eib_fip_send_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
48 static int eib_fip_send_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
49 
50 static int eib_fip_parse_vhub_table(uint8_t *, eib_vnic_t *);
51 static int eib_fip_parse_vhub_update(uint8_t *, eib_vnic_t *);
52 static void eib_fip_update_eport_state(eib_t *, eib_vhub_table_t *,
53     eib_vhub_update_t *, boolean_t, uint8_t);
54 static void eib_fip_queue_tbl_entry(eib_vhub_table_t *, eib_vhub_map_t *,
55     uint32_t, uint8_t);
56 static void eib_fip_queue_upd_entry(eib_vhub_update_t *, eib_vhub_map_t *,
57     uint32_t, uint8_t);
58 static void eib_fip_queue_gw_entry(eib_vnic_t *, eib_vhub_table_t *, uint32_t,
59     uint8_t);
60 static int eib_fip_apply_updates(eib_t *, eib_vhub_table_t *,
61     eib_vhub_update_t *);
62 static void eib_fip_dequeue_tbl_entry(eib_vhub_table_t *, uint8_t *, uint32_t,
63     uint8_t);
64 static eib_vhub_map_t *eib_fip_get_vhub_map(void);
65 
66 /*
67  * Definitions private to this file
68  */
69 const char eib_vendor_mellanox[] = {
70 	0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
71 };
72 
73 /*
74  * The three requests to the gateway - request a vHUB table, request a
75  * vHUB update (aka keepalive) and vNIC logout - all need the same
76  * vnic identity descriptor to be sent with different flag settings.
77  *
78  *      vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE
79  *      keepalive/vHUB update: R=0, U=1, TUSN=last, subcode=KEEPALIVE
80  *      vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT
81  */
82 #define	EIB_UPD_REQ_TABLE	1
83 #define	EIB_UPD_REQ_KA		2
84 #define	EIB_UPD_REQ_LOGOUT	3
85 
86 int
87 eib_fip_login(eib_t *ss, eib_vnic_t *vnic, int *err)
88 {
89 	eib_wqe_t *swqe;
90 	int ret;
91 	int ntries = 0;
92 
93 	do {
94 		if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
95 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_login: "
96 			    "no swqe available, not sending "
97 			    "vnic login request");
98 			*err = ENOMEM;
99 			return (EIB_E_FAILURE);
100 		}
101 
102 		ret = eib_fip_make_login(ss, vnic, swqe, err);
103 		if (ret != EIB_E_SUCCESS) {
104 			eib_rsrc_return_swqe(ss, swqe, NULL);
105 			return (EIB_E_FAILURE);
106 		}
107 
108 		ret = eib_fip_send_login(ss, vnic, swqe, err);
109 		if (ret != EIB_E_SUCCESS) {
110 			eib_rsrc_return_swqe(ss, swqe, NULL);
111 			return (EIB_E_FAILURE);
112 		}
113 
114 		ret = eib_vnic_wait_for_login_ack(ss, vnic, err);
115 		if (ret == EIB_E_SUCCESS)
116 			break;
117 
118 	} while ((*err == ETIME) && (ntries++ < EIB_MAX_LOGIN_ATTEMPTS));
119 
120 	return (ret);
121 }
122 
123 int
124 eib_fip_vhub_table(eib_t *ss, eib_vnic_t *vnic, int *err)
125 {
126 	eib_wqe_t *swqe;
127 	int ret;
128 	int ntries = 0;
129 
130 	do {
131 		if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
132 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_vhub_table: "
133 			    "no swqe available, not sending "
134 			    "vhub table request");
135 			*err = ENOMEM;
136 			return (EIB_E_FAILURE);
137 		}
138 
139 		ret = eib_fip_make_table(ss, vnic, swqe, err);
140 		if (ret != EIB_E_SUCCESS) {
141 			eib_rsrc_return_swqe(ss, swqe, NULL);
142 			return (EIB_E_FAILURE);
143 		}
144 
145 		ret = eib_fip_send_table(ss, vnic, swqe, err);
146 		if (ret != EIB_E_SUCCESS) {
147 			eib_rsrc_return_swqe(ss, swqe, NULL);
148 			return (EIB_E_FAILURE);
149 		}
150 
151 		ret = eib_vnic_wait_for_table(ss, vnic, err);
152 		if (ret == EIB_E_SUCCESS) {
153 			return (EIB_E_SUCCESS);
154 		}
155 
156 		/*
157 		 * If we'd failed in constructing a proper vhub table above,
158 		 * the vnic login state would be set to EIB_LOGIN_TBL_FAILED.
159 		 * We need to clean up any pending entries from the vhub
160 		 * table and vhub update structures and reset the vnic state
161 		 * to EIB_LOGIN_ACK_RCVD before we can try again.
162 		 */
163 		eib_vnic_fini_tables(ss, vnic, B_FALSE);
164 		mutex_enter(&vnic->vn_lock);
165 		vnic->vn_state = EIB_LOGIN_ACK_RCVD;
166 		mutex_exit(&vnic->vn_lock);
167 
168 	} while ((*err == ETIME) && (ntries++ < EIB_MAX_VHUB_TBL_ATTEMPTS));
169 
170 	return (EIB_E_FAILURE);
171 }
172 
173 int
174 eib_fip_heartbeat(eib_t *ss, eib_vnic_t *vnic, int *err)
175 {
176 	eib_wqe_t *swqe;
177 	int ntries = 0;
178 	int ret;
179 
180 	/*
181 	 * Even if we're running low on the wqe resource, we want to be
182 	 * able to grab a wqe to send the keepalive, to avoid getting
183 	 * logged out by the gateway, so we use EIB_WPRI_HI.
184 	 */
185 	if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_HI)) == NULL) {
186 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_heartbeat: "
187 		    "no swqe available, not sending heartbeat");
188 		return (EIB_E_FAILURE);
189 	}
190 
191 	while (ntries++ < EIB_MAX_KA_ATTEMPTS) {
192 		ret = eib_fip_make_ka(ss, vnic, swqe, err);
193 		if (ret != EIB_E_SUCCESS)
194 			continue;
195 
196 		ret = eib_fip_send_ka(ss, vnic, swqe, err);
197 		if (ret == EIB_E_SUCCESS)
198 			break;
199 	}
200 
201 	if (ret != EIB_E_SUCCESS)
202 		eib_rsrc_return_swqe(ss, swqe, NULL);
203 
204 	return (ret);
205 }
206 
207 int
208 eib_fip_logout(eib_t *ss, eib_vnic_t *vnic, int *err)
209 {
210 	eib_wqe_t *swqe;
211 	int ret;
212 
213 	/*
214 	 * This routine is only called after the vnic has successfully
215 	 * logged in to the gateway. If that's really the case, there
216 	 * is nothing in terms of resources we need to release: the swqe
217 	 * that was acquired during login has already been posted, the
218 	 * work has been completed and the swqe has also been reaped back
219 	 * into the free pool. The only thing we need to rollback is the
220 	 * fact that we're logged in to the gateway at all -- and the way
221 	 * to do this is to send a logout request.
222 	 */
223 	if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
224 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_logout: "
225 		    "no swqe available, not sending logout");
226 		return (EIB_E_FAILURE);
227 	}
228 
229 	ret = eib_fip_make_logout(ss, vnic, swqe, err);
230 	if (ret != EIB_E_SUCCESS) {
231 		eib_rsrc_return_swqe(ss, swqe, NULL);
232 		return (EIB_E_FAILURE);
233 	}
234 
235 	ret = eib_fip_send_logout(ss, vnic, swqe, err);
236 	if (ret != EIB_E_SUCCESS) {
237 		eib_rsrc_return_swqe(ss, swqe, NULL);
238 		return (EIB_E_FAILURE);
239 	}
240 
241 	return (EIB_E_SUCCESS);
242 }
243 
244 int
245 eib_fip_parse_login_ack(eib_t *ss, uint8_t *pkt, eib_login_data_t *ld)
246 {
247 	fip_login_ack_t *ack;
248 	fip_basic_hdr_t *hdr;
249 	fip_desc_iba_t *iba;
250 	fip_desc_vnic_login_t *login;
251 	fip_desc_partition_t *partition;
252 	ib_guid_t guid;
253 	uint32_t syn_ctl_qpn;
254 	uint16_t sl_portid;
255 	uint16_t flags_vlan;
256 	uint16_t opcode;
257 	uint8_t subcode;
258 
259 	/*
260 	 * Note that 'pkt' is always atleast double-word aligned
261 	 * when it is passed to us, so we can cast it without any
262 	 * problems.
263 	 */
264 	ack = (fip_login_ack_t *)(void *)pkt;
265 	hdr = &(ack->ak_fip_header);
266 
267 	/*
268 	 * Verify that the opcode is EoIB
269 	 */
270 	if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
271 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
272 		    "unsupported opcode 0x%x in login ack, ignoring",
273 		    opcode);
274 		return (EIB_E_FAILURE);
275 	}
276 
277 	/*
278 	 * The admin qp in the EoIB driver should receive only the login
279 	 * acknowledgements
280 	 */
281 	subcode = hdr->hd_subcode;
282 	if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) {
283 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
284 		    "unexpected subcode 0x%x received by adm qp, ignoring",
285 		    subcode);
286 		return (EIB_E_FAILURE);
287 	}
288 
289 	/*
290 	 * Verify if the descriptor list length in the received packet is
291 	 * valid if the workaround to disable it explicitly is absent.
292 	 */
293 	if (!eib_wa_no_desc_list_len) {
294 		uint_t pkt_data_sz;
295 
296 		pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
297 		if (pkt_data_sz < sizeof (fip_login_ack_t)) {
298 			EIB_DPRINTF_WARN(ss->ei_instance,
299 			    "eib_fip_parse_login_ack: "
300 			    "login ack desc list len (0x%lx) too small "
301 			    "(min 0x%lx)",
302 			    pkt_data_sz, sizeof (fip_login_ack_t));
303 			return (EIB_E_FAILURE);
304 		}
305 	}
306 
307 	/*
308 	 * Validate all the header and descriptor types and lengths
309 	 */
310 	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
311 	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
312 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
313 		    "invalid type/len in basic hdr: expected (0x%x,0x%x), "
314 		    "got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID,
315 		    FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
316 		return (EIB_E_FAILURE);
317 	}
318 	iba = &(ack->ak_iba);
319 	if (iba->ia_type != FIP_DESC_TYPE_IBA ||
320 	    iba->ia_len != FIP_DESC_LEN_IBA) {
321 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
322 		    "invalid type/len in iba desc: expected (0x%x,0x%x), "
323 		    "got (0x%x,0x%x)", FIP_DESC_TYPE_IBA, FIP_DESC_LEN_IBA,
324 		    iba->ia_type, iba->ia_len);
325 		return (EIB_E_FAILURE);
326 	}
327 	login = &(ack->ak_vnic_login);
328 	if (login->vl_type != FIP_DESC_TYPE_VNIC_LOGIN ||
329 	    login->vl_len != FIP_DESC_LEN_VNIC_LOGIN) {
330 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
331 		    "invalid type/len in login desc: expected (0x%x,0x%x), "
332 		    "got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN,
333 		    FIP_DESC_LEN_VNIC_LOGIN, login->vl_type, login->vl_len);
334 		return (EIB_E_FAILURE);
335 	}
336 	partition = &(ack->ak_vhub_partition);
337 	if (partition->pn_type != FIP_DESC_TYPE_PARTITION ||
338 	    partition->pn_len != FIP_DESC_LEN_PARTITION) {
339 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
340 		    "invalid type/len in partition desc: expected (0x%x,0x%x), "
341 		    "got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION,
342 		    FIP_DESC_LEN_PARTITION, partition->pn_type,
343 		    partition->pn_len);
344 		return (EIB_E_FAILURE);
345 	}
346 
347 	/*
348 	 * Note that we'll return the vnic id as-is.  The msb is not actually
349 	 * part of the vnic id in our internal records, so we'll mask it out
350 	 * later before we do our searches.
351 	 */
352 	ld->ld_vnic_id = ntohs(login->vl_vnic_id);
353 
354 	syn_ctl_qpn = ntohl(login->vl_syndrome_ctl_qpn);
355 
356 	/*
357 	 * If the syndrome indicates a nack, we're done.  No need to collect
358 	 * any more information
359 	 */
360 	ld->ld_syndrome = (uint8_t)((syn_ctl_qpn & FIP_VL_SYN_MASK) >>
361 	    FIP_VL_SYN_SHIFT);
362 	if (ld->ld_syndrome) {
363 		return (EIB_E_SUCCESS);
364 	}
365 
366 	/*
367 	 * Let's get the rest of the information out of the login ack
368 	 */
369 	sl_portid = ntohs(iba->ia_sl_portid);
370 	ld->ld_gw_port_id = sl_portid & FIP_IBA_PORTID_MASK;
371 	ld->ld_gw_sl = (sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT;
372 
373 	ld->ld_gw_data_qpn = ntohl(iba->ia_qpn) & FIP_IBA_QPN_MASK;
374 	ld->ld_gw_lid = ntohs(iba->ia_lid);
375 
376 	bcopy(iba->ia_guid, &guid, sizeof (ib_guid_t));
377 	ld->ld_gw_guid = ntohll(guid);
378 	ld->ld_vhub_mtu = ntohs(login->vl_mtu);
379 	bcopy(login->vl_mac, ld->ld_assigned_mac, ETHERADDRL);
380 	bcopy(login->vl_gw_mgid_prefix, ld->ld_gw_mgid_prefix,
381 	    FIP_MGID_PREFIX_LEN);
382 	ld->ld_n_rss_mcgid = login->vl_flags_rss & FIP_VL_N_RSS_MCGID_MASK;
383 	ld->ld_n_mac_mcgid = login->vl_n_mac_mcgid & FIP_VL_N_MAC_MCGID_MASK;
384 	ld->ld_gw_ctl_qpn = (syn_ctl_qpn & FIP_VL_CTL_QPN_MASK);
385 
386 	flags_vlan = ntohs(login->vl_flags_vlan);
387 	ld->ld_assigned_vlan = flags_vlan & FIP_VL_VLAN_MASK;
388 	ld->ld_vlan_in_packets = (flags_vlan & FIP_VL_FLAGS_VP) ? 1 : 0;
389 	bcopy(login->vl_vnic_name, ld->ld_vnic_name, FIP_VNIC_NAME_LEN);
390 
391 	ld->ld_vhub_pkey = ntohs(partition->pn_pkey);
392 
393 	return (EIB_E_SUCCESS);
394 }
395 
396 int
397 eib_fip_parse_ctl_pkt(uint8_t *pkt, eib_vnic_t *vnic)
398 {
399 	eib_t *ss = vnic->vn_ss;
400 	fip_vhub_pkt_t *vhb;
401 	fip_basic_hdr_t *hdr;
402 	uint16_t opcode;
403 	uint8_t subcode;
404 	uint_t vnic_state;
405 	int ret = EIB_E_FAILURE;
406 
407 	/*
408 	 * Note that 'pkt' is always atleast double-word aligned when it is
409 	 * passed to us, so we can cast it without any problems.
410 	 */
411 	vhb = (fip_vhub_pkt_t *)(void *)pkt;
412 	hdr = &(vhb->hb_fip_header);
413 
414 	/*
415 	 * Verify that the opcode is EoIB
416 	 */
417 	if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
418 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
419 		    "unsupported opcode 0x%x in ctl pkt, ignoring",
420 		    opcode);
421 		return (EIB_E_FAILURE);
422 	}
423 
424 	mutex_enter(&vnic->vn_lock);
425 	vnic_state = vnic->vn_state;
426 	mutex_exit(&vnic->vn_lock);
427 
428 	/*
429 	 * The ctl qp in the EoIB driver should receive only vHUB messages
430 	 */
431 	subcode = hdr->hd_subcode;
432 	if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) {
433 		if (vnic_state != EIB_LOGIN_TBL_WAIT &&
434 		    vnic_state != EIB_LOGIN_TBL_INPROG &&
435 		    vnic_state != EIB_LOGIN_TBL_DONE &&
436 		    vnic_state != EIB_LOGIN_DONE) {
437 
438 			EIB_DPRINTF_WARN(ss->ei_instance,
439 			    "eib_fip_parse_ctl_pkt: unexpected vnic state "
440 			    "(0x%lx) for subcode (VHUB_UPDATE 0x%x)",
441 			    vnic_state, subcode);
442 			return (EIB_E_FAILURE);
443 		}
444 
445 		ret = eib_fip_parse_vhub_update(pkt, vnic);
446 
447 	} else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) {
448 		if ((vnic_state != EIB_LOGIN_TBL_WAIT) &&
449 		    (vnic_state != EIB_LOGIN_TBL_INPROG)) {
450 
451 			EIB_DPRINTF_WARN(ss->ei_instance,
452 			    "eib_fip_parse_ctl_pkt: unexpected vnic state "
453 			    "(0x%lx) for subcode (VHUB_TABLE 0x%x)",
454 			    vnic_state, subcode);
455 			return (EIB_E_FAILURE);
456 		}
457 
458 		ret = eib_fip_parse_vhub_table(pkt, vnic);
459 
460 	} else {
461 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
462 		    "unexpected subcode 0x%x for ctl pkt", subcode);
463 	}
464 
465 	if (ret == EIB_E_SUCCESS) {
466 		/*
467 		 * Update last gateway heartbeat received time and
468 		 * gateway eport state.  The eport state should only
469 		 * be updated if the vnic's vhub table has been fully
470 		 * constructed.
471 		 */
472 		mutex_enter(&ss->ei_vnic_lock);
473 		ss->ei_gw_last_heartbeat = ddi_get_lbolt64();
474 		if (vnic_state == EIB_LOGIN_TBL_DONE ||
475 		    vnic_state == EIB_LOGIN_DONE) {
476 			ss->ei_gw_eport_state =
477 			    vnic->vn_vhub_table->tb_eport_state;
478 		}
479 		mutex_exit(&ss->ei_vnic_lock);
480 	}
481 
482 	return (ret);
483 }
484 
485 static int
486 eib_fip_make_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
487 {
488 	fip_login_t *login;
489 	fip_proto_t *proto;
490 	fip_basic_hdr_t *hdr;
491 	fip_desc_iba_t *iba;
492 	fip_desc_vnic_login_t *vlg;
493 	ib_gid_t port_gid;
494 	ib_guid_t port_guid;
495 	uint16_t sl_portid;
496 	uint16_t flags_vlan;
497 
498 	uint16_t gw_portid = ss->ei_gw_props->pp_gw_portid;
499 	uint16_t sl = ss->ei_gw_props->pp_gw_sl;
500 	uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
501 	uint_t pktsz = swqe->qe_sgl.ds_len;
502 	uint_t login_sz = sizeof (fip_login_t);
503 
504 	if (pktsz < login_sz) {
505 		*err = EINVAL;
506 
507 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_login: "
508 		    "send buffer size (0x%lx) too small to send"
509 		    "login request (min 0x%lx)",
510 		    pktsz, login_sz);
511 		return (EIB_E_FAILURE);
512 	}
513 
514 	/*
515 	 * Lint complains that there may be an alignment issue here,
516 	 * but we know that the "pkt" is atleast double-word aligned,
517 	 * so it's ok.
518 	 */
519 	login = (fip_login_t *)(void *)pkt;
520 	bzero(pkt, login_sz);
521 
522 	/*
523 	 * Fill in the FIP protocol version
524 	 */
525 	proto = &login->lg_proto_version;
526 	proto->pr_version = FIP_PROTO_VERSION;
527 
528 	/*
529 	 * Fill in the basic header
530 	 */
531 	hdr = &login->lg_fip_header;
532 	hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
533 	hdr->hd_subcode = FIP_SUBCODE_H_VNIC_LOGIN;
534 	hdr->hd_desc_list_len = htons((login_sz >> 2) - 2);
535 	hdr->hd_flags = 0;
536 	hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
537 	hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
538 	bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
539 
540 	/*
541 	 * Fill in the Infiniband Address descriptor
542 	 */
543 	iba = &login->lg_iba;
544 	iba->ia_type = FIP_DESC_TYPE_IBA;
545 	iba->ia_len = FIP_DESC_LEN_IBA;
546 	bcopy(eib_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
547 	iba->ia_qpn = htonl(vnic->vn_data_chan->ch_qpn);
548 
549 	sl_portid = (gw_portid & FIP_IBA_PORTID_MASK) |
550 	    ((sl << FIP_IBA_SL_SHIFT) & FIP_IBA_SL_MASK);
551 	iba->ia_sl_portid = htons(sl_portid);
552 
553 	iba->ia_lid = htons(ss->ei_props->ep_blid);
554 
555 	port_gid = ss->ei_props->ep_sgid;
556 	port_guid = htonll(port_gid.gid_guid);
557 	bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
558 
559 	/*
560 	 * Now, fill in the vNIC Login descriptor
561 	 */
562 
563 	vlg = &login->lg_vnic_login;
564 	vlg->vl_type = FIP_DESC_TYPE_VNIC_LOGIN;
565 	vlg->vl_len = FIP_DESC_LEN_VNIC_LOGIN;
566 	bcopy(eib_vendor_mellanox, vlg->vl_vendor_id, FIP_VENDOR_LEN);
567 
568 	/*
569 	 * Only for the physlink instance 0, we ask the gateway to assign
570 	 * the mac address and a VLAN (tagless, actually).  For this vnic
571 	 * only, we do not set the H bit. All other vnics are created by
572 	 * Solaris admin and will have the H bit set. Note also that we
573 	 * need to clear the vnic id's most significant bit for those that
574 	 * are administered by the gateway, so vnic0's vnic_id's msb should
575 	 * be 0 as well.
576 	 */
577 	if (vnic->vn_instance == 0) {
578 		vlg->vl_vnic_id = htons(vnic->vn_id);
579 		flags_vlan = vnic->vn_vlan & FIP_VL_VLAN_MASK;
580 	} else {
581 		vlg->vl_vnic_id = htons(vnic->vn_id | FIP_VL_VNIC_ID_MSBIT);
582 		flags_vlan = (vnic->vn_vlan & FIP_VL_VLAN_MASK) |
583 		    FIP_VL_FLAGS_H | FIP_VL_FLAGS_M;
584 
585 		if (vnic->vn_vlan & FIP_VL_VLAN_MASK)
586 			flags_vlan |= (FIP_VL_FLAGS_V | FIP_VL_FLAGS_VP);
587 	}
588 
589 	vlg->vl_flags_vlan = htons(flags_vlan);
590 	bcopy(vnic->vn_macaddr, vlg->vl_mac, ETHERADDRL);
591 
592 	/*
593 	 * We aren't ready to enable rss, so we set the RSS bit and
594 	 * the n_rss_mcgid field to 0.  Set the mac mcgid to 0 as well.
595 	 */
596 	vlg->vl_flags_rss = 0;
597 	vlg->vl_n_mac_mcgid = 0;
598 
599 	/*
600 	 * Set the syndrome to 0 and pass the control qpn
601 	 */
602 	vlg->vl_syndrome_ctl_qpn =
603 	    htonl(vnic->vn_ctl_chan->ch_qpn & FIP_VL_CTL_QPN_MASK);
604 
605 	/*
606 	 * Try to set as unique a name as possible for this vnic
607 	 */
608 	(void) snprintf((char *)(vlg->vl_vnic_name), FIP_VNIC_NAME_LEN,
609 	    "eoib_%02x_%02x", ss->ei_instance, vnic->vn_instance);
610 
611 	/*
612 	 * Adjust the ds_len in the sgl to indicate the size of this
613 	 * request before returning
614 	 */
615 	swqe->qe_sgl.ds_len = login_sz;
616 
617 	return (EIB_E_SUCCESS);
618 }
619 
620 static int
621 eib_fip_make_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int req,
622     int *err)
623 {
624 	fip_keep_alive_t *ka;
625 	fip_proto_t *proto;
626 	fip_basic_hdr_t *hdr;
627 	fip_desc_vnic_identity_t *vid;
628 	ib_gid_t port_gid;
629 	ib_guid_t port_guid;
630 	uint32_t flags_vhub_id;
631 
632 	uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
633 	uint_t pktsz = swqe->qe_sgl.ds_len;
634 	uint_t ka_sz = sizeof (fip_keep_alive_t);
635 
636 	if (pktsz < ka_sz) {
637 		*err = EINVAL;
638 
639 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_update: "
640 		    "send buffer size (0x%lx) too small to send"
641 		    "keepalive/update request (min 0x%lx)",
642 		    pktsz, ka_sz);
643 		return (EIB_E_FAILURE);
644 	}
645 
646 	/*
647 	 * Lint complains that there may be an alignment issue here,
648 	 * but we know that the "pkt" is atleast double-word aligned,
649 	 * so it's ok.
650 	 */
651 	ka = (fip_keep_alive_t *)(void *)pkt;
652 	bzero(pkt, ka_sz);
653 
654 	/*
655 	 * Fill in the FIP protocol version
656 	 */
657 	proto = &ka->ka_proto_version;
658 	proto->pr_version = FIP_PROTO_VERSION;
659 
660 	/*
661 	 * Fill in the basic header
662 	 */
663 	hdr = &ka->ka_fip_header;
664 	hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
665 	hdr->hd_subcode = (req == EIB_UPD_REQ_LOGOUT) ?
666 	    FIP_SUBCODE_H_VNIC_LOGOUT : FIP_SUBCODE_H_KEEP_ALIVE;
667 	hdr->hd_desc_list_len = htons((ka_sz >> 2) - 2);
668 	hdr->hd_flags = 0;
669 	hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
670 	hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
671 	bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
672 
673 	/*
674 	 * Fill in the vNIC Identity descriptor
675 	 */
676 	vid = &ka->ka_vnic_identity;
677 
678 	vid->vi_type = FIP_DESC_TYPE_VNIC_IDENTITY;
679 	vid->vi_len = FIP_DESC_LEN_VNIC_IDENTITY;
680 	bcopy(eib_vendor_mellanox, vid->vi_vendor_id, FIP_VENDOR_LEN);
681 
682 	flags_vhub_id = vnic->vn_login_data.ld_vhub_id;
683 	if (vnic->vn_login_data.ld_vlan_in_packets) {
684 		flags_vhub_id |= FIP_VI_FLAG_VP;
685 	}
686 	if (req == EIB_UPD_REQ_TABLE) {
687 		flags_vhub_id |= FIP_VI_FLAG_R;
688 	} else if (req == EIB_UPD_REQ_KA) {
689 		flags_vhub_id |= FIP_VI_FLAG_U;
690 	}
691 	vid->vi_flags_vhub_id = htonl(flags_vhub_id);
692 
693 	vid->vi_tusn = (req != EIB_UPD_REQ_LOGOUT) ?
694 	    htonl(vnic->vn_vhub_table->tb_tusn) : 0;
695 
696 	vid->vi_vnic_id = htons(vnic->vn_login_data.ld_vnic_id);
697 	bcopy(vnic->vn_login_data.ld_assigned_mac, vid->vi_mac, ETHERADDRL);
698 
699 	port_gid = ss->ei_props->ep_sgid;
700 	port_guid = htonll(port_gid.gid_guid);
701 	bcopy(&port_guid, vid->vi_port_guid, FIP_GUID_LEN);
702 	bcopy(vnic->vn_login_data.ld_vnic_name, vid->vi_vnic_name,
703 	    FIP_VNIC_NAME_LEN);
704 
705 	/*
706 	 * Adjust the ds_len in the sgl to indicate the size of this
707 	 * request before returning
708 	 */
709 	swqe->qe_sgl.ds_len = ka_sz;
710 
711 	return (EIB_E_SUCCESS);
712 }
713 
714 static int
715 eib_fip_make_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
716 {
717 	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_TABLE, err));
718 }
719 
720 static int
721 eib_fip_make_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
722 {
723 	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_KA, err));
724 }
725 
726 static int
727 eib_fip_make_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
728 {
729 	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_LOGOUT, err));
730 }
731 
732 static int
733 eib_fip_send_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
734 {
735 	eib_avect_t *av;
736 	eib_chan_t *chan = ss->ei_admin_chan;
737 	ibt_status_t ret;
738 
739 	/*
740 	 * Get an address vector for this destination
741 	 */
742 	if ((av = eib_ibt_hold_avect(ss, ss->ei_gw_props->pp_gw_lid,
743 	    ss->ei_gw_props->pp_gw_sl)) == NULL) {
744 		*err = ENOMEM;
745 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
746 		    "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
747 		    ss->ei_gw_props->pp_gw_lid, ss->ei_gw_props->pp_gw_sl);
748 		return (EIB_E_FAILURE);
749 	}
750 
751 	/*
752 	 * Modify the UD destination handle to the gateway
753 	 */
754 	ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
755 	    ss->ei_gw_props->pp_gw_ctrl_qpn, &av->av_vect);
756 
757 	eib_ibt_release_avect(ss, av);
758 	if (ret != IBT_SUCCESS) {
759 		*err = EINVAL;
760 
761 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
762 		    "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
763 		    "ret=%d", ss->ei_gw_props->pp_gw_ctrl_qpn,
764 		    EIB_FIP_QKEY, ret);
765 		return (EIB_E_FAILURE);
766 	}
767 
768 	/*
769 	 * Send the login packet to the destination gateway. Posting
770 	 * the login and setting the login state to wait-for-ack should
771 	 * ideally be atomic to avoid race.
772 	 */
773 	mutex_enter(&vnic->vn_lock);
774 	ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
775 	if (ret != IBT_SUCCESS) {
776 		mutex_exit(&vnic->vn_lock);
777 		*err = EINVAL;
778 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
779 		    "ibt_post_send() failed for vnic id 0x%x, ret=%d",
780 		    vnic->vn_id, ret);
781 		return (EIB_E_FAILURE);
782 	}
783 	vnic->vn_state = EIB_LOGIN_ACK_WAIT;
784 
785 	mutex_enter(&chan->ch_tx_lock);
786 	chan->ch_tx_posted++;
787 	mutex_exit(&chan->ch_tx_lock);
788 
789 	mutex_exit(&vnic->vn_lock);
790 
791 	return (EIB_E_SUCCESS);
792 }
793 
794 static int
795 eib_fip_send_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe,
796     uint_t nxt_state, int *err)
797 {
798 	eib_login_data_t *ld = &vnic->vn_login_data;
799 	eib_chan_t *chan = vnic->vn_ctl_chan;
800 	eib_avect_t *av;
801 	ibt_status_t ret;
802 
803 	/*
804 	 * Get an address vector for this destination
805 	 */
806 	if ((av = eib_ibt_hold_avect(ss, ld->ld_gw_lid,
807 	    ld->ld_gw_sl)) == NULL) {
808 		*err = ENOMEM;
809 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
810 		    "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
811 		    ld->ld_gw_lid, ld->ld_gw_sl);
812 		return (EIB_E_FAILURE);
813 	}
814 
815 	/*
816 	 * Modify the UD destination handle to the destination appropriately
817 	 */
818 	ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
819 	    ld->ld_gw_ctl_qpn, &av->av_vect);
820 
821 	eib_ibt_release_avect(ss, av);
822 	if (ret != IBT_SUCCESS) {
823 		*err = EINVAL;
824 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
825 		    "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
826 		    "ret=%d", ld->ld_gw_ctl_qpn, EIB_FIP_QKEY, ret);
827 		return (EIB_E_FAILURE);
828 	}
829 
830 	/*
831 	 * Send the update packet to the destination. Posting the update request
832 	 * and setting the login state to wait-for-vhub_table needs to be atomic
833 	 * to avoid race.
834 	 */
835 	mutex_enter(&vnic->vn_lock);
836 	ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
837 	if (ret != IBT_SUCCESS) {
838 		mutex_exit(&vnic->vn_lock);
839 		*err = EINVAL;
840 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
841 		    "ibt_post_send() failed for vnic id 0x%x, ret=%d",
842 		    vnic->vn_id, ret);
843 		return (EIB_E_FAILURE);
844 	}
845 	vnic->vn_state = nxt_state;
846 
847 	mutex_enter(&chan->ch_tx_lock);
848 	chan->ch_tx_posted++;
849 	mutex_exit(&chan->ch_tx_lock);
850 
851 	mutex_exit(&vnic->vn_lock);
852 
853 	return (EIB_E_SUCCESS);
854 }
855 
856 static int
857 eib_fip_send_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
858 {
859 	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_TBL_WAIT, err));
860 }
861 
862 static int
863 eib_fip_send_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
864 {
865 	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_DONE, err));
866 }
867 
868 static int
869 eib_fip_send_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
870 {
871 	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGOUT_DONE, err));
872 }
873 
874 static int
875 eib_fip_parse_vhub_table(uint8_t *pkt, eib_vnic_t *vnic)
876 {
877 	fip_vhub_table_t *tbl;
878 	fip_desc_vhub_table_t *desc_tbl;
879 	fip_vhub_table_entry_t *entry;
880 	fip_basic_hdr_t *hdr;
881 	eib_t *ss = vnic->vn_ss;
882 	eib_login_data_t *ld = &vnic->vn_login_data;
883 	eib_vhub_table_t *etbl = vnic->vn_vhub_table;
884 	eib_vhub_update_t *eupd = vnic->vn_vhub_update;
885 	eib_vhub_map_t *newmap;
886 
887 	uint32_t *ipkt;
888 	uint32_t init_checksum = 0;
889 	uint32_t tusn;
890 	uint32_t vhub_id;
891 	uint_t entries_in_pkt;
892 	uint_t ndx;
893 	uint_t i;
894 
895 	/*
896 	 * If we're here receiving vhub table messages, we certainly should
897 	 * have the vhub table structure allocated and present at this point.
898 	 */
899 	if (etbl == NULL) {
900 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
901 		    "vhub table missing for vnic id 0x%x", vnic->vn_id);
902 		return (EIB_E_FAILURE);
903 	}
904 
905 	/*
906 	 * Note that 'pkt' is always atleast double-word aligned when it is
907 	 * passed to us, so we can cast it without any problems.
908 	 */
909 	ipkt = (uint32_t *)(void *)pkt;
910 	tbl = (fip_vhub_table_t *)(void *)pkt;
911 	hdr = &(tbl->vt_fip_header);
912 
913 	/*
914 	 * Validate all the header and descriptor types and lengths
915 	 */
916 	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
917 	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
918 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
919 		    "invalid type/len in fip basic header, "
920 		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
921 		    FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
922 		    hdr->hd_type, hdr->hd_len);
923 		return (EIB_E_FAILURE);
924 	}
925 	desc_tbl = &(tbl->vt_vhub_table);
926 	if (desc_tbl->tb_type != FIP_DESC_TYPE_VHUB_TABLE) {
927 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
928 		    "invalid type in vhub desc, exp 0x%x, got 0x%x",
929 		    FIP_DESC_TYPE_VHUB_TABLE, desc_tbl->tb_type);
930 		return (EIB_E_FAILURE);
931 	}
932 
933 	/*
934 	 * Verify that the vhub id is ok for this vnic
935 	 */
936 	vhub_id = ntohl(desc_tbl->tb_flags_vhub_id) & FIP_TB_VHUB_ID_MASK;
937 	if (vhub_id != ld->ld_vhub_id) {
938 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
939 		    "invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x",
940 		    ld->ld_vhub_id, vhub_id);
941 		return (EIB_E_FAILURE);
942 	}
943 
944 	/*
945 	 * Count the number of vhub table entries in this packet
946 	 */
947 	entries_in_pkt = (desc_tbl->tb_len - FIP_DESC_VHUB_TABLE_WORDS) /
948 	    FIP_VHUB_TABLE_ENTRY_WORDS;
949 
950 	/*
951 	 * While we're here, also compute the 32-bit 2's complement carry-
952 	 * discarded checksum of the vHUB table descriptor in this packet
953 	 * till the first vhub table entry.
954 	 */
955 	for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++)
956 		init_checksum += ipkt[i];
957 
958 	/*
959 	 * Initialize the vhub's Table Update Sequence Number (tusn),
960 	 * checksum and record the total number of entries in in the table
961 	 * if this is the first pkt of the table.
962 	 */
963 	tusn = ntohl(desc_tbl->tb_tusn);
964 	if (desc_tbl->tb_hdr & FIP_TB_HDR_FIRST) {
965 		etbl->tb_entries_in_table = ntohs(desc_tbl->tb_table_size);
966 		etbl->tb_tusn = tusn;
967 		etbl->tb_checksum = 0;
968 
969 		mutex_enter(&vnic->vn_lock);
970 		vnic->vn_state = EIB_LOGIN_TBL_INPROG;
971 		mutex_exit(&vnic->vn_lock);
972 	}
973 
974 	/*
975 	 * First, middle or last, the current table TUSN we have must match this
976 	 * packet's TUSN.
977 	 */
978 	if (etbl->tb_tusn != tusn) {
979 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
980 		    "unexpected TUSN (0x%lx) during vhub table construction, "
981 		    "expected 0x%lx", etbl->tb_tusn, tusn);
982 		goto vhub_table_fail;
983 	}
984 
985 	/*
986 	 * See if we've overrun/underrun our original entries count
987 	 */
988 	if ((etbl->tb_entries_seen + entries_in_pkt) >
989 	    etbl->tb_entries_in_table) {
990 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
991 		    "vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d",
992 		    etbl->tb_entries_in_table, etbl->tb_entries_seen,
993 		    entries_in_pkt);
994 		goto vhub_table_fail;
995 	} else if (((etbl->tb_entries_seen + entries_in_pkt) <
996 	    etbl->tb_entries_in_table) &&
997 	    (desc_tbl->tb_hdr & FIP_TB_HDR_LAST)) {
998 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
999 		    "vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d",
1000 		    etbl->tb_entries_in_table, etbl->tb_entries_seen,
1001 		    entries_in_pkt);
1002 		goto vhub_table_fail;
1003 	}
1004 
1005 	/*
1006 	 * Process and add the entries we have in this packet
1007 	 */
1008 	etbl->tb_checksum += init_checksum;
1009 	entry = (fip_vhub_table_entry_t *)(void *)
1010 	    ((uint8_t *)desc_tbl + FIP_DESC_VHUB_TABLE_SZ);
1011 
1012 	for (ndx = 0; ndx < entries_in_pkt; ndx++, entry++) {
1013 		/*
1014 		 * Allocate a eib_vhub_map_t, copy the current entry details
1015 		 * and chain it to the appropriate queue.
1016 		 */
1017 		if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1018 			EIB_DPRINTF_WARN(ss->ei_instance,
1019 			    "eib_fip_parse_vhub_table: no memory for vhub "
1020 			    "table entry, ignoring this vhub table packet");
1021 			goto vhub_table_fail;
1022 		}
1023 
1024 		ASSERT((entry->te_v_rss_type & FIP_TE_VALID) == FIP_TE_VALID);
1025 		newmap->mp_v_rss_type = entry->te_v_rss_type;
1026 		bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1027 		newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1028 		newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1029 		newmap->mp_lid = ntohs(entry->te_lid);
1030 		newmap->mp_tusn = tusn;
1031 		newmap->mp_next = NULL;
1032 
1033 		/*
1034 		 * The vhub table messages do not provide status on eport
1035 		 * state, so we'll simply assume that the eport is up.
1036 		 */
1037 		eib_fip_queue_tbl_entry(etbl, newmap, tusn, FIP_EPORT_UP);
1038 
1039 		/*
1040 		 * Update table checksum with this entry's computed checksum
1041 		 */
1042 		ipkt = (uint32_t *)entry;
1043 		for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++)
1044 			etbl->tb_checksum += ipkt[i];
1045 	}
1046 	etbl->tb_entries_seen += entries_in_pkt;
1047 
1048 	/*
1049 	 * If this is the last packet of this vhub table, complete vhub
1050 	 * table by verifying checksum and applying all the vhub updates
1051 	 * that may have come in while we were constructing this table.
1052 	 */
1053 	if (desc_tbl->tb_hdr & FIP_TB_HDR_LAST) {
1054 
1055 		ipkt = (uint32_t *)entry;
1056 		if (!eib_wa_no_good_vhub_cksum) {
1057 			if (*ipkt != etbl->tb_checksum) {
1058 				EIB_DPRINTF_VERBOSE(ss->ei_instance,
1059 				    "eib_fip_parse_vhub_table: "
1060 				    "vhub table checksum invalid, "
1061 				    "computed=0x%lx, found=0x%lx",
1062 				    etbl->tb_checksum, *ipkt);
1063 			}
1064 		}
1065 
1066 		/*
1067 		 * Per the EoIB specification, the gateway is supposed to
1068 		 * include its address information for data messages in the
1069 		 * vhub table.  But we've observed that it doesn't do this
1070 		 * (with the current version). If this is the case, we'll
1071 		 * hand-create and add a vhub map for the gateway from the
1072 		 * information we got in login ack.
1073 		 */
1074 		if (etbl->tb_gateway == NULL)
1075 			eib_fip_queue_gw_entry(vnic, etbl, tusn, FIP_EPORT_UP);
1076 
1077 		/*
1078 		 * Apply pending vhub updates and reset table counters needed
1079 		 * during table construction.
1080 		 */
1081 		if (eib_fip_apply_updates(ss, etbl, eupd) != EIB_E_SUCCESS)
1082 			goto vhub_table_fail;
1083 
1084 		etbl->tb_entries_seen = 0;
1085 		etbl->tb_entries_in_table = 0;
1086 
1087 		eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_DONE);
1088 	}
1089 
1090 	return (EIB_E_SUCCESS);
1091 
1092 vhub_table_fail:
1093 	eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_FAILED);
1094 	return (EIB_E_FAILURE);
1095 }
1096 
1097 static int
1098 eib_fip_parse_vhub_update(uint8_t *pkt, eib_vnic_t *vnic)
1099 {
1100 	fip_vhub_update_t *upd;
1101 	fip_desc_vhub_update_t *desc_upd;
1102 	fip_vhub_table_entry_t *entry;
1103 	fip_basic_hdr_t *hdr;
1104 	eib_t *ss = vnic->vn_ss;
1105 	eib_login_data_t *ld = &vnic->vn_login_data;
1106 	eib_vhub_table_t *etbl = vnic->vn_vhub_table;
1107 	eib_vhub_update_t *eupd = vnic->vn_vhub_update;
1108 	eib_vhub_map_t *newmap;
1109 	boolean_t vhub_tbl_done;
1110 	uint32_t eport_vp_vhub_id;
1111 	uint32_t vhub_id;
1112 	uint32_t tusn;
1113 	uint32_t prev_tusn;
1114 	uint8_t eport_state;
1115 
1116 	/*
1117 	 * We should have the vhub table allocated as long as we're receiving
1118 	 * vhub control messages.
1119 	 */
1120 	if (etbl == NULL) {
1121 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1122 		    "vhub table missing for vnic id 0x%x", vnic->vn_id);
1123 		return (EIB_E_FAILURE);
1124 	}
1125 
1126 	mutex_enter(&vnic->vn_lock);
1127 	vhub_tbl_done = ((vnic->vn_state == EIB_LOGIN_TBL_DONE) ||
1128 	    (vnic->vn_state == EIB_LOGIN_DONE)) ? B_TRUE : B_FALSE;
1129 	mutex_exit(&vnic->vn_lock);
1130 
1131 	/*
1132 	 * Note that 'pkt' is always atleast double-word aligned when it is
1133 	 * passed to us, so we can cast it without any problems.
1134 	 */
1135 	upd = (fip_vhub_update_t *)(void *)pkt;
1136 	hdr = &(upd->vu_fip_header);
1137 
1138 	/*
1139 	 * Validate all the header and descriptor types and lengths
1140 	 */
1141 	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
1142 	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
1143 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1144 		    "invalid type/len in fip basic header, "
1145 		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
1146 		    FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
1147 		    hdr->hd_type, hdr->hd_len);
1148 		return (EIB_E_FAILURE);
1149 	}
1150 	desc_upd = &(upd->vu_vhub_update);
1151 	if (desc_upd->up_type != FIP_DESC_TYPE_VHUB_UPDATE ||
1152 	    desc_upd->up_len != FIP_DESC_LEN_VHUB_UPDATE) {
1153 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1154 		    "invalid type/len in vhub update desc: "
1155 		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
1156 		    FIP_DESC_TYPE_VHUB_UPDATE, FIP_DESC_LEN_VHUB_UPDATE,
1157 		    desc_upd->up_type, desc_upd->up_len);
1158 		return (EIB_E_FAILURE);
1159 	}
1160 
1161 	/*
1162 	 * Verify that the vhub id is ok for this vnic and save the eport state
1163 	 */
1164 	eport_vp_vhub_id = ntohl(desc_upd->up_eport_vp_vhub_id);
1165 
1166 	vhub_id = eport_vp_vhub_id & FIP_UP_VHUB_ID_MASK;
1167 	if (vhub_id != ld->ld_vhub_id) {
1168 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1169 		    "invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x",
1170 		    ld->ld_vhub_id, vhub_id);
1171 		return (EIB_E_FAILURE);
1172 	}
1173 	eport_state = (uint8_t)((eport_vp_vhub_id >> FIP_UP_EPORT_STATE_SHIFT) &
1174 	    FIP_UP_EPORT_STATE_MASK);
1175 
1176 	/*
1177 	 * If this is the first update we receive, any tusn is ok.  Otherwise,
1178 	 * make sure the tusn we see in the packet is appropriate.
1179 	 */
1180 	tusn = ntohl(desc_upd->up_tusn);
1181 	prev_tusn = vhub_tbl_done ? etbl->tb_tusn : eupd->up_tusn;
1182 
1183 	if (prev_tusn != 0) {
1184 		if (tusn == prev_tusn) {
1185 			eib_fip_update_eport_state(ss, etbl, eupd,
1186 			    vhub_tbl_done, eport_state);
1187 			return (EIB_E_SUCCESS);
1188 		}
1189 		if (tusn != (prev_tusn + 1)) {
1190 			EIB_DPRINTF_WARN(ss->ei_instance,
1191 			    "eib_fip_parse_vhub_update: "
1192 			    "out of order TUSN received (exp 0x%lx, "
1193 			    "got 0x%lx), dropping pkt", prev_tusn + 1, tusn);
1194 			return (EIB_E_FAILURE);
1195 		}
1196 	}
1197 
1198 	/*
1199 	 * EoIB expects only type 0 (vnic address) entries to maintain the
1200 	 * context table
1201 	 */
1202 	entry = &(desc_upd->up_tbl_entry);
1203 	ASSERT((entry->te_v_rss_type & FIP_TE_TYPE_MASK) == FIP_TE_TYPE_VNIC);
1204 
1205 	/*
1206 	 * If the vHUB table has already been fully constructed and if we've
1207 	 * now received a notice to remove a vnic entry from it, do it.
1208 	 */
1209 	if ((vhub_tbl_done) &&
1210 	    ((entry->te_v_rss_type & FIP_TE_VALID) == 0)) {
1211 		eib_fip_dequeue_tbl_entry(etbl, entry->te_mac,
1212 		    tusn, eport_state);
1213 
1214 		if (bcmp(entry->te_mac, ld->ld_assigned_mac, ETHERADDRL) == 0) {
1215 			uint8_t *mymac;
1216 
1217 			mymac = entry->te_mac;
1218 			EIB_DPRINTF_WARN(ss->ei_instance,
1219 			    "eib_fip_parse_vhub_update: "
1220 			    "vhub update pkt received to kill self "
1221 			    "(%x:%x:%x:%x:%x:%x)", mymac[0], mymac[1], mymac[2],
1222 			    mymac[3], mymac[4], mymac[5]);
1223 
1224 			return (EIB_E_FAILURE);
1225 		}
1226 		return (EIB_E_SUCCESS);
1227 	}
1228 
1229 	/*
1230 	 * Otherwise, allocate a new eib_vhub_map_t and fill it in with
1231 	 * the details of the new entry
1232 	 */
1233 	if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1234 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1235 		    "no memory for vhub update entry, will be ignoring"
1236 		    "this vhub update packet");
1237 		return (EIB_E_FAILURE);
1238 	}
1239 
1240 	newmap->mp_v_rss_type = entry->te_v_rss_type;
1241 	bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1242 	newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1243 	newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1244 	newmap->mp_lid = ntohs(entry->te_lid);
1245 	newmap->mp_tusn = tusn;
1246 	newmap->mp_next = NULL;
1247 
1248 	/*
1249 	 * Update the full vhub table or chain it to the list of pending
1250 	 * updates depending on if the vhub table construction is over
1251 	 * or not.
1252 	 */
1253 	if (vhub_tbl_done) {
1254 		eib_fip_queue_tbl_entry(etbl, newmap, tusn, eport_state);
1255 	} else {
1256 		eib_fip_queue_upd_entry(eupd, newmap, tusn, eport_state);
1257 	}
1258 
1259 	return (EIB_E_SUCCESS);
1260 }
1261 
1262 static void
1263 eib_fip_update_eport_state(eib_t *ss, eib_vhub_table_t *tbl,
1264     eib_vhub_update_t *upd, boolean_t tbl_done, uint8_t eport_state)
1265 {
1266 	if (tbl_done) {
1267 		mutex_enter(&tbl->tb_lock);
1268 		if (tbl->tb_eport_state != eport_state) {
1269 			EIB_DPRINTF_DEBUG(ss->ei_instance,
1270 			    "eib_fip_update_eport_state: "
1271 			    "eport state changing from %d to %d",
1272 			    tbl->tb_eport_state, eport_state);
1273 			tbl->tb_eport_state = eport_state;
1274 		}
1275 		mutex_exit(&tbl->tb_lock);
1276 	} else {
1277 		mutex_enter(&upd->up_lock);
1278 		if (upd->up_eport_state != eport_state) {
1279 			EIB_DPRINTF_DEBUG(ss->ei_instance,
1280 			    "eib_fip_update_eport_state: "
1281 			    "eport state changing from %d to %d",
1282 			    upd->up_eport_state, eport_state);
1283 			upd->up_eport_state = eport_state;
1284 		}
1285 		mutex_exit(&upd->up_lock);
1286 	}
1287 }
1288 
1289 static void
1290 eib_fip_queue_tbl_entry(eib_vhub_table_t *tbl, eib_vhub_map_t *map,
1291     uint32_t tusn, uint8_t eport_state)
1292 {
1293 	uint8_t bkt;
1294 
1295 	mutex_enter(&tbl->tb_lock);
1296 
1297 	switch (map->mp_v_rss_type & FIP_TE_TYPE_MASK) {
1298 	case FIP_TE_TYPE_GATEWAY:
1299 		if (tbl->tb_gateway) {
1300 			kmem_free(tbl->tb_gateway,
1301 			    sizeof (eib_vhub_map_t));
1302 		}
1303 		tbl->tb_gateway = map;
1304 		break;
1305 
1306 	case FIP_TE_TYPE_UNICAST_MISS:
1307 		if (tbl->tb_unicast_miss) {
1308 			kmem_free(tbl->tb_unicast_miss,
1309 			    sizeof (eib_vhub_map_t));
1310 		}
1311 		tbl->tb_unicast_miss = map;
1312 		break;
1313 
1314 	case FIP_TE_TYPE_VHUB_MULTICAST:
1315 		if (tbl->tb_vhub_multicast) {
1316 			kmem_free(tbl->tb_vhub_multicast,
1317 			    sizeof (eib_vhub_map_t));
1318 		}
1319 		tbl->tb_vhub_multicast = map;
1320 		break;
1321 
1322 	case FIP_TE_TYPE_MULTICAST_ENTRY:
1323 		/*
1324 		 * If multicast entry types are not to be specially
1325 		 * processed, treat them like regular vnic addresses.
1326 		 */
1327 		if (!eib_wa_no_mcast_entries) {
1328 			bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1329 			map->mp_next = tbl->tb_mcast_entry[bkt];
1330 			tbl->tb_mcast_entry[bkt] = map;
1331 			break;
1332 		}
1333 		/*FALLTHROUGH*/
1334 
1335 	case FIP_TE_TYPE_VNIC:
1336 		bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1337 		map->mp_next = tbl->tb_vnic_entry[bkt];
1338 		tbl->tb_vnic_entry[bkt] = map;
1339 		break;
1340 	}
1341 
1342 	tbl->tb_tusn = tusn;
1343 	tbl->tb_eport_state = eport_state;
1344 
1345 	mutex_exit(&tbl->tb_lock);
1346 }
1347 
1348 static void
1349 eib_fip_queue_upd_entry(eib_vhub_update_t *upd, eib_vhub_map_t *map,
1350     uint32_t tusn, uint8_t eport_state)
1351 {
1352 	eib_vhub_map_t *tail;
1353 
1354 	/*
1355 	 * The eib_vhub_update_t list is only touched/traversed when the
1356 	 * control cq handler is parsing either update or table message,
1357 	 * or by the table cleanup routine when we aren't attached to any
1358 	 * control mcgs.  Bottom line is that this list traversal is always
1359 	 * single-threaded and we could probably do away with the lock.
1360 	 */
1361 	mutex_enter(&upd->up_lock);
1362 	for (tail = upd->up_vnic_entry;  tail != NULL; tail = tail->mp_next) {
1363 		if (tail->mp_next == NULL)
1364 			break;
1365 	}
1366 	if (tail) {
1367 		tail->mp_next = map;
1368 	} else {
1369 		upd->up_vnic_entry = map;
1370 	}
1371 
1372 	upd->up_tusn = tusn;
1373 	upd->up_eport_state = eport_state;
1374 
1375 	mutex_exit(&upd->up_lock);
1376 }
1377 
1378 static void
1379 eib_fip_queue_gw_entry(eib_vnic_t *vnic, eib_vhub_table_t *tbl, uint32_t tusn,
1380     uint8_t eport_state)
1381 {
1382 	eib_t *ss = vnic->vn_ss;
1383 	eib_vhub_map_t *newmap;
1384 	eib_login_data_t *ld = &vnic->vn_login_data;
1385 
1386 	if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1387 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_queue_gw_entry: "
1388 		    "no memory to queue gw entry, transactions could fail");
1389 		return;
1390 	}
1391 
1392 	newmap->mp_v_rss_type = FIP_TE_VALID | FIP_TE_TYPE_GATEWAY;
1393 	bcopy(eib_zero_mac, newmap->mp_mac, ETHERADDRL);
1394 	newmap->mp_qpn = ld->ld_gw_data_qpn;
1395 	newmap->mp_sl = ld->ld_gw_sl;
1396 	newmap->mp_lid = ld->ld_gw_lid;
1397 	newmap->mp_tusn = tusn;
1398 	newmap->mp_next = NULL;
1399 
1400 	eib_fip_queue_tbl_entry(tbl, newmap, tusn, eport_state);
1401 }
1402 
1403 static int
1404 eib_fip_apply_updates(eib_t *ss, eib_vhub_table_t *tbl, eib_vhub_update_t *upd)
1405 {
1406 	eib_vhub_map_t *list;
1407 	eib_vhub_map_t *map;
1408 	eib_vhub_map_t *nxt;
1409 	uint32_t tbl_tusn = tbl->tb_tusn;
1410 
1411 	/*
1412 	 * Take the update list out
1413 	 */
1414 	mutex_enter(&upd->up_lock);
1415 	list = upd->up_vnic_entry;
1416 	upd->up_vnic_entry = NULL;
1417 	mutex_exit(&upd->up_lock);
1418 
1419 	/*
1420 	 * Skip any updates with older/same tusn as our vhub table
1421 	 */
1422 	nxt = NULL;
1423 	for (map = list; (map) && (map->mp_tusn <= tbl_tusn); map = nxt) {
1424 		nxt = map->mp_next;
1425 		kmem_free(map, sizeof (eib_vhub_map_t));
1426 	}
1427 
1428 	if (map == NULL)
1429 		return (EIB_E_SUCCESS);
1430 
1431 	/*
1432 	 * If we missed any updates between table tusn and the first
1433 	 * update tusn we got, we need to fail.
1434 	 */
1435 	if (map->mp_tusn > (tbl_tusn + 1)) {
1436 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_apply_updates: "
1437 		    "vhub update missed tusn(s), expected=0x%lx, got=0x%lx",
1438 		    (tbl_tusn + 1), map->mp_tusn);
1439 		for (; map != NULL; map = nxt) {
1440 			nxt = map->mp_next;
1441 			kmem_free(map, sizeof (eib_vhub_map_t));
1442 		}
1443 		return (EIB_E_FAILURE);
1444 	}
1445 
1446 	/*
1447 	 * If everything is fine, apply all the updates we received
1448 	 */
1449 	for (; map != NULL; map = nxt) {
1450 		nxt = map->mp_next;
1451 		map->mp_next = NULL;
1452 
1453 		if (map->mp_v_rss_type & FIP_TE_VALID) {
1454 			eib_fip_queue_tbl_entry(tbl, map, upd->up_tusn,
1455 			    upd->up_eport_state);
1456 		} else {
1457 			eib_fip_dequeue_tbl_entry(tbl, map->mp_mac,
1458 			    upd->up_tusn, upd->up_eport_state);
1459 			kmem_free(map, sizeof (eib_vhub_map_t));
1460 		}
1461 	}
1462 
1463 	return (EIB_E_SUCCESS);
1464 }
1465 
1466 static void
1467 eib_fip_dequeue_tbl_entry(eib_vhub_table_t *tbl, uint8_t *mac, uint32_t tusn,
1468     uint8_t eport_state)
1469 {
1470 	uint8_t bkt;
1471 	eib_vhub_map_t *prev;
1472 	eib_vhub_map_t *elem;
1473 
1474 	bkt = (mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1475 
1476 	mutex_enter(&tbl->tb_lock);
1477 
1478 	/*
1479 	 * Note that for EoIB, the vhub table is maintained using only
1480 	 * vnic entry updates
1481 	 */
1482 	prev = NULL;
1483 	for (elem = tbl->tb_vnic_entry[bkt]; elem; elem = elem->mp_next) {
1484 		if (bcmp(elem->mp_mac, mac, ETHERADDRL) == 0)
1485 			break;
1486 		prev = elem;
1487 	}
1488 
1489 	if (prev && elem) {
1490 		prev->mp_next = elem->mp_next;
1491 		kmem_free(elem, sizeof (eib_vhub_map_t));
1492 	}
1493 
1494 	tbl->tb_tusn = tusn;
1495 	tbl->tb_eport_state = eport_state;
1496 
1497 	mutex_exit(&tbl->tb_lock);
1498 }
1499 
1500 static eib_vhub_map_t *
1501 eib_fip_get_vhub_map(void)
1502 {
1503 	return (kmem_zalloc(sizeof (eib_vhub_map_t), KM_NOSLEEP));
1504 }
1505