1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/ksynch.h>
30 #include <sys/byteorder.h>
31
32 #include <sys/ib/clients/eoib/eib_impl.h>
33
34 /*
35 * Declarations private to this file
36 */
37 static int eib_fip_make_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
38 static int eib_fip_make_update(eib_t *, eib_vnic_t *, eib_wqe_t *, int, int *);
39 static int eib_fip_make_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
40 static int eib_fip_make_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
41 static int eib_fip_make_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
42
43 static int eib_fip_send_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
44 static int eib_fip_send_update(eib_t *, eib_vnic_t *, eib_wqe_t *,
45 uint_t, int *);
46 static int eib_fip_send_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
47 static int eib_fip_send_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
48 static int eib_fip_send_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
49
50 static int eib_fip_parse_vhub_table(uint8_t *, eib_vnic_t *);
51 static int eib_fip_parse_vhub_update(uint8_t *, eib_vnic_t *);
52 static void eib_fip_update_eport_state(eib_t *, eib_vhub_table_t *,
53 eib_vhub_update_t *, boolean_t, uint8_t);
54 static void eib_fip_queue_tbl_entry(eib_vhub_table_t *, eib_vhub_map_t *,
55 uint32_t, uint8_t);
56 static void eib_fip_queue_upd_entry(eib_vhub_update_t *, eib_vhub_map_t *,
57 uint32_t, uint8_t);
58 static void eib_fip_queue_gw_entry(eib_vnic_t *, eib_vhub_table_t *, uint32_t,
59 uint8_t);
60 static int eib_fip_apply_updates(eib_t *, eib_vhub_table_t *,
61 eib_vhub_update_t *);
62 static void eib_fip_dequeue_tbl_entry(eib_vhub_table_t *, uint8_t *, uint32_t,
63 uint8_t);
64 static eib_vhub_map_t *eib_fip_get_vhub_map(void);
65
66 /*
67 * Definitions private to this file
68 */
69 const char eib_vendor_mellanox[] = {
70 0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
71 };
72
73 /*
74 * The three requests to the gateway - request a vHUB table, request a
75 * vHUB update (aka keepalive) and vNIC logout - all need the same
76 * vnic identity descriptor to be sent with different flag settings.
77 *
78 * vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE
79 * keepalive/vHUB update: R=0, U=1, TUSN=last, subcode=KEEPALIVE
80 * vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT
81 */
82 #define EIB_UPD_REQ_TABLE 1
83 #define EIB_UPD_REQ_KA 2
84 #define EIB_UPD_REQ_LOGOUT 3
85
86 int
eib_fip_login(eib_t * ss,eib_vnic_t * vnic,int * err)87 eib_fip_login(eib_t *ss, eib_vnic_t *vnic, int *err)
88 {
89 eib_wqe_t *swqe;
90 int ret;
91 int ntries = 0;
92
93 do {
94 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
95 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_login: "
96 "no swqe available, not sending "
97 "vnic login request");
98 *err = ENOMEM;
99 return (EIB_E_FAILURE);
100 }
101
102 ret = eib_fip_make_login(ss, vnic, swqe, err);
103 if (ret != EIB_E_SUCCESS) {
104 eib_rsrc_return_swqe(ss, swqe, NULL);
105 return (EIB_E_FAILURE);
106 }
107
108 ret = eib_fip_send_login(ss, vnic, swqe, err);
109 if (ret != EIB_E_SUCCESS) {
110 eib_rsrc_return_swqe(ss, swqe, NULL);
111 return (EIB_E_FAILURE);
112 }
113
114 ret = eib_vnic_wait_for_login_ack(ss, vnic, err);
115 if (ret == EIB_E_SUCCESS)
116 break;
117
118 } while ((*err == ETIME) && (ntries++ < EIB_MAX_LOGIN_ATTEMPTS));
119
120 return (ret);
121 }
122
123 int
eib_fip_vhub_table(eib_t * ss,eib_vnic_t * vnic,int * err)124 eib_fip_vhub_table(eib_t *ss, eib_vnic_t *vnic, int *err)
125 {
126 eib_wqe_t *swqe;
127 int ret;
128 int ntries = 0;
129
130 do {
131 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
132 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_vhub_table: "
133 "no swqe available, not sending "
134 "vhub table request");
135 *err = ENOMEM;
136 return (EIB_E_FAILURE);
137 }
138
139 ret = eib_fip_make_table(ss, vnic, swqe, err);
140 if (ret != EIB_E_SUCCESS) {
141 eib_rsrc_return_swqe(ss, swqe, NULL);
142 return (EIB_E_FAILURE);
143 }
144
145 ret = eib_fip_send_table(ss, vnic, swqe, err);
146 if (ret != EIB_E_SUCCESS) {
147 eib_rsrc_return_swqe(ss, swqe, NULL);
148 return (EIB_E_FAILURE);
149 }
150
151 ret = eib_vnic_wait_for_table(ss, vnic, err);
152 if (ret == EIB_E_SUCCESS) {
153 return (EIB_E_SUCCESS);
154 }
155
156 /*
157 * If we'd failed in constructing a proper vhub table above,
158 * the vnic login state would be set to EIB_LOGIN_TBL_FAILED.
159 * We need to clean up any pending entries from the vhub
160 * table and vhub update structures and reset the vnic state
161 * to EIB_LOGIN_ACK_RCVD before we can try again.
162 */
163 eib_vnic_fini_tables(ss, vnic, B_FALSE);
164 mutex_enter(&vnic->vn_lock);
165 vnic->vn_state = EIB_LOGIN_ACK_RCVD;
166 mutex_exit(&vnic->vn_lock);
167
168 } while ((*err == ETIME) && (ntries++ < EIB_MAX_VHUB_TBL_ATTEMPTS));
169
170 return (EIB_E_FAILURE);
171 }
172
173 int
eib_fip_heartbeat(eib_t * ss,eib_vnic_t * vnic,int * err)174 eib_fip_heartbeat(eib_t *ss, eib_vnic_t *vnic, int *err)
175 {
176 eib_wqe_t *swqe;
177 int ntries = 0;
178 int ret;
179
180 /*
181 * Even if we're running low on the wqe resource, we want to be
182 * able to grab a wqe to send the keepalive, to avoid getting
183 * logged out by the gateway, so we use EIB_WPRI_HI.
184 */
185 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_HI)) == NULL) {
186 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_heartbeat: "
187 "no swqe available, not sending heartbeat");
188 return (EIB_E_FAILURE);
189 }
190
191 while (ntries++ < EIB_MAX_KA_ATTEMPTS) {
192 ret = eib_fip_make_ka(ss, vnic, swqe, err);
193 if (ret != EIB_E_SUCCESS)
194 continue;
195
196 ret = eib_fip_send_ka(ss, vnic, swqe, err);
197 if (ret == EIB_E_SUCCESS)
198 break;
199 }
200
201 if (ret != EIB_E_SUCCESS)
202 eib_rsrc_return_swqe(ss, swqe, NULL);
203
204 return (ret);
205 }
206
207 int
eib_fip_logout(eib_t * ss,eib_vnic_t * vnic,int * err)208 eib_fip_logout(eib_t *ss, eib_vnic_t *vnic, int *err)
209 {
210 eib_wqe_t *swqe;
211 int ret;
212
213 /*
214 * This routine is only called after the vnic has successfully
215 * logged in to the gateway. If that's really the case, there
216 * is nothing in terms of resources we need to release: the swqe
217 * that was acquired during login has already been posted, the
218 * work has been completed and the swqe has also been reaped back
219 * into the free pool. The only thing we need to rollback is the
220 * fact that we're logged in to the gateway at all -- and the way
221 * to do this is to send a logout request.
222 */
223 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
224 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_logout: "
225 "no swqe available, not sending logout");
226 return (EIB_E_FAILURE);
227 }
228
229 ret = eib_fip_make_logout(ss, vnic, swqe, err);
230 if (ret != EIB_E_SUCCESS) {
231 eib_rsrc_return_swqe(ss, swqe, NULL);
232 return (EIB_E_FAILURE);
233 }
234
235 ret = eib_fip_send_logout(ss, vnic, swqe, err);
236 if (ret != EIB_E_SUCCESS) {
237 eib_rsrc_return_swqe(ss, swqe, NULL);
238 return (EIB_E_FAILURE);
239 }
240
241 return (EIB_E_SUCCESS);
242 }
243
244 int
eib_fip_parse_login_ack(eib_t * ss,uint8_t * pkt,eib_login_data_t * ld)245 eib_fip_parse_login_ack(eib_t *ss, uint8_t *pkt, eib_login_data_t *ld)
246 {
247 fip_login_ack_t *ack;
248 fip_basic_hdr_t *hdr;
249 fip_desc_iba_t *iba;
250 fip_desc_vnic_login_t *login;
251 fip_desc_partition_t *partition;
252 ib_guid_t guid;
253 uint32_t syn_ctl_qpn;
254 uint16_t sl_portid;
255 uint16_t flags_vlan;
256 uint16_t opcode;
257 uint8_t subcode;
258
259 /*
260 * Note that 'pkt' is always atleast double-word aligned
261 * when it is passed to us, so we can cast it without any
262 * problems.
263 */
264 ack = (fip_login_ack_t *)(void *)pkt;
265 hdr = &(ack->ak_fip_header);
266
267 /*
268 * Verify that the opcode is EoIB
269 */
270 if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
271 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
272 "unsupported opcode 0x%x in login ack, ignoring",
273 opcode);
274 return (EIB_E_FAILURE);
275 }
276
277 /*
278 * The admin qp in the EoIB driver should receive only the login
279 * acknowledgements
280 */
281 subcode = hdr->hd_subcode;
282 if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) {
283 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
284 "unexpected subcode 0x%x received by adm qp, ignoring",
285 subcode);
286 return (EIB_E_FAILURE);
287 }
288
289 /*
290 * Verify if the descriptor list length in the received packet is
291 * valid if the workaround to disable it explicitly is absent.
292 */
293 if (!eib_wa_no_desc_list_len) {
294 uint_t pkt_data_sz;
295
296 pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
297 if (pkt_data_sz < sizeof (fip_login_ack_t)) {
298 EIB_DPRINTF_WARN(ss->ei_instance,
299 "eib_fip_parse_login_ack: "
300 "login ack desc list len (0x%lx) too small "
301 "(min 0x%lx)",
302 pkt_data_sz, sizeof (fip_login_ack_t));
303 return (EIB_E_FAILURE);
304 }
305 }
306
307 /*
308 * Validate all the header and descriptor types and lengths
309 */
310 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
311 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
312 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
313 "invalid type/len in basic hdr: expected (0x%x,0x%x), "
314 "got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID,
315 FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
316 return (EIB_E_FAILURE);
317 }
318 iba = &(ack->ak_iba);
319 if (iba->ia_type != FIP_DESC_TYPE_IBA ||
320 iba->ia_len != FIP_DESC_LEN_IBA) {
321 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
322 "invalid type/len in iba desc: expected (0x%x,0x%x), "
323 "got (0x%x,0x%x)", FIP_DESC_TYPE_IBA, FIP_DESC_LEN_IBA,
324 iba->ia_type, iba->ia_len);
325 return (EIB_E_FAILURE);
326 }
327 login = &(ack->ak_vnic_login);
328 if (login->vl_type != FIP_DESC_TYPE_VNIC_LOGIN ||
329 login->vl_len != FIP_DESC_LEN_VNIC_LOGIN) {
330 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
331 "invalid type/len in login desc: expected (0x%x,0x%x), "
332 "got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN,
333 FIP_DESC_LEN_VNIC_LOGIN, login->vl_type, login->vl_len);
334 return (EIB_E_FAILURE);
335 }
336 partition = &(ack->ak_vhub_partition);
337 if (partition->pn_type != FIP_DESC_TYPE_PARTITION ||
338 partition->pn_len != FIP_DESC_LEN_PARTITION) {
339 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
340 "invalid type/len in partition desc: expected (0x%x,0x%x), "
341 "got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION,
342 FIP_DESC_LEN_PARTITION, partition->pn_type,
343 partition->pn_len);
344 return (EIB_E_FAILURE);
345 }
346
347 /*
348 * Note that we'll return the vnic id as-is. The msb is not actually
349 * part of the vnic id in our internal records, so we'll mask it out
350 * later before we do our searches.
351 */
352 ld->ld_vnic_id = ntohs(login->vl_vnic_id);
353
354 syn_ctl_qpn = ntohl(login->vl_syndrome_ctl_qpn);
355
356 /*
357 * If the syndrome indicates a nack, we're done. No need to collect
358 * any more information
359 */
360 ld->ld_syndrome = (uint8_t)((syn_ctl_qpn & FIP_VL_SYN_MASK) >>
361 FIP_VL_SYN_SHIFT);
362 if (ld->ld_syndrome) {
363 return (EIB_E_SUCCESS);
364 }
365
366 /*
367 * Let's get the rest of the information out of the login ack
368 */
369 sl_portid = ntohs(iba->ia_sl_portid);
370 ld->ld_gw_port_id = sl_portid & FIP_IBA_PORTID_MASK;
371 ld->ld_gw_sl = (sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT;
372
373 ld->ld_gw_data_qpn = ntohl(iba->ia_qpn) & FIP_IBA_QPN_MASK;
374 ld->ld_gw_lid = ntohs(iba->ia_lid);
375
376 bcopy(iba->ia_guid, &guid, sizeof (ib_guid_t));
377 ld->ld_gw_guid = ntohll(guid);
378 ld->ld_vhub_mtu = ntohs(login->vl_mtu);
379 bcopy(login->vl_mac, ld->ld_assigned_mac, ETHERADDRL);
380 bcopy(login->vl_gw_mgid_prefix, ld->ld_gw_mgid_prefix,
381 FIP_MGID_PREFIX_LEN);
382 ld->ld_n_rss_mcgid = login->vl_flags_rss & FIP_VL_N_RSS_MCGID_MASK;
383 ld->ld_n_mac_mcgid = login->vl_n_mac_mcgid & FIP_VL_N_MAC_MCGID_MASK;
384 ld->ld_gw_ctl_qpn = (syn_ctl_qpn & FIP_VL_CTL_QPN_MASK);
385
386 flags_vlan = ntohs(login->vl_flags_vlan);
387 ld->ld_assigned_vlan = flags_vlan & FIP_VL_VLAN_MASK;
388 ld->ld_vlan_in_packets = (flags_vlan & FIP_VL_FLAGS_VP) ? 1 : 0;
389 bcopy(login->vl_vnic_name, ld->ld_vnic_name, FIP_VNIC_NAME_LEN);
390
391 ld->ld_vhub_pkey = ntohs(partition->pn_pkey);
392
393 return (EIB_E_SUCCESS);
394 }
395
396 int
eib_fip_parse_ctl_pkt(uint8_t * pkt,eib_vnic_t * vnic)397 eib_fip_parse_ctl_pkt(uint8_t *pkt, eib_vnic_t *vnic)
398 {
399 eib_t *ss = vnic->vn_ss;
400 fip_vhub_pkt_t *vhb;
401 fip_basic_hdr_t *hdr;
402 uint16_t opcode;
403 uint8_t subcode;
404 uint_t vnic_state;
405 int ret = EIB_E_FAILURE;
406
407 /*
408 * Note that 'pkt' is always atleast double-word aligned when it is
409 * passed to us, so we can cast it without any problems.
410 */
411 vhb = (fip_vhub_pkt_t *)(void *)pkt;
412 hdr = &(vhb->hb_fip_header);
413
414 /*
415 * Verify that the opcode is EoIB
416 */
417 if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
418 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
419 "unsupported opcode 0x%x in ctl pkt, ignoring",
420 opcode);
421 return (EIB_E_FAILURE);
422 }
423
424 mutex_enter(&vnic->vn_lock);
425 vnic_state = vnic->vn_state;
426 mutex_exit(&vnic->vn_lock);
427
428 /*
429 * The ctl qp in the EoIB driver should receive only vHUB messages
430 */
431 subcode = hdr->hd_subcode;
432 if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) {
433 if (vnic_state != EIB_LOGIN_TBL_WAIT &&
434 vnic_state != EIB_LOGIN_TBL_INPROG &&
435 vnic_state != EIB_LOGIN_TBL_DONE &&
436 vnic_state != EIB_LOGIN_DONE) {
437
438 EIB_DPRINTF_WARN(ss->ei_instance,
439 "eib_fip_parse_ctl_pkt: unexpected vnic state "
440 "(0x%lx) for subcode (VHUB_UPDATE 0x%x)",
441 vnic_state, subcode);
442 return (EIB_E_FAILURE);
443 }
444
445 ret = eib_fip_parse_vhub_update(pkt, vnic);
446
447 } else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) {
448 if ((vnic_state != EIB_LOGIN_TBL_WAIT) &&
449 (vnic_state != EIB_LOGIN_TBL_INPROG)) {
450
451 EIB_DPRINTF_WARN(ss->ei_instance,
452 "eib_fip_parse_ctl_pkt: unexpected vnic state "
453 "(0x%lx) for subcode (VHUB_TABLE 0x%x)",
454 vnic_state, subcode);
455 return (EIB_E_FAILURE);
456 }
457
458 ret = eib_fip_parse_vhub_table(pkt, vnic);
459
460 } else {
461 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
462 "unexpected subcode 0x%x for ctl pkt", subcode);
463 }
464
465 if (ret == EIB_E_SUCCESS) {
466 /*
467 * Update last gateway heartbeat received time and
468 * gateway eport state. The eport state should only
469 * be updated if the vnic's vhub table has been fully
470 * constructed.
471 */
472 mutex_enter(&ss->ei_vnic_lock);
473 ss->ei_gw_last_heartbeat = ddi_get_lbolt64();
474 if (vnic_state == EIB_LOGIN_TBL_DONE ||
475 vnic_state == EIB_LOGIN_DONE) {
476 ss->ei_gw_eport_state =
477 vnic->vn_vhub_table->tb_eport_state;
478 }
479 mutex_exit(&ss->ei_vnic_lock);
480 }
481
482 return (ret);
483 }
484
485 static int
eib_fip_make_login(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)486 eib_fip_make_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
487 {
488 fip_login_t *login;
489 fip_proto_t *proto;
490 fip_basic_hdr_t *hdr;
491 fip_desc_iba_t *iba;
492 fip_desc_vnic_login_t *vlg;
493 ib_gid_t port_gid;
494 ib_guid_t port_guid;
495 uint16_t sl_portid;
496 uint16_t flags_vlan;
497
498 uint16_t gw_portid = ss->ei_gw_props->pp_gw_portid;
499 uint16_t sl = ss->ei_gw_props->pp_gw_sl;
500 uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
501 uint_t pktsz = swqe->qe_sgl.ds_len;
502 uint_t login_sz = sizeof (fip_login_t);
503
504 if (pktsz < login_sz) {
505 *err = EINVAL;
506
507 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_login: "
508 "send buffer size (0x%lx) too small to send"
509 "login request (min 0x%lx)",
510 pktsz, login_sz);
511 return (EIB_E_FAILURE);
512 }
513
514 /*
515 * Lint complains that there may be an alignment issue here,
516 * but we know that the "pkt" is atleast double-word aligned,
517 * so it's ok.
518 */
519 login = (fip_login_t *)(void *)pkt;
520 bzero(pkt, login_sz);
521
522 /*
523 * Fill in the FIP protocol version
524 */
525 proto = &login->lg_proto_version;
526 proto->pr_version = FIP_PROTO_VERSION;
527
528 /*
529 * Fill in the basic header
530 */
531 hdr = &login->lg_fip_header;
532 hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
533 hdr->hd_subcode = FIP_SUBCODE_H_VNIC_LOGIN;
534 hdr->hd_desc_list_len = htons((login_sz >> 2) - 2);
535 hdr->hd_flags = 0;
536 hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
537 hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
538 bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
539
540 /*
541 * Fill in the Infiniband Address descriptor
542 */
543 iba = &login->lg_iba;
544 iba->ia_type = FIP_DESC_TYPE_IBA;
545 iba->ia_len = FIP_DESC_LEN_IBA;
546 bcopy(eib_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
547 iba->ia_qpn = htonl(vnic->vn_data_chan->ch_qpn);
548
549 sl_portid = (gw_portid & FIP_IBA_PORTID_MASK) |
550 ((sl << FIP_IBA_SL_SHIFT) & FIP_IBA_SL_MASK);
551 iba->ia_sl_portid = htons(sl_portid);
552
553 iba->ia_lid = htons(ss->ei_props->ep_blid);
554
555 port_gid = ss->ei_props->ep_sgid;
556 port_guid = htonll(port_gid.gid_guid);
557 bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
558
559 /*
560 * Now, fill in the vNIC Login descriptor
561 */
562
563 vlg = &login->lg_vnic_login;
564 vlg->vl_type = FIP_DESC_TYPE_VNIC_LOGIN;
565 vlg->vl_len = FIP_DESC_LEN_VNIC_LOGIN;
566 bcopy(eib_vendor_mellanox, vlg->vl_vendor_id, FIP_VENDOR_LEN);
567
568 /*
569 * Only for the physlink instance 0, we ask the gateway to assign
570 * the mac address and a VLAN (tagless, actually). For this vnic
571 * only, we do not set the H bit. All other vnics are created by
572 * Solaris admin and will have the H bit set. Note also that we
573 * need to clear the vnic id's most significant bit for those that
574 * are administered by the gateway, so vnic0's vnic_id's msb should
575 * be 0 as well.
576 */
577 if (vnic->vn_instance == 0) {
578 vlg->vl_vnic_id = htons(vnic->vn_id);
579 flags_vlan = vnic->vn_vlan & FIP_VL_VLAN_MASK;
580 } else {
581 vlg->vl_vnic_id = htons(vnic->vn_id | FIP_VL_VNIC_ID_MSBIT);
582 flags_vlan = (vnic->vn_vlan & FIP_VL_VLAN_MASK) |
583 FIP_VL_FLAGS_H | FIP_VL_FLAGS_M;
584
585 if (vnic->vn_vlan & FIP_VL_VLAN_MASK)
586 flags_vlan |= (FIP_VL_FLAGS_V | FIP_VL_FLAGS_VP);
587 }
588
589 vlg->vl_flags_vlan = htons(flags_vlan);
590 bcopy(vnic->vn_macaddr, vlg->vl_mac, ETHERADDRL);
591
592 /*
593 * We aren't ready to enable rss, so we set the RSS bit and
594 * the n_rss_mcgid field to 0. Set the mac mcgid to 0 as well.
595 */
596 vlg->vl_flags_rss = 0;
597 vlg->vl_n_mac_mcgid = 0;
598
599 /*
600 * Set the syndrome to 0 and pass the control qpn
601 */
602 vlg->vl_syndrome_ctl_qpn =
603 htonl(vnic->vn_ctl_chan->ch_qpn & FIP_VL_CTL_QPN_MASK);
604
605 /*
606 * Try to set as unique a name as possible for this vnic
607 */
608 (void) snprintf((char *)(vlg->vl_vnic_name), FIP_VNIC_NAME_LEN,
609 "eoib_%02x_%02x", ss->ei_instance, vnic->vn_instance);
610
611 /*
612 * Adjust the ds_len in the sgl to indicate the size of this
613 * request before returning
614 */
615 swqe->qe_sgl.ds_len = login_sz;
616
617 return (EIB_E_SUCCESS);
618 }
619
620 static int
eib_fip_make_update(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int req,int * err)621 eib_fip_make_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int req,
622 int *err)
623 {
624 fip_keep_alive_t *ka;
625 fip_proto_t *proto;
626 fip_basic_hdr_t *hdr;
627 fip_desc_vnic_identity_t *vid;
628 ib_gid_t port_gid;
629 ib_guid_t port_guid;
630 uint32_t flags_vhub_id;
631
632 uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
633 uint_t pktsz = swqe->qe_sgl.ds_len;
634 uint_t ka_sz = sizeof (fip_keep_alive_t);
635
636 if (pktsz < ka_sz) {
637 *err = EINVAL;
638
639 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_update: "
640 "send buffer size (0x%lx) too small to send"
641 "keepalive/update request (min 0x%lx)",
642 pktsz, ka_sz);
643 return (EIB_E_FAILURE);
644 }
645
646 /*
647 * Lint complains that there may be an alignment issue here,
648 * but we know that the "pkt" is atleast double-word aligned,
649 * so it's ok.
650 */
651 ka = (fip_keep_alive_t *)(void *)pkt;
652 bzero(pkt, ka_sz);
653
654 /*
655 * Fill in the FIP protocol version
656 */
657 proto = &ka->ka_proto_version;
658 proto->pr_version = FIP_PROTO_VERSION;
659
660 /*
661 * Fill in the basic header
662 */
663 hdr = &ka->ka_fip_header;
664 hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
665 hdr->hd_subcode = (req == EIB_UPD_REQ_LOGOUT) ?
666 FIP_SUBCODE_H_VNIC_LOGOUT : FIP_SUBCODE_H_KEEP_ALIVE;
667 hdr->hd_desc_list_len = htons((ka_sz >> 2) - 2);
668 hdr->hd_flags = 0;
669 hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
670 hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
671 bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
672
673 /*
674 * Fill in the vNIC Identity descriptor
675 */
676 vid = &ka->ka_vnic_identity;
677
678 vid->vi_type = FIP_DESC_TYPE_VNIC_IDENTITY;
679 vid->vi_len = FIP_DESC_LEN_VNIC_IDENTITY;
680 bcopy(eib_vendor_mellanox, vid->vi_vendor_id, FIP_VENDOR_LEN);
681
682 flags_vhub_id = vnic->vn_login_data.ld_vhub_id;
683 if (vnic->vn_login_data.ld_vlan_in_packets) {
684 flags_vhub_id |= FIP_VI_FLAG_VP;
685 }
686 if (req == EIB_UPD_REQ_TABLE) {
687 flags_vhub_id |= FIP_VI_FLAG_R;
688 } else if (req == EIB_UPD_REQ_KA) {
689 flags_vhub_id |= FIP_VI_FLAG_U;
690 }
691 vid->vi_flags_vhub_id = htonl(flags_vhub_id);
692
693 vid->vi_tusn = (req != EIB_UPD_REQ_LOGOUT) ?
694 htonl(vnic->vn_vhub_table->tb_tusn) : 0;
695
696 vid->vi_vnic_id = htons(vnic->vn_login_data.ld_vnic_id);
697 bcopy(vnic->vn_login_data.ld_assigned_mac, vid->vi_mac, ETHERADDRL);
698
699 port_gid = ss->ei_props->ep_sgid;
700 port_guid = htonll(port_gid.gid_guid);
701 bcopy(&port_guid, vid->vi_port_guid, FIP_GUID_LEN);
702 bcopy(vnic->vn_login_data.ld_vnic_name, vid->vi_vnic_name,
703 FIP_VNIC_NAME_LEN);
704
705 /*
706 * Adjust the ds_len in the sgl to indicate the size of this
707 * request before returning
708 */
709 swqe->qe_sgl.ds_len = ka_sz;
710
711 return (EIB_E_SUCCESS);
712 }
713
714 static int
eib_fip_make_table(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)715 eib_fip_make_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
716 {
717 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_TABLE, err));
718 }
719
720 static int
eib_fip_make_ka(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)721 eib_fip_make_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
722 {
723 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_KA, err));
724 }
725
726 static int
eib_fip_make_logout(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)727 eib_fip_make_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
728 {
729 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_LOGOUT, err));
730 }
731
732 static int
eib_fip_send_login(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)733 eib_fip_send_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
734 {
735 eib_avect_t *av;
736 eib_chan_t *chan = ss->ei_admin_chan;
737 ibt_status_t ret;
738
739 /*
740 * Get an address vector for this destination
741 */
742 if ((av = eib_ibt_hold_avect(ss, ss->ei_gw_props->pp_gw_lid,
743 ss->ei_gw_props->pp_gw_sl)) == NULL) {
744 *err = ENOMEM;
745 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
746 "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
747 ss->ei_gw_props->pp_gw_lid, ss->ei_gw_props->pp_gw_sl);
748 return (EIB_E_FAILURE);
749 }
750
751 /*
752 * Modify the UD destination handle to the gateway
753 */
754 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
755 ss->ei_gw_props->pp_gw_ctrl_qpn, &av->av_vect);
756
757 eib_ibt_release_avect(ss, av);
758 if (ret != IBT_SUCCESS) {
759 *err = EINVAL;
760
761 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
762 "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
763 "ret=%d", ss->ei_gw_props->pp_gw_ctrl_qpn,
764 EIB_FIP_QKEY, ret);
765 return (EIB_E_FAILURE);
766 }
767
768 /*
769 * Send the login packet to the destination gateway. Posting
770 * the login and setting the login state to wait-for-ack should
771 * ideally be atomic to avoid race.
772 */
773 mutex_enter(&vnic->vn_lock);
774 ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
775 if (ret != IBT_SUCCESS) {
776 mutex_exit(&vnic->vn_lock);
777 *err = EINVAL;
778 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
779 "ibt_post_send() failed for vnic id 0x%x, ret=%d",
780 vnic->vn_id, ret);
781 return (EIB_E_FAILURE);
782 }
783 vnic->vn_state = EIB_LOGIN_ACK_WAIT;
784
785 mutex_enter(&chan->ch_tx_lock);
786 chan->ch_tx_posted++;
787 mutex_exit(&chan->ch_tx_lock);
788
789 mutex_exit(&vnic->vn_lock);
790
791 return (EIB_E_SUCCESS);
792 }
793
794 static int
eib_fip_send_update(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,uint_t nxt_state,int * err)795 eib_fip_send_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe,
796 uint_t nxt_state, int *err)
797 {
798 eib_login_data_t *ld = &vnic->vn_login_data;
799 eib_chan_t *chan = vnic->vn_ctl_chan;
800 eib_avect_t *av;
801 ibt_status_t ret;
802
803 /*
804 * Get an address vector for this destination
805 */
806 if ((av = eib_ibt_hold_avect(ss, ld->ld_gw_lid,
807 ld->ld_gw_sl)) == NULL) {
808 *err = ENOMEM;
809 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
810 "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
811 ld->ld_gw_lid, ld->ld_gw_sl);
812 return (EIB_E_FAILURE);
813 }
814
815 /*
816 * Modify the UD destination handle to the destination appropriately
817 */
818 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
819 ld->ld_gw_ctl_qpn, &av->av_vect);
820
821 eib_ibt_release_avect(ss, av);
822 if (ret != IBT_SUCCESS) {
823 *err = EINVAL;
824 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
825 "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
826 "ret=%d", ld->ld_gw_ctl_qpn, EIB_FIP_QKEY, ret);
827 return (EIB_E_FAILURE);
828 }
829
830 /*
831 * Send the update packet to the destination. Posting the update request
832 * and setting the login state to wait-for-vhub_table needs to be atomic
833 * to avoid race.
834 */
835 mutex_enter(&vnic->vn_lock);
836 ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
837 if (ret != IBT_SUCCESS) {
838 mutex_exit(&vnic->vn_lock);
839 *err = EINVAL;
840 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
841 "ibt_post_send() failed for vnic id 0x%x, ret=%d",
842 vnic->vn_id, ret);
843 return (EIB_E_FAILURE);
844 }
845 vnic->vn_state = nxt_state;
846
847 mutex_enter(&chan->ch_tx_lock);
848 chan->ch_tx_posted++;
849 mutex_exit(&chan->ch_tx_lock);
850
851 mutex_exit(&vnic->vn_lock);
852
853 return (EIB_E_SUCCESS);
854 }
855
856 static int
eib_fip_send_table(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)857 eib_fip_send_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
858 {
859 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_TBL_WAIT, err));
860 }
861
862 static int
eib_fip_send_ka(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)863 eib_fip_send_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
864 {
865 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_DONE, err));
866 }
867
868 static int
eib_fip_send_logout(eib_t * ss,eib_vnic_t * vnic,eib_wqe_t * swqe,int * err)869 eib_fip_send_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
870 {
871 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGOUT_DONE, err));
872 }
873
874 static int
eib_fip_parse_vhub_table(uint8_t * pkt,eib_vnic_t * vnic)875 eib_fip_parse_vhub_table(uint8_t *pkt, eib_vnic_t *vnic)
876 {
877 fip_vhub_table_t *tbl;
878 fip_desc_vhub_table_t *desc_tbl;
879 fip_vhub_table_entry_t *entry;
880 fip_basic_hdr_t *hdr;
881 eib_t *ss = vnic->vn_ss;
882 eib_login_data_t *ld = &vnic->vn_login_data;
883 eib_vhub_table_t *etbl = vnic->vn_vhub_table;
884 eib_vhub_update_t *eupd = vnic->vn_vhub_update;
885 eib_vhub_map_t *newmap;
886
887 uint32_t *ipkt;
888 uint32_t init_checksum = 0;
889 uint32_t tusn;
890 uint32_t vhub_id;
891 uint_t entries_in_pkt;
892 uint_t ndx;
893 uint_t i;
894
895 /*
896 * If we're here receiving vhub table messages, we certainly should
897 * have the vhub table structure allocated and present at this point.
898 */
899 if (etbl == NULL) {
900 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
901 "vhub table missing for vnic id 0x%x", vnic->vn_id);
902 return (EIB_E_FAILURE);
903 }
904
905 /*
906 * Note that 'pkt' is always atleast double-word aligned when it is
907 * passed to us, so we can cast it without any problems.
908 */
909 ipkt = (uint32_t *)(void *)pkt;
910 tbl = (fip_vhub_table_t *)(void *)pkt;
911 hdr = &(tbl->vt_fip_header);
912
913 /*
914 * Validate all the header and descriptor types and lengths
915 */
916 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
917 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
918 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
919 "invalid type/len in fip basic header, "
920 "exp (0x%x,0x%x), got (0x%x,0x%x)",
921 FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
922 hdr->hd_type, hdr->hd_len);
923 return (EIB_E_FAILURE);
924 }
925 desc_tbl = &(tbl->vt_vhub_table);
926 if (desc_tbl->tb_type != FIP_DESC_TYPE_VHUB_TABLE) {
927 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
928 "invalid type in vhub desc, exp 0x%x, got 0x%x",
929 FIP_DESC_TYPE_VHUB_TABLE, desc_tbl->tb_type);
930 return (EIB_E_FAILURE);
931 }
932
933 /*
934 * Verify that the vhub id is ok for this vnic
935 */
936 vhub_id = ntohl(desc_tbl->tb_flags_vhub_id) & FIP_TB_VHUB_ID_MASK;
937 if (vhub_id != ld->ld_vhub_id) {
938 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
939 "invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x",
940 ld->ld_vhub_id, vhub_id);
941 return (EIB_E_FAILURE);
942 }
943
944 /*
945 * Count the number of vhub table entries in this packet
946 */
947 entries_in_pkt = (desc_tbl->tb_len - FIP_DESC_VHUB_TABLE_WORDS) /
948 FIP_VHUB_TABLE_ENTRY_WORDS;
949
950 /*
951 * While we're here, also compute the 32-bit 2's complement carry-
952 * discarded checksum of the vHUB table descriptor in this packet
953 * till the first vhub table entry.
954 */
955 for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++)
956 init_checksum += ipkt[i];
957
958 /*
959 * Initialize the vhub's Table Update Sequence Number (tusn),
960 * checksum and record the total number of entries in in the table
961 * if this is the first pkt of the table.
962 */
963 tusn = ntohl(desc_tbl->tb_tusn);
964 if (desc_tbl->tb_hdr & FIP_TB_HDR_FIRST) {
965 etbl->tb_entries_in_table = ntohs(desc_tbl->tb_table_size);
966 etbl->tb_tusn = tusn;
967 etbl->tb_checksum = 0;
968
969 mutex_enter(&vnic->vn_lock);
970 vnic->vn_state = EIB_LOGIN_TBL_INPROG;
971 mutex_exit(&vnic->vn_lock);
972 }
973
974 /*
975 * First, middle or last, the current table TUSN we have must match this
976 * packet's TUSN.
977 */
978 if (etbl->tb_tusn != tusn) {
979 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
980 "unexpected TUSN (0x%lx) during vhub table construction, "
981 "expected 0x%lx", etbl->tb_tusn, tusn);
982 goto vhub_table_fail;
983 }
984
985 /*
986 * See if we've overrun/underrun our original entries count
987 */
988 if ((etbl->tb_entries_seen + entries_in_pkt) >
989 etbl->tb_entries_in_table) {
990 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
991 "vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d",
992 etbl->tb_entries_in_table, etbl->tb_entries_seen,
993 entries_in_pkt);
994 goto vhub_table_fail;
995 } else if (((etbl->tb_entries_seen + entries_in_pkt) <
996 etbl->tb_entries_in_table) &&
997 (desc_tbl->tb_hdr & FIP_TB_HDR_LAST)) {
998 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
999 "vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d",
1000 etbl->tb_entries_in_table, etbl->tb_entries_seen,
1001 entries_in_pkt);
1002 goto vhub_table_fail;
1003 }
1004
1005 /*
1006 * Process and add the entries we have in this packet
1007 */
1008 etbl->tb_checksum += init_checksum;
1009 entry = (fip_vhub_table_entry_t *)(void *)
1010 ((uint8_t *)desc_tbl + FIP_DESC_VHUB_TABLE_SZ);
1011
1012 for (ndx = 0; ndx < entries_in_pkt; ndx++, entry++) {
1013 /*
1014 * Allocate a eib_vhub_map_t, copy the current entry details
1015 * and chain it to the appropriate queue.
1016 */
1017 if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1018 EIB_DPRINTF_WARN(ss->ei_instance,
1019 "eib_fip_parse_vhub_table: no memory for vhub "
1020 "table entry, ignoring this vhub table packet");
1021 goto vhub_table_fail;
1022 }
1023
1024 ASSERT((entry->te_v_rss_type & FIP_TE_VALID) == FIP_TE_VALID);
1025 newmap->mp_v_rss_type = entry->te_v_rss_type;
1026 bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1027 newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1028 newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1029 newmap->mp_lid = ntohs(entry->te_lid);
1030 newmap->mp_tusn = tusn;
1031 newmap->mp_next = NULL;
1032
1033 /*
1034 * The vhub table messages do not provide status on eport
1035 * state, so we'll simply assume that the eport is up.
1036 */
1037 eib_fip_queue_tbl_entry(etbl, newmap, tusn, FIP_EPORT_UP);
1038
1039 /*
1040 * Update table checksum with this entry's computed checksum
1041 */
1042 ipkt = (uint32_t *)entry;
1043 for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++)
1044 etbl->tb_checksum += ipkt[i];
1045 }
1046 etbl->tb_entries_seen += entries_in_pkt;
1047
1048 /*
1049 * If this is the last packet of this vhub table, complete vhub
1050 * table by verifying checksum and applying all the vhub updates
1051 * that may have come in while we were constructing this table.
1052 */
1053 if (desc_tbl->tb_hdr & FIP_TB_HDR_LAST) {
1054
1055 ipkt = (uint32_t *)entry;
1056 if (!eib_wa_no_good_vhub_cksum) {
1057 if (*ipkt != etbl->tb_checksum) {
1058 EIB_DPRINTF_VERBOSE(ss->ei_instance,
1059 "eib_fip_parse_vhub_table: "
1060 "vhub table checksum invalid, "
1061 "computed=0x%lx, found=0x%lx",
1062 etbl->tb_checksum, *ipkt);
1063 }
1064 }
1065
1066 /*
1067 * Per the EoIB specification, the gateway is supposed to
1068 * include its address information for data messages in the
1069 * vhub table. But we've observed that it doesn't do this
1070 * (with the current version). If this is the case, we'll
1071 * hand-create and add a vhub map for the gateway from the
1072 * information we got in login ack.
1073 */
1074 if (etbl->tb_gateway == NULL)
1075 eib_fip_queue_gw_entry(vnic, etbl, tusn, FIP_EPORT_UP);
1076
1077 /*
1078 * Apply pending vhub updates and reset table counters needed
1079 * during table construction.
1080 */
1081 if (eib_fip_apply_updates(ss, etbl, eupd) != EIB_E_SUCCESS)
1082 goto vhub_table_fail;
1083
1084 etbl->tb_entries_seen = 0;
1085 etbl->tb_entries_in_table = 0;
1086
1087 eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_DONE);
1088 }
1089
1090 return (EIB_E_SUCCESS);
1091
1092 vhub_table_fail:
1093 eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_FAILED);
1094 return (EIB_E_FAILURE);
1095 }
1096
1097 static int
eib_fip_parse_vhub_update(uint8_t * pkt,eib_vnic_t * vnic)1098 eib_fip_parse_vhub_update(uint8_t *pkt, eib_vnic_t *vnic)
1099 {
1100 fip_vhub_update_t *upd;
1101 fip_desc_vhub_update_t *desc_upd;
1102 fip_vhub_table_entry_t *entry;
1103 fip_basic_hdr_t *hdr;
1104 eib_t *ss = vnic->vn_ss;
1105 eib_login_data_t *ld = &vnic->vn_login_data;
1106 eib_vhub_table_t *etbl = vnic->vn_vhub_table;
1107 eib_vhub_update_t *eupd = vnic->vn_vhub_update;
1108 eib_vhub_map_t *newmap;
1109 boolean_t vhub_tbl_done;
1110 uint32_t eport_vp_vhub_id;
1111 uint32_t vhub_id;
1112 uint32_t tusn;
1113 uint32_t prev_tusn;
1114 uint8_t eport_state;
1115
1116 /*
1117 * We should have the vhub table allocated as long as we're receiving
1118 * vhub control messages.
1119 */
1120 if (etbl == NULL) {
1121 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1122 "vhub table missing for vnic id 0x%x", vnic->vn_id);
1123 return (EIB_E_FAILURE);
1124 }
1125
1126 mutex_enter(&vnic->vn_lock);
1127 vhub_tbl_done = ((vnic->vn_state == EIB_LOGIN_TBL_DONE) ||
1128 (vnic->vn_state == EIB_LOGIN_DONE)) ? B_TRUE : B_FALSE;
1129 mutex_exit(&vnic->vn_lock);
1130
1131 /*
1132 * Note that 'pkt' is always atleast double-word aligned when it is
1133 * passed to us, so we can cast it without any problems.
1134 */
1135 upd = (fip_vhub_update_t *)(void *)pkt;
1136 hdr = &(upd->vu_fip_header);
1137
1138 /*
1139 * Validate all the header and descriptor types and lengths
1140 */
1141 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
1142 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
1143 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1144 "invalid type/len in fip basic header, "
1145 "exp (0x%x,0x%x), got (0x%x,0x%x)",
1146 FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
1147 hdr->hd_type, hdr->hd_len);
1148 return (EIB_E_FAILURE);
1149 }
1150 desc_upd = &(upd->vu_vhub_update);
1151 if (desc_upd->up_type != FIP_DESC_TYPE_VHUB_UPDATE ||
1152 desc_upd->up_len != FIP_DESC_LEN_VHUB_UPDATE) {
1153 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1154 "invalid type/len in vhub update desc: "
1155 "exp (0x%x,0x%x), got (0x%x,0x%x)",
1156 FIP_DESC_TYPE_VHUB_UPDATE, FIP_DESC_LEN_VHUB_UPDATE,
1157 desc_upd->up_type, desc_upd->up_len);
1158 return (EIB_E_FAILURE);
1159 }
1160
1161 /*
1162 * Verify that the vhub id is ok for this vnic and save the eport state
1163 */
1164 eport_vp_vhub_id = ntohl(desc_upd->up_eport_vp_vhub_id);
1165
1166 vhub_id = eport_vp_vhub_id & FIP_UP_VHUB_ID_MASK;
1167 if (vhub_id != ld->ld_vhub_id) {
1168 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1169 "invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x",
1170 ld->ld_vhub_id, vhub_id);
1171 return (EIB_E_FAILURE);
1172 }
1173 eport_state = (uint8_t)((eport_vp_vhub_id >> FIP_UP_EPORT_STATE_SHIFT) &
1174 FIP_UP_EPORT_STATE_MASK);
1175
1176 /*
1177 * If this is the first update we receive, any tusn is ok. Otherwise,
1178 * make sure the tusn we see in the packet is appropriate.
1179 */
1180 tusn = ntohl(desc_upd->up_tusn);
1181 prev_tusn = vhub_tbl_done ? etbl->tb_tusn : eupd->up_tusn;
1182
1183 if (prev_tusn != 0) {
1184 if (tusn == prev_tusn) {
1185 eib_fip_update_eport_state(ss, etbl, eupd,
1186 vhub_tbl_done, eport_state);
1187 return (EIB_E_SUCCESS);
1188 }
1189 if (tusn != (prev_tusn + 1)) {
1190 EIB_DPRINTF_WARN(ss->ei_instance,
1191 "eib_fip_parse_vhub_update: "
1192 "out of order TUSN received (exp 0x%lx, "
1193 "got 0x%lx), dropping pkt", prev_tusn + 1, tusn);
1194 return (EIB_E_FAILURE);
1195 }
1196 }
1197
1198 /*
1199 * EoIB expects only type 0 (vnic address) entries to maintain the
1200 * context table
1201 */
1202 entry = &(desc_upd->up_tbl_entry);
1203 ASSERT((entry->te_v_rss_type & FIP_TE_TYPE_MASK) == FIP_TE_TYPE_VNIC);
1204
1205 /*
1206 * If the vHUB table has already been fully constructed and if we've
1207 * now received a notice to remove a vnic entry from it, do it.
1208 */
1209 if ((vhub_tbl_done) &&
1210 ((entry->te_v_rss_type & FIP_TE_VALID) == 0)) {
1211 eib_fip_dequeue_tbl_entry(etbl, entry->te_mac,
1212 tusn, eport_state);
1213
1214 if (bcmp(entry->te_mac, ld->ld_assigned_mac, ETHERADDRL) == 0) {
1215 uint8_t *mymac;
1216
1217 mymac = entry->te_mac;
1218 EIB_DPRINTF_WARN(ss->ei_instance,
1219 "eib_fip_parse_vhub_update: "
1220 "vhub update pkt received to kill self "
1221 "(%x:%x:%x:%x:%x:%x)", mymac[0], mymac[1], mymac[2],
1222 mymac[3], mymac[4], mymac[5]);
1223
1224 return (EIB_E_FAILURE);
1225 }
1226 return (EIB_E_SUCCESS);
1227 }
1228
1229 /*
1230 * Otherwise, allocate a new eib_vhub_map_t and fill it in with
1231 * the details of the new entry
1232 */
1233 if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1234 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1235 "no memory for vhub update entry, will be ignoring"
1236 "this vhub update packet");
1237 return (EIB_E_FAILURE);
1238 }
1239
1240 newmap->mp_v_rss_type = entry->te_v_rss_type;
1241 bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1242 newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1243 newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1244 newmap->mp_lid = ntohs(entry->te_lid);
1245 newmap->mp_tusn = tusn;
1246 newmap->mp_next = NULL;
1247
1248 /*
1249 * Update the full vhub table or chain it to the list of pending
1250 * updates depending on if the vhub table construction is over
1251 * or not.
1252 */
1253 if (vhub_tbl_done) {
1254 eib_fip_queue_tbl_entry(etbl, newmap, tusn, eport_state);
1255 } else {
1256 eib_fip_queue_upd_entry(eupd, newmap, tusn, eport_state);
1257 }
1258
1259 return (EIB_E_SUCCESS);
1260 }
1261
1262 static void
eib_fip_update_eport_state(eib_t * ss,eib_vhub_table_t * tbl,eib_vhub_update_t * upd,boolean_t tbl_done,uint8_t eport_state)1263 eib_fip_update_eport_state(eib_t *ss, eib_vhub_table_t *tbl,
1264 eib_vhub_update_t *upd, boolean_t tbl_done, uint8_t eport_state)
1265 {
1266 if (tbl_done) {
1267 mutex_enter(&tbl->tb_lock);
1268 if (tbl->tb_eport_state != eport_state) {
1269 EIB_DPRINTF_DEBUG(ss->ei_instance,
1270 "eib_fip_update_eport_state: "
1271 "eport state changing from %d to %d",
1272 tbl->tb_eport_state, eport_state);
1273 tbl->tb_eport_state = eport_state;
1274 }
1275 mutex_exit(&tbl->tb_lock);
1276 } else {
1277 mutex_enter(&upd->up_lock);
1278 if (upd->up_eport_state != eport_state) {
1279 EIB_DPRINTF_DEBUG(ss->ei_instance,
1280 "eib_fip_update_eport_state: "
1281 "eport state changing from %d to %d",
1282 upd->up_eport_state, eport_state);
1283 upd->up_eport_state = eport_state;
1284 }
1285 mutex_exit(&upd->up_lock);
1286 }
1287 }
1288
1289 static void
eib_fip_queue_tbl_entry(eib_vhub_table_t * tbl,eib_vhub_map_t * map,uint32_t tusn,uint8_t eport_state)1290 eib_fip_queue_tbl_entry(eib_vhub_table_t *tbl, eib_vhub_map_t *map,
1291 uint32_t tusn, uint8_t eport_state)
1292 {
1293 uint8_t bkt;
1294
1295 mutex_enter(&tbl->tb_lock);
1296
1297 switch (map->mp_v_rss_type & FIP_TE_TYPE_MASK) {
1298 case FIP_TE_TYPE_GATEWAY:
1299 if (tbl->tb_gateway) {
1300 kmem_free(tbl->tb_gateway,
1301 sizeof (eib_vhub_map_t));
1302 }
1303 tbl->tb_gateway = map;
1304 break;
1305
1306 case FIP_TE_TYPE_UNICAST_MISS:
1307 if (tbl->tb_unicast_miss) {
1308 kmem_free(tbl->tb_unicast_miss,
1309 sizeof (eib_vhub_map_t));
1310 }
1311 tbl->tb_unicast_miss = map;
1312 break;
1313
1314 case FIP_TE_TYPE_VHUB_MULTICAST:
1315 if (tbl->tb_vhub_multicast) {
1316 kmem_free(tbl->tb_vhub_multicast,
1317 sizeof (eib_vhub_map_t));
1318 }
1319 tbl->tb_vhub_multicast = map;
1320 break;
1321
1322 case FIP_TE_TYPE_MULTICAST_ENTRY:
1323 /*
1324 * If multicast entry types are not to be specially
1325 * processed, treat them like regular vnic addresses.
1326 */
1327 if (!eib_wa_no_mcast_entries) {
1328 bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1329 map->mp_next = tbl->tb_mcast_entry[bkt];
1330 tbl->tb_mcast_entry[bkt] = map;
1331 break;
1332 }
1333 /*FALLTHROUGH*/
1334
1335 case FIP_TE_TYPE_VNIC:
1336 bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1337 map->mp_next = tbl->tb_vnic_entry[bkt];
1338 tbl->tb_vnic_entry[bkt] = map;
1339 break;
1340 }
1341
1342 tbl->tb_tusn = tusn;
1343 tbl->tb_eport_state = eport_state;
1344
1345 mutex_exit(&tbl->tb_lock);
1346 }
1347
1348 static void
eib_fip_queue_upd_entry(eib_vhub_update_t * upd,eib_vhub_map_t * map,uint32_t tusn,uint8_t eport_state)1349 eib_fip_queue_upd_entry(eib_vhub_update_t *upd, eib_vhub_map_t *map,
1350 uint32_t tusn, uint8_t eport_state)
1351 {
1352 eib_vhub_map_t *tail;
1353
1354 /*
1355 * The eib_vhub_update_t list is only touched/traversed when the
1356 * control cq handler is parsing either update or table message,
1357 * or by the table cleanup routine when we aren't attached to any
1358 * control mcgs. Bottom line is that this list traversal is always
1359 * single-threaded and we could probably do away with the lock.
1360 */
1361 mutex_enter(&upd->up_lock);
1362 for (tail = upd->up_vnic_entry; tail != NULL; tail = tail->mp_next) {
1363 if (tail->mp_next == NULL)
1364 break;
1365 }
1366 if (tail) {
1367 tail->mp_next = map;
1368 } else {
1369 upd->up_vnic_entry = map;
1370 }
1371
1372 upd->up_tusn = tusn;
1373 upd->up_eport_state = eport_state;
1374
1375 mutex_exit(&upd->up_lock);
1376 }
1377
1378 static void
eib_fip_queue_gw_entry(eib_vnic_t * vnic,eib_vhub_table_t * tbl,uint32_t tusn,uint8_t eport_state)1379 eib_fip_queue_gw_entry(eib_vnic_t *vnic, eib_vhub_table_t *tbl, uint32_t tusn,
1380 uint8_t eport_state)
1381 {
1382 eib_t *ss = vnic->vn_ss;
1383 eib_vhub_map_t *newmap;
1384 eib_login_data_t *ld = &vnic->vn_login_data;
1385
1386 if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1387 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_queue_gw_entry: "
1388 "no memory to queue gw entry, transactions could fail");
1389 return;
1390 }
1391
1392 newmap->mp_v_rss_type = FIP_TE_VALID | FIP_TE_TYPE_GATEWAY;
1393 bcopy(eib_zero_mac, newmap->mp_mac, ETHERADDRL);
1394 newmap->mp_qpn = ld->ld_gw_data_qpn;
1395 newmap->mp_sl = ld->ld_gw_sl;
1396 newmap->mp_lid = ld->ld_gw_lid;
1397 newmap->mp_tusn = tusn;
1398 newmap->mp_next = NULL;
1399
1400 eib_fip_queue_tbl_entry(tbl, newmap, tusn, eport_state);
1401 }
1402
1403 static int
eib_fip_apply_updates(eib_t * ss,eib_vhub_table_t * tbl,eib_vhub_update_t * upd)1404 eib_fip_apply_updates(eib_t *ss, eib_vhub_table_t *tbl, eib_vhub_update_t *upd)
1405 {
1406 eib_vhub_map_t *list;
1407 eib_vhub_map_t *map;
1408 eib_vhub_map_t *nxt;
1409 uint32_t tbl_tusn = tbl->tb_tusn;
1410
1411 /*
1412 * Take the update list out
1413 */
1414 mutex_enter(&upd->up_lock);
1415 list = upd->up_vnic_entry;
1416 upd->up_vnic_entry = NULL;
1417 mutex_exit(&upd->up_lock);
1418
1419 /*
1420 * Skip any updates with older/same tusn as our vhub table
1421 */
1422 nxt = NULL;
1423 for (map = list; (map) && (map->mp_tusn <= tbl_tusn); map = nxt) {
1424 nxt = map->mp_next;
1425 kmem_free(map, sizeof (eib_vhub_map_t));
1426 }
1427
1428 if (map == NULL)
1429 return (EIB_E_SUCCESS);
1430
1431 /*
1432 * If we missed any updates between table tusn and the first
1433 * update tusn we got, we need to fail.
1434 */
1435 if (map->mp_tusn > (tbl_tusn + 1)) {
1436 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_apply_updates: "
1437 "vhub update missed tusn(s), expected=0x%lx, got=0x%lx",
1438 (tbl_tusn + 1), map->mp_tusn);
1439 for (; map != NULL; map = nxt) {
1440 nxt = map->mp_next;
1441 kmem_free(map, sizeof (eib_vhub_map_t));
1442 }
1443 return (EIB_E_FAILURE);
1444 }
1445
1446 /*
1447 * If everything is fine, apply all the updates we received
1448 */
1449 for (; map != NULL; map = nxt) {
1450 nxt = map->mp_next;
1451 map->mp_next = NULL;
1452
1453 if (map->mp_v_rss_type & FIP_TE_VALID) {
1454 eib_fip_queue_tbl_entry(tbl, map, upd->up_tusn,
1455 upd->up_eport_state);
1456 } else {
1457 eib_fip_dequeue_tbl_entry(tbl, map->mp_mac,
1458 upd->up_tusn, upd->up_eport_state);
1459 kmem_free(map, sizeof (eib_vhub_map_t));
1460 }
1461 }
1462
1463 return (EIB_E_SUCCESS);
1464 }
1465
1466 static void
eib_fip_dequeue_tbl_entry(eib_vhub_table_t * tbl,uint8_t * mac,uint32_t tusn,uint8_t eport_state)1467 eib_fip_dequeue_tbl_entry(eib_vhub_table_t *tbl, uint8_t *mac, uint32_t tusn,
1468 uint8_t eport_state)
1469 {
1470 uint8_t bkt;
1471 eib_vhub_map_t *prev;
1472 eib_vhub_map_t *elem;
1473
1474 bkt = (mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1475
1476 mutex_enter(&tbl->tb_lock);
1477
1478 /*
1479 * Note that for EoIB, the vhub table is maintained using only
1480 * vnic entry updates
1481 */
1482 prev = NULL;
1483 for (elem = tbl->tb_vnic_entry[bkt]; elem; elem = elem->mp_next) {
1484 if (bcmp(elem->mp_mac, mac, ETHERADDRL) == 0)
1485 break;
1486 prev = elem;
1487 }
1488
1489 if (prev && elem) {
1490 prev->mp_next = elem->mp_next;
1491 kmem_free(elem, sizeof (eib_vhub_map_t));
1492 }
1493
1494 tbl->tb_tusn = tusn;
1495 tbl->tb_eport_state = eport_state;
1496
1497 mutex_exit(&tbl->tb_lock);
1498 }
1499
1500 static eib_vhub_map_t *
eib_fip_get_vhub_map(void)1501 eib_fip_get_vhub_map(void)
1502 {
1503 return (kmem_zalloc(sizeof (eib_vhub_map_t), KM_NOSLEEP));
1504 }
1505