1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/sunndi.h>
32 #include <sys/ksynch.h>
33 #include <sys/callb.h>
34 #include <sys/ib/mgt/sm_attr.h> /* SM_INIT_TYPE_REPLY_... */
35
36 #include <sys/ib/clients/eoib/enx_impl.h>
37
38 /*
39 * Static function declarations
40 */
41 static void eibnx_gw_is_alive(eibnx_gw_info_t *);
42 static void eibnx_gw_is_aware(eibnx_thr_info_t *, eibnx_gw_info_t *, boolean_t);
43 static void eibnx_process_rx(eibnx_thr_info_t *, ibt_wc_t *, eibnx_wqe_t *);
44 static void eibnx_handle_wcerr(uint8_t, eibnx_wqe_t *, eibnx_thr_info_t *);
45 static void eibnx_handle_login_ack(eibnx_thr_info_t *, uint8_t *);
46 static void eibnx_handle_gw_rebirth(eibnx_thr_info_t *, uint16_t);
47 static void eibnx_handle_gw_info_update(eibnx_thr_info_t *, uint16_t, void *);
48 static int eibnx_replace_portinfo(eibnx_thr_info_t *, ibt_hca_portinfo_t *,
49 uint_t);
50 static void eibnx_handle_port_events(ibt_hca_hdl_t, uint8_t);
51 static void eibnx_handle_hca_attach(ib_guid_t);
52 static void eibnx_handle_hca_detach(ib_guid_t);
53
54 /*
55 * NDI event handle we need
56 */
57 extern ndi_event_hdl_t enx_ndi_event_hdl;
58
59 /*
60 * SM's init type reply flags
61 */
62 #define ENX_PORT_ATTR_LOADED(itr) \
63 (((itr) & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0)
64 #define ENX_PORT_ATTR_NOT_PRESERVED(itr) \
65 (((itr) & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)
66 #define ENX_PORT_PRES_NOT_PRESERVED(itr) \
67 (((itr) & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) == 0)
68
69 /*
70 * Port monitor progress flags (all flag values should be non-zero)
71 */
72 #define ENX_MON_LINKSTATE_UP 0x01
73 #define ENX_MON_FOUND_MCGS 0x02
74 #define ENX_MON_SETUP_CQ 0x04
75 #define ENX_MON_SETUP_UD_CHAN 0x08
76 #define ENX_MON_SETUP_BUFS 0x10
77 #define ENX_MON_SETUP_CQ_HDLR 0x20
78 #define ENX_MON_JOINED_MCGS 0x40
79 #define ENX_MON_MULTICAST_SLCT 0x80
80 #define ENX_MON_MAX 0xFF
81
82 /*
83 * Per-port thread to solicit, monitor and discover EoIB gateways
84 * and create the corresponding EoIB driver instances on the host.
85 */
86 void
eibnx_port_monitor(eibnx_thr_info_t * info)87 eibnx_port_monitor(eibnx_thr_info_t *info)
88 {
89 clock_t solicit_period_ticks;
90 clock_t deadline;
91 kmutex_t ci_lock;
92 callb_cpr_t ci;
93 char thr_name[MAXNAMELEN];
94
95 (void) snprintf(thr_name, MAXNAMELEN, ENX_PORT_MONITOR,
96 info->ti_pi->p_port_num);
97
98 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
99 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, thr_name);
100
101 info->ti_progress = 0;
102
103 /*
104 * If the port is not active yet, wait for a port up event. The
105 * async handler, when it sees a port-up event, is expected to
106 * update the port_monitor's portinfo structure's p_linkstate
107 * and wake us up with ENX_EVENT_LINK_UP.
108 */
109 while (info->ti_pi->p_linkstate != IBT_PORT_ACTIVE) {
110 mutex_enter(&info->ti_event_lock);
111 while ((info->ti_event &
112 (ENX_EVENT_LINK_UP | ENX_EVENT_DIE)) == 0) {
113 mutex_enter(&ci_lock);
114 CALLB_CPR_SAFE_BEGIN(&ci);
115 mutex_exit(&ci_lock);
116
117 cv_wait(&info->ti_event_cv, &info->ti_event_lock);
118
119 mutex_enter(&ci_lock);
120 CALLB_CPR_SAFE_END(&ci, &ci_lock);
121 mutex_exit(&ci_lock);
122 }
123 if (info->ti_event & ENX_EVENT_DIE) {
124 mutex_exit(&info->ti_event_lock);
125 goto port_monitor_exit;
126 }
127 info->ti_event &= (~ENX_EVENT_LINK_UP);
128 mutex_exit(&info->ti_event_lock);
129 }
130 info->ti_progress |= ENX_MON_LINKSTATE_UP;
131
132 /*
133 * Locate the multicast groups for sending solicit requests
134 * to the GW and receiving advertisements from the GW. If
135 * either of the mcg is not present, wait for them to be
136 * created by the GW.
137 */
138 while (eibnx_find_mgroups(info) != ENX_E_SUCCESS) {
139 mutex_enter(&info->ti_event_lock);
140 while ((info->ti_event &
141 (ENX_EVENT_MCGS_AVAILABLE | ENX_EVENT_DIE)) == 0) {
142 mutex_enter(&ci_lock);
143 CALLB_CPR_SAFE_BEGIN(&ci);
144 mutex_exit(&ci_lock);
145
146 cv_wait(&info->ti_event_cv, &info->ti_event_lock);
147
148 mutex_enter(&ci_lock);
149 CALLB_CPR_SAFE_END(&ci, &ci_lock);
150 mutex_exit(&ci_lock);
151 }
152 if (info->ti_event & ENX_EVENT_DIE) {
153 mutex_exit(&info->ti_event_lock);
154 goto port_monitor_exit;
155 }
156 info->ti_event &= (~ENX_EVENT_MCGS_AVAILABLE);
157 mutex_exit(&info->ti_event_lock);
158 }
159 info->ti_progress |= ENX_MON_FOUND_MCGS;
160
161 /*
162 * Setup a shared CQ
163 */
164 if (eibnx_setup_cq(info) != ENX_E_SUCCESS) {
165 ENX_DPRINTF_ERR("eibnx_setup_cq() failed, terminating "
166 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
167 info->ti_hca_guid, info->ti_pi->p_port_num);
168 goto port_monitor_exit;
169 }
170 info->ti_progress |= ENX_MON_SETUP_CQ;
171
172 /*
173 * Setup UD channel
174 */
175 if (eibnx_setup_ud_channel(info) != ENX_E_SUCCESS) {
176 ENX_DPRINTF_ERR("eibnx_setup_ud_channel() failed, terminating "
177 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
178 info->ti_hca_guid, info->ti_pi->p_port_num);
179 goto port_monitor_exit;
180 }
181 info->ti_progress |= ENX_MON_SETUP_UD_CHAN;
182
183 /*
184 * Allocate/initialize any tx/rx buffers
185 */
186 if (eibnx_setup_bufs(info) != ENX_E_SUCCESS) {
187 ENX_DPRINTF_ERR("eibnx_setup_bufs() failed, terminating "
188 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
189 info->ti_hca_guid, info->ti_pi->p_port_num);
190 goto port_monitor_exit;
191 }
192 info->ti_progress |= ENX_MON_SETUP_BUFS;
193
194 /*
195 * Setup completion handler
196 */
197 if (eibnx_setup_cq_handler(info) != ENX_E_SUCCESS) {
198 ENX_DPRINTF_ERR("eibnx_setup_cq_handler() failed, terminating "
199 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
200 info->ti_hca_guid, info->ti_pi->p_port_num);
201 goto port_monitor_exit;
202 }
203 info->ti_progress |= ENX_MON_SETUP_CQ_HDLR;
204
205 /*
206 * Join EoIB multicast groups
207 */
208 if (eibnx_join_mcgs(info) != ENX_E_SUCCESS) {
209 ENX_DPRINTF_ERR("eibnx_join_mcgs() failed, terminating ",
210 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
211 info->ti_hca_guid, info->ti_pi->p_port_num);
212 goto port_monitor_exit;
213 }
214 info->ti_progress |= ENX_MON_JOINED_MCGS;
215
216 /*
217 * Send SOLICIT pkt to the EoIB multicast group
218 */
219 if (eibnx_fip_solicit_mcast(info) != ENX_E_SUCCESS) {
220 ENX_DPRINTF_ERR("eibnx_fip_solicit_mcast() failed, terminating "
221 "port monitor for (hca_guid=0x%llx, port_num=0x%x)",
222 info->ti_hca_guid, info->ti_pi->p_port_num);
223 goto port_monitor_exit;
224 }
225 info->ti_progress |= ENX_MON_MULTICAST_SLCT;
226
227 mutex_enter(&info->ti_event_lock);
228
229 solicit_period_ticks = drv_usectohz(ENX_DFL_SOLICIT_PERIOD_USEC);
230
231 periodic_solicit:
232 deadline = ddi_get_lbolt() + solicit_period_ticks;
233 while ((info->ti_event & (ENX_EVENT_TIMED_OUT | ENX_EVENT_DIE)) == 0) {
234 mutex_enter(&ci_lock);
235 CALLB_CPR_SAFE_BEGIN(&ci);
236 mutex_exit(&ci_lock);
237
238 if (cv_timedwait(&info->ti_event_cv, &info->ti_event_lock,
239 deadline) == -1) {
240 info->ti_event |= ENX_EVENT_TIMED_OUT;
241 }
242
243 mutex_enter(&ci_lock);
244 CALLB_CPR_SAFE_END(&ci, &ci_lock);
245 mutex_exit(&ci_lock);
246 }
247
248 if (info->ti_event & ENX_EVENT_DIE) {
249 mutex_exit(&info->ti_event_lock);
250 goto port_monitor_exit;
251 }
252
253 if (info->ti_event & ENX_EVENT_TIMED_OUT) {
254 if (eibnx_fip_solicit_ucast(info,
255 &solicit_period_ticks) != ENX_E_SUCCESS) {
256 ENX_DPRINTF_WARN("failed to send solicit ucast to "
257 "gateways (hca_guid=0x%llx, port_num=0x%x)",
258 info->ti_hca_guid, info->ti_pi->p_port_num);
259 }
260 info->ti_event &= ~ENX_EVENT_TIMED_OUT;
261 }
262
263 goto periodic_solicit;
264
265 port_monitor_exit:
266 if (info->ti_progress & ENX_MON_MULTICAST_SLCT) {
267 eibnx_cleanup_port_nodes(info);
268 info->ti_progress &= (~ENX_MON_MULTICAST_SLCT);
269 }
270 if (info->ti_progress & ENX_MON_JOINED_MCGS) {
271 eibnx_rb_join_mcgs(info);
272 info->ti_progress &= (~ENX_MON_JOINED_MCGS);
273 }
274 if (info->ti_progress & ENX_MON_SETUP_CQ_HDLR) {
275 eibnx_rb_setup_cq_handler(info);
276 info->ti_progress &= (~ENX_MON_SETUP_CQ_HDLR);
277 }
278 if (info->ti_progress & ENX_MON_SETUP_BUFS) {
279 eibnx_rb_setup_bufs(info);
280 info->ti_progress &= (~ENX_MON_SETUP_BUFS);
281 }
282 if (info->ti_progress & ENX_MON_SETUP_UD_CHAN) {
283 eibnx_rb_setup_ud_channel(info);
284 info->ti_progress &= (~ENX_MON_SETUP_UD_CHAN);
285 }
286 if (info->ti_progress & ENX_MON_SETUP_CQ) {
287 eibnx_rb_setup_cq(info);
288 info->ti_progress &= (~ENX_MON_SETUP_CQ);
289 }
290 if (info->ti_progress & ENX_MON_FOUND_MCGS) {
291 eibnx_rb_find_mgroups(info);
292 info->ti_progress &= (~ENX_MON_FOUND_MCGS);
293 }
294
295 mutex_enter(&ci_lock);
296 CALLB_CPR_EXIT(&ci);
297 mutex_destroy(&ci_lock);
298 }
299
300 /*
301 * Async subnet notices handler registered with IBTF
302 */
303 /*ARGSUSED*/
304 void
eibnx_subnet_notices_handler(void * arg,ib_gid_t gid,ibt_subnet_event_code_t sn_evcode,ibt_subnet_event_t * sn_event)305 eibnx_subnet_notices_handler(void *arg, ib_gid_t gid,
306 ibt_subnet_event_code_t sn_evcode, ibt_subnet_event_t *sn_event)
307 {
308 eibnx_t *ss = enx_global_ss;
309 eibnx_thr_info_t *ti;
310 ib_gid_t notice_gid;
311
312 switch (sn_evcode) {
313 case IBT_SM_EVENT_MCG_CREATED:
314 notice_gid = sn_event->sm_notice_gid;
315
316 if ((notice_gid.gid_prefix == enx_solicit_mgid.gid_prefix &&
317 notice_gid.gid_guid == enx_solicit_mgid.gid_guid) ||
318 (notice_gid.gid_prefix == enx_advertise_mgid.gid_prefix &&
319 notice_gid.gid_guid == enx_advertise_mgid.gid_guid)) {
320
321 mutex_enter(&ss->nx_lock);
322 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) {
323 mutex_enter(&ti->ti_event_lock);
324 ti->ti_event |= ENX_EVENT_MCGS_AVAILABLE;
325 cv_broadcast(&ti->ti_event_cv);
326 mutex_exit(&ti->ti_event_lock);
327 }
328 mutex_exit(&ss->nx_lock);
329 }
330 break;
331
332 case IBT_SM_EVENT_MCG_DELETED:
333 break;
334
335 default:
336 break;
337 }
338 }
339
340 /*
341 * Async event handler registered with IBTF
342 */
343 /*ARGSUSED*/
344 void
eibnx_async_handler(void * clnt_pvt,ibt_hca_hdl_t hca,ibt_async_code_t code,ibt_async_event_t * event)345 eibnx_async_handler(void *clnt_pvt, ibt_hca_hdl_t hca,
346 ibt_async_code_t code, ibt_async_event_t *event)
347 {
348 switch (code) {
349 case IBT_ERROR_CATASTROPHIC_CHAN:
350 case IBT_ERROR_INVALID_REQUEST_CHAN:
351 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
352 case IBT_ERROR_CQ:
353 case IBT_ERROR_CATASTROPHIC_SRQ:
354 ENX_DPRINTF_ERR("ibt ERROR event 0x%x received "
355 "(hca_guid=0x%llx)", code, event->ev_hca_guid);
356 break;
357
358 case IBT_ERROR_PORT_DOWN:
359 ENX_DPRINTF_WARN("ibt PORT_DOWN event received "
360 "(hca_guid=0x%llx, port_num=0x%x)",
361 event->ev_hca_guid, event->ev_port);
362 break;
363
364 case IBT_EVENT_PORT_UP:
365 ENX_DPRINTF_WARN("ibt PORT_UP event received "
366 "(hca_guid=0x%llx, port_num=0x%x)",
367 event->ev_hca_guid, event->ev_port);
368 eibnx_handle_port_events(hca, event->ev_port);
369 break;
370
371 case IBT_PORT_CHANGE_EVENT:
372 ENX_DPRINTF_WARN("ibt PORT_CHANGE event received "
373 "(hca_guid=0x%llx, port_num=0x%x)",
374 event->ev_hca_guid, event->ev_port);
375 eibnx_handle_port_events(hca, event->ev_port);
376 break;
377
378 case IBT_CLNT_REREG_EVENT:
379 ENX_DPRINTF_WARN("ibt CLNT_REREG event received "
380 "(hca_guid=0x%llx, port_num=0x%x)",
381 event->ev_hca_guid, event->ev_port);
382 eibnx_handle_port_events(hca, event->ev_port);
383 break;
384
385 case IBT_HCA_ATTACH_EVENT:
386 ENX_DPRINTF_VERBOSE("ibt HCA_ATTACH event received "
387 "(new hca_guid=0x%llx)", event->ev_hca_guid);
388 eibnx_handle_hca_attach(event->ev_hca_guid);
389 break;
390
391 case IBT_HCA_DETACH_EVENT:
392 ENX_DPRINTF_VERBOSE("ibt HCA_DETACH event received "
393 "(target hca_guid=0x%llx)", event->ev_hca_guid);
394 eibnx_handle_hca_detach(event->ev_hca_guid);
395 break;
396
397 default:
398 ENX_DPRINTF_VERBOSE("ibt UNSUPPORTED event 0x%x received "
399 "(hca_guid=0x%llx)", code, event->ev_hca_guid);
400 break;
401 }
402 }
403
404 boolean_t
eibnx_is_gw_dead(eibnx_gw_info_t * gwi)405 eibnx_is_gw_dead(eibnx_gw_info_t *gwi)
406 {
407 int64_t cur_lbolt;
408
409 cur_lbolt = ddi_get_lbolt64();
410
411 mutex_enter(&gwi->gw_adv_lock);
412 if ((cur_lbolt - gwi->gw_adv_last_lbolt) > gwi->gw_adv_timeout_ticks) {
413 gwi->gw_adv_flag = ENX_GW_DEAD;
414 mutex_exit(&gwi->gw_adv_lock);
415 return (B_TRUE);
416 }
417 mutex_exit(&gwi->gw_adv_lock);
418
419 return (B_FALSE);
420 }
421
422 static void
eibnx_gw_is_alive(eibnx_gw_info_t * gwi)423 eibnx_gw_is_alive(eibnx_gw_info_t *gwi)
424 {
425 /*
426 * We've just received a multicast advertisement from this
427 * gateway. Multicast or unicast, this means that the gateway
428 * is alive. Record this timestamp (in ticks).
429 */
430 mutex_enter(&gwi->gw_adv_lock);
431 gwi->gw_adv_last_lbolt = ddi_get_lbolt64();
432 if (gwi->gw_adv_flag == ENX_GW_DEAD) {
433 gwi->gw_adv_flag = ENX_GW_ALIVE;
434 }
435 mutex_exit(&gwi->gw_adv_lock);
436 }
437
438 static void
eibnx_gw_is_aware(eibnx_thr_info_t * info,eibnx_gw_info_t * gwi,boolean_t gwi_changed)439 eibnx_gw_is_aware(eibnx_thr_info_t *info, eibnx_gw_info_t *gwi,
440 boolean_t gwi_changed)
441 {
442 eib_gw_info_t eib_gwi;
443 boolean_t post_rebirth_event = B_FALSE;
444
445 /*
446 * We're here when we receive a unicast advertisement from a
447 * gateway. If this gateway was discovered earlier but was in
448 * a dead state, this means it has come back alive and become
449 * aware of us. We may need to inform any EoIB children
450 * waiting for notification. Note that if this gateway is
451 * being discovered for the first time now, we wouldn't have
452 * created the binding eoib node for it (we will do that when
453 * we return from this routine), so the "rebirth" and "gw info
454 * update" event postings will be NOPs.
455 */
456 mutex_enter(&gwi->gw_adv_lock);
457 gwi->gw_adv_last_lbolt = ddi_get_lbolt64();
458 if (gwi->gw_adv_flag != ENX_GW_AWARE) {
459 post_rebirth_event = B_TRUE;
460 }
461 gwi->gw_adv_flag = ENX_GW_AWARE;
462 mutex_exit(&gwi->gw_adv_lock);
463
464 /*
465 * If we have a gateway information update event, we post that
466 * first, so any rebirth event processed later will have the
467 * correct gateway information.
468 */
469 if (gwi_changed) {
470 eib_gwi.gi_system_guid = gwi->gw_system_guid;
471 eib_gwi.gi_guid = gwi->gw_guid;
472 eib_gwi.gi_sn_prefix = gwi->gw_addr.ga_gid.gid_prefix;
473 eib_gwi.gi_adv_period = gwi->gw_adv_period;
474 eib_gwi.gi_ka_period = gwi->gw_ka_period;
475 eib_gwi.gi_vnic_ka_period = gwi->gw_vnic_ka_period;
476 eib_gwi.gi_ctrl_qpn = gwi->gw_ctrl_qpn;
477 eib_gwi.gi_lid = gwi->gw_lid;
478 eib_gwi.gi_portid = gwi->gw_portid;
479 eib_gwi.gi_num_net_vnics = gwi->gw_num_net_vnics;
480 eib_gwi.gi_flag_available = gwi->gw_flag_available;
481 eib_gwi.gi_is_host_adm_vnics = gwi->gw_is_host_adm_vnics;
482 eib_gwi.gi_sl = gwi->gw_sl;
483 eib_gwi.gi_n_rss_qpn = gwi->gw_n_rss_qpn;
484 bcopy(gwi->gw_system_name, eib_gwi.gi_system_name,
485 EIB_GW_SYSNAME_LEN);
486 bcopy(gwi->gw_port_name, eib_gwi.gi_port_name,
487 EIB_GW_PORTNAME_LEN);
488 bcopy(gwi->gw_vendor_id, eib_gwi.gi_vendor_id,
489 EIB_GW_VENDOR_LEN);
490
491 eibnx_handle_gw_info_update(info, eib_gwi.gi_portid, &eib_gwi);
492 }
493 if (post_rebirth_event) {
494 eibnx_handle_gw_rebirth(info, gwi->gw_portid);
495 }
496 }
497
498 /*
499 * Thread to create eoib nodes and online instances
500 */
501 void
eibnx_create_eoib_node(void)502 eibnx_create_eoib_node(void)
503 {
504 eibnx_t *ss = enx_global_ss;
505 eibnx_nodeq_t *node;
506 kmutex_t ci_lock;
507 callb_cpr_t ci;
508
509 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL);
510 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, ENX_NODE_CREATOR);
511
512 wait_for_node_to_create:
513 mutex_enter(&ss->nx_nodeq_lock);
514
515 while ((ss->nx_nodeq == NULL) && (ss->nx_nodeq_thr_die == 0)) {
516 mutex_enter(&ci_lock);
517 CALLB_CPR_SAFE_BEGIN(&ci);
518 mutex_exit(&ci_lock);
519
520 cv_wait(&ss->nx_nodeq_cv, &ss->nx_nodeq_lock);
521
522 mutex_enter(&ci_lock);
523 CALLB_CPR_SAFE_END(&ci, &ci_lock);
524 mutex_exit(&ci_lock);
525 }
526
527 /*
528 * If this is not really a work item, but a request for us to
529 * die, throwaway all pending work requests and just die.
530 */
531 if (ss->nx_nodeq_thr_die) {
532 while (ss->nx_nodeq) {
533 node = ss->nx_nodeq;
534 ss->nx_nodeq = node->nc_next;
535 node->nc_next = NULL;
536
537 kmem_free(node, sizeof (eibnx_nodeq_t));
538 }
539 mutex_exit(&ss->nx_nodeq_lock);
540
541 mutex_enter(&ci_lock);
542 CALLB_CPR_EXIT(&ci);
543 mutex_destroy(&ci_lock);
544
545 return;
546 }
547
548 /*
549 * Grab the first node entry from the queue
550 */
551 ASSERT(ss->nx_nodeq != NULL);
552 node = ss->nx_nodeq;
553 ss->nx_nodeq = node->nc_next;
554 node->nc_next = NULL;
555
556 mutex_exit(&ss->nx_nodeq_lock);
557
558 (void) eibnx_configure_node(node->nc_info, node->nc_gwi, NULL);
559
560 kmem_free(node, sizeof (eibnx_nodeq_t));
561 goto wait_for_node_to_create;
562
563 /*NOTREACHED*/
564 }
565
566 /*
567 * Tx and Rx completion interrupt handler. Guaranteed to be single
568 * threaded and nonreentrant for this CQ.
569 */
570 void
eibnx_comp_intr(ibt_cq_hdl_t cq_hdl,void * arg)571 eibnx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg)
572 {
573 eibnx_thr_info_t *info = arg;
574
575 if (info->ti_cq_hdl != cq_hdl) {
576 ENX_DPRINTF_DEBUG("eibnx_comp_intr: "
577 "cq_hdl(0x%llx) != info->ti_cq_hdl(0x%llx), "
578 "ignoring completion", cq_hdl, info->ti_cq_hdl);
579 return;
580 }
581
582 ASSERT(info->ti_softint_hdl != NULL);
583
584 (void) ddi_intr_trigger_softint(info->ti_softint_hdl, NULL);
585 }
586
587 /*
588 * Send and Receive completion handler functions for EoIB nexus
589 */
590
591 /*ARGSUSED*/
592 uint_t
eibnx_comp_handler(caddr_t arg1,caddr_t arg2)593 eibnx_comp_handler(caddr_t arg1, caddr_t arg2)
594 {
595 eibnx_thr_info_t *info = (eibnx_thr_info_t *)arg1;
596 ibt_wc_t *wc;
597 eibnx_wqe_t *wqe;
598 ibt_status_t ret;
599 uint_t polled;
600 int i;
601
602 /*
603 * Make sure the port monitor isn't killed if we're in the completion
604 * handler. If the port monitor thread is already being killed, we'll
605 * stop processing completions.
606 */
607 mutex_enter(&info->ti_event_lock);
608 if (info->ti_event & (ENX_EVENT_DIE | ENX_EVENT_COMPLETION)) {
609 mutex_exit(&info->ti_event_lock);
610 return ((uint_t)ENX_E_SUCCESS);
611 }
612 info->ti_event |= ENX_EVENT_COMPLETION;
613 mutex_exit(&info->ti_event_lock);
614
615 /*
616 * Re-arm the notification callback before we start polling
617 * the completion queue. There's nothing much we can do if the
618 * enable_cq_notify fails - we issue a warning and move on.
619 */
620 ret = ibt_enable_cq_notify(info->ti_cq_hdl, IBT_NEXT_COMPLETION);
621 if (ret != IBT_SUCCESS) {
622 ENX_DPRINTF_WARN("ibt_enable_cq_notify(cq_hdl=0x%llx) "
623 "failed, ret=%d", info->ti_cq_hdl, ret);
624 }
625
626 /*
627 * Handle tx and rx completions
628 */
629 while ((ret = ibt_poll_cq(info->ti_cq_hdl, info->ti_wc, info->ti_cq_sz,
630 &polled)) == IBT_SUCCESS) {
631 for (wc = info->ti_wc, i = 0; i < polled; i++, wc++) {
632 wqe = (eibnx_wqe_t *)(uintptr_t)wc->wc_id;
633 if (wc->wc_status != IBT_WC_SUCCESS) {
634 eibnx_handle_wcerr(wc->wc_status, wqe, info);
635 } else if (wqe->qe_type == ENX_QETYP_RWQE) {
636 eibnx_process_rx(info, wc, wqe);
637 eibnx_return_rwqe(info, wqe);
638 } else {
639 eibnx_return_swqe(wqe);
640 }
641 }
642 }
643
644 /*
645 * On the way out, make sure we wake up any pending death requestor
646 * for the port-monitor thread. Note that we need to do a cv_broadcast()
647 * here since there could be multiple threads sleeping on the event cv
648 * and we want to make sure all waiters get a chance to see if it's
649 * their turn.
650 */
651 mutex_enter(&info->ti_event_lock);
652 info->ti_event &= (~ENX_EVENT_COMPLETION);
653 cv_broadcast(&info->ti_event_cv);
654 mutex_exit(&info->ti_event_lock);
655
656 return (DDI_INTR_CLAIMED);
657 }
658
659 /*
660 * Rx processing code
661 */
662 static void
eibnx_process_rx(eibnx_thr_info_t * info,ibt_wc_t * wc,eibnx_wqe_t * wqe)663 eibnx_process_rx(eibnx_thr_info_t *info, ibt_wc_t *wc, eibnx_wqe_t *wqe)
664 {
665 eibnx_gw_msg_t msg;
666 eibnx_gw_info_t *gwi;
667 eibnx_gw_info_t *orig_gwi;
668 eibnx_gw_info_t *new_gwi;
669 uint_t orig_gw_state;
670 uint8_t *pkt = (uint8_t *)(uintptr_t)(wqe->qe_sgl.ds_va);
671 boolean_t gwi_changed;
672
673 /*
674 * We'll simply drop any packet (including broadcast advertisements
675 * from gws) we receive before we've done our solicitation broadcast.
676 */
677 if (info->ti_mcast_done == 0) {
678 return;
679 }
680
681 /*
682 * Skip the GRH and parse the message in the packet
683 */
684 if (eibnx_fip_parse_pkt(pkt + ENX_GRH_SZ, &msg) != ENX_E_SUCCESS) {
685 return;
686 }
687
688 /*
689 * If it was a login ack for one of our children, we need to pass
690 * it on to the child
691 */
692 if (msg.gm_type == FIP_VNIC_LOGIN_ACK) {
693 eibnx_handle_login_ack(info, pkt);
694 return;
695 }
696
697 /*
698 * Other than that, we only handle gateway advertisements
699 */
700 if (msg.gm_type != FIP_GW_ADVERTISE_MCAST &&
701 msg.gm_type != FIP_GW_ADVERTISE_UCAST) {
702 return;
703 }
704
705 gwi = &msg.u.gm_info;
706
707 /*
708 * State machine to create eoib instances. Whether this advertisement
709 * is from a new gateway or an old gateway that we already know about,
710 * if this was a unicast response to our earlier solicitation and it's
711 * the first time we're receiving it from this gateway, we're ready to
712 * login, so we create the EoIB instance for it.
713 */
714 orig_gwi = eibnx_find_gw_in_gwlist(info, gwi);
715 if (orig_gwi == NULL) {
716 if (gwi->gw_flag_available == 0) {
717 gwi->gw_state = ENX_GW_STATE_UNAVAILABLE;
718 gwi->gw_adv_flag = ENX_GW_ALIVE;
719 (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt);
720 } else if (gwi->gw_flag_ucast_advt == 0) {
721 gwi->gw_state = ENX_GW_STATE_AVAILABLE;
722 gwi->gw_adv_flag = ENX_GW_ALIVE;
723 (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt);
724 } else {
725 gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN;
726 gwi->gw_adv_flag = ENX_GW_AWARE;
727 if ((new_gwi = eibnx_add_gw_to_gwlist(info, gwi,
728 wc, pkt)) != NULL) {
729 eibnx_queue_for_creation(info, new_gwi);
730 }
731 }
732 } else {
733 orig_gw_state = orig_gwi->gw_state;
734 if (gwi->gw_flag_available == 0) {
735 gwi->gw_state = ENX_GW_STATE_UNAVAILABLE;
736 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi,
737 wc, pkt, NULL);
738 eibnx_gw_is_alive(orig_gwi);
739
740 } else if (gwi->gw_flag_ucast_advt == 0) {
741 if (orig_gw_state == ENX_GW_STATE_UNAVAILABLE) {
742 gwi->gw_state = ENX_GW_STATE_AVAILABLE;
743 } else {
744 gwi->gw_state = orig_gw_state;
745 }
746 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi,
747 wc, pkt, NULL);
748 eibnx_gw_is_alive(orig_gwi);
749
750 } else {
751 gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN;
752 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi,
753 wc, pkt, &gwi_changed);
754 eibnx_gw_is_aware(info, orig_gwi, gwi_changed);
755
756 if (orig_gw_state != ENX_GW_STATE_READY_TO_LOGIN)
757 eibnx_queue_for_creation(info, orig_gwi);
758 }
759 }
760 }
761
762 /*ARGSUSED*/
763 static void
eibnx_handle_wcerr(uint8_t wcerr,eibnx_wqe_t * wqe,eibnx_thr_info_t * info)764 eibnx_handle_wcerr(uint8_t wcerr, eibnx_wqe_t *wqe, eibnx_thr_info_t *info)
765 {
766 /*
767 * Currently, all we do is report
768 */
769 switch (wcerr) {
770 case IBT_WC_WR_FLUSHED_ERR:
771 ENX_DPRINTF_VERBOSE("IBT_WC_WR_FLUSHED_ERR seen "
772 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)",
773 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type);
774 break;
775
776 case IBT_WC_LOCAL_CHAN_OP_ERR:
777 ENX_DPRINTF_ERR("IBT_WC_LOCAL_CHAN_OP_ERR seen "
778 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)",
779 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type);
780 break;
781
782 case IBT_WC_LOCAL_PROTECT_ERR:
783 ENX_DPRINTF_ERR("IBT_WC_LOCAL_PROTECT_ERR seen "
784 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)",
785 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type);
786 break;
787 }
788 }
789
790 static void
eibnx_handle_login_ack(eibnx_thr_info_t * info,uint8_t * pkt)791 eibnx_handle_login_ack(eibnx_thr_info_t *info, uint8_t *pkt)
792 {
793 eibnx_t *ss = enx_global_ss;
794 fip_login_ack_t *ack;
795 fip_desc_vnic_login_t *login;
796 ddi_eventcookie_t cookie;
797 dev_info_t *rdip;
798 uint16_t vnic_id;
799 uint16_t inst;
800 int ret;
801
802 /*
803 * When we get login acknowledgements, we simply invoke the
804 * appropriate EoIB driver callback to process it on behalf
805 * of the driver instance. We will let the callback do error
806 * checks.
807 */
808 ack = (fip_login_ack_t *)(pkt + ENX_GRH_SZ);
809 login = &(ack->ak_vnic_login);
810 vnic_id = ntohs(login->vl_vnic_id);
811 inst = EIB_DEVI_INSTANCE(vnic_id);
812
813 if ((rdip = eibnx_find_child_dip_by_inst(info, inst)) == NULL) {
814 ENX_DPRINTF_DEBUG("no eoib child with instance 0x%x found "
815 "for (hca_guid=0x%llx, port_num=0x%x)", inst,
816 info->ti_hca_guid, info->ti_pi->p_port_num);
817 return;
818 }
819
820 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip,
821 EIB_NDI_EVENT_LOGIN_ACK, &cookie, NDI_EVENT_NOPASS);
822 if (ret != NDI_SUCCESS) {
823 ENX_DPRINTF_WARN("no login-ack cookie for (hca_guid=0x%llx, "
824 "port_num=0x%x, eoib_inst=0x%x), ret=%d", info->ti_hca_guid,
825 info->ti_pi->p_port_num, inst, ret);
826 return;
827 }
828
829 (void) ndi_post_event(ss->nx_dip, rdip, cookie, (void *)pkt);
830 }
831
832 static void
eibnx_handle_gw_rebirth(eibnx_thr_info_t * info,uint16_t portid)833 eibnx_handle_gw_rebirth(eibnx_thr_info_t *info, uint16_t portid)
834 {
835 eibnx_t *ss = enx_global_ss;
836 ddi_eventcookie_t cookie;
837 dev_info_t *rdip;
838 int ret;
839
840 if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) {
841 ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x "
842 "found for (hca_guid=0x%llx, port_num=0x%x)",
843 portid, info->ti_hca_guid, info->ti_pi->p_port_num);
844 return;
845 }
846
847 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip,
848 EIB_NDI_EVENT_GW_AVAILABLE, &cookie, NDI_EVENT_NOPASS);
849 if (ret != NDI_SUCCESS) {
850 ENX_DPRINTF_WARN("no gw-available cookie for (hca_guid=0x%llx, "
851 "port_num=0x%x, gw_portid=0x%x), ret=%d", info->ti_hca_guid,
852 info->ti_pi->p_port_num, portid, ret);
853 return;
854 }
855
856 (void) ndi_post_event(ss->nx_dip, rdip, cookie, NULL);
857 }
858
859 static void
eibnx_handle_gw_info_update(eibnx_thr_info_t * info,uint16_t portid,void * new_gw_info)860 eibnx_handle_gw_info_update(eibnx_thr_info_t *info, uint16_t portid,
861 void *new_gw_info)
862 {
863 eibnx_t *ss = enx_global_ss;
864 ddi_eventcookie_t cookie;
865 dev_info_t *rdip;
866 int ret;
867
868 if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) {
869 ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x "
870 "found for (hca_guid=0x%llx, port_num=0x%x)",
871 portid, info->ti_hca_guid, info->ti_pi->p_port_num);
872 return;
873 }
874
875 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip,
876 EIB_NDI_EVENT_GW_INFO_UPDATE, &cookie, NDI_EVENT_NOPASS);
877 if (ret != NDI_SUCCESS) {
878 ENX_DPRINTF_WARN("no gw-info-update cookie for "
879 "(hca_guid=0x%llx, port_num=0x%x, gw_portid=0x%x), "
880 "ret=%d", info->ti_hca_guid, info->ti_pi->p_port_num,
881 portid, ret);
882 return;
883 }
884
885 (void) ndi_post_event(ss->nx_dip, rdip, cookie, new_gw_info);
886 }
887
888 static int
eibnx_replace_portinfo(eibnx_thr_info_t * ti,ibt_hca_portinfo_t * new_pi,uint_t new_size_pi)889 eibnx_replace_portinfo(eibnx_thr_info_t *ti, ibt_hca_portinfo_t *new_pi,
890 uint_t new_size_pi)
891 {
892 eibnx_t *ss = enx_global_ss;
893 eibnx_hca_t *hca;
894 eibnx_port_t *port;
895
896 mutex_enter(&ss->nx_lock);
897
898 for (hca = ss->nx_hca; hca; hca = hca->hc_next) {
899 if (hca->hc_hdl == ti->ti_hca)
900 break;
901 }
902
903 if (hca == NULL) {
904 ENX_DPRINTF_WARN("hca hdl (0x%llx) not found in hca list",
905 ti->ti_hca);
906 mutex_exit(&ss->nx_lock);
907 return (ENX_E_FAILURE);
908 }
909
910 for (port = hca->hc_port; port; port = port->po_next) {
911 if (port->po_pi == ti->ti_pi) {
912 ibt_free_portinfo(port->po_pi, port->po_pi_size);
913 port->po_pi = new_pi;
914 port->po_pi_size = new_size_pi;
915 ti->ti_pi = port->po_pi;
916 break;
917 }
918 }
919
920 if (port == NULL) {
921 ENX_DPRINTF_WARN("portinfo (0x%llx) not found in hca list",
922 ti->ti_pi);
923 mutex_exit(&ss->nx_lock);
924 return (ENX_E_FAILURE);
925 }
926
927 mutex_exit(&ss->nx_lock);
928
929 return (ENX_E_SUCCESS);
930 }
931
932 static void
eibnx_handle_port_events(ibt_hca_hdl_t ev_hca,uint8_t ev_portnum)933 eibnx_handle_port_events(ibt_hca_hdl_t ev_hca, uint8_t ev_portnum)
934 {
935 eibnx_t *ss = enx_global_ss;
936 eibnx_thr_info_t *ti;
937 ibt_hca_portinfo_t *pi;
938 ibt_status_t ret;
939 uint_t num_pi;
940 uint_t size_pi;
941 uint8_t itr;
942
943 /*
944 * Find the port monitor thread that matches the event hca and
945 * portnum
946 */
947 mutex_enter(&ss->nx_lock);
948 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) {
949 if ((ti->ti_hca == ev_hca) &&
950 (ti->ti_pi->p_port_num == ev_portnum)) {
951 break;
952 }
953 }
954 mutex_exit(&ss->nx_lock);
955
956 if (ti == NULL)
957 return;
958
959 /*
960 * See if we need to rejoin the mcgs for this port and do so if true
961 */
962 ret = ibt_query_hca_ports(ev_hca, ev_portnum, &pi, &num_pi, &size_pi);
963 if (ret != IBT_SUCCESS) {
964 ENX_DPRINTF_WARN("ibt_query_hca_ports() failed with %d", ret);
965 return;
966 } else if (num_pi != 1 || pi->p_linkstate != IBT_PORT_ACTIVE) {
967 ENX_DPRINTF_WARN("ibt_query_hca_ports(port_num=%d) failed, "
968 "num_pi=%d, linkstate=0x%x", ev_portnum, num_pi,
969 pi->p_linkstate);
970 ibt_free_portinfo(pi, size_pi);
971 return;
972 }
973
974 itr = pi->p_init_type_reply;
975 if (ENX_PORT_ATTR_LOADED(itr) && ENX_PORT_ATTR_NOT_PRESERVED(itr)) {
976 /*
977 * If our port's base lid has changed, we need to replace
978 * the saved portinfo in our lists with the new one before
979 * going further.
980 */
981 if (ti->ti_pi->p_base_lid != pi->p_base_lid) {
982 if (eibnx_replace_portinfo(ti, pi, size_pi) ==
983 ENX_E_SUCCESS) {
984 pi = NULL;
985 size_pi = 0;
986 }
987 }
988 }
989
990 /*
991 * If the port monitor was stuck waiting for the link to come up,
992 * let it know that it is up now.
993 */
994 mutex_enter(&ti->ti_event_lock);
995 if ((ti->ti_progress & ENX_MON_LINKSTATE_UP) != ENX_MON_LINKSTATE_UP) {
996 ti->ti_pi->p_linkstate = IBT_PORT_ACTIVE;
997 ti->ti_event |= ENX_EVENT_LINK_UP;
998 cv_broadcast(&ti->ti_event_cv);
999 }
1000 mutex_exit(&ti->ti_event_lock);
1001
1002 if (ENX_PORT_PRES_NOT_PRESERVED(itr)) {
1003 if (ti->ti_progress & ENX_MON_JOINED_MCGS)
1004 (void) eibnx_rejoin_mcgs(ti);
1005 }
1006
1007 if (pi != NULL)
1008 ibt_free_portinfo(pi, size_pi);
1009 }
1010
1011 static void
eibnx_handle_hca_attach(ib_guid_t new_hca_guid)1012 eibnx_handle_hca_attach(ib_guid_t new_hca_guid)
1013 {
1014 eibnx_t *ss = enx_global_ss;
1015 eibnx_thr_info_t *ti;
1016 eibnx_hca_t *hca;
1017 eibnx_port_t *port;
1018
1019 /*
1020 * All we need to do is to start a port monitor for all the ports
1021 * on the new HCA. To do this, go through our current port monitors
1022 * and see if we already have a monitor for this HCA - if so, print
1023 * a warning and return.
1024 */
1025 mutex_enter(&ss->nx_lock);
1026 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) {
1027 if (ti->ti_hca_guid == new_hca_guid) {
1028 ENX_DPRINTF_VERBOSE("hca (guid=0x%llx) already "
1029 "attached", new_hca_guid);
1030 mutex_exit(&ss->nx_lock);
1031 return;
1032 }
1033 }
1034 mutex_exit(&ss->nx_lock);
1035
1036 /*
1037 * If we don't have it in our list, process the HCA and start the
1038 * port monitors
1039 */
1040 if ((hca = eibnx_prepare_hca(new_hca_guid)) != NULL) {
1041 mutex_enter(&ss->nx_lock);
1042
1043 hca->hc_next = ss->nx_hca;
1044 ss->nx_hca = hca;
1045
1046 for (port = hca->hc_port; port; port = port->po_next) {
1047 ti = eibnx_start_port_monitor(hca, port);
1048
1049 ti->ti_next = ss->nx_thr_info;
1050 ss->nx_thr_info = ti;
1051 }
1052 mutex_exit(&ss->nx_lock);
1053 }
1054 }
1055
1056 static void
eibnx_handle_hca_detach(ib_guid_t del_hca_guid)1057 eibnx_handle_hca_detach(ib_guid_t del_hca_guid)
1058 {
1059 eibnx_t *ss = enx_global_ss;
1060 eibnx_thr_info_t *ti;
1061 eibnx_thr_info_t *ti_stop_list = NULL;
1062 eibnx_thr_info_t *ti_prev;
1063 eibnx_thr_info_t *ti_next;
1064 eibnx_hca_t *hca;
1065 eibnx_hca_t *hca_prev;
1066
1067 /*
1068 * We need to locate all monitor threads for this HCA and stop them
1069 */
1070 mutex_enter(&ss->nx_lock);
1071 ti_prev = NULL;
1072 for (ti = ss->nx_thr_info; ti; ti = ti_next) {
1073 ti_next = ti->ti_next;
1074
1075 if (ti->ti_hca_guid != del_hca_guid) {
1076 ti_prev = ti;
1077 } else {
1078 /*
1079 * Take it out from the good list
1080 */
1081 if (ti_prev)
1082 ti_prev->ti_next = ti_next;
1083 else
1084 ss->nx_thr_info = ti_next;
1085
1086 /*
1087 * And put it in the to-stop list
1088 */
1089 ti->ti_next = ti_stop_list;
1090 ti_stop_list = ti;
1091 }
1092 }
1093 mutex_exit(&ss->nx_lock);
1094
1095 /*
1096 * Ask all the port_monitor threads to die.
1097 */
1098 for (ti = ti_stop_list; ti; ti = ti_next) {
1099 ti_next = ti->ti_next;
1100 eibnx_stop_port_monitor(ti);
1101 }
1102
1103 /*
1104 * Now, locate the HCA in our list and release all HCA related
1105 * resources.
1106 */
1107 mutex_enter(&ss->nx_lock);
1108 hca_prev = NULL;
1109 for (hca = ss->nx_hca; hca; hca = hca->hc_next) {
1110 if (hca->hc_guid != del_hca_guid) {
1111 hca_prev = hca;
1112 } else {
1113 if (hca_prev) {
1114 hca_prev->hc_next = hca->hc_next;
1115 } else {
1116 ss->nx_hca = hca->hc_next;
1117 }
1118 hca->hc_next = NULL;
1119 break;
1120 }
1121 }
1122 mutex_exit(&ss->nx_lock);
1123
1124 if (hca) {
1125 (void) eibnx_cleanup_hca(hca);
1126 }
1127 }
1128