xref: /illumos-gate/usr/src/uts/common/io/sfxge/sfxge.c (revision 7d0b359ca572cd04474eb1f2ceec5a8ff39e36c9)
1 /*
2  * Copyright (c) 2008-2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  *    this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  *    this list of conditions and the following disclaimer in the documentation
12  *    and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are
27  * those of the authors and should not be interpreted as representing official
28  * policies, either expressed or implied, of the FreeBSD Project.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/modctl.h>
35 #include <sys/conf.h>
36 #include <sys/ethernet.h>
37 #include <sys/pci.h>
38 #include <sys/stream.h>
39 #include <sys/strsun.h>
40 #include <sys/processor.h>
41 #include <sys/cpuvar.h>
42 #include <sys/pghw.h>
43 
44 #include "sfxge.h"
45 #include "sfxge_version.h"
46 #include "efsys.h"
47 #include "efx.h"
48 
49 #ifdef	DEBUG
50 boolean_t sfxge_aask = B_FALSE;
51 #endif
52 
53 /* Receive queue TRIM default polling interval (in microseconds) */
54 #define	SFXGE_RX_QPOLL_USEC	(5000000)
55 
56 /* Broadcast address */
57 uint8_t	sfxge_brdcst[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
58 
59 /*
60  * By default modinfo will display lines truncated to 80 characters and so just
61  * show 32 characters of our sfxge_ident string.
62  */
63 const char sfxge_ident[] = "Solarflare 10Gb/40Gb Ethernet";
64 const char sfxge_version[] = SFXGE_VERSION_STRING;
65 
66 static void
67 sfxge_cfg_build(sfxge_t *sp)
68 {
69 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sp->s_enp);
70 	(void) snprintf(sp->s_cfg_kstat.buf.sck_mac, 64,
71 	    "%02X:%02X:%02X:%02X:%02X:%02X",
72 	    encp->enc_mac_addr[0], encp->enc_mac_addr[1],
73 	    encp->enc_mac_addr[2], encp->enc_mac_addr[3],
74 	    encp->enc_mac_addr[4], encp->enc_mac_addr[5]);
75 }
76 
77 static int
78 sfxge_create(dev_info_t *dip, sfxge_t **spp)
79 {
80 	sfxge_t *sp;
81 	efx_nic_t *enp;
82 	unsigned int rxq_size;
83 	int rxq_poll_usec;
84 	int rc;
85 
86 	/* Allocate the object */
87 	sp = kmem_zalloc(sizeof (*sp), KM_SLEEP);
88 	sp->s_dip = dip;
89 	ddi_set_driver_private(dip, sp);
90 
91 	mutex_init(&(sp->s_state_lock), NULL, MUTEX_DRIVER, NULL);
92 	sp->s_state = SFXGE_UNINITIALIZED;
93 
94 	/* Get property values */
95 	sp->s_mtu = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
96 	    DDI_PROP_DONTPASS, "mtu", ETHERMTU);
97 
98 	sp->s_action_on_hw_err = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
99 	    DDI_PROP_DONTPASS, "action_on_hw_err", SFXGE_RECOVER);
100 
101 	rxq_size = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
102 	    DDI_PROP_DONTPASS, "rxq_size", SFXGE_DEFAULT_RXQ_SIZE);
103 	if (!(ISP2(rxq_size)))
104 		rxq_size = SFXGE_DEFAULT_RXQ_SIZE;
105 	rxq_size = min(rxq_size, EFX_RXQ_MAXNDESCS);
106 	sp->s_rxq_size = (uint16_t)max(rxq_size, EFX_RXQ_MINNDESCS);
107 
108 	/* Configure polling interval for queue refill/trim */
109 	rxq_poll_usec = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
110 	    DDI_PROP_DONTPASS, "rxq_poll_usec", SFXGE_RX_QPOLL_USEC);
111 	if (rxq_poll_usec <= 0)
112 		rxq_poll_usec = SFXGE_RX_QPOLL_USEC;
113 	sp->s_rxq_poll_usec = rxq_poll_usec;
114 
115 #if EFSYS_OPT_MCDI_LOGGING
116 	sp->s_mcdi_logging = ddi_prop_get_int(DDI_DEV_T_ANY, sp->s_dip,
117 	    DDI_PROP_DONTPASS, "mcdi_logging", 0);
118 #endif
119 
120 	/* Create a taskq */
121 	sp->s_tqp = ddi_taskq_create(dip, "tq", 1, TASKQ_DEFAULTPRI, 0);
122 	if (sp->s_tqp == NULL) {
123 		rc = ENOMEM;
124 		goto fail2;
125 	}
126 
127 	/* Check and initialize PCI configuration space */
128 	if ((rc = sfxge_pci_init(sp)) != 0)
129 		goto fail3;
130 
131 	/* Map the device registers */
132 	if ((rc = sfxge_bar_init(sp)) != 0)
133 		goto fail4;
134 
135 	/* Create the NIC object */
136 	mutex_init(&(sp->s_nic_lock), NULL, MUTEX_DRIVER, NULL);
137 
138 	if ((rc = efx_nic_create(sp->s_family, (efsys_identifier_t *)sp,
139 	    &(sp->s_bar), &(sp->s_nic_lock), &enp)) != 0)
140 		goto fail5;
141 
142 	sp->s_enp = enp;
143 
144 	/* Initialize MCDI to talk to the Microcontroller */
145 	if ((rc = sfxge_mcdi_init(sp)) != 0)
146 		goto fail6;
147 
148 	/* Probe the NIC and build the configuration data area */
149 	if ((rc = efx_nic_probe(enp)) != 0)
150 		goto fail7;
151 
152 	switch (sp->s_family) {
153 	case EFX_FAMILY_HUNTINGTON:
154 		sfxge_pcie_check_link(sp, 8, 3); /* PCI 8x Gen3 */
155 		break;
156 
157 	case EFX_FAMILY_SIENA:
158 		sfxge_pcie_check_link(sp, 8, 2); /* PCI 8x Gen2 */
159 		break;
160 
161 	default:
162 		break;
163 	}
164 
165 	if ((rc = efx_nvram_init(enp)) != 0)
166 		goto fail8;
167 
168 	if ((rc = efx_vpd_init(enp)) != 0)
169 		goto fail9;
170 
171 	if ((rc = efx_nic_reset(enp)) != 0)
172 		goto fail10;
173 
174 	sfxge_sram_init(sp);
175 
176 	if ((rc = sfxge_intr_init(sp)) != 0)
177 		goto fail11;
178 
179 	if ((rc = sfxge_ev_init(sp)) != 0)
180 		goto fail12;
181 
182 	if ((rc = sfxge_mac_init(sp)) != 0)
183 		goto fail13;
184 
185 	if ((rc = sfxge_rx_init(sp)) != 0)
186 		goto fail14;
187 
188 	if ((rc = sfxge_tx_init(sp)) != 0)
189 		goto fail15;
190 
191 	if ((rc = sfxge_mon_init(sp)) != 0)
192 		goto fail16;
193 
194 	mutex_init(&(sp->s_tx_flush_lock), NULL, MUTEX_DRIVER,
195 	    DDI_INTR_PRI(sp->s_intr.si_intr_pri));
196 	cv_init(&(sp->s_tx_flush_kv), NULL, CV_DRIVER, NULL);
197 
198 	sp->s_state = SFXGE_INITIALIZED;
199 
200 	*spp = sp;
201 	return (0);
202 
203 fail16:
204 	DTRACE_PROBE(fail15);
205 	sfxge_tx_fini(sp);
206 
207 fail15:
208 	DTRACE_PROBE(fail14);
209 	sfxge_rx_fini(sp);
210 
211 fail14:
212 	DTRACE_PROBE(fail14);
213 	sfxge_mac_fini(sp);
214 
215 fail13:
216 	DTRACE_PROBE(fail13);
217 	sfxge_ev_fini(sp);
218 
219 fail12:
220 	DTRACE_PROBE(fail12);
221 	sfxge_intr_fini(sp);
222 
223 fail11:
224 	DTRACE_PROBE(fail11);
225 	sfxge_sram_fini(sp);
226 	(void) efx_nic_reset(sp->s_enp);
227 
228 fail10:
229 	DTRACE_PROBE(fail10);
230 	efx_vpd_fini(enp);
231 
232 fail9:
233 	DTRACE_PROBE(fail9);
234 	efx_nvram_fini(enp);
235 
236 fail8:
237 	DTRACE_PROBE(fail8);
238 	efx_nic_unprobe(enp);
239 
240 fail7:
241 	DTRACE_PROBE(fail7);
242 	sfxge_mcdi_fini(sp);
243 
244 fail6:
245 	DTRACE_PROBE(fail6);
246 	sp->s_enp = NULL;
247 	efx_nic_destroy(enp);
248 
249 fail5:
250 	DTRACE_PROBE(fail5);
251 	mutex_destroy(&(sp->s_nic_lock));
252 	sfxge_bar_fini(sp);
253 
254 fail4:
255 	DTRACE_PROBE(fail4);
256 	sfxge_pci_fini(sp);
257 
258 fail3:
259 	DTRACE_PROBE(fail3);
260 	ddi_taskq_destroy(sp->s_tqp);
261 	sp->s_tqp = NULL;
262 
263 fail2:
264 	DTRACE_PROBE(fail2);
265 
266 	/* Clear property values */
267 	sp->s_mtu = 0;
268 
269 	mutex_destroy(&(sp->s_state_lock));
270 
271 	/* Free the soft state */
272 	sp->s_dip = NULL;
273 
274 	SFXGE_OBJ_CHECK(sp, sfxge_t);
275 	kmem_free(sp, sizeof (*sp));
276 
277 	return (rc);
278 }
279 
280 
281 static int
282 sfxge_start_locked(sfxge_t *sp, boolean_t restart)
283 {
284 	int rc;
285 
286 	ASSERT(mutex_owned(&(sp->s_state_lock)));
287 
288 	if (sp->s_state == SFXGE_STARTED)
289 		goto done;
290 
291 	if (sp->s_state != SFXGE_REGISTERED) {
292 		rc = EINVAL;
293 		goto fail1;
294 	}
295 	sp->s_state = SFXGE_STARTING;
296 
297 	/* Start a new epoch (allow fresh MCDI requests to succeed) */
298 	efx_mcdi_new_epoch(sp->s_enp);
299 
300 	if ((rc = efx_nic_reset(sp->s_enp)) != 0)
301 		goto fail2;
302 
303 	if ((rc = efx_nic_init(sp->s_enp)) != 0)
304 		goto fail3;
305 
306 	if ((rc = efx_filter_init(sp->s_enp)) != 0)
307 		goto fail4;
308 
309 	if ((rc = sfxge_sram_start(sp)) != 0)
310 		goto fail5;
311 
312 	if ((rc = sfxge_intr_start(sp)) != 0)
313 		goto fail6;
314 
315 	if ((rc = sfxge_ev_start(sp)) != 0)
316 		goto fail7;
317 
318 	if ((rc = sfxge_mac_start(sp, restart)) != 0)
319 		goto fail8;
320 
321 	if ((rc = sfxge_rx_start(sp)) != 0)
322 		goto fail9;
323 
324 	if ((rc = sfxge_tx_start(sp)) != 0)
325 		goto fail10;
326 
327 	if ((rc = sfxge_mon_start(sp)) != 0)
328 		goto fail11;
329 
330 	ASSERT3U(sp->s_state, ==, SFXGE_STARTING);
331 	sp->s_state = SFXGE_STARTED;
332 
333 	/* Notify any change of MTU */
334 	sfxge_gld_mtu_update(sp);
335 
336 done:
337 	return (0);
338 
339 fail11:
340 	DTRACE_PROBE(fail11);
341 	sfxge_tx_stop(sp);
342 
343 fail10:
344 	DTRACE_PROBE(fail10);
345 	sfxge_rx_stop(sp);
346 
347 fail9:
348 	DTRACE_PROBE(fail9);
349 	sfxge_mac_stop(sp);
350 
351 fail8:
352 	DTRACE_PROBE(fail8);
353 	sfxge_ev_stop(sp);
354 
355 fail7:
356 	DTRACE_PROBE(fail7);
357 	sfxge_intr_stop(sp);
358 
359 fail6:
360 	DTRACE_PROBE(fail6);
361 	sfxge_sram_stop(sp);
362 
363 fail5:
364 	DTRACE_PROBE(fail5);
365 	efx_filter_fini(sp->s_enp);
366 
367 fail4:
368 	DTRACE_PROBE(fail4);
369 	efx_nic_fini(sp->s_enp);
370 
371 fail3:
372 	DTRACE_PROBE(fail3);
373 	(void) efx_nic_reset(sp->s_enp);
374 
375 fail2:
376 	DTRACE_PROBE(fail2);
377 
378 	ASSERT3U(sp->s_state, ==, SFXGE_STARTING);
379 	sp->s_state = SFXGE_REGISTERED;
380 
381 fail1:
382 	DTRACE_PROBE1(fail1, int, rc);
383 
384 	return (rc);
385 }
386 
387 
388 int
389 sfxge_start(sfxge_t *sp, boolean_t restart)
390 {
391 	int rc;
392 
393 	mutex_enter(&(sp->s_state_lock));
394 	rc = sfxge_start_locked(sp, restart);
395 	mutex_exit(&(sp->s_state_lock));
396 	return (rc);
397 }
398 
399 
400 static void
401 sfxge_stop_locked(sfxge_t *sp)
402 {
403 	ASSERT(mutex_owned(&(sp->s_state_lock)));
404 
405 	if (sp->s_state != SFXGE_STARTED) {
406 		return;
407 	}
408 	sp->s_state = SFXGE_STOPPING;
409 
410 	sfxge_mon_stop(sp);
411 	sfxge_tx_stop(sp);
412 	sfxge_rx_stop(sp);
413 	sfxge_mac_stop(sp);
414 
415 	/* Stop event processing - must be after rx_stop see sfxge_rx_qpoll() */
416 	sfxge_ev_stop(sp);
417 	sfxge_intr_stop(sp); /* cope with late flush/soft events until here */
418 	sfxge_sram_stop(sp);
419 
420 	efx_filter_fini(sp->s_enp);
421 
422 	efx_nic_fini(sp->s_enp);
423 	(void) efx_nic_reset(sp->s_enp);
424 
425 	ASSERT3U(sp->s_state, ==, SFXGE_STOPPING);
426 	sp->s_state = SFXGE_REGISTERED;
427 }
428 
429 void
430 sfxge_stop(sfxge_t *sp)
431 {
432 	mutex_enter(&(sp->s_state_lock));
433 	sfxge_stop_locked(sp);
434 	mutex_exit(&(sp->s_state_lock));
435 }
436 
437 static void
438 _sfxge_restart(void *arg)
439 {
440 	sfxge_t *sp = arg;
441 	int rc;
442 
443 	/* logging on entry is in sfxge_restart_dispatch */
444 	mutex_enter(&(sp->s_state_lock));
445 
446 	DTRACE_PROBE(_sfxge_restart);
447 	if (sp->s_state != SFXGE_STARTED)
448 		goto done;
449 
450 	/* inform the OS that the link is down - may trigger IPMP failover */
451 	if (sp->s_hw_err && sp->s_action_on_hw_err != SFXGE_INVISIBLE) {
452 		sp->s_mac.sm_link_mode = EFX_LINK_DOWN;
453 		sfxge_gld_link_update(sp);
454 	}
455 
456 	/* Stop processing */
457 	sfxge_stop_locked(sp);
458 
459 	if (sp->s_hw_err && sp->s_action_on_hw_err == SFXGE_LEAVE_DEAD) {
460 		dev_err(sp->s_dip, CE_WARN, SFXGE_CMN_ERR
461 		    "NIC error - interface is"
462 		    " being left permanently DOWN per driver config");
463 
464 		(void) atomic_swap_32(&(sp->s_nested_restarts), 0);
465 		mutex_exit(&(sp->s_state_lock));
466 		return;
467 	} else
468 		sp->s_hw_err = SFXGE_HW_OK;
469 
470 	/* Start processing */
471 	if ((rc = sfxge_start_locked(sp, B_TRUE)) != 0)
472 		goto fail1;
473 
474 done:
475 	(void) atomic_swap_32(&(sp->s_nested_restarts), 0);
476 	mutex_exit(&(sp->s_state_lock));
477 	dev_err(sp->s_dip, CE_WARN, SFXGE_CMN_ERR "NIC restart complete");
478 	return;
479 
480 fail1:
481 	DTRACE_PROBE1(fail1, int, rc);
482 	dev_err(sp->s_dip, CE_WARN,
483 	    SFXGE_CMN_ERR "FATAL ERROR: NIC restart failed rc=%d", rc);
484 
485 	(void) atomic_swap_32(&(sp->s_nested_restarts), 0);
486 	mutex_exit(&(sp->s_state_lock));
487 }
488 
489 int
490 sfxge_restart_dispatch(sfxge_t *sp, uint_t cflags, sfxge_hw_err_t hw_err,
491     const char *reason, uint32_t errval)
492 {
493 	if (hw_err == SFXGE_HW_OK)
494 		sp->s_num_restarts++;
495 	else {
496 		sp->s_hw_err = hw_err;
497 		sp->s_num_restarts_hw_err++;
498 	}
499 
500 	if (atomic_inc_32_nv(&(sp->s_nested_restarts)) > 1) {
501 		/* A restart is currently in progress */
502 		return (0);
503 	}
504 
505 	DTRACE_PROBE2(sfxge_restart_dispatch, sfxge_hw_err_t, hw_err, char *,
506 	    reason);
507 
508 	dev_err(sp->s_dip, CE_WARN, SFXGE_CMN_ERR "NIC restart due to %s:%d",
509 	    reason, errval);
510 
511 	/* If cflags == DDI_SLEEP then guaranteed to succeed */
512 	return (ddi_taskq_dispatch(sp->s_tqp, _sfxge_restart, sp, cflags));
513 }
514 
515 
516 static int
517 sfxge_can_destroy(sfxge_t *sp)
518 {
519 	int index;
520 
521 	/*
522 	 * In SFC bug 19834 it was noted that a mblk passed up to STREAMS
523 	 * could be reused for transmit and sit in the sfxge_tx_packet_cache.
524 	 * This call to empty the TX deferred packet list may result in
525 	 * rx_loaned reducing.
526 	 */
527 	index = EFX_ARRAY_SIZE(sp->s_stp);
528 	while (--index >= 0) {
529 		sfxge_txq_t *stp = sp->s_stp[index];
530 
531 		if (stp != NULL)
532 			sfxge_tx_qdpl_flush(stp);
533 	}
534 
535 	/* Need to wait for desballoc free_func callback */
536 	return (sfxge_rx_loaned(sp));
537 }
538 
539 
540 static int
541 sfxge_destroy(sfxge_t *sp)
542 {
543 	ddi_taskq_t *tqp;
544 	efx_nic_t *enp;
545 	int rc;
546 
547 	ASSERT3U(sp->s_state, ==, SFXGE_INITIALIZED);
548 	enp = sp->s_enp;
549 
550 	if (sfxge_can_destroy(sp) != 0) {
551 		rc = EBUSY;
552 		goto fail1;
553 	}
554 
555 	sp->s_state = SFXGE_UNINITIALIZED;
556 
557 	cv_destroy(&(sp->s_tx_flush_kv));
558 	mutex_destroy(&(sp->s_tx_flush_lock));
559 
560 	sfxge_mon_fini(sp);
561 	sfxge_tx_fini(sp);
562 	sfxge_rx_fini(sp);
563 	sfxge_mac_fini(sp);
564 	sfxge_ev_fini(sp);
565 	sfxge_intr_fini(sp);
566 	sfxge_sram_fini(sp);
567 	(void) efx_nic_reset(enp);
568 
569 	efx_vpd_fini(enp);
570 	efx_nvram_fini(enp);
571 	efx_nic_unprobe(enp);
572 	sfxge_mcdi_fini(sp);
573 
574 	/* Destroy the NIC object */
575 	sp->s_enp = NULL;
576 	efx_nic_destroy(enp);
577 
578 	mutex_destroy(&(sp->s_nic_lock));
579 
580 	/* Unmap the device registers */
581 	sfxge_bar_fini(sp);
582 
583 	/* Tear down PCI configuration space */
584 	sfxge_pci_fini(sp);
585 
586 	/* Destroy the taskq */
587 	tqp = sp->s_tqp;
588 	sp->s_tqp = NULL;
589 	ddi_taskq_destroy(tqp);
590 
591 	mutex_destroy(&(sp->s_state_lock));
592 
593 	/* Clear property values */
594 	sp->s_mtu = 0;
595 
596 	/* Free the soft state */
597 	sp->s_dip = NULL;
598 
599 	SFXGE_OBJ_CHECK(sp, sfxge_t);
600 	kmem_free(sp, sizeof (*sp));
601 
602 	return (0);
603 
604 fail1:
605 	DTRACE_PROBE1(fail1, int, rc);
606 
607 	return (rc);
608 }
609 
610 void
611 sfxge_ioctl(sfxge_t *sp, queue_t *wq, mblk_t *mp)
612 {
613 	struct iocblk *iocp;
614 	int rc, taskq_wait = 0;
615 	size_t ioclen = 0;
616 
617 	/*
618 	 * single concurrent IOCTL
619 	 * serialized from sfxge_create, _destroy, _(re)start, _stop
620 	 */
621 	mutex_enter(&(sp->s_state_lock));
622 
623 	/*LINTED*/
624 	iocp = (struct iocblk *)mp->b_rptr;
625 
626 	switch (iocp->ioc_cmd) {
627 	case SFXGE_NVRAM_IOC:
628 		ioclen = sizeof (sfxge_nvram_ioc_t);
629 		break;
630 	case SFXGE_MCDI_IOC:
631 		ioclen = sizeof (sfxge_mcdi_ioc_t);
632 		break;
633 	case SFXGE_MCDI2_IOC:
634 		ioclen = sizeof (sfxge_mcdi2_ioc_t);
635 		break;
636 	case SFXGE_VPD_IOC:
637 		ioclen = sizeof (sfxge_vpd_ioc_t);
638 		break;
639 	case SFXGE_NIC_RESET_IOC:
640 		break;
641 	default:
642 		rc = ENOTSUP;
643 		goto fail1;
644 	}
645 
646 	if (iocp->ioc_count != ioclen) {
647 		rc = EINVAL;
648 		goto fail2;
649 	}
650 
651 	/* if in multiple fragments pull it up to one linear buffer */
652 	if ((rc = miocpullup(mp, ioclen)) != 0) {
653 		goto fail3;
654 	}
655 
656 	switch (iocp->ioc_cmd) {
657 	case SFXGE_NVRAM_IOC: {
658 		sfxge_nvram_ioc_t *snip =
659 		    (sfxge_nvram_ioc_t *)mp->b_cont->b_rptr;
660 
661 		if ((rc = sfxge_nvram_ioctl(sp, snip)) != 0)
662 			goto fail4;
663 
664 		break;
665 	}
666 	case SFXGE_MCDI_IOC: {
667 		sfxge_mcdi_ioc_t *smip = (sfxge_mcdi_ioc_t *)mp->b_cont->b_rptr;
668 
669 		if ((rc = sfxge_mcdi_ioctl(sp, smip)) != 0)
670 			goto fail4;
671 		taskq_wait = 1;
672 
673 		break;
674 	}
675 	case SFXGE_MCDI2_IOC: {
676 		sfxge_mcdi2_ioc_t *smip =
677 		    (sfxge_mcdi2_ioc_t *)mp->b_cont->b_rptr;
678 
679 		if ((rc = sfxge_mcdi2_ioctl(sp, smip)) != 0)
680 			goto fail4;
681 		taskq_wait = 1;
682 
683 		break;
684 	}
685 	case SFXGE_NIC_RESET_IOC: {
686 		DTRACE_PROBE(nic_reset_ioc);
687 
688 		/* sp->s_state_lock held */
689 		(void) sfxge_restart_dispatch(sp, DDI_SLEEP, SFXGE_HW_OK,
690 		    "NIC_RESET_IOC", 0);
691 		taskq_wait = 1;
692 
693 		break;
694 	}
695 	case SFXGE_VPD_IOC: {
696 		sfxge_vpd_ioc_t *svip = (sfxge_vpd_ioc_t *)mp->b_cont->b_rptr;
697 
698 		if ((rc = sfxge_vpd_ioctl(sp, svip)) != 0)
699 			goto fail4;
700 
701 		break;
702 	}
703 	default:
704 		ASSERT(0);
705 	}
706 
707 	mutex_exit(&(sp->s_state_lock));
708 
709 	if (taskq_wait) {
710 		/*
711 		 * Wait for any tasks that may be accessing GLD functions
712 		 * This may end up waiting for multiple nic_resets
713 		 * as it needs to be outside of s_state_lock for sfxge_restart()
714 		 */
715 		ddi_taskq_wait(sp->s_tqp);
716 	}
717 
718 	/* The entire structure is the acknowledgement */
719 	miocack(wq, mp, iocp->ioc_count, 0);
720 
721 	return;
722 
723 fail4:
724 	DTRACE_PROBE(fail4);
725 fail3:
726 	DTRACE_PROBE(fail3);
727 fail2:
728 	DTRACE_PROBE(fail2);
729 fail1:
730 	DTRACE_PROBE1(fail1, int, rc);
731 
732 	mutex_exit(&(sp->s_state_lock));
733 
734 	/* no data returned */
735 	miocnak(wq, mp, 0, rc);
736 }
737 
738 static int
739 sfxge_register(sfxge_t *sp)
740 {
741 	int rc;
742 
743 	ASSERT3U(sp->s_state, ==, SFXGE_INITIALIZED);
744 
745 	if ((rc = sfxge_gld_register(sp)) != 0)
746 		goto fail1;
747 
748 	sp->s_state = SFXGE_REGISTERED;
749 
750 	return (0);
751 
752 fail1:
753 	DTRACE_PROBE1(fail1, int, rc);
754 
755 	return (rc);
756 }
757 
758 static int
759 sfxge_unregister(sfxge_t *sp)
760 {
761 	int rc;
762 
763 	ASSERT3U(sp->s_state, ==, SFXGE_REGISTERED);
764 
765 	/* Wait for any tasks that may be accessing GLD functions */
766 	ddi_taskq_wait(sp->s_tqp);
767 
768 	if ((rc = sfxge_gld_unregister(sp)) != 0)
769 		goto fail1;
770 
771 	sp->s_state = SFXGE_INITIALIZED;
772 
773 	return (0);
774 
775 fail1:
776 	DTRACE_PROBE1(fail1, int, rc);
777 
778 	return (rc);
779 }
780 
781 static void
782 _sfxge_vpd_kstat_init(sfxge_t *sp, caddr_t vpd, size_t size, efx_vpd_tag_t tag,
783     const char *keyword, sfxge_vpd_type_t type)
784 {
785 	static const char unknown[] = "?";
786 	efx_nic_t *enp = sp->s_enp;
787 	sfxge_vpd_kstat_t *svkp = &(sp->s_vpd_kstat);
788 	kstat_named_t *knp;
789 	efx_vpd_value_t *evvp;
790 
791 	evvp = svkp->svk_vv + type;
792 	evvp->evv_tag = tag;
793 	evvp->evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
794 
795 	if (efx_vpd_get(enp, vpd, size, evvp) != 0) {
796 		evvp->evv_length = strlen(unknown) + 1;
797 		bcopy(unknown, evvp->evv_value, evvp->evv_length);
798 	}
799 
800 	knp = &(svkp->svk_stat[type]);
801 
802 	kstat_named_init(knp, (char *)keyword, KSTAT_DATA_STRING);
803 	kstat_named_setstr(knp, (char *)evvp->evv_value);
804 	svkp->svk_ksp->ks_data_size += sizeof (*evvp);
805 }
806 
807 static int
808 sfxge_vpd_kstat_init(sfxge_t *sp)
809 {
810 	efx_nic_t *enp = sp->s_enp;
811 	sfxge_vpd_kstat_t *svkp = &(sp->s_vpd_kstat);
812 	dev_info_t *dip = sp->s_dip;
813 	char name[MAXNAMELEN];
814 	kstat_t *ksp;
815 	caddr_t vpd;
816 	size_t size;
817 	int rc;
818 
819 	SFXGE_OBJ_CHECK(svkp, sfxge_vpd_kstat_t);
820 	(void) snprintf(name, MAXNAMELEN - 1, "%s_vpd", ddi_driver_name(dip));
821 
822 	/* Get a copy of the VPD space */
823 	if ((rc = efx_vpd_size(enp, &size)) != 0)
824 		goto fail1;
825 
826 	if ((vpd = kmem_zalloc(size, KM_NOSLEEP)) == NULL) {
827 		rc = ENOMEM;
828 		goto fail2;
829 	}
830 
831 	if ((svkp->svk_vv = kmem_zalloc(sizeof (efx_vpd_value_t) *
832 	    SFXGE_VPD_MAX, KM_NOSLEEP)) == NULL) {
833 		rc = ENOMEM;
834 		goto fail3;
835 	}
836 
837 	if ((rc = efx_vpd_read(enp, vpd, size)) != 0)
838 		goto fail4;
839 
840 	if ((ksp = kstat_create((char *)ddi_driver_name(dip),
841 	    ddi_get_instance(dip), name, "vpd", KSTAT_TYPE_NAMED, SFXGE_VPD_MAX,
842 	    KSTAT_FLAG_VIRTUAL)) == NULL) {
843 		rc = ENOMEM;
844 		goto fail5;
845 	}
846 	svkp->svk_ksp = ksp;
847 	ksp->ks_data = &(svkp->svk_stat);
848 
849 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_ID, "ID", SFXGE_VPD_ID);
850 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "PN", SFXGE_VPD_PN);
851 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "SN", SFXGE_VPD_SN);
852 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "EC", SFXGE_VPD_EC);
853 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "MN", SFXGE_VPD_MN);
854 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "VD", SFXGE_VPD_VD);
855 	_sfxge_vpd_kstat_init(sp, vpd, size, EFX_VPD_RO, "VE", SFXGE_VPD_VE);
856 
857 	kstat_install(ksp);
858 	kmem_free(vpd, size);
859 
860 	return (0);
861 
862 fail5:
863 	DTRACE_PROBE(fail5);
864 fail4:
865 	DTRACE_PROBE(fail4);
866 	kmem_free(svkp->svk_vv, sizeof (efx_vpd_value_t) * SFXGE_VPD_MAX);
867 fail3:
868 	DTRACE_PROBE(fail3);
869 	kmem_free(vpd, size);
870 fail2:
871 	DTRACE_PROBE(fail2);
872 fail1:
873 	DTRACE_PROBE1(fail1, int, rc);
874 	SFXGE_OBJ_CHECK(svkp, sfxge_vpd_kstat_t);
875 
876 	return (rc);
877 }
878 
879 static void
880 sfxge_vpd_kstat_fini(sfxge_t *sp)
881 {
882 	sfxge_vpd_kstat_t *svkp = &(sp->s_vpd_kstat);
883 
884 	/* NOTE: VPD support is optional, so kstats might not be registered */
885 	if (svkp->svk_ksp != NULL) {
886 
887 		kstat_delete(svkp->svk_ksp);
888 
889 		kmem_free(svkp->svk_vv,
890 		    sizeof (efx_vpd_value_t) * SFXGE_VPD_MAX);
891 
892 		bzero(svkp->svk_stat,
893 		    sizeof (kstat_named_t) * SFXGE_VPD_MAX);
894 
895 		svkp->svk_ksp = NULL;
896 	}
897 
898 	SFXGE_OBJ_CHECK(svkp, sfxge_vpd_kstat_t);
899 }
900 
901 static int
902 sfxge_cfg_kstat_init(sfxge_t *sp)
903 {
904 	dev_info_t *dip = sp->s_dip;
905 	char name[MAXNAMELEN];
906 	kstat_t *ksp;
907 	sfxge_cfg_kstat_t *sckp;
908 	int rc;
909 
910 	sfxge_cfg_build(sp);
911 
912 	/* Create the set */
913 	(void) snprintf(name, MAXNAMELEN - 1, "%s_cfg", ddi_driver_name(dip));
914 
915 	if ((ksp = kstat_create((char *)ddi_driver_name(dip),
916 	    ddi_get_instance(dip), name, "cfg", KSTAT_TYPE_NAMED,
917 	    sizeof (sckp->kstat) / sizeof (kstat_named_t),
918 	    KSTAT_FLAG_VIRTUAL)) == NULL) {
919 		rc = ENOMEM;
920 		goto fail1;
921 	}
922 
923 	sp->s_cfg_ksp = ksp;
924 
925 	ksp->ks_data = sckp = &(sp->s_cfg_kstat);
926 
927 	kstat_named_init(&(sckp->kstat.sck_mac), "mac", KSTAT_DATA_STRING);
928 	kstat_named_setstr(&(sckp->kstat.sck_mac), sckp->buf.sck_mac);
929 	ksp->ks_data_size += sizeof (sckp->buf.sck_mac);
930 
931 	kstat_named_init(&(sckp->kstat.sck_version), "version",
932 	    KSTAT_DATA_STRING);
933 	kstat_named_setstr(&(sckp->kstat.sck_version), sfxge_version);
934 	ksp->ks_data_size += sizeof (sfxge_version);
935 
936 	kstat_install(ksp);
937 	return (0);
938 
939 fail1:
940 	DTRACE_PROBE1(fail1, int, rc);
941 
942 	return (rc);
943 }
944 
945 static void
946 sfxge_cfg_kstat_fini(sfxge_t *sp)
947 {
948 	if (sp->s_cfg_ksp == NULL)
949 		return;
950 
951 	kstat_delete(sp->s_cfg_ksp);
952 	sp->s_cfg_ksp = NULL;
953 
954 	bzero(&(sp->s_cfg_kstat), sizeof (sfxge_cfg_kstat_t));
955 }
956 
957 static int
958 sfxge_resume(sfxge_t *sp)
959 {
960 	int rc;
961 
962 	/* Start processing */
963 	if ((rc = sfxge_start(sp, B_FALSE)) != 0)
964 		goto fail1;
965 
966 	return (DDI_SUCCESS);
967 
968 fail1:
969 	DTRACE_PROBE1(fail1, int, rc);
970 
971 	return (DDI_FAILURE);
972 }
973 
974 static int
975 sfxge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
976 {
977 	sfxge_t *sp;
978 	int rc;
979 
980 	switch (cmd) {
981 	case DDI_ATTACH:
982 		break;
983 
984 	case DDI_RESUME:
985 		if ((sp = ddi_get_driver_private(dip)) == NULL)
986 			return (DDI_FAILURE);
987 		return (sfxge_resume(sp));
988 
989 	default:
990 		return (DDI_FAILURE);
991 	}
992 
993 	/* Create the soft state */
994 	if ((rc = sfxge_create(dip, &sp)) != 0)
995 		goto fail1;
996 
997 	/* Create the configuration kstats */
998 	if ((rc = sfxge_cfg_kstat_init(sp)) != 0)
999 		goto fail2;
1000 
1001 	/* Create the VPD kstats */
1002 	if ((rc = sfxge_vpd_kstat_init(sp)) != 0) {
1003 		if (rc != ENOTSUP)
1004 			goto fail3;
1005 	}
1006 
1007 	/* Register the interface */
1008 	if ((rc = sfxge_register(sp)) != 0)
1009 		goto fail4;
1010 
1011 	/* Announce ourselves in the system log */
1012 	ddi_report_dev(dip);
1013 
1014 	return (DDI_SUCCESS);
1015 
1016 fail4:
1017 	DTRACE_PROBE(fail4);
1018 
1019 	/* Destroy the VPD kstats */
1020 	sfxge_vpd_kstat_fini(sp);
1021 
1022 fail3:
1023 	DTRACE_PROBE(fail3);
1024 
1025 	/* Destroy the configuration kstats */
1026 	sfxge_cfg_kstat_fini(sp);
1027 
1028 fail2:
1029 	DTRACE_PROBE(fail2);
1030 
1031 	/* Destroy the soft state */
1032 	(void) sfxge_destroy(sp);
1033 
1034 fail1:
1035 	DTRACE_PROBE1(fail1, int, rc);
1036 
1037 	return (DDI_FAILURE);
1038 }
1039 
1040 static int
1041 sfxge_suspend(sfxge_t *sp)
1042 {
1043 	/* Stop processing */
1044 	sfxge_stop(sp);
1045 
1046 	return (DDI_SUCCESS);
1047 }
1048 
1049 static int
1050 sfxge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1051 {
1052 	sfxge_t *sp = ddi_get_driver_private(dip);
1053 	int rc;
1054 
1055 	switch (cmd) {
1056 	case DDI_DETACH:
1057 		if (sp == NULL)
1058 			return (DDI_FAILURE);
1059 		break;
1060 
1061 	case DDI_SUSPEND:
1062 		if (sp == NULL)
1063 			return (DDI_FAILURE);
1064 		return (sfxge_suspend(sp));
1065 
1066 	default:
1067 		return (DDI_FAILURE);
1068 	}
1069 
1070 	ASSERT(sp != NULL);
1071 
1072 	/* Wait for any pending restarts to complete */
1073 	ddi_taskq_wait(sp->s_tqp);
1074 
1075 	/*
1076 	 * IOCTLs from utilites can cause GLD mc_start() (SFXGE_STARTED state)
1077 	 * And mc_stop() may not occur until detach time and race. SFC bug 19855
1078 	 * Holding the lock seems to be enough - the log message is not seen
1079 	 */
1080 	mutex_enter(&(sp->s_state_lock));
1081 	if (sp->s_state == SFXGE_STARTED) {
1082 		dev_err(dip, CE_WARN, SFXGE_CMN_ERR
1083 		    "STREAMS detach when STARTED");
1084 		sfxge_stop_locked(sp);
1085 		ASSERT3U(sp->s_state, ==, SFXGE_REGISTERED);
1086 	}
1087 	mutex_exit(&(sp->s_state_lock));
1088 
1089 	ASSERT(sp->s_state == SFXGE_REGISTERED ||
1090 	    sp->s_state == SFXGE_INITIALIZED);
1091 
1092 	if (sp->s_state != SFXGE_REGISTERED)
1093 		goto destroy;
1094 
1095 	/* Unregister the interface */
1096 	if ((rc = sfxge_unregister(sp)) != 0)
1097 		goto fail1;
1098 
1099 destroy:
1100 	/* Destroy the VPD kstats */
1101 	sfxge_vpd_kstat_fini(sp);
1102 
1103 	/* Destroy the configuration kstats */
1104 	sfxge_cfg_kstat_fini(sp);
1105 
1106 	/*
1107 	 * Destroy the soft state - this might fail until rx_loaned packets that
1108 	 * have been passed up the STREAMS stack are returned
1109 	 */
1110 	if ((rc = sfxge_destroy(sp)) != 0)
1111 		goto fail2;
1112 
1113 	return (DDI_SUCCESS);
1114 
1115 fail2:
1116 	DTRACE_PROBE(fail2);
1117 fail1:
1118 	DTRACE_PROBE1(fail1, int, rc);
1119 
1120 	return (DDI_FAILURE);
1121 }
1122 
1123 /*
1124  * modlinkage
1125  */
1126 
1127 DDI_DEFINE_STREAM_OPS(sfxge_dev_ops, nulldev, nulldev, sfxge_attach,
1128     sfxge_detach, nulldev, NULL, D_MP, NULL, NULL);
1129 
1130 static struct modldrv		sfxge_modldrv = {
1131 	&mod_driverops,
1132 	(char *)sfxge_ident,
1133 	&sfxge_dev_ops,
1134 };
1135 
1136 static struct modlinkage	sfxge_modlinkage = {
1137 	MODREV_1,
1138 	{ &sfxge_modldrv, NULL }
1139 };
1140 
1141 kmutex_t	sfxge_global_lock;
1142 unsigned int	*sfxge_cpu;
1143 
1144 int
1145 _init(void)
1146 {
1147 	int rc;
1148 
1149 	mutex_init(&sfxge_global_lock, NULL, MUTEX_DRIVER, NULL);
1150 
1151 	/* Create tables for CPU, core, cache and chip counts */
1152 	sfxge_cpu = kmem_zalloc(sizeof (unsigned int) * NCPU, KM_SLEEP);
1153 
1154 	mac_init_ops(&sfxge_dev_ops, SFXGE_DRIVER_NAME);
1155 
1156 	if ((rc = mod_install(&sfxge_modlinkage)) != 0)
1157 		goto fail1;
1158 
1159 	return (0);
1160 
1161 fail1:
1162 	DTRACE_PROBE(fail2);
1163 
1164 	mac_fini_ops(&sfxge_dev_ops);
1165 
1166 	kmem_free(sfxge_cpu, sizeof (unsigned int) * NCPU);
1167 	mutex_destroy(&sfxge_global_lock);
1168 
1169 	return (rc);
1170 }
1171 
1172 int
1173 _fini(void)
1174 {
1175 	int rc;
1176 
1177 	if ((rc = mod_remove(&sfxge_modlinkage)) != 0)
1178 		return (rc);
1179 
1180 	mac_fini_ops(&sfxge_dev_ops);
1181 
1182 	/* Destroy tables */
1183 	kmem_free(sfxge_cpu, sizeof (unsigned int) * NCPU);
1184 
1185 	mutex_destroy(&sfxge_global_lock);
1186 
1187 	return (0);
1188 }
1189 
1190 int
1191 _info(struct modinfo *mip)
1192 {
1193 	return (mod_info(&sfxge_modlinkage, mip));
1194 }
1195