xref: /illumos-gate/usr/src/uts/common/io/igb/igb_main.c (revision 4d9fdb46b215739778ebc12079842c9905586999)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2007-2012 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Copyright 2013, Nexenta Systems, Inc. All rights reserved.
29  * Copyright 2016 Joyent, Inc.
30  * Copyright 2020 Oxide Computer Company
31  */
32 
33 #include "igb_sw.h"
34 
35 static char ident[] = "Intel 1Gb Ethernet";
36 static char igb_version[] = "igb 2.3.8-ish";
37 
38 /*
39  * Local function protoypes
40  */
41 static int igb_register_mac(igb_t *);
42 static int igb_identify_hardware(igb_t *);
43 static int igb_regs_map(igb_t *);
44 static void igb_init_properties(igb_t *);
45 static int igb_init_driver_settings(igb_t *);
46 static void igb_init_locks(igb_t *);
47 static void igb_destroy_locks(igb_t *);
48 static int igb_init_mac_address(igb_t *);
49 static int igb_init(igb_t *);
50 static int igb_init_adapter(igb_t *);
51 static void igb_stop_adapter(igb_t *);
52 static int igb_reset(igb_t *);
53 static void igb_tx_clean(igb_t *);
54 static boolean_t igb_tx_drain(igb_t *);
55 static boolean_t igb_rx_drain(igb_t *);
56 static int igb_alloc_rings(igb_t *);
57 static int igb_alloc_rx_data(igb_t *);
58 static void igb_free_rx_data(igb_t *);
59 static void igb_free_rings(igb_t *);
60 static void igb_setup_rings(igb_t *);
61 static void igb_setup_rx(igb_t *);
62 static void igb_setup_tx(igb_t *);
63 static void igb_setup_rx_ring(igb_rx_ring_t *);
64 static void igb_setup_tx_ring(igb_tx_ring_t *);
65 static void igb_setup_rss(igb_t *);
66 static void igb_setup_mac_rss_classify(igb_t *);
67 static void igb_setup_mac_classify(igb_t *);
68 static void igb_init_unicst(igb_t *);
69 static void igb_setup_multicst(igb_t *);
70 static void igb_get_phy_state(igb_t *);
71 static void igb_param_sync(igb_t *);
72 static void igb_get_conf(igb_t *);
73 static int igb_get_prop(igb_t *, char *, int, int, int);
74 static boolean_t igb_is_link_up(igb_t *);
75 static boolean_t igb_link_check(igb_t *);
76 static void igb_local_timer(void *);
77 static void igb_link_timer(void *);
78 static void igb_arm_watchdog_timer(igb_t *);
79 static void igb_start_watchdog_timer(igb_t *);
80 static void igb_restart_watchdog_timer(igb_t *);
81 static void igb_stop_watchdog_timer(igb_t *);
82 static void igb_start_link_timer(igb_t *);
83 static void igb_stop_link_timer(igb_t *);
84 static void igb_disable_adapter_interrupts(igb_t *);
85 static void igb_enable_adapter_interrupts_82575(igb_t *);
86 static void igb_enable_adapter_interrupts_82576(igb_t *);
87 static void igb_enable_adapter_interrupts_82580(igb_t *);
88 static boolean_t is_valid_mac_addr(uint8_t *);
89 static boolean_t igb_stall_check(igb_t *);
90 static boolean_t igb_set_loopback_mode(igb_t *, uint32_t);
91 static void igb_set_external_loopback(igb_t *);
92 static void igb_set_internal_phy_loopback(igb_t *);
93 static void igb_set_internal_serdes_loopback(igb_t *);
94 static boolean_t igb_find_mac_address(igb_t *);
95 static int igb_alloc_intrs(igb_t *);
96 static int igb_alloc_intr_handles(igb_t *, int);
97 static int igb_add_intr_handlers(igb_t *);
98 static void igb_rem_intr_handlers(igb_t *);
99 static void igb_rem_intrs(igb_t *);
100 static int igb_enable_intrs(igb_t *);
101 static int igb_disable_intrs(igb_t *);
102 static void igb_setup_msix_82575(igb_t *);
103 static void igb_setup_msix_82576(igb_t *);
104 static void igb_setup_msix_82580(igb_t *);
105 static uint_t igb_intr_legacy(void *, void *);
106 static uint_t igb_intr_msi(void *, void *);
107 static uint_t igb_intr_rx(void *, void *);
108 static uint_t igb_intr_tx(void *, void *);
109 static uint_t igb_intr_tx_other(void *, void *);
110 static void igb_intr_rx_work(igb_rx_ring_t *);
111 static void igb_intr_tx_work(igb_tx_ring_t *);
112 static void igb_intr_link_work(igb_t *);
113 static void igb_get_driver_control(struct e1000_hw *);
114 static void igb_release_driver_control(struct e1000_hw *);
115 
116 static int igb_attach(dev_info_t *, ddi_attach_cmd_t);
117 static int igb_detach(dev_info_t *, ddi_detach_cmd_t);
118 static int igb_resume(dev_info_t *);
119 static int igb_suspend(dev_info_t *);
120 static int igb_quiesce(dev_info_t *);
121 static void igb_unconfigure(dev_info_t *, igb_t *);
122 static int igb_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
123     const void *);
124 static void igb_fm_init(igb_t *);
125 static void igb_fm_fini(igb_t *);
126 static void igb_release_multicast(igb_t *);
127 static int igb_ufm_fill_image(ddi_ufm_handle_t *, void *arg, uint_t,
128     ddi_ufm_image_t *);
129 static int igb_ufm_fill_slot(ddi_ufm_handle_t *, void *, uint_t, uint_t,
130     ddi_ufm_slot_t *);
131 static int igb_ufm_getcaps(ddi_ufm_handle_t *, void *, ddi_ufm_cap_t *);
132 static int igb_ufm_readimg(ddi_ufm_handle_t *, void *, uint_t, uint_t,
133     uint64_t, uint64_t, void *, uint64_t *);
134 
135 char *igb_priv_props[] = {
136 	"_eee_support",
137 	"_tx_copy_thresh",
138 	"_tx_recycle_thresh",
139 	"_tx_overload_thresh",
140 	"_tx_resched_thresh",
141 	"_rx_copy_thresh",
142 	"_rx_limit_per_intr",
143 	"_intr_throttling",
144 	"_adv_pause_cap",
145 	"_adv_asym_pause_cap",
146 	NULL
147 };
148 
149 static struct cb_ops igb_cb_ops = {
150 	nulldev,		/* cb_open */
151 	nulldev,		/* cb_close */
152 	nodev,			/* cb_strategy */
153 	nodev,			/* cb_print */
154 	nodev,			/* cb_dump */
155 	nodev,			/* cb_read */
156 	nodev,			/* cb_write */
157 	nodev,			/* cb_ioctl */
158 	nodev,			/* cb_devmap */
159 	nodev,			/* cb_mmap */
160 	nodev,			/* cb_segmap */
161 	nochpoll,		/* cb_chpoll */
162 	ddi_prop_op,		/* cb_prop_op */
163 	NULL,			/* cb_stream */
164 	D_MP | D_HOTPLUG,	/* cb_flag */
165 	CB_REV,			/* cb_rev */
166 	nodev,			/* cb_aread */
167 	nodev			/* cb_awrite */
168 };
169 
170 static struct dev_ops igb_dev_ops = {
171 	DEVO_REV,		/* devo_rev */
172 	0,			/* devo_refcnt */
173 	NULL,			/* devo_getinfo */
174 	nulldev,		/* devo_identify */
175 	nulldev,		/* devo_probe */
176 	igb_attach,		/* devo_attach */
177 	igb_detach,		/* devo_detach */
178 	nodev,			/* devo_reset */
179 	&igb_cb_ops,		/* devo_cb_ops */
180 	NULL,			/* devo_bus_ops */
181 	ddi_power,		/* devo_power */
182 	igb_quiesce,	/* devo_quiesce */
183 };
184 
185 static struct modldrv igb_modldrv = {
186 	&mod_driverops,		/* Type of module.  This one is a driver */
187 	ident,			/* Discription string */
188 	&igb_dev_ops,		/* driver ops */
189 };
190 
191 static struct modlinkage igb_modlinkage = {
192 	MODREV_1, &igb_modldrv, NULL
193 };
194 
195 /* Access attributes for register mapping */
196 ddi_device_acc_attr_t igb_regs_acc_attr = {
197 	DDI_DEVICE_ATTR_V1,
198 	DDI_STRUCTURE_LE_ACC,
199 	DDI_STRICTORDER_ACC,
200 	DDI_FLAGERR_ACC
201 };
202 
203 #define	IGB_M_CALLBACK_FLAGS \
204 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
205 
206 static mac_callbacks_t igb_m_callbacks = {
207 	IGB_M_CALLBACK_FLAGS,
208 	igb_m_stat,
209 	igb_m_start,
210 	igb_m_stop,
211 	igb_m_promisc,
212 	igb_m_multicst,
213 	NULL,
214 	NULL,
215 	NULL,
216 	igb_m_ioctl,
217 	igb_m_getcapab,
218 	NULL,
219 	NULL,
220 	igb_m_setprop,
221 	igb_m_getprop,
222 	igb_m_propinfo
223 };
224 
225 /*
226  * Initialize capabilities of each supported adapter type
227  */
228 static adapter_info_t igb_82575_cap = {
229 	/* limits */
230 	4,		/* maximum number of rx queues */
231 	1,		/* minimum number of rx queues */
232 	4,		/* default number of rx queues */
233 	4,		/* maximum number of tx queues */
234 	1,		/* minimum number of tx queues */
235 	4,		/* default number of tx queues */
236 	65535,		/* maximum interrupt throttle rate */
237 	0,		/* minimum interrupt throttle rate */
238 	200,		/* default interrupt throttle rate */
239 
240 	/* function pointers */
241 	igb_enable_adapter_interrupts_82575,
242 	igb_setup_msix_82575,
243 
244 	/* capabilities */
245 	(IGB_FLAG_HAS_DCA |	/* capability flags */
246 	IGB_FLAG_VMDQ_POOL),
247 
248 	0xffc00000		/* mask for RXDCTL register */
249 };
250 
251 static adapter_info_t igb_82576_cap = {
252 	/* limits */
253 	16,		/* maximum number of rx queues */
254 	1,		/* minimum number of rx queues */
255 	4,		/* default number of rx queues */
256 	16,		/* maximum number of tx queues */
257 	1,		/* minimum number of tx queues */
258 	4,		/* default number of tx queues */
259 	65535,		/* maximum interrupt throttle rate */
260 	0,		/* minimum interrupt throttle rate */
261 	200,		/* default interrupt throttle rate */
262 
263 	/* function pointers */
264 	igb_enable_adapter_interrupts_82576,
265 	igb_setup_msix_82576,
266 
267 	/* capabilities */
268 	(IGB_FLAG_HAS_DCA |	/* capability flags */
269 	IGB_FLAG_VMDQ_POOL |
270 	IGB_FLAG_NEED_CTX_IDX),
271 
272 	0xffe00000		/* mask for RXDCTL register */
273 };
274 
275 static adapter_info_t igb_82580_cap = {
276 	/* limits */
277 	8,		/* maximum number of rx queues */
278 	1,		/* minimum number of rx queues */
279 	4,		/* default number of rx queues */
280 	8,		/* maximum number of tx queues */
281 	1,		/* minimum number of tx queues */
282 	4,		/* default number of tx queues */
283 	65535,		/* maximum interrupt throttle rate */
284 	0,		/* minimum interrupt throttle rate */
285 	200,		/* default interrupt throttle rate */
286 
287 	/* function pointers */
288 	igb_enable_adapter_interrupts_82580,
289 	igb_setup_msix_82580,
290 
291 	/* capabilities */
292 	(IGB_FLAG_HAS_DCA |	/* capability flags */
293 	IGB_FLAG_VMDQ_POOL |
294 	IGB_FLAG_NEED_CTX_IDX),
295 
296 	0xffe00000		/* mask for RXDCTL register */
297 };
298 
299 static adapter_info_t igb_i350_cap = {
300 	/* limits */
301 	8,		/* maximum number of rx queues */
302 	1,		/* minimum number of rx queues */
303 	4,		/* default number of rx queues */
304 	8,		/* maximum number of tx queues */
305 	1,		/* minimum number of tx queues */
306 	4,		/* default number of tx queues */
307 	65535,		/* maximum interrupt throttle rate */
308 	0,		/* minimum interrupt throttle rate */
309 	200,		/* default interrupt throttle rate */
310 
311 	/* function pointers */
312 	igb_enable_adapter_interrupts_82580,
313 	igb_setup_msix_82580,
314 
315 	/* capabilities */
316 	(IGB_FLAG_HAS_DCA |	/* capability flags */
317 	IGB_FLAG_VMDQ_POOL |
318 	IGB_FLAG_NEED_CTX_IDX),
319 
320 	0xffe00000		/* mask for RXDCTL register */
321 };
322 
323 static adapter_info_t igb_i210_cap = {
324 	/* limits */
325 	4,		/* maximum number of rx queues */
326 	1,		/* minimum number of rx queues */
327 	4,		/* default number of rx queues */
328 	4,		/* maximum number of tx queues */
329 	1,		/* minimum number of tx queues */
330 	4,		/* default number of tx queues */
331 	65535,		/* maximum interrupt throttle rate */
332 	0,		/* minimum interrupt throttle rate */
333 	200,		/* default interrupt throttle rate */
334 
335 	/* function pointers */
336 	igb_enable_adapter_interrupts_82580,
337 	igb_setup_msix_82580,
338 
339 	/* capabilities */
340 	(IGB_FLAG_HAS_DCA |	/* capability flags */
341 	IGB_FLAG_VMDQ_POOL |
342 	IGB_FLAG_NEED_CTX_IDX),
343 
344 	0xfff00000		/* mask for RXDCTL register */
345 };
346 
347 static adapter_info_t igb_i354_cap = {
348 	/* limits */
349 	8,		/* maximum number of rx queues */
350 	1,		/* minimum number of rx queues */
351 	4,		/* default number of rx queues */
352 	8,		/* maximum number of tx queues */
353 	1,		/* minimum number of tx queues */
354 	4,		/* default number of tx queues */
355 	65535,		/* maximum interrupt throttle rate */
356 	0,		/* minimum interrupt throttle rate */
357 	200,		/* default interrupt throttle rate */
358 
359 	/* function pointers */
360 	igb_enable_adapter_interrupts_82580,
361 	igb_setup_msix_82580,
362 
363 	/* capabilities */
364 	(IGB_FLAG_HAS_DCA |	/* capability flags */
365 	IGB_FLAG_VMDQ_POOL |
366 	IGB_FLAG_NEED_CTX_IDX),
367 
368 	0xfff00000		/* mask for RXDCTL register */
369 };
370 
371 static ddi_ufm_ops_t igb_ufm_ops = {
372 	.ddi_ufm_op_fill_image = igb_ufm_fill_image,
373 	.ddi_ufm_op_fill_slot = igb_ufm_fill_slot,
374 	.ddi_ufm_op_getcaps = igb_ufm_getcaps,
375 	.ddi_ufm_op_readimg = igb_ufm_readimg
376 };
377 
378 /*
379  * Module Initialization Functions
380  */
381 
382 int
383 _init(void)
384 {
385 	int status;
386 
387 	mac_init_ops(&igb_dev_ops, MODULE_NAME);
388 
389 	status = mod_install(&igb_modlinkage);
390 
391 	if (status != DDI_SUCCESS) {
392 		mac_fini_ops(&igb_dev_ops);
393 	}
394 
395 	return (status);
396 }
397 
398 int
399 _fini(void)
400 {
401 	int status;
402 
403 	status = mod_remove(&igb_modlinkage);
404 
405 	if (status == DDI_SUCCESS) {
406 		mac_fini_ops(&igb_dev_ops);
407 	}
408 
409 	return (status);
410 
411 }
412 
413 int
414 _info(struct modinfo *modinfop)
415 {
416 	int status;
417 
418 	status = mod_info(&igb_modlinkage, modinfop);
419 
420 	return (status);
421 }
422 
423 /*
424  * igb_attach - driver attach
425  *
426  * This function is the device specific initialization entry
427  * point. This entry point is required and must be written.
428  * The DDI_ATTACH command must be provided in the attach entry
429  * point. When attach() is called with cmd set to DDI_ATTACH,
430  * all normal kernel services (such as kmem_alloc(9F)) are
431  * available for use by the driver.
432  *
433  * The attach() function will be called once for each instance
434  * of  the  device  on  the  system with cmd set to DDI_ATTACH.
435  * Until attach() succeeds, the only driver entry points which
436  * may be called are open(9E) and getinfo(9E).
437  */
438 static int
439 igb_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
440 {
441 	igb_t *igb;
442 	struct igb_osdep *osdep;
443 	struct e1000_hw *hw;
444 	int instance;
445 
446 	/*
447 	 * Check the command and perform corresponding operations
448 	 */
449 	switch (cmd) {
450 	default:
451 		return (DDI_FAILURE);
452 
453 	case DDI_RESUME:
454 		return (igb_resume(devinfo));
455 
456 	case DDI_ATTACH:
457 		break;
458 	}
459 
460 	/* Get the device instance */
461 	instance = ddi_get_instance(devinfo);
462 
463 	/* Allocate memory for the instance data structure */
464 	igb = kmem_zalloc(sizeof (igb_t), KM_SLEEP);
465 
466 	igb->dip = devinfo;
467 	igb->instance = instance;
468 
469 	hw = &igb->hw;
470 	osdep = &igb->osdep;
471 	hw->back = osdep;
472 	osdep->igb = igb;
473 
474 	/* Attach the instance pointer to the dev_info data structure */
475 	ddi_set_driver_private(devinfo, igb);
476 
477 
478 	/* Initialize for fma support */
479 	igb->fm_capabilities = igb_get_prop(igb, "fm-capable",
480 	    0, 0x0f,
481 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
482 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
483 	igb_fm_init(igb);
484 	igb->attach_progress |= ATTACH_PROGRESS_FMINIT;
485 
486 	/*
487 	 * Map PCI config space registers
488 	 */
489 	if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
490 		igb_log(igb, IGB_LOG_ERROR, "Failed to map PCI configurations");
491 		goto attach_fail;
492 	}
493 	igb->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
494 
495 	/*
496 	 * Identify the chipset family
497 	 */
498 	if (igb_identify_hardware(igb) != IGB_SUCCESS) {
499 		igb_log(igb, IGB_LOG_ERROR, "Failed to identify hardware");
500 		goto attach_fail;
501 	}
502 
503 	/*
504 	 * Map device registers
505 	 */
506 	if (igb_regs_map(igb) != IGB_SUCCESS) {
507 		igb_log(igb, IGB_LOG_ERROR, "Failed to map device registers");
508 		goto attach_fail;
509 	}
510 	igb->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
511 
512 	/*
513 	 * Initialize driver parameters
514 	 */
515 	igb_init_properties(igb);
516 	igb->attach_progress |= ATTACH_PROGRESS_PROPS;
517 
518 	/*
519 	 * Allocate interrupts
520 	 */
521 	if (igb_alloc_intrs(igb) != IGB_SUCCESS) {
522 		igb_log(igb, IGB_LOG_ERROR, "Failed to allocate interrupts");
523 		goto attach_fail;
524 	}
525 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
526 
527 	/*
528 	 * Allocate rx/tx rings based on the ring numbers.
529 	 * The actual numbers of rx/tx rings are decided by the number of
530 	 * allocated interrupt vectors, so we should allocate the rings after
531 	 * interrupts are allocated.
532 	 */
533 	if (igb_alloc_rings(igb) != IGB_SUCCESS) {
534 		igb_log(igb, IGB_LOG_ERROR,
535 		    "Failed to allocate rx/tx rings or groups");
536 		goto attach_fail;
537 	}
538 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
539 
540 	/*
541 	 * Add interrupt handlers
542 	 */
543 	if (igb_add_intr_handlers(igb) != IGB_SUCCESS) {
544 		igb_log(igb, IGB_LOG_ERROR, "Failed to add interrupt handlers");
545 		goto attach_fail;
546 	}
547 	igb->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
548 
549 	/*
550 	 * Initialize driver parameters
551 	 */
552 	if (igb_init_driver_settings(igb) != IGB_SUCCESS) {
553 		igb_log(igb, IGB_LOG_ERROR,
554 		    "Failed to initialize driver settings");
555 		goto attach_fail;
556 	}
557 
558 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
559 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
560 		goto attach_fail;
561 	}
562 
563 	/*
564 	 * Initialize mutexes for this device.
565 	 * Do this before enabling the interrupt handler and
566 	 * register the softint to avoid the condition where
567 	 * interrupt handler can try using uninitialized mutex
568 	 */
569 	igb_init_locks(igb);
570 	igb->attach_progress |= ATTACH_PROGRESS_LOCKS;
571 
572 	/*
573 	 * Initialize the adapter
574 	 */
575 	if (igb_init(igb) != IGB_SUCCESS) {
576 		igb_log(igb, IGB_LOG_ERROR, "Failed to initialize adapter");
577 		goto attach_fail;
578 	}
579 	igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
580 
581 	/*
582 	 * Initialize statistics
583 	 */
584 	if (igb_init_stats(igb) != IGB_SUCCESS) {
585 		igb_log(igb, IGB_LOG_ERROR, "Failed to initialize statistics");
586 		goto attach_fail;
587 	}
588 	igb->attach_progress |= ATTACH_PROGRESS_STATS;
589 
590 	/*
591 	 * Register the driver to the MAC
592 	 */
593 	if (igb_register_mac(igb) != IGB_SUCCESS) {
594 		igb_log(igb, IGB_LOG_ERROR, "Failed to register MAC");
595 		goto attach_fail;
596 	}
597 	igb->attach_progress |= ATTACH_PROGRESS_MAC;
598 
599 	/*
600 	 * Now that mutex locks are initialized, and the chip is also
601 	 * initialized, enable interrupts.
602 	 */
603 	if (igb_enable_intrs(igb) != IGB_SUCCESS) {
604 		igb_log(igb, IGB_LOG_ERROR, "Failed to enable DDI interrupts");
605 		goto attach_fail;
606 	}
607 	igb->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
608 
609 	/*
610 	 * Only enable UFM support on function zero of the device as the images
611 	 * are always device wide.
612 	 */
613 	if (igb->hw.bus.func == 0) {
614 		if (ddi_ufm_init(devinfo, DDI_UFM_CURRENT_VERSION, &igb_ufm_ops,
615 		    &igb->igb_ufmh, igb) != 0) {
616 			igb_log(igb, IGB_LOG_ERROR, "Failed to enable DDI UFM "
617 			    "support");
618 			goto attach_fail;
619 		}
620 		igb->attach_progress |= ATTACH_PROGRESS_UFM;
621 		ddi_ufm_update(igb->igb_ufmh);
622 	}
623 
624 	igb_log(igb, IGB_LOG_INFO, "%s", igb_version);
625 	atomic_or_32(&igb->igb_state, IGB_INITIALIZED);
626 
627 	/*
628 	 * Newer models have Energy Efficient Ethernet, let's disable this by
629 	 * default.
630 	 */
631 	if (igb->hw.mac.type == e1000_i350)
632 		(void) e1000_set_eee_i350(&igb->hw, B_FALSE, B_FALSE);
633 	else if (igb->hw.mac.type == e1000_i354)
634 		(void) e1000_set_eee_i354(&igb->hw, B_FALSE, B_FALSE);
635 
636 	return (DDI_SUCCESS);
637 
638 attach_fail:
639 	igb_unconfigure(devinfo, igb);
640 	return (DDI_FAILURE);
641 }
642 
643 /*
644  * igb_detach - driver detach
645  *
646  * The detach() function is the complement of the attach routine.
647  * If cmd is set to DDI_DETACH, detach() is used to remove  the
648  * state  associated  with  a  given  instance of a device node
649  * prior to the removal of that instance from the system.
650  *
651  * The detach() function will be called once for each  instance
652  * of the device for which there has been a successful attach()
653  * once there are no longer  any  opens  on  the  device.
654  *
655  * Interrupts routine are disabled, All memory allocated by this
656  * driver are freed.
657  */
658 static int
659 igb_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
660 {
661 	igb_t *igb;
662 
663 	/*
664 	 * Check detach command
665 	 */
666 	switch (cmd) {
667 	default:
668 		return (DDI_FAILURE);
669 
670 	case DDI_SUSPEND:
671 		return (igb_suspend(devinfo));
672 
673 	case DDI_DETACH:
674 		break;
675 	}
676 
677 
678 	/*
679 	 * Get the pointer to the driver private data structure
680 	 */
681 	igb = (igb_t *)ddi_get_driver_private(devinfo);
682 	if (igb == NULL)
683 		return (DDI_FAILURE);
684 
685 	/*
686 	 * Unregister MAC. If failed, we have to fail the detach
687 	 */
688 	if (mac_unregister(igb->mac_hdl) != 0) {
689 		igb_log(igb, IGB_LOG_ERROR, "Failed to unregister MAC");
690 		return (DDI_FAILURE);
691 	}
692 	igb->attach_progress &= ~ATTACH_PROGRESS_MAC;
693 
694 	/*
695 	 * If the device is still running, it needs to be stopped first.
696 	 * This check is necessary because under some specific circumstances,
697 	 * the detach routine can be called without stopping the interface
698 	 * first.
699 	 */
700 	mutex_enter(&igb->gen_lock);
701 	if (igb->igb_state & IGB_STARTED) {
702 		atomic_and_32(&igb->igb_state, ~IGB_STARTED);
703 		igb_stop(igb, B_TRUE);
704 		mutex_exit(&igb->gen_lock);
705 		/* Disable and stop the watchdog timer */
706 		igb_disable_watchdog_timer(igb);
707 	} else
708 		mutex_exit(&igb->gen_lock);
709 
710 	/*
711 	 * Check if there are still rx buffers held by the upper layer.
712 	 * If so, fail the detach.
713 	 */
714 	if (!igb_rx_drain(igb))
715 		return (DDI_FAILURE);
716 
717 	/*
718 	 * Do the remaining unconfigure routines
719 	 */
720 	igb_unconfigure(devinfo, igb);
721 
722 	return (DDI_SUCCESS);
723 }
724 
725 /*
726  * quiesce(9E) entry point.
727  *
728  * This function is called when the system is single-threaded at high
729  * PIL with preemption disabled. Therefore, this function must not be
730  * blocked.
731  *
732  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
733  * DDI_FAILURE indicates an error condition and should almost never happen.
734  */
735 static int
736 igb_quiesce(dev_info_t *devinfo)
737 {
738 	igb_t *igb;
739 	struct e1000_hw *hw;
740 
741 	igb = (igb_t *)ddi_get_driver_private(devinfo);
742 
743 	if (igb == NULL)
744 		return (DDI_FAILURE);
745 
746 	hw = &igb->hw;
747 
748 	/*
749 	 * Disable the adapter interrupts
750 	 */
751 	igb_disable_adapter_interrupts(igb);
752 
753 	/* Tell firmware driver is no longer in control */
754 	igb_release_driver_control(hw);
755 
756 	/*
757 	 * Reset the chipset
758 	 */
759 	(void) e1000_reset_hw(hw);
760 
761 	/*
762 	 * Reset PHY if possible
763 	 */
764 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
765 		(void) e1000_phy_hw_reset(hw);
766 
767 	return (DDI_SUCCESS);
768 }
769 
770 /*
771  * igb_unconfigure - release all resources held by this instance
772  */
773 static void
774 igb_unconfigure(dev_info_t *devinfo, igb_t *igb)
775 {
776 	if (igb->attach_progress & ATTACH_PROGRESS_UFM) {
777 		ddi_ufm_fini(igb->igb_ufmh);
778 	}
779 
780 	/*
781 	 * Disable interrupt
782 	 */
783 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
784 		(void) igb_disable_intrs(igb);
785 	}
786 
787 	/*
788 	 * Unregister MAC
789 	 */
790 	if (igb->attach_progress & ATTACH_PROGRESS_MAC) {
791 		(void) mac_unregister(igb->mac_hdl);
792 	}
793 
794 	/*
795 	 * Free statistics
796 	 */
797 	if (igb->attach_progress & ATTACH_PROGRESS_STATS) {
798 		kstat_delete((kstat_t *)igb->igb_ks);
799 	}
800 
801 	/*
802 	 * Remove interrupt handlers
803 	 */
804 	if (igb->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
805 		igb_rem_intr_handlers(igb);
806 	}
807 
808 	/*
809 	 * Remove interrupts
810 	 */
811 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
812 		igb_rem_intrs(igb);
813 	}
814 
815 	/*
816 	 * Remove driver properties
817 	 */
818 	if (igb->attach_progress & ATTACH_PROGRESS_PROPS) {
819 		(void) ddi_prop_remove_all(devinfo);
820 	}
821 
822 	/*
823 	 * Stop the adapter
824 	 */
825 	if (igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) {
826 		mutex_enter(&igb->gen_lock);
827 		igb_stop_adapter(igb);
828 		mutex_exit(&igb->gen_lock);
829 		if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
830 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_UNAFFECTED);
831 	}
832 
833 	/*
834 	 * Free multicast table
835 	 */
836 	igb_release_multicast(igb);
837 
838 	/*
839 	 * Free register handle
840 	 */
841 	if (igb->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
842 		if (igb->osdep.reg_handle != NULL)
843 			ddi_regs_map_free(&igb->osdep.reg_handle);
844 	}
845 
846 	/*
847 	 * Free PCI config handle
848 	 */
849 	if (igb->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
850 		if (igb->osdep.cfg_handle != NULL)
851 			pci_config_teardown(&igb->osdep.cfg_handle);
852 	}
853 
854 	/*
855 	 * Free locks
856 	 */
857 	if (igb->attach_progress & ATTACH_PROGRESS_LOCKS) {
858 		igb_destroy_locks(igb);
859 	}
860 
861 	/*
862 	 * Free the rx/tx rings
863 	 */
864 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
865 		igb_free_rings(igb);
866 	}
867 
868 	/*
869 	 * Remove FMA
870 	 */
871 	if (igb->attach_progress & ATTACH_PROGRESS_FMINIT) {
872 		igb_fm_fini(igb);
873 	}
874 
875 	/*
876 	 * Free the driver data structure
877 	 */
878 	kmem_free(igb, sizeof (igb_t));
879 
880 	ddi_set_driver_private(devinfo, NULL);
881 }
882 
883 /*
884  * igb_register_mac - Register the driver and its function pointers with
885  * the GLD interface
886  */
887 static int
888 igb_register_mac(igb_t *igb)
889 {
890 	struct e1000_hw *hw = &igb->hw;
891 	mac_register_t *mac;
892 	int status;
893 
894 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
895 		return (IGB_FAILURE);
896 
897 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
898 	mac->m_driver = igb;
899 	mac->m_dip = igb->dip;
900 	mac->m_src_addr = hw->mac.addr;
901 	mac->m_callbacks = &igb_m_callbacks;
902 	mac->m_min_sdu = 0;
903 	mac->m_max_sdu = igb->max_frame_size -
904 	    sizeof (struct ether_vlan_header) - ETHERFCSL;
905 	mac->m_margin = VLAN_TAGSZ;
906 	mac->m_priv_props = igb_priv_props;
907 	mac->m_v12n = MAC_VIRT_LEVEL1;
908 
909 	status = mac_register(mac, &igb->mac_hdl);
910 
911 	mac_free(mac);
912 
913 	return ((status == 0) ? IGB_SUCCESS : IGB_FAILURE);
914 }
915 
916 /*
917  * igb_identify_hardware - Identify the type of the chipset
918  */
919 static int
920 igb_identify_hardware(igb_t *igb)
921 {
922 	struct e1000_hw *hw = &igb->hw;
923 	struct igb_osdep *osdep = &igb->osdep;
924 
925 	/*
926 	 * Get the device id
927 	 */
928 	hw->vendor_id =
929 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
930 	hw->device_id =
931 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
932 	hw->revision_id =
933 	    pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
934 	hw->subsystem_device_id =
935 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
936 	hw->subsystem_vendor_id =
937 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
938 
939 	/*
940 	 * Set the mac type of the adapter based on the device id
941 	 */
942 	if (e1000_set_mac_type(hw) != E1000_SUCCESS) {
943 		return (IGB_FAILURE);
944 	}
945 
946 	/*
947 	 * Install adapter capabilities based on mac type
948 	 */
949 	switch (hw->mac.type) {
950 	case e1000_82575:
951 		igb->capab = &igb_82575_cap;
952 		break;
953 	case e1000_82576:
954 		igb->capab = &igb_82576_cap;
955 		break;
956 	case e1000_82580:
957 		igb->capab = &igb_82580_cap;
958 		break;
959 	case e1000_i350:
960 		igb->capab = &igb_i350_cap;
961 		break;
962 	case e1000_i210:
963 	case e1000_i211:
964 		igb->capab = &igb_i210_cap;
965 		break;
966 	case e1000_i354:
967 		igb->capab = &igb_i354_cap;
968 		break;
969 	default:
970 		return (IGB_FAILURE);
971 	}
972 
973 	return (IGB_SUCCESS);
974 }
975 
976 /*
977  * igb_regs_map - Map the device registers
978  */
979 static int
980 igb_regs_map(igb_t *igb)
981 {
982 	dev_info_t *devinfo = igb->dip;
983 	struct e1000_hw *hw = &igb->hw;
984 	struct igb_osdep *osdep = &igb->osdep;
985 	off_t mem_size;
986 
987 	/*
988 	 * First get the size of device registers to be mapped.
989 	 */
990 	if (ddi_dev_regsize(devinfo, IGB_ADAPTER_REGSET, &mem_size) !=
991 	    DDI_SUCCESS) {
992 		return (IGB_FAILURE);
993 	}
994 
995 	/*
996 	 * Call ddi_regs_map_setup() to map registers
997 	 */
998 	if ((ddi_regs_map_setup(devinfo, IGB_ADAPTER_REGSET,
999 	    (caddr_t *)&hw->hw_addr, 0,
1000 	    mem_size, &igb_regs_acc_attr,
1001 	    &osdep->reg_handle)) != DDI_SUCCESS) {
1002 		return (IGB_FAILURE);
1003 	}
1004 
1005 	return (IGB_SUCCESS);
1006 }
1007 
1008 /*
1009  * igb_init_properties - Initialize driver properties
1010  */
1011 static void
1012 igb_init_properties(igb_t *igb)
1013 {
1014 	/*
1015 	 * Get conf file properties, including link settings
1016 	 * jumbo frames, ring number, descriptor number, etc.
1017 	 */
1018 	igb_get_conf(igb);
1019 }
1020 
1021 /*
1022  * igb_init_driver_settings - Initialize driver settings
1023  *
1024  * The settings include hardware function pointers, bus information,
1025  * rx/tx rings settings, link state, and any other parameters that
1026  * need to be setup during driver initialization.
1027  */
1028 static int
1029 igb_init_driver_settings(igb_t *igb)
1030 {
1031 	struct e1000_hw *hw = &igb->hw;
1032 	igb_rx_ring_t *rx_ring;
1033 	igb_tx_ring_t *tx_ring;
1034 	uint32_t rx_size;
1035 	uint32_t tx_size;
1036 	int i;
1037 
1038 	/*
1039 	 * Initialize chipset specific hardware function pointers
1040 	 */
1041 	if (e1000_setup_init_funcs(hw, B_TRUE) != E1000_SUCCESS) {
1042 		return (IGB_FAILURE);
1043 	}
1044 
1045 	/*
1046 	 * Get bus information
1047 	 */
1048 	if (e1000_get_bus_info(hw) != E1000_SUCCESS) {
1049 		return (IGB_FAILURE);
1050 	}
1051 
1052 	/*
1053 	 * Get the system page size
1054 	 */
1055 	igb->page_size = ddi_ptob(igb->dip, (ulong_t)1);
1056 
1057 	/*
1058 	 * Set rx buffer size
1059 	 * The IP header alignment room is counted in the calculation.
1060 	 * The rx buffer size is in unit of 1K that is required by the
1061 	 * chipset hardware.
1062 	 */
1063 	rx_size = igb->max_frame_size + IPHDR_ALIGN_ROOM;
1064 	igb->rx_buf_size = ((rx_size >> 10) +
1065 	    ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
1066 
1067 	/*
1068 	 * Set tx buffer size
1069 	 */
1070 	tx_size = igb->max_frame_size;
1071 	igb->tx_buf_size = ((tx_size >> 10) +
1072 	    ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
1073 
1074 	/*
1075 	 * Initialize rx/tx rings parameters
1076 	 */
1077 	for (i = 0; i < igb->num_rx_rings; i++) {
1078 		rx_ring = &igb->rx_rings[i];
1079 		rx_ring->index = i;
1080 		rx_ring->igb = igb;
1081 	}
1082 
1083 	for (i = 0; i < igb->num_tx_rings; i++) {
1084 		tx_ring = &igb->tx_rings[i];
1085 		tx_ring->index = i;
1086 		tx_ring->igb = igb;
1087 		if (igb->tx_head_wb_enable)
1088 			tx_ring->tx_recycle = igb_tx_recycle_head_wb;
1089 		else
1090 			tx_ring->tx_recycle = igb_tx_recycle_legacy;
1091 
1092 		tx_ring->ring_size = igb->tx_ring_size;
1093 		tx_ring->free_list_size = igb->tx_ring_size +
1094 		    (igb->tx_ring_size >> 1);
1095 	}
1096 
1097 	/*
1098 	 * Initialize values of interrupt throttling rates
1099 	 */
1100 	for (i = 1; i < MAX_NUM_EITR; i++)
1101 		igb->intr_throttling[i] = igb->intr_throttling[0];
1102 
1103 	/*
1104 	 * The initial link state should be "unknown"
1105 	 */
1106 	igb->link_state = LINK_STATE_UNKNOWN;
1107 
1108 	return (IGB_SUCCESS);
1109 }
1110 
1111 /*
1112  * igb_init_locks - Initialize locks
1113  */
1114 static void
1115 igb_init_locks(igb_t *igb)
1116 {
1117 	igb_rx_ring_t *rx_ring;
1118 	igb_tx_ring_t *tx_ring;
1119 	int i;
1120 
1121 	for (i = 0; i < igb->num_rx_rings; i++) {
1122 		rx_ring = &igb->rx_rings[i];
1123 		mutex_init(&rx_ring->rx_lock, NULL,
1124 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1125 	}
1126 
1127 	for (i = 0; i < igb->num_tx_rings; i++) {
1128 		tx_ring = &igb->tx_rings[i];
1129 		mutex_init(&tx_ring->tx_lock, NULL,
1130 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1131 		mutex_init(&tx_ring->recycle_lock, NULL,
1132 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1133 		mutex_init(&tx_ring->tcb_head_lock, NULL,
1134 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1135 		mutex_init(&tx_ring->tcb_tail_lock, NULL,
1136 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1137 	}
1138 
1139 	mutex_init(&igb->gen_lock, NULL,
1140 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1141 
1142 	mutex_init(&igb->watchdog_lock, NULL,
1143 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1144 
1145 	mutex_init(&igb->link_lock, NULL,
1146 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1147 }
1148 
1149 /*
1150  * igb_destroy_locks - Destroy locks
1151  */
1152 static void
1153 igb_destroy_locks(igb_t *igb)
1154 {
1155 	igb_rx_ring_t *rx_ring;
1156 	igb_tx_ring_t *tx_ring;
1157 	int i;
1158 
1159 	for (i = 0; i < igb->num_rx_rings; i++) {
1160 		rx_ring = &igb->rx_rings[i];
1161 		mutex_destroy(&rx_ring->rx_lock);
1162 	}
1163 
1164 	for (i = 0; i < igb->num_tx_rings; i++) {
1165 		tx_ring = &igb->tx_rings[i];
1166 		mutex_destroy(&tx_ring->tx_lock);
1167 		mutex_destroy(&tx_ring->recycle_lock);
1168 		mutex_destroy(&tx_ring->tcb_head_lock);
1169 		mutex_destroy(&tx_ring->tcb_tail_lock);
1170 	}
1171 
1172 	mutex_destroy(&igb->gen_lock);
1173 	mutex_destroy(&igb->watchdog_lock);
1174 	mutex_destroy(&igb->link_lock);
1175 }
1176 
1177 static int
1178 igb_resume(dev_info_t *devinfo)
1179 {
1180 	igb_t *igb;
1181 
1182 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1183 	if (igb == NULL)
1184 		return (DDI_FAILURE);
1185 
1186 	mutex_enter(&igb->gen_lock);
1187 
1188 	/*
1189 	 * Enable interrupts
1190 	 */
1191 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
1192 		if (igb_enable_intrs(igb) != IGB_SUCCESS) {
1193 			igb_log(igb, IGB_LOG_ERROR,
1194 			    "Failed to enable DDI interrupts");
1195 			mutex_exit(&igb->gen_lock);
1196 			return (DDI_FAILURE);
1197 		}
1198 	}
1199 
1200 	if (igb->igb_state & IGB_STARTED) {
1201 		if (igb_start(igb, B_FALSE) != IGB_SUCCESS) {
1202 			mutex_exit(&igb->gen_lock);
1203 			return (DDI_FAILURE);
1204 		}
1205 
1206 		/*
1207 		 * Enable and start the watchdog timer
1208 		 */
1209 		igb_enable_watchdog_timer(igb);
1210 	}
1211 
1212 	atomic_and_32(&igb->igb_state, ~IGB_SUSPENDED);
1213 
1214 	mutex_exit(&igb->gen_lock);
1215 
1216 	return (DDI_SUCCESS);
1217 }
1218 
1219 static int
1220 igb_suspend(dev_info_t *devinfo)
1221 {
1222 	igb_t *igb;
1223 
1224 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1225 	if (igb == NULL)
1226 		return (DDI_FAILURE);
1227 
1228 	mutex_enter(&igb->gen_lock);
1229 
1230 	atomic_or_32(&igb->igb_state, IGB_SUSPENDED);
1231 
1232 	/*
1233 	 * Disable interrupts
1234 	 */
1235 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
1236 		(void) igb_disable_intrs(igb);
1237 	}
1238 
1239 	if (!(igb->igb_state & IGB_STARTED)) {
1240 		mutex_exit(&igb->gen_lock);
1241 		return (DDI_SUCCESS);
1242 	}
1243 
1244 	igb_stop(igb, B_FALSE);
1245 
1246 	mutex_exit(&igb->gen_lock);
1247 
1248 	/*
1249 	 * Disable and stop the watchdog timer
1250 	 */
1251 	igb_disable_watchdog_timer(igb);
1252 
1253 	return (DDI_SUCCESS);
1254 }
1255 
1256 static int
1257 igb_init(igb_t *igb)
1258 {
1259 	mutex_enter(&igb->gen_lock);
1260 
1261 	/*
1262 	 * Initilize the adapter
1263 	 */
1264 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
1265 		mutex_exit(&igb->gen_lock);
1266 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1267 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1268 		return (IGB_FAILURE);
1269 	}
1270 
1271 	mutex_exit(&igb->gen_lock);
1272 
1273 	return (IGB_SUCCESS);
1274 }
1275 
1276 /*
1277  * igb_init_mac_address - Initialize the default MAC address
1278  *
1279  * On success, the MAC address is entered in the igb->hw.mac.addr
1280  * and hw->mac.perm_addr fields and the adapter's RAR(0) receive
1281  * address register.
1282  *
1283  * Important side effects:
1284  * 1. adapter is reset - this is required to put it in a known state.
1285  * 2. all of non-volatile memory (NVM) is read & checksummed - NVM is where
1286  * MAC address and all default settings are stored, so a valid checksum
1287  * is required.
1288  */
1289 static int
1290 igb_init_mac_address(igb_t *igb)
1291 {
1292 	struct e1000_hw *hw = &igb->hw;
1293 
1294 	ASSERT(mutex_owned(&igb->gen_lock));
1295 
1296 	/*
1297 	 * Reset chipset to put the hardware in a known state
1298 	 * before we try to get MAC address from NVM.
1299 	 */
1300 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1301 		igb_log(igb, IGB_LOG_ERROR, "Adapter reset failed.");
1302 		goto init_mac_fail;
1303 	}
1304 
1305 	/*
1306 	 * NVM validation
1307 	 */
1308 	if (((igb->hw.mac.type != e1000_i210) &&
1309 	    (igb->hw.mac.type != e1000_i211)) &&
1310 	    (e1000_validate_nvm_checksum(hw) < 0)) {
1311 		/*
1312 		 * Some PCI-E parts fail the first check due to
1313 		 * the link being in sleep state.  Call it again,
1314 		 * if it fails a second time its a real issue.
1315 		 */
1316 		if (e1000_validate_nvm_checksum(hw) < 0) {
1317 			igb_log(igb, IGB_LOG_ERROR,
1318 			    "Invalid NVM checksum. Please contact "
1319 			    "the vendor to update the NVM.");
1320 			goto init_mac_fail;
1321 		}
1322 	}
1323 
1324 	/*
1325 	 * Get the mac address
1326 	 * This function should handle SPARC case correctly.
1327 	 */
1328 	if (!igb_find_mac_address(igb)) {
1329 		igb_log(igb, IGB_LOG_ERROR, "Failed to get the mac address");
1330 		goto init_mac_fail;
1331 	}
1332 
1333 	/* Validate mac address */
1334 	if (!is_valid_mac_addr(hw->mac.addr)) {
1335 		igb_log(igb, IGB_LOG_ERROR, "Invalid mac address");
1336 		goto init_mac_fail;
1337 	}
1338 
1339 	return (IGB_SUCCESS);
1340 
1341 init_mac_fail:
1342 	return (IGB_FAILURE);
1343 }
1344 
1345 /*
1346  * igb_init_adapter - Initialize the adapter
1347  */
1348 static int
1349 igb_init_adapter(igb_t *igb)
1350 {
1351 	struct e1000_hw *hw = &igb->hw;
1352 	uint32_t pba;
1353 	int oemid[2];
1354 	uint16_t nvmword;
1355 	uint32_t hwm;
1356 	uint32_t default_mtu;
1357 	u8 pbanum[E1000_PBANUM_LENGTH];
1358 	char eepromver[5];	/* f.ff */
1359 	int i;
1360 
1361 	ASSERT(mutex_owned(&igb->gen_lock));
1362 
1363 	/*
1364 	 * In order to obtain the default MAC address, this will reset the
1365 	 * adapter and validate the NVM that the address and many other
1366 	 * default settings come from.
1367 	 */
1368 	if (igb_init_mac_address(igb) != IGB_SUCCESS) {
1369 		igb_log(igb, IGB_LOG_ERROR, "Failed to initialize MAC address");
1370 		goto init_adapter_fail;
1371 	}
1372 
1373 	/*
1374 	 * Packet Buffer Allocation (PBA)
1375 	 * Writing PBA sets the receive portion of the buffer
1376 	 * the remainder is used for the transmit buffer.
1377 	 */
1378 	switch (hw->mac.type) {
1379 	case e1000_82575:
1380 		pba = E1000_PBA_32K;
1381 		break;
1382 	case e1000_82576:
1383 		pba = E1000_READ_REG(hw, E1000_RXPBS);
1384 		pba &= E1000_RXPBS_SIZE_MASK_82576;
1385 		break;
1386 	case e1000_82580:
1387 	case e1000_i350:
1388 	case e1000_i354:
1389 		pba = E1000_READ_REG(hw, E1000_RXPBS);
1390 		pba = e1000_rxpbs_adjust_82580(pba);
1391 		break;
1392 	case e1000_i210:
1393 	case e1000_i211:
1394 		pba = E1000_PBA_34K;
1395 	default:
1396 		break;
1397 	}
1398 
1399 	/* Special needs in case of Jumbo frames */
1400 	default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
1401 	    MIN_MTU, MAX_MTU, DEFAULT_MTU);
1402 	if ((hw->mac.type == e1000_82575) && (default_mtu > ETHERMTU)) {
1403 		u32 tx_space, min_tx, min_rx;
1404 		pba = E1000_READ_REG(hw, E1000_PBA);
1405 		tx_space = pba >> 16;
1406 		pba &= 0xffff;
1407 		min_tx = (igb->max_frame_size +
1408 		    sizeof (struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
1409 		min_tx = roundup(min_tx, 1024);
1410 		min_tx >>= 10;
1411 		min_rx = igb->max_frame_size;
1412 		min_rx = roundup(min_rx, 1024);
1413 		min_rx >>= 10;
1414 		if (tx_space < min_tx &&
1415 		    ((min_tx - tx_space) < pba)) {
1416 			pba = pba - (min_tx - tx_space);
1417 			/*
1418 			 * if short on rx space, rx wins
1419 			 * and must trump tx adjustment
1420 			 */
1421 			if (pba < min_rx)
1422 				pba = min_rx;
1423 		}
1424 		E1000_WRITE_REG(hw, E1000_PBA, pba);
1425 	}
1426 
1427 	DEBUGOUT1("igb_init: pba=%dK", pba);
1428 
1429 	/*
1430 	 * These parameters control the automatic generation (Tx) and
1431 	 * response (Rx) to Ethernet PAUSE frames.
1432 	 * - High water mark should allow for at least two frames to be
1433 	 *   received after sending an XOFF.
1434 	 * - Low water mark works best when it is very near the high water mark.
1435 	 *   This allows the receiver to restart by sending XON when it has
1436 	 *   drained a bit.
1437 	 */
1438 	hwm = min(((pba << 10) * 9 / 10),
1439 	    ((pba << 10) - 2 * igb->max_frame_size));
1440 
1441 	if (hw->mac.type < e1000_82576) {
1442 		hw->fc.high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1443 		hw->fc.low_water = hw->fc.high_water - 8;
1444 	} else {
1445 		hw->fc.high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1446 		hw->fc.low_water = hw->fc.high_water - 16;
1447 	}
1448 
1449 	hw->fc.pause_time = E1000_FC_PAUSE_TIME;
1450 	hw->fc.send_xon = B_TRUE;
1451 
1452 	(void) e1000_validate_mdi_setting(hw);
1453 
1454 	/*
1455 	 * Reset the chipset hardware the second time to put PBA settings
1456 	 * into effect.
1457 	 */
1458 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1459 		igb_log(igb, IGB_LOG_ERROR, "Second reset failed");
1460 		goto init_adapter_fail;
1461 	}
1462 
1463 	/*
1464 	 * Don't wait for auto-negotiation to complete
1465 	 */
1466 	hw->phy.autoneg_wait_to_complete = B_FALSE;
1467 
1468 	/*
1469 	 * Copper options
1470 	 */
1471 	if (hw->phy.media_type == e1000_media_type_copper) {
1472 		hw->phy.mdix = 0;	/* AUTO_ALL_MODES */
1473 		hw->phy.disable_polarity_correction = B_FALSE;
1474 		hw->phy.ms_type = e1000_ms_hw_default; /* E1000_MASTER_SLAVE */
1475 	}
1476 
1477 	/*
1478 	 * Initialize link settings
1479 	 */
1480 	(void) igb_setup_link(igb, B_FALSE);
1481 
1482 	/*
1483 	 * Configure/Initialize hardware
1484 	 */
1485 	if (e1000_init_hw(hw) != E1000_SUCCESS) {
1486 		igb_log(igb, IGB_LOG_ERROR, "Failed to initialize hardware");
1487 		goto init_adapter_fail;
1488 	}
1489 
1490 	/*
1491 	 *  Start the link setup timer
1492 	 */
1493 	igb_start_link_timer(igb);
1494 
1495 	/*
1496 	 * Disable wakeup control by default
1497 	 */
1498 	E1000_WRITE_REG(hw, E1000_WUC, 0);
1499 
1500 	/*
1501 	 * Record phy info in hw struct
1502 	 */
1503 	(void) e1000_get_phy_info(hw);
1504 
1505 	/*
1506 	 * Make sure driver has control
1507 	 */
1508 	igb_get_driver_control(hw);
1509 
1510 	/*
1511 	 * Restore LED settings to the default from EEPROM
1512 	 * to meet the standard for Sun platforms.
1513 	 */
1514 	(void) e1000_cleanup_led(hw);
1515 
1516 	/*
1517 	 * Setup MSI-X interrupts
1518 	 */
1519 	if (igb->intr_type == DDI_INTR_TYPE_MSIX)
1520 		igb->capab->setup_msix(igb);
1521 
1522 	/*
1523 	 * Initialize unicast addresses.
1524 	 */
1525 	igb_init_unicst(igb);
1526 
1527 	/*
1528 	 * Setup and initialize the mctable structures.
1529 	 */
1530 	igb_setup_multicst(igb);
1531 
1532 	/*
1533 	 * Set interrupt throttling rate
1534 	 */
1535 	for (i = 0; i < igb->intr_cnt; i++)
1536 		E1000_WRITE_REG(hw, E1000_EITR(i), igb->intr_throttling[i]);
1537 
1538 	/*
1539 	 * Read identifying information and place in devinfo.
1540 	 */
1541 	nvmword = 0xffff;
1542 	(void) e1000_read_nvm(&igb->hw, NVM_OEM_OFFSET_0, 1, &nvmword);
1543 	oemid[0] = (int)nvmword;
1544 	(void) e1000_read_nvm(&igb->hw, NVM_OEM_OFFSET_1, 1, &nvmword);
1545 	oemid[1] = (int)nvmword;
1546 	(void) ddi_prop_update_int_array(DDI_DEV_T_NONE, igb->dip,
1547 	    "oem-identifier", oemid, 2);
1548 
1549 	pbanum[0] = '\0';
1550 	(void) e1000_read_pba_string(&igb->hw, pbanum, sizeof (pbanum));
1551 	if (*pbanum != '\0') {
1552 		(void) ddi_prop_update_string(DDI_DEV_T_NONE, igb->dip,
1553 		    "printed-board-assembly", (char *)pbanum);
1554 	}
1555 
1556 	nvmword = 0xffff;
1557 	(void) e1000_read_nvm(&igb->hw, NVM_VERSION, 1, &nvmword);
1558 	if ((nvmword & 0xf00) == 0) {
1559 		(void) snprintf(eepromver, sizeof (eepromver), "%x.%x",
1560 		    (nvmword & 0xf000) >> 12, (nvmword & 0xff));
1561 		(void) ddi_prop_update_string(DDI_DEV_T_NONE, igb->dip,
1562 		    "nvm-version", eepromver);
1563 	}
1564 
1565 	/*
1566 	 * Save the state of the phy
1567 	 */
1568 	igb_get_phy_state(igb);
1569 
1570 	igb_param_sync(igb);
1571 
1572 	return (IGB_SUCCESS);
1573 
1574 init_adapter_fail:
1575 	/*
1576 	 * Reset PHY if possible
1577 	 */
1578 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
1579 		(void) e1000_phy_hw_reset(hw);
1580 
1581 	return (IGB_FAILURE);
1582 }
1583 
1584 /*
1585  * igb_stop_adapter - Stop the adapter
1586  */
1587 static void
1588 igb_stop_adapter(igb_t *igb)
1589 {
1590 	struct e1000_hw *hw = &igb->hw;
1591 
1592 	ASSERT(mutex_owned(&igb->gen_lock));
1593 
1594 	/* Stop the link setup timer */
1595 	igb_stop_link_timer(igb);
1596 
1597 	/* Tell firmware driver is no longer in control */
1598 	igb_release_driver_control(hw);
1599 
1600 	/*
1601 	 * Reset the chipset
1602 	 */
1603 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1604 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1605 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1606 	}
1607 
1608 	/*
1609 	 * e1000_phy_hw_reset is not needed here, MAC reset above is sufficient
1610 	 */
1611 }
1612 
1613 /*
1614  * igb_reset - Reset the chipset and restart the driver.
1615  *
1616  * It involves stopping and re-starting the chipset,
1617  * and re-configuring the rx/tx rings.
1618  */
1619 static int
1620 igb_reset(igb_t *igb)
1621 {
1622 	int i;
1623 
1624 	mutex_enter(&igb->gen_lock);
1625 
1626 	ASSERT(igb->igb_state & IGB_STARTED);
1627 	atomic_and_32(&igb->igb_state, ~IGB_STARTED);
1628 
1629 	/*
1630 	 * Disable the adapter interrupts to stop any rx/tx activities
1631 	 * before draining pending data and resetting hardware.
1632 	 */
1633 	igb_disable_adapter_interrupts(igb);
1634 
1635 	/*
1636 	 * Drain the pending transmit packets
1637 	 */
1638 	(void) igb_tx_drain(igb);
1639 
1640 	for (i = 0; i < igb->num_rx_rings; i++)
1641 		mutex_enter(&igb->rx_rings[i].rx_lock);
1642 	for (i = 0; i < igb->num_tx_rings; i++)
1643 		mutex_enter(&igb->tx_rings[i].tx_lock);
1644 
1645 	/*
1646 	 * Stop the adapter
1647 	 */
1648 	igb_stop_adapter(igb);
1649 
1650 	/*
1651 	 * Clean the pending tx data/resources
1652 	 */
1653 	igb_tx_clean(igb);
1654 
1655 	/*
1656 	 * Start the adapter
1657 	 */
1658 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
1659 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1660 		goto reset_failure;
1661 	}
1662 
1663 	/*
1664 	 * Setup the rx/tx rings
1665 	 */
1666 	igb->tx_ring_init = B_FALSE;
1667 	igb_setup_rings(igb);
1668 
1669 	atomic_and_32(&igb->igb_state, ~(IGB_ERROR | IGB_STALL));
1670 
1671 	/*
1672 	 * Enable adapter interrupts
1673 	 * The interrupts must be enabled after the driver state is START
1674 	 */
1675 	igb->capab->enable_intr(igb);
1676 
1677 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1678 		goto reset_failure;
1679 
1680 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1681 		goto reset_failure;
1682 
1683 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1684 		mutex_exit(&igb->tx_rings[i].tx_lock);
1685 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1686 		mutex_exit(&igb->rx_rings[i].rx_lock);
1687 
1688 	atomic_or_32(&igb->igb_state, IGB_STARTED);
1689 
1690 	mutex_exit(&igb->gen_lock);
1691 
1692 	return (IGB_SUCCESS);
1693 
1694 reset_failure:
1695 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1696 		mutex_exit(&igb->tx_rings[i].tx_lock);
1697 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1698 		mutex_exit(&igb->rx_rings[i].rx_lock);
1699 
1700 	mutex_exit(&igb->gen_lock);
1701 
1702 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1703 
1704 	return (IGB_FAILURE);
1705 }
1706 
1707 /*
1708  * igb_tx_clean - Clean the pending transmit packets and DMA resources
1709  */
1710 static void
1711 igb_tx_clean(igb_t *igb)
1712 {
1713 	igb_tx_ring_t *tx_ring;
1714 	tx_control_block_t *tcb;
1715 	link_list_t pending_list;
1716 	uint32_t desc_num;
1717 	int i, j;
1718 
1719 	LINK_LIST_INIT(&pending_list);
1720 
1721 	for (i = 0; i < igb->num_tx_rings; i++) {
1722 		tx_ring = &igb->tx_rings[i];
1723 
1724 		mutex_enter(&tx_ring->recycle_lock);
1725 
1726 		/*
1727 		 * Clean the pending tx data - the pending packets in the
1728 		 * work_list that have no chances to be transmitted again.
1729 		 *
1730 		 * We must ensure the chipset is stopped or the link is down
1731 		 * before cleaning the transmit packets.
1732 		 */
1733 		desc_num = 0;
1734 		for (j = 0; j < tx_ring->ring_size; j++) {
1735 			tcb = tx_ring->work_list[j];
1736 			if (tcb != NULL) {
1737 				desc_num += tcb->desc_num;
1738 
1739 				tx_ring->work_list[j] = NULL;
1740 
1741 				igb_free_tcb(tcb);
1742 
1743 				LIST_PUSH_TAIL(&pending_list, &tcb->link);
1744 			}
1745 		}
1746 
1747 		if (desc_num > 0) {
1748 			atomic_add_32(&tx_ring->tbd_free, desc_num);
1749 			ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
1750 
1751 			/*
1752 			 * Reset the head and tail pointers of the tbd ring;
1753 			 * Reset the head write-back if it is enabled.
1754 			 */
1755 			tx_ring->tbd_head = 0;
1756 			tx_ring->tbd_tail = 0;
1757 			if (igb->tx_head_wb_enable)
1758 				*tx_ring->tbd_head_wb = 0;
1759 
1760 			E1000_WRITE_REG(&igb->hw, E1000_TDH(tx_ring->index), 0);
1761 			E1000_WRITE_REG(&igb->hw, E1000_TDT(tx_ring->index), 0);
1762 		}
1763 
1764 		mutex_exit(&tx_ring->recycle_lock);
1765 
1766 		/*
1767 		 * Add the tx control blocks in the pending list to
1768 		 * the free list.
1769 		 */
1770 		igb_put_free_list(tx_ring, &pending_list);
1771 	}
1772 }
1773 
1774 /*
1775  * igb_tx_drain - Drain the tx rings to allow pending packets to be transmitted
1776  */
1777 static boolean_t
1778 igb_tx_drain(igb_t *igb)
1779 {
1780 	igb_tx_ring_t *tx_ring;
1781 	boolean_t done;
1782 	int i, j;
1783 
1784 	/*
1785 	 * Wait for a specific time to allow pending tx packets
1786 	 * to be transmitted.
1787 	 *
1788 	 * Check the counter tbd_free to see if transmission is done.
1789 	 * No lock protection is needed here.
1790 	 *
1791 	 * Return B_TRUE if all pending packets have been transmitted;
1792 	 * Otherwise return B_FALSE;
1793 	 */
1794 	for (i = 0; i < TX_DRAIN_TIME; i++) {
1795 
1796 		done = B_TRUE;
1797 		for (j = 0; j < igb->num_tx_rings; j++) {
1798 			tx_ring = &igb->tx_rings[j];
1799 			done = done &&
1800 			    (tx_ring->tbd_free == tx_ring->ring_size);
1801 		}
1802 
1803 		if (done)
1804 			break;
1805 
1806 		msec_delay(1);
1807 	}
1808 
1809 	return (done);
1810 }
1811 
1812 /*
1813  * igb_rx_drain - Wait for all rx buffers to be released by upper layer
1814  */
1815 static boolean_t
1816 igb_rx_drain(igb_t *igb)
1817 {
1818 	boolean_t done;
1819 	int i;
1820 
1821 	/*
1822 	 * Polling the rx free list to check if those rx buffers held by
1823 	 * the upper layer are released.
1824 	 *
1825 	 * Check the counter rcb_free to see if all pending buffers are
1826 	 * released. No lock protection is needed here.
1827 	 *
1828 	 * Return B_TRUE if all pending buffers have been released;
1829 	 * Otherwise return B_FALSE;
1830 	 */
1831 	for (i = 0; i < RX_DRAIN_TIME; i++) {
1832 		done = (igb->rcb_pending == 0);
1833 
1834 		if (done)
1835 			break;
1836 
1837 		msec_delay(1);
1838 	}
1839 
1840 	return (done);
1841 }
1842 
1843 /*
1844  * igb_start - Start the driver/chipset
1845  */
1846 int
1847 igb_start(igb_t *igb, boolean_t alloc_buffer)
1848 {
1849 	int i;
1850 
1851 	ASSERT(mutex_owned(&igb->gen_lock));
1852 
1853 	if (alloc_buffer) {
1854 		if (igb_alloc_rx_data(igb) != IGB_SUCCESS) {
1855 			igb_log(igb, IGB_LOG_ERROR,
1856 			    "Failed to allocate software receive rings");
1857 			return (IGB_FAILURE);
1858 		}
1859 
1860 		/* Allocate buffers for all the rx/tx rings */
1861 		if (igb_alloc_dma(igb) != IGB_SUCCESS) {
1862 			igb_log(igb, IGB_LOG_ERROR,
1863 			    "Failed to allocate DMA resource");
1864 			return (IGB_FAILURE);
1865 		}
1866 
1867 		igb->tx_ring_init = B_TRUE;
1868 	} else {
1869 		igb->tx_ring_init = B_FALSE;
1870 	}
1871 
1872 	for (i = 0; i < igb->num_rx_rings; i++)
1873 		mutex_enter(&igb->rx_rings[i].rx_lock);
1874 	for (i = 0; i < igb->num_tx_rings; i++)
1875 		mutex_enter(&igb->tx_rings[i].tx_lock);
1876 
1877 	/*
1878 	 * Start the adapter
1879 	 */
1880 	if ((igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) == 0) {
1881 		if (igb_init_adapter(igb) != IGB_SUCCESS) {
1882 			igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1883 			goto start_failure;
1884 		}
1885 		igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
1886 	}
1887 
1888 	/*
1889 	 * Setup the rx/tx rings
1890 	 */
1891 	igb_setup_rings(igb);
1892 
1893 	/*
1894 	 * Enable adapter interrupts
1895 	 * The interrupts must be enabled after the driver state is START
1896 	 */
1897 	igb->capab->enable_intr(igb);
1898 
1899 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1900 		goto start_failure;
1901 
1902 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1903 		goto start_failure;
1904 
1905 	if (igb->hw.mac.type == e1000_i350)
1906 		(void) e1000_set_eee_i350(&igb->hw, B_FALSE, B_FALSE);
1907 	else if (igb->hw.mac.type == e1000_i354)
1908 		(void) e1000_set_eee_i354(&igb->hw, B_FALSE, B_FALSE);
1909 
1910 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1911 		mutex_exit(&igb->tx_rings[i].tx_lock);
1912 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1913 		mutex_exit(&igb->rx_rings[i].rx_lock);
1914 
1915 	return (IGB_SUCCESS);
1916 
1917 start_failure:
1918 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1919 		mutex_exit(&igb->tx_rings[i].tx_lock);
1920 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1921 		mutex_exit(&igb->rx_rings[i].rx_lock);
1922 
1923 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1924 
1925 	return (IGB_FAILURE);
1926 }
1927 
1928 /*
1929  * igb_stop - Stop the driver/chipset
1930  */
1931 void
1932 igb_stop(igb_t *igb, boolean_t free_buffer)
1933 {
1934 	int i;
1935 
1936 	ASSERT(mutex_owned(&igb->gen_lock));
1937 
1938 	igb->attach_progress &= ~ATTACH_PROGRESS_INIT_ADAPTER;
1939 
1940 	/*
1941 	 * Disable the adapter interrupts
1942 	 */
1943 	igb_disable_adapter_interrupts(igb);
1944 
1945 	/*
1946 	 * Drain the pending tx packets
1947 	 */
1948 	(void) igb_tx_drain(igb);
1949 
1950 	for (i = 0; i < igb->num_rx_rings; i++)
1951 		mutex_enter(&igb->rx_rings[i].rx_lock);
1952 	for (i = 0; i < igb->num_tx_rings; i++)
1953 		mutex_enter(&igb->tx_rings[i].tx_lock);
1954 
1955 	/*
1956 	 * Stop the adapter
1957 	 */
1958 	igb_stop_adapter(igb);
1959 
1960 	/*
1961 	 * Clean the pending tx data/resources
1962 	 */
1963 	igb_tx_clean(igb);
1964 
1965 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1966 		mutex_exit(&igb->tx_rings[i].tx_lock);
1967 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1968 		mutex_exit(&igb->rx_rings[i].rx_lock);
1969 
1970 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1971 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1972 
1973 	if (igb->link_state == LINK_STATE_UP) {
1974 		igb->link_state = LINK_STATE_UNKNOWN;
1975 		mac_link_update(igb->mac_hdl, igb->link_state);
1976 	}
1977 
1978 	if (free_buffer) {
1979 		/*
1980 		 * Release the DMA/memory resources of rx/tx rings
1981 		 */
1982 		igb_free_dma(igb);
1983 		igb_free_rx_data(igb);
1984 	}
1985 }
1986 
1987 /*
1988  * igb_alloc_rings - Allocate memory space for rx/tx rings
1989  */
1990 static int
1991 igb_alloc_rings(igb_t *igb)
1992 {
1993 	/*
1994 	 * Allocate memory space for rx rings
1995 	 */
1996 	igb->rx_rings = kmem_zalloc(
1997 	    sizeof (igb_rx_ring_t) * igb->num_rx_rings,
1998 	    KM_NOSLEEP);
1999 
2000 	if (igb->rx_rings == NULL) {
2001 		return (IGB_FAILURE);
2002 	}
2003 
2004 	/*
2005 	 * Allocate memory space for tx rings
2006 	 */
2007 	igb->tx_rings = kmem_zalloc(
2008 	    sizeof (igb_tx_ring_t) * igb->num_tx_rings,
2009 	    KM_NOSLEEP);
2010 
2011 	if (igb->tx_rings == NULL) {
2012 		kmem_free(igb->rx_rings,
2013 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
2014 		igb->rx_rings = NULL;
2015 		return (IGB_FAILURE);
2016 	}
2017 
2018 	/*
2019 	 * Allocate memory space for rx ring groups
2020 	 */
2021 	igb->rx_groups = kmem_zalloc(
2022 	    sizeof (igb_rx_group_t) * igb->num_rx_groups,
2023 	    KM_NOSLEEP);
2024 
2025 	if (igb->rx_groups == NULL) {
2026 		kmem_free(igb->rx_rings,
2027 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
2028 		kmem_free(igb->tx_rings,
2029 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
2030 		igb->rx_rings = NULL;
2031 		igb->tx_rings = NULL;
2032 		return (IGB_FAILURE);
2033 	}
2034 
2035 	return (IGB_SUCCESS);
2036 }
2037 
2038 /*
2039  * igb_free_rings - Free the memory space of rx/tx rings.
2040  */
2041 static void
2042 igb_free_rings(igb_t *igb)
2043 {
2044 	if (igb->rx_rings != NULL) {
2045 		kmem_free(igb->rx_rings,
2046 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
2047 		igb->rx_rings = NULL;
2048 	}
2049 
2050 	if (igb->tx_rings != NULL) {
2051 		kmem_free(igb->tx_rings,
2052 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
2053 		igb->tx_rings = NULL;
2054 	}
2055 
2056 	if (igb->rx_groups != NULL) {
2057 		kmem_free(igb->rx_groups,
2058 		    sizeof (igb_rx_group_t) * igb->num_rx_groups);
2059 		igb->rx_groups = NULL;
2060 	}
2061 }
2062 
2063 static int
2064 igb_alloc_rx_data(igb_t *igb)
2065 {
2066 	igb_rx_ring_t *rx_ring;
2067 	int i;
2068 
2069 	for (i = 0; i < igb->num_rx_rings; i++) {
2070 		rx_ring = &igb->rx_rings[i];
2071 		if (igb_alloc_rx_ring_data(rx_ring) != IGB_SUCCESS)
2072 			goto alloc_rx_rings_failure;
2073 	}
2074 	return (IGB_SUCCESS);
2075 
2076 alloc_rx_rings_failure:
2077 	igb_free_rx_data(igb);
2078 	return (IGB_FAILURE);
2079 }
2080 
2081 static void
2082 igb_free_rx_data(igb_t *igb)
2083 {
2084 	igb_rx_ring_t *rx_ring;
2085 	igb_rx_data_t *rx_data;
2086 	int i;
2087 
2088 	for (i = 0; i < igb->num_rx_rings; i++) {
2089 		rx_ring = &igb->rx_rings[i];
2090 
2091 		mutex_enter(&igb->rx_pending_lock);
2092 		rx_data = rx_ring->rx_data;
2093 
2094 		if (rx_data != NULL) {
2095 			rx_data->flag |= IGB_RX_STOPPED;
2096 
2097 			if (rx_data->rcb_pending == 0) {
2098 				igb_free_rx_ring_data(rx_data);
2099 				rx_ring->rx_data = NULL;
2100 			}
2101 		}
2102 
2103 		mutex_exit(&igb->rx_pending_lock);
2104 	}
2105 }
2106 
2107 /*
2108  * igb_setup_rings - Setup rx/tx rings
2109  */
2110 static void
2111 igb_setup_rings(igb_t *igb)
2112 {
2113 	/*
2114 	 * Setup the rx/tx rings, including the following:
2115 	 *
2116 	 * 1. Setup the descriptor ring and the control block buffers;
2117 	 * 2. Initialize necessary registers for receive/transmit;
2118 	 * 3. Initialize software pointers/parameters for receive/transmit;
2119 	 */
2120 	igb_setup_rx(igb);
2121 
2122 	igb_setup_tx(igb);
2123 }
2124 
2125 static void
2126 igb_setup_rx_ring(igb_rx_ring_t *rx_ring)
2127 {
2128 	igb_t *igb = rx_ring->igb;
2129 	igb_rx_data_t *rx_data = rx_ring->rx_data;
2130 	struct e1000_hw *hw = &igb->hw;
2131 	rx_control_block_t *rcb;
2132 	union e1000_adv_rx_desc	*rbd;
2133 	uint32_t size;
2134 	uint32_t buf_low;
2135 	uint32_t buf_high;
2136 	uint32_t rxdctl;
2137 	int i;
2138 
2139 	ASSERT(mutex_owned(&rx_ring->rx_lock));
2140 	ASSERT(mutex_owned(&igb->gen_lock));
2141 
2142 	/*
2143 	 * Initialize descriptor ring with buffer addresses
2144 	 */
2145 	for (i = 0; i < igb->rx_ring_size; i++) {
2146 		rcb = rx_data->work_list[i];
2147 		rbd = &rx_data->rbd_ring[i];
2148 
2149 		rbd->read.pkt_addr = rcb->rx_buf.dma_address;
2150 		rbd->read.hdr_addr = 0;
2151 	}
2152 
2153 	/*
2154 	 * Initialize the base address registers
2155 	 */
2156 	buf_low = (uint32_t)rx_data->rbd_area.dma_address;
2157 	buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32);
2158 	E1000_WRITE_REG(hw, E1000_RDBAH(rx_ring->index), buf_high);
2159 	E1000_WRITE_REG(hw, E1000_RDBAL(rx_ring->index), buf_low);
2160 
2161 	/*
2162 	 * Initialize the length register
2163 	 */
2164 	size = rx_data->ring_size * sizeof (union e1000_adv_rx_desc);
2165 	E1000_WRITE_REG(hw, E1000_RDLEN(rx_ring->index), size);
2166 
2167 	/*
2168 	 * Initialize buffer size & descriptor type
2169 	 */
2170 	E1000_WRITE_REG(hw, E1000_SRRCTL(rx_ring->index),
2171 	    ((igb->rx_buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) |
2172 	    E1000_SRRCTL_DESCTYPE_ADV_ONEBUF));
2173 
2174 	/*
2175 	 * Setup the Receive Descriptor Control Register (RXDCTL)
2176 	 */
2177 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rx_ring->index));
2178 	rxdctl &= igb->capab->rxdctl_mask;
2179 	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2180 	rxdctl |= 16;		/* pthresh */
2181 	rxdctl |= 8 << 8;	/* hthresh */
2182 	rxdctl |= 1 << 16;	/* wthresh */
2183 	E1000_WRITE_REG(hw, E1000_RXDCTL(rx_ring->index), rxdctl);
2184 
2185 	rx_data->rbd_next = 0;
2186 }
2187 
2188 static void
2189 igb_setup_rx(igb_t *igb)
2190 {
2191 	igb_rx_ring_t *rx_ring;
2192 	igb_rx_data_t *rx_data;
2193 	igb_rx_group_t *rx_group;
2194 	struct e1000_hw *hw = &igb->hw;
2195 	uint32_t rctl, rxcsum;
2196 	uint32_t ring_per_group;
2197 	int i;
2198 
2199 	/*
2200 	 * Setup the Receive Control Register (RCTL), and enable the
2201 	 * receiver. The initial configuration is to: enable the receiver,
2202 	 * accept broadcasts, discard bad packets, accept long packets,
2203 	 * disable VLAN filter checking, and set receive buffer size to
2204 	 * 2k.  For 82575, also set the receive descriptor minimum
2205 	 * threshold size to 1/2 the ring.
2206 	 */
2207 	rctl = E1000_READ_REG(hw, E1000_RCTL);
2208 
2209 	/*
2210 	 * Clear the field used for wakeup control.  This driver doesn't do
2211 	 * wakeup but leave this here for completeness.
2212 	 */
2213 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2214 	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2215 
2216 	rctl |= (E1000_RCTL_EN |	/* Enable Receive Unit */
2217 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
2218 	    E1000_RCTL_LPE |		/* Large Packet Enable */
2219 					/* Multicast filter offset */
2220 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
2221 	    E1000_RCTL_RDMTS_HALF |	/* rx descriptor threshold */
2222 	    E1000_RCTL_SECRC);		/* Strip Ethernet CRC */
2223 
2224 	for (i = 0; i < igb->num_rx_groups; i++) {
2225 		rx_group = &igb->rx_groups[i];
2226 		rx_group->index = i;
2227 		rx_group->igb = igb;
2228 	}
2229 
2230 	/*
2231 	 * Set up all rx descriptor rings - must be called before receive unit
2232 	 * enabled.
2233 	 */
2234 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2235 	for (i = 0; i < igb->num_rx_rings; i++) {
2236 		rx_ring = &igb->rx_rings[i];
2237 		igb_setup_rx_ring(rx_ring);
2238 
2239 		/*
2240 		 * Map a ring to a group by assigning a group index
2241 		 */
2242 		rx_ring->group_index = i / ring_per_group;
2243 	}
2244 
2245 	/*
2246 	 * Setup the Rx Long Packet Max Length register
2247 	 */
2248 	E1000_WRITE_REG(hw, E1000_RLPML, igb->max_frame_size);
2249 
2250 	/*
2251 	 * Hardware checksum settings
2252 	 */
2253 	if (igb->rx_hcksum_enable) {
2254 		rxcsum =
2255 		    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum */
2256 		    E1000_RXCSUM_IPOFL;		/* IP checksum */
2257 
2258 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2259 	}
2260 
2261 	/*
2262 	 * Setup classify and RSS for multiple receive queues
2263 	 */
2264 	switch (igb->vmdq_mode) {
2265 	case E1000_VMDQ_OFF:
2266 		/*
2267 		 * One ring group, only RSS is needed when more than
2268 		 * one ring enabled.
2269 		 */
2270 		if (igb->num_rx_rings > 1)
2271 			igb_setup_rss(igb);
2272 		break;
2273 	case E1000_VMDQ_MAC:
2274 		/*
2275 		 * Multiple groups, each group has one ring,
2276 		 * only the MAC classification is needed.
2277 		 */
2278 		igb_setup_mac_classify(igb);
2279 		break;
2280 	case E1000_VMDQ_MAC_RSS:
2281 		/*
2282 		 * Multiple groups and multiple rings, both
2283 		 * MAC classification and RSS are needed.
2284 		 */
2285 		igb_setup_mac_rss_classify(igb);
2286 		break;
2287 	}
2288 
2289 	/*
2290 	 * Enable the receive unit - must be done after all
2291 	 * the rx setup above.
2292 	 */
2293 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2294 
2295 	/*
2296 	 * Initialize all adapter ring head & tail pointers - must
2297 	 * be done after receive unit is enabled
2298 	 */
2299 	for (i = 0; i < igb->num_rx_rings; i++) {
2300 		rx_ring = &igb->rx_rings[i];
2301 		rx_data = rx_ring->rx_data;
2302 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2303 		E1000_WRITE_REG(hw, E1000_RDT(i), rx_data->ring_size - 1);
2304 	}
2305 
2306 	/*
2307 	 * 82575 with manageability enabled needs a special flush to make
2308 	 * sure the fifos start clean.
2309 	 */
2310 	if ((hw->mac.type == e1000_82575) &&
2311 	    (E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) {
2312 		e1000_rx_fifo_flush_82575(hw);
2313 	}
2314 }
2315 
2316 static void
2317 igb_setup_tx_ring(igb_tx_ring_t *tx_ring)
2318 {
2319 	igb_t *igb = tx_ring->igb;
2320 	struct e1000_hw *hw = &igb->hw;
2321 	uint32_t size;
2322 	uint32_t buf_low;
2323 	uint32_t buf_high;
2324 	uint32_t reg_val;
2325 
2326 	ASSERT(mutex_owned(&tx_ring->tx_lock));
2327 	ASSERT(mutex_owned(&igb->gen_lock));
2328 
2329 
2330 	/*
2331 	 * Initialize the length register
2332 	 */
2333 	size = tx_ring->ring_size * sizeof (union e1000_adv_tx_desc);
2334 	E1000_WRITE_REG(hw, E1000_TDLEN(tx_ring->index), size);
2335 
2336 	/*
2337 	 * Initialize the base address registers
2338 	 */
2339 	buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
2340 	buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
2341 	E1000_WRITE_REG(hw, E1000_TDBAL(tx_ring->index), buf_low);
2342 	E1000_WRITE_REG(hw, E1000_TDBAH(tx_ring->index), buf_high);
2343 
2344 	/*
2345 	 * Setup head & tail pointers
2346 	 */
2347 	E1000_WRITE_REG(hw, E1000_TDH(tx_ring->index), 0);
2348 	E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), 0);
2349 
2350 	/*
2351 	 * Setup head write-back
2352 	 */
2353 	if (igb->tx_head_wb_enable) {
2354 		/*
2355 		 * The memory of the head write-back is allocated using
2356 		 * the extra tbd beyond the tail of the tbd ring.
2357 		 */
2358 		tx_ring->tbd_head_wb = (uint32_t *)
2359 		    ((uintptr_t)tx_ring->tbd_area.address + size);
2360 		*tx_ring->tbd_head_wb = 0;
2361 
2362 		buf_low = (uint32_t)
2363 		    (tx_ring->tbd_area.dma_address + size);
2364 		buf_high = (uint32_t)
2365 		    ((tx_ring->tbd_area.dma_address + size) >> 32);
2366 
2367 		/* Set the head write-back enable bit */
2368 		buf_low |= E1000_TX_HEAD_WB_ENABLE;
2369 
2370 		E1000_WRITE_REG(hw, E1000_TDWBAL(tx_ring->index), buf_low);
2371 		E1000_WRITE_REG(hw, E1000_TDWBAH(tx_ring->index), buf_high);
2372 
2373 		/*
2374 		 * Turn off relaxed ordering for head write back or it will
2375 		 * cause problems with the tx recycling
2376 		 */
2377 		reg_val = E1000_READ_REG(hw,
2378 		    E1000_DCA_TXCTRL(tx_ring->index));
2379 		reg_val &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
2380 		E1000_WRITE_REG(hw,
2381 		    E1000_DCA_TXCTRL(tx_ring->index), reg_val);
2382 	} else {
2383 		tx_ring->tbd_head_wb = NULL;
2384 	}
2385 
2386 	tx_ring->tbd_head = 0;
2387 	tx_ring->tbd_tail = 0;
2388 	tx_ring->tbd_free = tx_ring->ring_size;
2389 
2390 	if (igb->tx_ring_init == B_TRUE) {
2391 		tx_ring->tcb_head = 0;
2392 		tx_ring->tcb_tail = 0;
2393 		tx_ring->tcb_free = tx_ring->free_list_size;
2394 	}
2395 
2396 	/*
2397 	 * Enable TXDCTL per queue
2398 	 */
2399 	reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
2400 	reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
2401 	E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
2402 
2403 	/*
2404 	 * Initialize hardware checksum offload settings
2405 	 */
2406 	bzero(&tx_ring->tx_context, sizeof (tx_context_t));
2407 }
2408 
2409 static void
2410 igb_setup_tx(igb_t *igb)
2411 {
2412 	igb_tx_ring_t *tx_ring;
2413 	struct e1000_hw *hw = &igb->hw;
2414 	uint32_t reg_val;
2415 	int i;
2416 
2417 	for (i = 0; i < igb->num_tx_rings; i++) {
2418 		tx_ring = &igb->tx_rings[i];
2419 		igb_setup_tx_ring(tx_ring);
2420 	}
2421 
2422 	/*
2423 	 * Setup the Transmit Control Register (TCTL)
2424 	 */
2425 	reg_val = E1000_READ_REG(hw, E1000_TCTL);
2426 	reg_val &= ~E1000_TCTL_CT;
2427 	reg_val |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2428 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2429 
2430 	/* Enable transmits */
2431 	reg_val |= E1000_TCTL_EN;
2432 
2433 	E1000_WRITE_REG(hw, E1000_TCTL, reg_val);
2434 }
2435 
2436 /*
2437  * igb_setup_rss - Setup receive-side scaling feature
2438  */
2439 static void
2440 igb_setup_rss(igb_t *igb)
2441 {
2442 	struct e1000_hw *hw = &igb->hw;
2443 	uint32_t i, mrqc, rxcsum;
2444 	int shift = 0;
2445 	uint32_t random;
2446 	union e1000_reta {
2447 		uint32_t	dword;
2448 		uint8_t		bytes[4];
2449 	} reta;
2450 
2451 	/* Setup the Redirection Table */
2452 	if (hw->mac.type == e1000_82576) {
2453 		shift = 3;
2454 	} else if (hw->mac.type == e1000_82575) {
2455 		shift = 6;
2456 	}
2457 	for (i = 0; i < (32 * 4); i++) {
2458 		reta.bytes[i & 3] = (i % igb->num_rx_rings) << shift;
2459 		if ((i & 3) == 3) {
2460 			E1000_WRITE_REG(hw,
2461 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2462 		}
2463 	}
2464 
2465 	/* Fill out hash function seeds */
2466 	for (i = 0; i < 10; i++) {
2467 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2468 		    sizeof (uint32_t));
2469 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2470 	}
2471 
2472 	/* Setup the Multiple Receive Queue Control register */
2473 	mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2474 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2475 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2476 	    E1000_MRQC_RSS_FIELD_IPV6 |
2477 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2478 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2479 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2480 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2481 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2482 
2483 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2484 
2485 	/*
2486 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2487 	 *
2488 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2489 	 * checksum offloading provided by the 82575 chipset besides the IP
2490 	 * header checksum offloading and the TCP/UDP checksum offloading.
2491 	 * The Packet Checksum is by default computed over the entire packet
2492 	 * from the first byte of the DA through the last byte of the CRC,
2493 	 * including the Ethernet and IP headers.
2494 	 *
2495 	 * It is a hardware limitation that Packet Checksum is mutually
2496 	 * exclusive with RSS.
2497 	 */
2498 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2499 	rxcsum |= E1000_RXCSUM_PCSD;
2500 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2501 }
2502 
2503 /*
2504  * igb_setup_mac_rss_classify - Setup MAC classification and rss
2505  */
2506 static void
2507 igb_setup_mac_rss_classify(igb_t *igb)
2508 {
2509 	struct e1000_hw *hw = &igb->hw;
2510 	uint32_t i, mrqc, vmdctl, rxcsum;
2511 	uint32_t ring_per_group;
2512 	int shift_group0, shift_group1;
2513 	uint32_t random;
2514 	union e1000_reta {
2515 		uint32_t	dword;
2516 		uint8_t		bytes[4];
2517 	} reta;
2518 
2519 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2520 
2521 	/* Setup the Redirection Table, it is shared between two groups */
2522 	shift_group0 = 2;
2523 	shift_group1 = 6;
2524 	for (i = 0; i < (32 * 4); i++) {
2525 		reta.bytes[i & 3] = ((i % ring_per_group) << shift_group0) |
2526 		    ((ring_per_group + (i % ring_per_group)) << shift_group1);
2527 		if ((i & 3) == 3) {
2528 			E1000_WRITE_REG(hw,
2529 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2530 		}
2531 	}
2532 
2533 	/* Fill out hash function seeds */
2534 	for (i = 0; i < 10; i++) {
2535 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2536 		    sizeof (uint32_t));
2537 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2538 	}
2539 
2540 	/*
2541 	 * Setup the Multiple Receive Queue Control register,
2542 	 * enable VMDq based on packet destination MAC address and RSS.
2543 	 */
2544 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_RSS_GROUP;
2545 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2546 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2547 	    E1000_MRQC_RSS_FIELD_IPV6 |
2548 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2549 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2550 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2551 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2552 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2553 
2554 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2555 
2556 
2557 	/* Define the default group and default queues */
2558 	vmdctl = E1000_VMDQ_MAC_GROUP_DEFAULT_QUEUE;
2559 	E1000_WRITE_REG(hw, E1000_VT_CTL, vmdctl);
2560 
2561 	/*
2562 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2563 	 *
2564 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2565 	 * checksum offloading provided by the 82575 chipset besides the IP
2566 	 * header checksum offloading and the TCP/UDP checksum offloading.
2567 	 * The Packet Checksum is by default computed over the entire packet
2568 	 * from the first byte of the DA through the last byte of the CRC,
2569 	 * including the Ethernet and IP headers.
2570 	 *
2571 	 * It is a hardware limitation that Packet Checksum is mutually
2572 	 * exclusive with RSS.
2573 	 */
2574 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2575 	rxcsum |= E1000_RXCSUM_PCSD;
2576 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2577 }
2578 
2579 /*
2580  * igb_setup_mac_classify - Setup MAC classification feature
2581  */
2582 static void
2583 igb_setup_mac_classify(igb_t *igb)
2584 {
2585 	struct e1000_hw *hw = &igb->hw;
2586 	uint32_t mrqc, rxcsum;
2587 
2588 	/*
2589 	 * Setup the Multiple Receive Queue Control register,
2590 	 * enable VMDq based on packet destination MAC address.
2591 	 */
2592 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_GROUP;
2593 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2594 
2595 	/*
2596 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2597 	 *
2598 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2599 	 * checksum offloading provided by the 82575 chipset besides the IP
2600 	 * header checksum offloading and the TCP/UDP checksum offloading.
2601 	 * The Packet Checksum is by default computed over the entire packet
2602 	 * from the first byte of the DA through the last byte of the CRC,
2603 	 * including the Ethernet and IP headers.
2604 	 *
2605 	 * It is a hardware limitation that Packet Checksum is mutually
2606 	 * exclusive with RSS.
2607 	 */
2608 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2609 	rxcsum |= E1000_RXCSUM_PCSD;
2610 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2611 
2612 }
2613 
2614 /*
2615  * igb_init_unicst - Initialize the unicast addresses
2616  */
2617 static void
2618 igb_init_unicst(igb_t *igb)
2619 {
2620 	struct e1000_hw *hw = &igb->hw;
2621 	int slot;
2622 
2623 	/*
2624 	 * Here we should consider two situations:
2625 	 *
2626 	 * 1. Chipset is initialized the first time
2627 	 *    Initialize the multiple unicast addresses, and
2628 	 *    save the default MAC address.
2629 	 *
2630 	 * 2. Chipset is reset
2631 	 *    Recover the multiple unicast addresses from the
2632 	 *    software data structure to the RAR registers.
2633 	 */
2634 
2635 	/*
2636 	 * Clear the default MAC address in the RAR0 rgister,
2637 	 * which is loaded from EEPROM when system boot or chipreset,
2638 	 * this will cause the conficts with add_mac/rem_mac entry
2639 	 * points when VMDq is enabled. For this reason, the RAR0
2640 	 * must be cleared for both cases mentioned above.
2641 	 */
2642 	e1000_rar_clear(hw, 0);
2643 
2644 	if (!igb->unicst_init) {
2645 
2646 		/* Initialize the multiple unicast addresses */
2647 		igb->unicst_total = MAX_NUM_UNICAST_ADDRESSES;
2648 		igb->unicst_avail = igb->unicst_total;
2649 
2650 		for (slot = 0; slot < igb->unicst_total; slot++)
2651 			igb->unicst_addr[slot].mac.set = 0;
2652 
2653 		igb->unicst_init = B_TRUE;
2654 	} else {
2655 		/* Re-configure the RAR registers */
2656 		for (slot = 0; slot < igb->unicst_total; slot++) {
2657 			(void) e1000_rar_set_vmdq(hw,
2658 			    igb->unicst_addr[slot].mac.addr,
2659 			    slot, igb->vmdq_mode,
2660 			    igb->unicst_addr[slot].mac.group_index);
2661 		}
2662 	}
2663 }
2664 
2665 /*
2666  * igb_unicst_find - Find the slot for the specified unicast address
2667  */
2668 int
2669 igb_unicst_find(igb_t *igb, const uint8_t *mac_addr)
2670 {
2671 	int slot;
2672 
2673 	ASSERT(mutex_owned(&igb->gen_lock));
2674 
2675 	for (slot = 0; slot < igb->unicst_total; slot++) {
2676 		if (bcmp(igb->unicst_addr[slot].mac.addr,
2677 		    mac_addr, ETHERADDRL) == 0)
2678 			return (slot);
2679 	}
2680 
2681 	return (-1);
2682 }
2683 
2684 /*
2685  * igb_unicst_set - Set the unicast address to the specified slot
2686  */
2687 int
2688 igb_unicst_set(igb_t *igb, const uint8_t *mac_addr,
2689     int slot)
2690 {
2691 	struct e1000_hw *hw = &igb->hw;
2692 
2693 	ASSERT(mutex_owned(&igb->gen_lock));
2694 
2695 	/*
2696 	 * Save the unicast address in the software data structure
2697 	 */
2698 	bcopy(mac_addr, igb->unicst_addr[slot].mac.addr, ETHERADDRL);
2699 
2700 	/*
2701 	 * Set the unicast address to the RAR register
2702 	 */
2703 	(void) e1000_rar_set(hw, (uint8_t *)mac_addr, slot);
2704 
2705 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2706 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2707 		return (EIO);
2708 	}
2709 
2710 	return (0);
2711 }
2712 
2713 /*
2714  * igb_multicst_add - Add a multicst address
2715  */
2716 int
2717 igb_multicst_add(igb_t *igb, const uint8_t *multiaddr)
2718 {
2719 	struct ether_addr *new_table;
2720 	size_t new_len;
2721 	size_t old_len;
2722 
2723 	ASSERT(mutex_owned(&igb->gen_lock));
2724 
2725 	if ((multiaddr[0] & 01) == 0) {
2726 		igb_log(igb, IGB_LOG_ERROR, "Illegal multicast address");
2727 		return (EINVAL);
2728 	}
2729 
2730 	if (igb->mcast_count >= igb->mcast_max_num) {
2731 		igb_log(igb, IGB_LOG_ERROR,
2732 		    "Adapter requested more than %d mcast addresses",
2733 		    igb->mcast_max_num);
2734 		return (ENOENT);
2735 	}
2736 
2737 	if (igb->mcast_count == igb->mcast_alloc_count) {
2738 		old_len = igb->mcast_alloc_count *
2739 		    sizeof (struct ether_addr);
2740 		new_len = (igb->mcast_alloc_count + MCAST_ALLOC_COUNT) *
2741 		    sizeof (struct ether_addr);
2742 
2743 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
2744 		if (new_table == NULL) {
2745 			igb_log(igb, IGB_LOG_ERROR,
2746 			    "Not enough memory to alloc mcast table");
2747 			return (ENOMEM);
2748 		}
2749 
2750 		if (igb->mcast_table != NULL) {
2751 			bcopy(igb->mcast_table, new_table, old_len);
2752 			kmem_free(igb->mcast_table, old_len);
2753 		}
2754 		igb->mcast_alloc_count += MCAST_ALLOC_COUNT;
2755 		igb->mcast_table = new_table;
2756 	}
2757 
2758 	bcopy(multiaddr,
2759 	    &igb->mcast_table[igb->mcast_count], ETHERADDRL);
2760 	igb->mcast_count++;
2761 
2762 	/*
2763 	 * Update the multicast table in the hardware
2764 	 */
2765 	igb_setup_multicst(igb);
2766 
2767 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2768 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2769 		return (EIO);
2770 	}
2771 
2772 	return (0);
2773 }
2774 
2775 /*
2776  * igb_multicst_remove - Remove a multicst address
2777  */
2778 int
2779 igb_multicst_remove(igb_t *igb, const uint8_t *multiaddr)
2780 {
2781 	struct ether_addr *new_table;
2782 	size_t new_len;
2783 	size_t old_len;
2784 	int i;
2785 
2786 	ASSERT(mutex_owned(&igb->gen_lock));
2787 
2788 	for (i = 0; i < igb->mcast_count; i++) {
2789 		if (bcmp(multiaddr, &igb->mcast_table[i],
2790 		    ETHERADDRL) == 0) {
2791 			for (i++; i < igb->mcast_count; i++) {
2792 				igb->mcast_table[i - 1] =
2793 				    igb->mcast_table[i];
2794 			}
2795 			igb->mcast_count--;
2796 			break;
2797 		}
2798 	}
2799 
2800 	if ((igb->mcast_alloc_count - igb->mcast_count) >
2801 	    MCAST_ALLOC_COUNT) {
2802 		old_len = igb->mcast_alloc_count *
2803 		    sizeof (struct ether_addr);
2804 		new_len = (igb->mcast_alloc_count - MCAST_ALLOC_COUNT) *
2805 		    sizeof (struct ether_addr);
2806 
2807 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
2808 		if (new_table != NULL) {
2809 			bcopy(igb->mcast_table, new_table, new_len);
2810 			kmem_free(igb->mcast_table, old_len);
2811 			igb->mcast_alloc_count -= MCAST_ALLOC_COUNT;
2812 			igb->mcast_table = new_table;
2813 		}
2814 	}
2815 
2816 	/*
2817 	 * Update the multicast table in the hardware
2818 	 */
2819 	igb_setup_multicst(igb);
2820 
2821 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2822 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2823 		return (EIO);
2824 	}
2825 
2826 	return (0);
2827 }
2828 
2829 static void
2830 igb_release_multicast(igb_t *igb)
2831 {
2832 	if (igb->mcast_table != NULL) {
2833 		kmem_free(igb->mcast_table,
2834 		    igb->mcast_alloc_count * sizeof (struct ether_addr));
2835 		igb->mcast_table = NULL;
2836 	}
2837 }
2838 
2839 /*
2840  * igb_setup_multicast - setup multicast data structures
2841  *
2842  * This routine initializes all of the multicast related structures
2843  * and save them in the hardware registers.
2844  */
2845 static void
2846 igb_setup_multicst(igb_t *igb)
2847 {
2848 	uint8_t *mc_addr_list;
2849 	uint32_t mc_addr_count;
2850 	struct e1000_hw *hw = &igb->hw;
2851 
2852 	ASSERT(mutex_owned(&igb->gen_lock));
2853 	ASSERT(igb->mcast_count <= igb->mcast_max_num);
2854 
2855 	mc_addr_list = (uint8_t *)igb->mcast_table;
2856 	mc_addr_count = igb->mcast_count;
2857 
2858 	/*
2859 	 * Update the multicase addresses to the MTA registers
2860 	 */
2861 	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
2862 }
2863 
2864 /*
2865  * igb_get_conf - Get driver configurations set in driver.conf
2866  *
2867  * This routine gets user-configured values out of the configuration
2868  * file igb.conf.
2869  *
2870  * For each configurable value, there is a minimum, a maximum, and a
2871  * default.
2872  * If user does not configure a value, use the default.
2873  * If user configures below the minimum, use the minumum.
2874  * If user configures above the maximum, use the maxumum.
2875  */
2876 static void
2877 igb_get_conf(igb_t *igb)
2878 {
2879 	struct e1000_hw *hw = &igb->hw;
2880 	uint32_t default_mtu;
2881 	uint32_t flow_control;
2882 	uint32_t ring_per_group;
2883 	int i;
2884 
2885 	/*
2886 	 * igb driver supports the following user configurations:
2887 	 *
2888 	 * Link configurations:
2889 	 *    adv_autoneg_cap
2890 	 *    adv_1000fdx_cap
2891 	 *    adv_100fdx_cap
2892 	 *    adv_100hdx_cap
2893 	 *    adv_10fdx_cap
2894 	 *    adv_10hdx_cap
2895 	 * Note: 1000hdx is not supported.
2896 	 *
2897 	 * Jumbo frame configuration:
2898 	 *    default_mtu
2899 	 *
2900 	 * Ethernet flow control configuration:
2901 	 *    flow_control
2902 	 *
2903 	 * Multiple rings configurations:
2904 	 *    tx_queue_number
2905 	 *    tx_ring_size
2906 	 *    rx_queue_number
2907 	 *    rx_ring_size
2908 	 *
2909 	 * Call igb_get_prop() to get the value for a specific
2910 	 * configuration parameter.
2911 	 */
2912 
2913 	/*
2914 	 * Link configurations
2915 	 */
2916 	igb->param_adv_autoneg_cap = igb_get_prop(igb,
2917 	    PROP_ADV_AUTONEG_CAP, 0, 1, 1);
2918 	igb->param_adv_1000fdx_cap = igb_get_prop(igb,
2919 	    PROP_ADV_1000FDX_CAP, 0, 1, 1);
2920 	igb->param_adv_100fdx_cap = igb_get_prop(igb,
2921 	    PROP_ADV_100FDX_CAP, 0, 1, 1);
2922 	igb->param_adv_100hdx_cap = igb_get_prop(igb,
2923 	    PROP_ADV_100HDX_CAP, 0, 1, 1);
2924 	igb->param_adv_10fdx_cap = igb_get_prop(igb,
2925 	    PROP_ADV_10FDX_CAP, 0, 1, 1);
2926 	igb->param_adv_10hdx_cap = igb_get_prop(igb,
2927 	    PROP_ADV_10HDX_CAP, 0, 1, 1);
2928 
2929 	/*
2930 	 * Jumbo frame configurations
2931 	 */
2932 	default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
2933 	    MIN_MTU, MAX_MTU, DEFAULT_MTU);
2934 
2935 	igb->max_frame_size = default_mtu +
2936 	    sizeof (struct ether_vlan_header) + ETHERFCSL;
2937 
2938 	/*
2939 	 * Ethernet flow control configuration
2940 	 */
2941 	flow_control = igb_get_prop(igb, PROP_FLOW_CONTROL,
2942 	    e1000_fc_none, 4, e1000_fc_full);
2943 	if (flow_control == 4)
2944 		flow_control = e1000_fc_default;
2945 
2946 	hw->fc.requested_mode = flow_control;
2947 
2948 	/*
2949 	 * Multiple rings configurations
2950 	 */
2951 	igb->tx_ring_size = igb_get_prop(igb, PROP_TX_RING_SIZE,
2952 	    MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
2953 	igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
2954 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
2955 
2956 	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0);
2957 	igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
2958 	    MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
2959 	/*
2960 	 * Currently we do not support VMDq for 82576 and 82580.
2961 	 * If it is e1000_82576, set num_rx_groups to 1.
2962 	 */
2963 	if (hw->mac.type >= e1000_82576)
2964 		igb->num_rx_groups = 1;
2965 
2966 	if (igb->mr_enable) {
2967 		igb->num_tx_rings = igb->capab->def_tx_que_num;
2968 		igb->num_rx_rings = igb->capab->def_rx_que_num;
2969 	} else {
2970 		igb->num_tx_rings = 1;
2971 		igb->num_rx_rings = 1;
2972 
2973 		if (igb->num_rx_groups > 1) {
2974 			igb_log(igb, IGB_LOG_ERROR,
2975 			    "Invalid rx groups number. Please enable multiple "
2976 			    "rings first");
2977 			igb->num_rx_groups = 1;
2978 		}
2979 	}
2980 
2981 	/*
2982 	 * Check the divisibility between rx rings and rx groups.
2983 	 */
2984 	for (i = igb->num_rx_groups; i > 0; i--) {
2985 		if ((igb->num_rx_rings % i) == 0)
2986 			break;
2987 	}
2988 	if (i != igb->num_rx_groups) {
2989 		igb_log(igb, IGB_LOG_ERROR,
2990 		    "Invalid rx groups number. Downgrade the rx group "
2991 		    "number to %d.", i);
2992 		igb->num_rx_groups = i;
2993 	}
2994 
2995 	/*
2996 	 * Get the ring number per group.
2997 	 */
2998 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2999 
3000 	if (igb->num_rx_groups == 1) {
3001 		/*
3002 		 * One rx ring group, the rx ring number is num_rx_rings.
3003 		 */
3004 		igb->vmdq_mode = E1000_VMDQ_OFF;
3005 	} else if (ring_per_group == 1) {
3006 		/*
3007 		 * Multiple rx groups, each group has one rx ring.
3008 		 */
3009 		igb->vmdq_mode = E1000_VMDQ_MAC;
3010 	} else {
3011 		/*
3012 		 * Multiple groups and multiple rings.
3013 		 */
3014 		igb->vmdq_mode = E1000_VMDQ_MAC_RSS;
3015 	}
3016 
3017 	/*
3018 	 * Tunable used to force an interrupt type. The only use is
3019 	 * for testing of the lesser interrupt types.
3020 	 * 0 = don't force interrupt type
3021 	 * 1 = force interrupt type MSIX
3022 	 * 2 = force interrupt type MSI
3023 	 * 3 = force interrupt type Legacy
3024 	 */
3025 	igb->intr_force = igb_get_prop(igb, PROP_INTR_FORCE,
3026 	    IGB_INTR_NONE, IGB_INTR_LEGACY, IGB_INTR_NONE);
3027 
3028 	igb->tx_hcksum_enable = igb_get_prop(igb, PROP_TX_HCKSUM_ENABLE,
3029 	    0, 1, 1);
3030 	igb->rx_hcksum_enable = igb_get_prop(igb, PROP_RX_HCKSUM_ENABLE,
3031 	    0, 1, 1);
3032 	igb->lso_enable = igb_get_prop(igb, PROP_LSO_ENABLE,
3033 	    0, 1, 1);
3034 	igb->tx_head_wb_enable = igb_get_prop(igb, PROP_TX_HEAD_WB_ENABLE,
3035 	    0, 1, 1);
3036 
3037 	/*
3038 	 * igb LSO needs the tx h/w checksum support.
3039 	 * Here LSO will be disabled if tx h/w checksum has been disabled.
3040 	 */
3041 	if (igb->tx_hcksum_enable == B_FALSE)
3042 		igb->lso_enable = B_FALSE;
3043 
3044 	igb->tx_copy_thresh = igb_get_prop(igb, PROP_TX_COPY_THRESHOLD,
3045 	    MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
3046 	    DEFAULT_TX_COPY_THRESHOLD);
3047 	igb->tx_recycle_thresh = igb_get_prop(igb, PROP_TX_RECYCLE_THRESHOLD,
3048 	    MIN_TX_RECYCLE_THRESHOLD, MAX_TX_RECYCLE_THRESHOLD,
3049 	    DEFAULT_TX_RECYCLE_THRESHOLD);
3050 	igb->tx_overload_thresh = igb_get_prop(igb, PROP_TX_OVERLOAD_THRESHOLD,
3051 	    MIN_TX_OVERLOAD_THRESHOLD, MAX_TX_OVERLOAD_THRESHOLD,
3052 	    DEFAULT_TX_OVERLOAD_THRESHOLD);
3053 	igb->tx_resched_thresh = igb_get_prop(igb, PROP_TX_RESCHED_THRESHOLD,
3054 	    MIN_TX_RESCHED_THRESHOLD,
3055 	    MIN(igb->tx_ring_size, MAX_TX_RESCHED_THRESHOLD),
3056 	    igb->tx_ring_size > DEFAULT_TX_RESCHED_THRESHOLD ?
3057 	    DEFAULT_TX_RESCHED_THRESHOLD : DEFAULT_TX_RESCHED_THRESHOLD_LOW);
3058 
3059 	igb->rx_copy_thresh = igb_get_prop(igb, PROP_RX_COPY_THRESHOLD,
3060 	    MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
3061 	    DEFAULT_RX_COPY_THRESHOLD);
3062 	igb->rx_limit_per_intr = igb_get_prop(igb, PROP_RX_LIMIT_PER_INTR,
3063 	    MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
3064 	    DEFAULT_RX_LIMIT_PER_INTR);
3065 
3066 	igb->intr_throttling[0] = igb_get_prop(igb, PROP_INTR_THROTTLING,
3067 	    igb->capab->min_intr_throttle,
3068 	    igb->capab->max_intr_throttle,
3069 	    igb->capab->def_intr_throttle);
3070 
3071 	/*
3072 	 * Max number of multicast addresses
3073 	 */
3074 	igb->mcast_max_num =
3075 	    igb_get_prop(igb, PROP_MCAST_MAX_NUM,
3076 	    MIN_MCAST_NUM, MAX_MCAST_NUM, DEFAULT_MCAST_NUM);
3077 }
3078 
3079 /*
3080  * igb_get_prop - Get a property value out of the configuration file igb.conf
3081  *
3082  * Caller provides the name of the property, a default value, a minimum
3083  * value, and a maximum value.
3084  *
3085  * Return configured value of the property, with default, minimum and
3086  * maximum properly applied.
3087  */
3088 static int
3089 igb_get_prop(igb_t *igb,
3090     char *propname,	/* name of the property */
3091     int minval,		/* minimum acceptable value */
3092     int maxval,		/* maximim acceptable value */
3093     int defval)		/* default value */
3094 {
3095 	int value;
3096 
3097 	/*
3098 	 * Call ddi_prop_get_int() to read the conf settings
3099 	 */
3100 	value = ddi_prop_get_int(DDI_DEV_T_ANY, igb->dip,
3101 	    DDI_PROP_DONTPASS, propname, defval);
3102 
3103 	if (value > maxval)
3104 		value = maxval;
3105 
3106 	if (value < minval)
3107 		value = minval;
3108 
3109 	return (value);
3110 }
3111 
3112 /*
3113  * igb_setup_link - Using the link properties to setup the link
3114  */
3115 int
3116 igb_setup_link(igb_t *igb, boolean_t setup_hw)
3117 {
3118 	struct e1000_mac_info *mac;
3119 	struct e1000_phy_info *phy;
3120 	boolean_t invalid;
3121 
3122 	mac = &igb->hw.mac;
3123 	phy = &igb->hw.phy;
3124 	invalid = B_FALSE;
3125 
3126 	if (igb->param_adv_autoneg_cap == 1) {
3127 		mac->autoneg = B_TRUE;
3128 		phy->autoneg_advertised = 0;
3129 
3130 		/*
3131 		 * 1000hdx is not supported for autonegotiation
3132 		 */
3133 		if (igb->param_adv_1000fdx_cap == 1)
3134 			phy->autoneg_advertised |= ADVERTISE_1000_FULL;
3135 
3136 		if (igb->param_adv_100fdx_cap == 1)
3137 			phy->autoneg_advertised |= ADVERTISE_100_FULL;
3138 
3139 		if (igb->param_adv_100hdx_cap == 1)
3140 			phy->autoneg_advertised |= ADVERTISE_100_HALF;
3141 
3142 		if (igb->param_adv_10fdx_cap == 1)
3143 			phy->autoneg_advertised |= ADVERTISE_10_FULL;
3144 
3145 		if (igb->param_adv_10hdx_cap == 1)
3146 			phy->autoneg_advertised |= ADVERTISE_10_HALF;
3147 
3148 		if (phy->autoneg_advertised == 0)
3149 			invalid = B_TRUE;
3150 	} else {
3151 		mac->autoneg = B_FALSE;
3152 
3153 		/*
3154 		 * 1000fdx and 1000hdx are not supported for forced link
3155 		 */
3156 		if (igb->param_adv_100fdx_cap == 1)
3157 			mac->forced_speed_duplex = ADVERTISE_100_FULL;
3158 		else if (igb->param_adv_100hdx_cap == 1)
3159 			mac->forced_speed_duplex = ADVERTISE_100_HALF;
3160 		else if (igb->param_adv_10fdx_cap == 1)
3161 			mac->forced_speed_duplex = ADVERTISE_10_FULL;
3162 		else if (igb->param_adv_10hdx_cap == 1)
3163 			mac->forced_speed_duplex = ADVERTISE_10_HALF;
3164 		else
3165 			invalid = B_TRUE;
3166 	}
3167 
3168 	if (invalid) {
3169 		igb_log(igb, IGB_LOG_INFO, "Invalid link settings. Setup "
3170 		    "link to autonegotiation with full link capabilities.");
3171 		mac->autoneg = B_TRUE;
3172 		phy->autoneg_advertised = ADVERTISE_1000_FULL |
3173 		    ADVERTISE_100_FULL | ADVERTISE_100_HALF |
3174 		    ADVERTISE_10_FULL | ADVERTISE_10_HALF;
3175 	}
3176 
3177 	if (setup_hw) {
3178 		if (e1000_setup_link(&igb->hw) != E1000_SUCCESS)
3179 			return (IGB_FAILURE);
3180 	}
3181 
3182 	return (IGB_SUCCESS);
3183 }
3184 
3185 
3186 /*
3187  * igb_is_link_up - Check if the link is up
3188  */
3189 static boolean_t
3190 igb_is_link_up(igb_t *igb)
3191 {
3192 	struct e1000_hw *hw = &igb->hw;
3193 	boolean_t link_up = B_FALSE;
3194 
3195 	ASSERT(mutex_owned(&igb->gen_lock));
3196 
3197 	/*
3198 	 * get_link_status is set in the interrupt handler on link-status-change
3199 	 * or rx sequence error interrupt.  get_link_status will stay
3200 	 * false until the e1000_check_for_link establishes link only
3201 	 * for copper adapters.
3202 	 */
3203 	switch (hw->phy.media_type) {
3204 	case e1000_media_type_copper:
3205 		if (hw->mac.get_link_status) {
3206 			(void) e1000_check_for_link(hw);
3207 			link_up = !hw->mac.get_link_status;
3208 		} else {
3209 			link_up = B_TRUE;
3210 		}
3211 		break;
3212 	case e1000_media_type_fiber:
3213 		(void) e1000_check_for_link(hw);
3214 		link_up = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU);
3215 		break;
3216 	case e1000_media_type_internal_serdes:
3217 		(void) e1000_check_for_link(hw);
3218 		link_up = hw->mac.serdes_has_link;
3219 		break;
3220 	}
3221 
3222 	return (link_up);
3223 }
3224 
3225 /*
3226  * igb_link_check - Link status processing
3227  */
3228 static boolean_t
3229 igb_link_check(igb_t *igb)
3230 {
3231 	struct e1000_hw *hw = &igb->hw;
3232 	uint16_t speed = 0, duplex = 0;
3233 	boolean_t link_changed = B_FALSE;
3234 
3235 	ASSERT(mutex_owned(&igb->gen_lock));
3236 
3237 	if (igb_is_link_up(igb)) {
3238 		/*
3239 		 * The Link is up, check whether it was marked as down earlier
3240 		 */
3241 		if (igb->link_state != LINK_STATE_UP) {
3242 			(void) e1000_get_speed_and_duplex(hw, &speed, &duplex);
3243 			igb->link_speed = speed;
3244 			igb->link_duplex = duplex;
3245 			igb->link_state = LINK_STATE_UP;
3246 			link_changed = B_TRUE;
3247 			if (!igb->link_complete)
3248 				igb_stop_link_timer(igb);
3249 		}
3250 	} else if (igb->link_complete) {
3251 		if (igb->link_state != LINK_STATE_DOWN) {
3252 			igb->link_speed = 0;
3253 			igb->link_duplex = 0;
3254 			igb->link_state = LINK_STATE_DOWN;
3255 			link_changed = B_TRUE;
3256 		}
3257 	}
3258 
3259 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3260 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3261 		return (B_FALSE);
3262 	}
3263 
3264 	return (link_changed);
3265 }
3266 
3267 /*
3268  * igb_local_timer - driver watchdog function
3269  *
3270  * This function will handle the hardware stall check, link status
3271  * check and other routines.
3272  */
3273 static void
3274 igb_local_timer(void *arg)
3275 {
3276 	igb_t *igb = (igb_t *)arg;
3277 	boolean_t link_changed = B_FALSE;
3278 
3279 	if (igb->igb_state & IGB_ERROR) {
3280 		igb->reset_count++;
3281 		if (igb_reset(igb) == IGB_SUCCESS)
3282 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3283 
3284 		igb_restart_watchdog_timer(igb);
3285 		return;
3286 	}
3287 
3288 	if (igb_stall_check(igb) || (igb->igb_state & IGB_STALL)) {
3289 		igb_fm_ereport(igb, DDI_FM_DEVICE_STALL);
3290 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
3291 		igb->reset_count++;
3292 		if (igb_reset(igb) == IGB_SUCCESS)
3293 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3294 
3295 		igb_restart_watchdog_timer(igb);
3296 		return;
3297 	}
3298 
3299 	mutex_enter(&igb->gen_lock);
3300 	if (!(igb->igb_state & IGB_SUSPENDED) && (igb->igb_state & IGB_STARTED))
3301 		link_changed = igb_link_check(igb);
3302 	mutex_exit(&igb->gen_lock);
3303 
3304 	if (link_changed)
3305 		mac_link_update(igb->mac_hdl, igb->link_state);
3306 
3307 	igb_restart_watchdog_timer(igb);
3308 }
3309 
3310 /*
3311  * igb_link_timer - link setup timer function
3312  *
3313  * It is called when the timer for link setup is expired, which indicates
3314  * the completion of the link setup. The link state will not be updated
3315  * until the link setup is completed. And the link state will not be sent
3316  * to the upper layer through mac_link_update() in this function. It will
3317  * be updated in the local timer routine or the interrupts service routine
3318  * after the interface is started (plumbed).
3319  */
3320 static void
3321 igb_link_timer(void *arg)
3322 {
3323 	igb_t *igb = (igb_t *)arg;
3324 
3325 	mutex_enter(&igb->link_lock);
3326 	igb->link_complete = B_TRUE;
3327 	igb->link_tid = 0;
3328 	mutex_exit(&igb->link_lock);
3329 }
3330 /*
3331  * igb_stall_check - check for transmit stall
3332  *
3333  * This function checks if the adapter is stalled (in transmit).
3334  *
3335  * It is called each time the watchdog timeout is invoked.
3336  * If the transmit descriptor reclaim continuously fails,
3337  * the watchdog value will increment by 1. If the watchdog
3338  * value exceeds the threshold, the igb is assumed to
3339  * have stalled and need to be reset.
3340  */
3341 static boolean_t
3342 igb_stall_check(igb_t *igb)
3343 {
3344 	igb_tx_ring_t *tx_ring;
3345 	struct e1000_hw *hw = &igb->hw;
3346 	boolean_t result;
3347 	int i;
3348 
3349 	if (igb->link_state != LINK_STATE_UP)
3350 		return (B_FALSE);
3351 
3352 	/*
3353 	 * If any tx ring is stalled, we'll reset the chipset
3354 	 */
3355 	result = B_FALSE;
3356 	for (i = 0; i < igb->num_tx_rings; i++) {
3357 		tx_ring = &igb->tx_rings[i];
3358 
3359 		if (tx_ring->recycle_fail > 0)
3360 			tx_ring->stall_watchdog++;
3361 		else
3362 			tx_ring->stall_watchdog = 0;
3363 
3364 		if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
3365 			result = B_TRUE;
3366 			if (hw->mac.type == e1000_82580) {
3367 				hw->dev_spec._82575.global_device_reset
3368 				    = B_TRUE;
3369 			}
3370 			break;
3371 		}
3372 	}
3373 
3374 	if (result) {
3375 		tx_ring->stall_watchdog = 0;
3376 		tx_ring->recycle_fail = 0;
3377 	}
3378 
3379 	return (result);
3380 }
3381 
3382 
3383 /*
3384  * is_valid_mac_addr - Check if the mac address is valid
3385  */
3386 static boolean_t
3387 is_valid_mac_addr(uint8_t *mac_addr)
3388 {
3389 	const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
3390 	const uint8_t addr_test2[6] =
3391 	    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
3392 
3393 	if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
3394 	    !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
3395 		return (B_FALSE);
3396 
3397 	return (B_TRUE);
3398 }
3399 
3400 static boolean_t
3401 igb_find_mac_address(igb_t *igb)
3402 {
3403 	struct e1000_hw *hw = &igb->hw;
3404 #ifdef __sparc
3405 	uchar_t *bytes;
3406 	struct ether_addr sysaddr;
3407 	uint_t nelts;
3408 	int err;
3409 	boolean_t found = B_FALSE;
3410 
3411 	/*
3412 	 * The "vendor's factory-set address" may already have
3413 	 * been extracted from the chip, but if the property
3414 	 * "local-mac-address" is set we use that instead.
3415 	 *
3416 	 * We check whether it looks like an array of 6
3417 	 * bytes (which it should, if OBP set it).  If we can't
3418 	 * make sense of it this way, we'll ignore it.
3419 	 */
3420 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3421 	    DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
3422 	if (err == DDI_PROP_SUCCESS) {
3423 		if (nelts == ETHERADDRL) {
3424 			while (nelts--)
3425 				hw->mac.addr[nelts] = bytes[nelts];
3426 			found = B_TRUE;
3427 		}
3428 		ddi_prop_free(bytes);
3429 	}
3430 
3431 	/*
3432 	 * Look up the OBP property "local-mac-address?". If the user has set
3433 	 * 'local-mac-address? = false', use "the system address" instead.
3434 	 */
3435 	if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip, 0,
3436 	    "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
3437 		if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
3438 			if (localetheraddr(NULL, &sysaddr) != 0) {
3439 				bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
3440 				found = B_TRUE;
3441 			}
3442 		}
3443 		ddi_prop_free(bytes);
3444 	}
3445 
3446 	/*
3447 	 * Finally(!), if there's a valid "mac-address" property (created
3448 	 * if we netbooted from this interface), we must use this instead
3449 	 * of any of the above to ensure that the NFS/install server doesn't
3450 	 * get confused by the address changing as Solaris takes over!
3451 	 */
3452 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3453 	    DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
3454 	if (err == DDI_PROP_SUCCESS) {
3455 		if (nelts == ETHERADDRL) {
3456 			while (nelts--)
3457 				hw->mac.addr[nelts] = bytes[nelts];
3458 			found = B_TRUE;
3459 		}
3460 		ddi_prop_free(bytes);
3461 	}
3462 
3463 	if (found) {
3464 		bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
3465 		return (B_TRUE);
3466 	}
3467 #endif
3468 
3469 	/*
3470 	 * Read the device MAC address from the EEPROM
3471 	 */
3472 	if (e1000_read_mac_addr(hw) != E1000_SUCCESS)
3473 		return (B_FALSE);
3474 
3475 	return (B_TRUE);
3476 }
3477 
3478 #pragma inline(igb_arm_watchdog_timer)
3479 
3480 static void
3481 igb_arm_watchdog_timer(igb_t *igb)
3482 {
3483 	/*
3484 	 * Fire a watchdog timer
3485 	 */
3486 	igb->watchdog_tid =
3487 	    timeout(igb_local_timer,
3488 	    (void *)igb, 1 * drv_usectohz(1000000));
3489 
3490 }
3491 
3492 /*
3493  * igb_enable_watchdog_timer - Enable and start the driver watchdog timer
3494  */
3495 void
3496 igb_enable_watchdog_timer(igb_t *igb)
3497 {
3498 	mutex_enter(&igb->watchdog_lock);
3499 
3500 	if (!igb->watchdog_enable) {
3501 		igb->watchdog_enable = B_TRUE;
3502 		igb->watchdog_start = B_TRUE;
3503 		igb_arm_watchdog_timer(igb);
3504 	}
3505 
3506 	mutex_exit(&igb->watchdog_lock);
3507 
3508 }
3509 
3510 /*
3511  * igb_disable_watchdog_timer - Disable and stop the driver watchdog timer
3512  */
3513 void
3514 igb_disable_watchdog_timer(igb_t *igb)
3515 {
3516 	timeout_id_t tid;
3517 
3518 	mutex_enter(&igb->watchdog_lock);
3519 
3520 	igb->watchdog_enable = B_FALSE;
3521 	igb->watchdog_start = B_FALSE;
3522 	tid = igb->watchdog_tid;
3523 	igb->watchdog_tid = 0;
3524 
3525 	mutex_exit(&igb->watchdog_lock);
3526 
3527 	if (tid != 0)
3528 		(void) untimeout(tid);
3529 
3530 }
3531 
3532 /*
3533  * igb_start_watchdog_timer - Start the driver watchdog timer
3534  */
3535 static void
3536 igb_start_watchdog_timer(igb_t *igb)
3537 {
3538 	mutex_enter(&igb->watchdog_lock);
3539 
3540 	if (igb->watchdog_enable) {
3541 		if (!igb->watchdog_start) {
3542 			igb->watchdog_start = B_TRUE;
3543 			igb_arm_watchdog_timer(igb);
3544 		}
3545 	}
3546 
3547 	mutex_exit(&igb->watchdog_lock);
3548 }
3549 
3550 /*
3551  * igb_restart_watchdog_timer - Restart the driver watchdog timer
3552  */
3553 static void
3554 igb_restart_watchdog_timer(igb_t *igb)
3555 {
3556 	mutex_enter(&igb->watchdog_lock);
3557 
3558 	if (igb->watchdog_start)
3559 		igb_arm_watchdog_timer(igb);
3560 
3561 	mutex_exit(&igb->watchdog_lock);
3562 }
3563 
3564 /*
3565  * igb_stop_watchdog_timer - Stop the driver watchdog timer
3566  */
3567 static void
3568 igb_stop_watchdog_timer(igb_t *igb)
3569 {
3570 	timeout_id_t tid;
3571 
3572 	mutex_enter(&igb->watchdog_lock);
3573 
3574 	igb->watchdog_start = B_FALSE;
3575 	tid = igb->watchdog_tid;
3576 	igb->watchdog_tid = 0;
3577 
3578 	mutex_exit(&igb->watchdog_lock);
3579 
3580 	if (tid != 0)
3581 		(void) untimeout(tid);
3582 }
3583 
3584 /*
3585  * igb_start_link_timer - Start the link setup timer
3586  */
3587 static void
3588 igb_start_link_timer(struct igb *igb)
3589 {
3590 	struct e1000_hw *hw = &igb->hw;
3591 	clock_t link_timeout;
3592 
3593 	if (hw->mac.autoneg)
3594 		link_timeout = PHY_AUTO_NEG_LIMIT *
3595 		    drv_usectohz(100000);
3596 	else
3597 		link_timeout = PHY_FORCE_LIMIT * drv_usectohz(100000);
3598 
3599 	mutex_enter(&igb->link_lock);
3600 	if (hw->phy.autoneg_wait_to_complete) {
3601 		igb->link_complete = B_TRUE;
3602 	} else {
3603 		igb->link_complete = B_FALSE;
3604 		igb->link_tid = timeout(igb_link_timer, (void *)igb,
3605 		    link_timeout);
3606 	}
3607 	mutex_exit(&igb->link_lock);
3608 }
3609 
3610 /*
3611  * igb_stop_link_timer - Stop the link setup timer
3612  */
3613 static void
3614 igb_stop_link_timer(struct igb *igb)
3615 {
3616 	timeout_id_t tid;
3617 
3618 	mutex_enter(&igb->link_lock);
3619 	igb->link_complete = B_TRUE;
3620 	tid = igb->link_tid;
3621 	igb->link_tid = 0;
3622 	mutex_exit(&igb->link_lock);
3623 
3624 	if (tid != 0)
3625 		(void) untimeout(tid);
3626 }
3627 
3628 /*
3629  * igb_disable_adapter_interrupts - Clear/disable all hardware interrupts
3630  */
3631 static void
3632 igb_disable_adapter_interrupts(igb_t *igb)
3633 {
3634 	struct e1000_hw *hw = &igb->hw;
3635 
3636 	/*
3637 	 * Set the IMC register to mask all the interrupts,
3638 	 * including the tx interrupts.
3639 	 */
3640 	E1000_WRITE_REG(hw, E1000_IMC, ~0);
3641 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3642 
3643 	/*
3644 	 * Additional disabling for MSI-X
3645 	 */
3646 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3647 		E1000_WRITE_REG(hw, E1000_EIMC, ~0);
3648 		E1000_WRITE_REG(hw, E1000_EIAC, 0);
3649 		E1000_WRITE_REG(hw, E1000_EIAM, 0);
3650 	}
3651 
3652 	E1000_WRITE_FLUSH(hw);
3653 }
3654 
3655 /*
3656  * igb_enable_adapter_interrupts_82580 - Enable NIC interrupts for 82580
3657  */
3658 static void
3659 igb_enable_adapter_interrupts_82580(igb_t *igb)
3660 {
3661 	struct e1000_hw *hw = &igb->hw;
3662 
3663 	/* Clear any pending interrupts */
3664 	(void) E1000_READ_REG(hw, E1000_ICR);
3665 	igb->ims_mask |= E1000_IMS_DRSTA;
3666 
3667 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3668 
3669 		/* Interrupt enabling for MSI-X */
3670 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3671 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3672 		igb->ims_mask = (E1000_IMS_LSC | E1000_IMS_DRSTA);
3673 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3674 	} else { /* Interrupt enabling for MSI and legacy */
3675 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3676 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3677 		igb->ims_mask |= E1000_IMS_DRSTA;
3678 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3679 	}
3680 
3681 	/* Disable auto-mask for ICR interrupt bits */
3682 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3683 
3684 	E1000_WRITE_FLUSH(hw);
3685 }
3686 
3687 /*
3688  * igb_enable_adapter_interrupts_82576 - Enable NIC interrupts for 82576
3689  */
3690 static void
3691 igb_enable_adapter_interrupts_82576(igb_t *igb)
3692 {
3693 	struct e1000_hw *hw = &igb->hw;
3694 
3695 	/* Clear any pending interrupts */
3696 	(void) E1000_READ_REG(hw, E1000_ICR);
3697 
3698 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3699 
3700 		/* Interrupt enabling for MSI-X */
3701 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3702 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3703 		igb->ims_mask = E1000_IMS_LSC;
3704 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3705 	} else {
3706 		/* Interrupt enabling for MSI and legacy */
3707 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3708 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3709 		E1000_WRITE_REG(hw, E1000_IMS,
3710 		    (IMS_ENABLE_MASK | E1000_IMS_TXQE));
3711 	}
3712 
3713 	/* Disable auto-mask for ICR interrupt bits */
3714 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3715 
3716 	E1000_WRITE_FLUSH(hw);
3717 }
3718 
3719 /*
3720  * igb_enable_adapter_interrupts_82575 - Enable NIC interrupts for 82575
3721  */
3722 static void
3723 igb_enable_adapter_interrupts_82575(igb_t *igb)
3724 {
3725 	struct e1000_hw *hw = &igb->hw;
3726 	uint32_t reg;
3727 
3728 	/* Clear any pending interrupts */
3729 	(void) E1000_READ_REG(hw, E1000_ICR);
3730 
3731 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3732 		/* Interrupt enabling for MSI-X */
3733 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3734 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3735 		igb->ims_mask = E1000_IMS_LSC;
3736 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3737 
3738 		/* Enable MSI-X PBA support */
3739 		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
3740 		reg |= E1000_CTRL_EXT_PBA_CLR;
3741 
3742 		/* Non-selective interrupt clear-on-read */
3743 		reg |= E1000_CTRL_EXT_IRCA;	/* Called NSICR in the EAS */
3744 
3745 		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
3746 	} else {
3747 		/* Interrupt enabling for MSI and legacy */
3748 		igb->ims_mask = IMS_ENABLE_MASK;
3749 		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
3750 	}
3751 
3752 	E1000_WRITE_FLUSH(hw);
3753 }
3754 
3755 /*
3756  * Loopback Support
3757  */
3758 static lb_property_t lb_normal =
3759 	{ normal,	"normal",	IGB_LB_NONE		};
3760 static lb_property_t lb_external =
3761 	{ external,	"External",	IGB_LB_EXTERNAL		};
3762 static lb_property_t lb_phy =
3763 	{ internal,	"PHY",		IGB_LB_INTERNAL_PHY	};
3764 static lb_property_t lb_serdes =
3765 	{ internal,	"SerDes",	IGB_LB_INTERNAL_SERDES	};
3766 
3767 enum ioc_reply
3768 igb_loopback_ioctl(igb_t *igb, struct iocblk *iocp, mblk_t *mp)
3769 {
3770 	lb_info_sz_t *lbsp;
3771 	lb_property_t *lbpp;
3772 	struct e1000_hw *hw;
3773 	uint32_t *lbmp;
3774 	uint32_t size;
3775 	uint32_t value;
3776 
3777 	hw = &igb->hw;
3778 
3779 	if (mp->b_cont == NULL)
3780 		return (IOC_INVAL);
3781 
3782 	switch (iocp->ioc_cmd) {
3783 	default:
3784 		return (IOC_INVAL);
3785 
3786 	case LB_GET_INFO_SIZE:
3787 		size = sizeof (lb_info_sz_t);
3788 		if (iocp->ioc_count != size)
3789 			return (IOC_INVAL);
3790 
3791 		value = sizeof (lb_normal);
3792 		if (hw->phy.media_type == e1000_media_type_copper)
3793 			value += sizeof (lb_phy);
3794 		else
3795 			value += sizeof (lb_serdes);
3796 		value += sizeof (lb_external);
3797 
3798 		lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
3799 		*lbsp = value;
3800 		break;
3801 
3802 	case LB_GET_INFO:
3803 		value = sizeof (lb_normal);
3804 		if (hw->phy.media_type == e1000_media_type_copper)
3805 			value += sizeof (lb_phy);
3806 		else
3807 			value += sizeof (lb_serdes);
3808 		value += sizeof (lb_external);
3809 
3810 		size = value;
3811 		if (iocp->ioc_count != size)
3812 			return (IOC_INVAL);
3813 
3814 		value = 0;
3815 		lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
3816 
3817 		lbpp[value++] = lb_normal;
3818 		if (hw->phy.media_type == e1000_media_type_copper)
3819 			lbpp[value++] = lb_phy;
3820 		else
3821 			lbpp[value++] = lb_serdes;
3822 		lbpp[value++] = lb_external;
3823 		break;
3824 
3825 	case LB_GET_MODE:
3826 		size = sizeof (uint32_t);
3827 		if (iocp->ioc_count != size)
3828 			return (IOC_INVAL);
3829 
3830 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3831 		*lbmp = igb->loopback_mode;
3832 		break;
3833 
3834 	case LB_SET_MODE:
3835 		size = 0;
3836 		if (iocp->ioc_count != sizeof (uint32_t))
3837 			return (IOC_INVAL);
3838 
3839 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3840 		if (!igb_set_loopback_mode(igb, *lbmp))
3841 			return (IOC_INVAL);
3842 		break;
3843 	}
3844 
3845 	iocp->ioc_count = size;
3846 	iocp->ioc_error = 0;
3847 
3848 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3849 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3850 		return (IOC_INVAL);
3851 	}
3852 
3853 	return (IOC_REPLY);
3854 }
3855 
3856 /*
3857  * igb_set_loopback_mode - Setup loopback based on the loopback mode
3858  */
3859 static boolean_t
3860 igb_set_loopback_mode(igb_t *igb, uint32_t mode)
3861 {
3862 	struct e1000_hw *hw;
3863 	int i;
3864 
3865 	if (mode == igb->loopback_mode)
3866 		return (B_TRUE);
3867 
3868 	hw = &igb->hw;
3869 
3870 	igb->loopback_mode = mode;
3871 
3872 	if (mode == IGB_LB_NONE) {
3873 		/* Reset the chip */
3874 		hw->phy.autoneg_wait_to_complete = B_TRUE;
3875 		(void) igb_reset(igb);
3876 		hw->phy.autoneg_wait_to_complete = B_FALSE;
3877 		return (B_TRUE);
3878 	}
3879 
3880 	mutex_enter(&igb->gen_lock);
3881 
3882 	switch (mode) {
3883 	default:
3884 		mutex_exit(&igb->gen_lock);
3885 		return (B_FALSE);
3886 
3887 	case IGB_LB_EXTERNAL:
3888 		igb_set_external_loopback(igb);
3889 		break;
3890 
3891 	case IGB_LB_INTERNAL_PHY:
3892 		igb_set_internal_phy_loopback(igb);
3893 		break;
3894 
3895 	case IGB_LB_INTERNAL_SERDES:
3896 		igb_set_internal_serdes_loopback(igb);
3897 		break;
3898 	}
3899 
3900 	mutex_exit(&igb->gen_lock);
3901 
3902 	/*
3903 	 * When external loopback is set, wait up to 1000ms to get the link up.
3904 	 * According to test, 1000ms can work and it's an experimental value.
3905 	 */
3906 	if (mode == IGB_LB_EXTERNAL) {
3907 		for (i = 0; i <= 10; i++) {
3908 			mutex_enter(&igb->gen_lock);
3909 			(void) igb_link_check(igb);
3910 			mutex_exit(&igb->gen_lock);
3911 
3912 			if (igb->link_state == LINK_STATE_UP)
3913 				break;
3914 
3915 			msec_delay(100);
3916 		}
3917 
3918 		if (igb->link_state != LINK_STATE_UP) {
3919 			/*
3920 			 * Does not support external loopback.
3921 			 * Reset driver to loopback none.
3922 			 */
3923 			igb->loopback_mode = IGB_LB_NONE;
3924 
3925 			/* Reset the chip */
3926 			hw->phy.autoneg_wait_to_complete = B_TRUE;
3927 			(void) igb_reset(igb);
3928 			hw->phy.autoneg_wait_to_complete = B_FALSE;
3929 
3930 			igb_log(igb, IGB_LOG_INFO, "Set external loopback "
3931 			    "failed, reset to loopback none.");
3932 
3933 			return (B_FALSE);
3934 		}
3935 	}
3936 
3937 	return (B_TRUE);
3938 }
3939 
3940 /*
3941  * igb_set_external_loopback - Set the external loopback mode
3942  */
3943 static void
3944 igb_set_external_loopback(igb_t *igb)
3945 {
3946 	struct e1000_hw *hw;
3947 	uint32_t ctrl_ext;
3948 
3949 	hw = &igb->hw;
3950 
3951 	/* Set link mode to PHY (00b) in the Extended Control register */
3952 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3953 	ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3954 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3955 
3956 	(void) e1000_write_phy_reg(hw, 0x0, 0x0140);
3957 	(void) e1000_write_phy_reg(hw, 0x9, 0x1a00);
3958 	(void) e1000_write_phy_reg(hw, 0x12, 0x1610);
3959 	(void) e1000_write_phy_reg(hw, 0x1f37, 0x3f1c);
3960 }
3961 
3962 /*
3963  * igb_set_internal_phy_loopback - Set the internal PHY loopback mode
3964  */
3965 static void
3966 igb_set_internal_phy_loopback(igb_t *igb)
3967 {
3968 	struct e1000_hw *hw;
3969 	uint32_t ctrl_ext;
3970 	uint16_t phy_ctrl;
3971 	uint16_t phy_pconf;
3972 
3973 	hw = &igb->hw;
3974 
3975 	/* Set link mode to PHY (00b) in the Extended Control register */
3976 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3977 	ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3978 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3979 
3980 	/*
3981 	 * Set PHY control register (0x4140):
3982 	 *    Set full duplex mode
3983 	 *    Set loopback bit
3984 	 *    Clear auto-neg enable bit
3985 	 *    Set PHY speed
3986 	 */
3987 	phy_ctrl = MII_CR_FULL_DUPLEX | MII_CR_SPEED_1000 | MII_CR_LOOPBACK;
3988 	(void) e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
3989 
3990 	/* Set the link disable bit in the Port Configuration register */
3991 	(void) e1000_read_phy_reg(hw, 0x10, &phy_pconf);
3992 	phy_pconf |= (uint16_t)1 << 14;
3993 	(void) e1000_write_phy_reg(hw, 0x10, phy_pconf);
3994 }
3995 
3996 /*
3997  * igb_set_internal_serdes_loopback - Set the internal SerDes loopback mode
3998  */
3999 static void
4000 igb_set_internal_serdes_loopback(igb_t *igb)
4001 {
4002 	struct e1000_hw *hw;
4003 	uint32_t ctrl_ext;
4004 	uint32_t ctrl;
4005 	uint32_t pcs_lctl;
4006 	uint32_t connsw;
4007 
4008 	hw = &igb->hw;
4009 
4010 	/* Set link mode to SerDes (11b) in the Extended Control register */
4011 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4012 	ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
4013 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
4014 
4015 	/* Configure the SerDes to loopback */
4016 	E1000_WRITE_REG(hw, E1000_SCTL, 0x410);
4017 
4018 	/* Set Device Control register */
4019 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
4020 	ctrl |= (E1000_CTRL_FD |	/* Force full duplex */
4021 	    E1000_CTRL_SLU);		/* Force link up */
4022 	ctrl &= ~(E1000_CTRL_RFCE |	/* Disable receive flow control */
4023 	    E1000_CTRL_TFCE |		/* Disable transmit flow control */
4024 	    E1000_CTRL_LRST);		/* Clear link reset */
4025 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
4026 
4027 	/* Set PCS Link Control register */
4028 	pcs_lctl = E1000_READ_REG(hw, E1000_PCS_LCTL);
4029 	pcs_lctl |= (E1000_PCS_LCTL_FORCE_LINK |
4030 	    E1000_PCS_LCTL_FSD |
4031 	    E1000_PCS_LCTL_FDV_FULL |
4032 	    E1000_PCS_LCTL_FLV_LINK_UP);
4033 	pcs_lctl &= ~E1000_PCS_LCTL_AN_ENABLE;
4034 	E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_lctl);
4035 
4036 	/* Set the Copper/Fiber Switch Control - CONNSW register */
4037 	connsw = E1000_READ_REG(hw, E1000_CONNSW);
4038 	connsw &= ~E1000_CONNSW_ENRGSRC;
4039 	E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
4040 }
4041 
4042 #pragma inline(igb_intr_rx_work)
4043 /*
4044  * igb_intr_rx_work - rx processing of ISR
4045  */
4046 static void
4047 igb_intr_rx_work(igb_rx_ring_t *rx_ring)
4048 {
4049 	mblk_t *mp;
4050 
4051 	mutex_enter(&rx_ring->rx_lock);
4052 	mp = igb_rx(rx_ring, IGB_NO_POLL);
4053 	mutex_exit(&rx_ring->rx_lock);
4054 
4055 	if (mp != NULL)
4056 		mac_rx_ring(rx_ring->igb->mac_hdl, rx_ring->ring_handle, mp,
4057 		    rx_ring->ring_gen_num);
4058 }
4059 
4060 #pragma inline(igb_intr_tx_work)
4061 /*
4062  * igb_intr_tx_work - tx processing of ISR
4063  */
4064 static void
4065 igb_intr_tx_work(igb_tx_ring_t *tx_ring)
4066 {
4067 	igb_t *igb = tx_ring->igb;
4068 
4069 	/* Recycle the tx descriptors */
4070 	tx_ring->tx_recycle(tx_ring);
4071 
4072 	/* Schedule the re-transmit */
4073 	if (tx_ring->reschedule &&
4074 	    (tx_ring->tbd_free >= igb->tx_resched_thresh)) {
4075 		tx_ring->reschedule = B_FALSE;
4076 		mac_tx_ring_update(tx_ring->igb->mac_hdl, tx_ring->ring_handle);
4077 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
4078 	}
4079 }
4080 
4081 #pragma inline(igb_intr_link_work)
4082 /*
4083  * igb_intr_link_work - link-status-change processing of ISR
4084  */
4085 static void
4086 igb_intr_link_work(igb_t *igb)
4087 {
4088 	boolean_t link_changed;
4089 
4090 	igb_stop_watchdog_timer(igb);
4091 
4092 	mutex_enter(&igb->gen_lock);
4093 
4094 	/*
4095 	 * Because we got a link-status-change interrupt, force
4096 	 * e1000_check_for_link() to look at phy
4097 	 */
4098 	igb->hw.mac.get_link_status = B_TRUE;
4099 
4100 	/* igb_link_check takes care of link status change */
4101 	link_changed = igb_link_check(igb);
4102 
4103 	/* Get new phy state */
4104 	igb_get_phy_state(igb);
4105 
4106 	mutex_exit(&igb->gen_lock);
4107 
4108 	if (link_changed)
4109 		mac_link_update(igb->mac_hdl, igb->link_state);
4110 
4111 	igb_start_watchdog_timer(igb);
4112 }
4113 
4114 /*
4115  * igb_intr_legacy - Interrupt handler for legacy interrupts
4116  */
4117 static uint_t
4118 igb_intr_legacy(void *arg1, void *arg2)
4119 {
4120 	igb_t *igb = (igb_t *)arg1;
4121 	igb_tx_ring_t *tx_ring;
4122 	uint32_t icr;
4123 	mblk_t *mp;
4124 	boolean_t tx_reschedule;
4125 	boolean_t link_changed;
4126 	uint_t result;
4127 
4128 	_NOTE(ARGUNUSED(arg2));
4129 
4130 	mutex_enter(&igb->gen_lock);
4131 
4132 	if (igb->igb_state & IGB_SUSPENDED) {
4133 		mutex_exit(&igb->gen_lock);
4134 		return (DDI_INTR_UNCLAIMED);
4135 	}
4136 
4137 	mp = NULL;
4138 	tx_reschedule = B_FALSE;
4139 	link_changed = B_FALSE;
4140 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4141 
4142 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4143 		mutex_exit(&igb->gen_lock);
4144 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4145 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4146 		return (DDI_INTR_UNCLAIMED);
4147 	}
4148 
4149 	if (icr & E1000_ICR_INT_ASSERTED) {
4150 		/*
4151 		 * E1000_ICR_INT_ASSERTED bit was set:
4152 		 * Read(Clear) the ICR, claim this interrupt,
4153 		 * look for work to do.
4154 		 */
4155 		ASSERT(igb->num_rx_rings == 1);
4156 		ASSERT(igb->num_tx_rings == 1);
4157 
4158 		/* Make sure all interrupt causes cleared */
4159 		(void) E1000_READ_REG(&igb->hw, E1000_EICR);
4160 
4161 		if (icr & E1000_ICR_RXT0) {
4162 			mp = igb_rx(&igb->rx_rings[0], IGB_NO_POLL);
4163 		}
4164 
4165 		if (icr & E1000_ICR_TXDW) {
4166 			tx_ring = &igb->tx_rings[0];
4167 
4168 			/* Recycle the tx descriptors */
4169 			tx_ring->tx_recycle(tx_ring);
4170 
4171 			/* Schedule the re-transmit */
4172 			tx_reschedule = (tx_ring->reschedule &&
4173 			    (tx_ring->tbd_free >= igb->tx_resched_thresh));
4174 		}
4175 
4176 		if (icr & E1000_ICR_LSC) {
4177 			/*
4178 			 * Because we got a link-status-change interrupt, force
4179 			 * e1000_check_for_link() to look at phy
4180 			 */
4181 			igb->hw.mac.get_link_status = B_TRUE;
4182 
4183 			/* igb_link_check takes care of link status change */
4184 			link_changed = igb_link_check(igb);
4185 
4186 			/* Get new phy state */
4187 			igb_get_phy_state(igb);
4188 		}
4189 
4190 		if (icr & E1000_ICR_DRSTA) {
4191 			/* 82580 Full Device Reset needed */
4192 			atomic_or_32(&igb->igb_state, IGB_STALL);
4193 		}
4194 
4195 		result = DDI_INTR_CLAIMED;
4196 	} else {
4197 		/*
4198 		 * E1000_ICR_INT_ASSERTED bit was not set:
4199 		 * Don't claim this interrupt.
4200 		 */
4201 		result = DDI_INTR_UNCLAIMED;
4202 	}
4203 
4204 	mutex_exit(&igb->gen_lock);
4205 
4206 	/*
4207 	 * Do the following work outside of the gen_lock
4208 	 */
4209 	if (mp != NULL)
4210 		mac_rx(igb->mac_hdl, NULL, mp);
4211 
4212 	if (tx_reschedule)  {
4213 		tx_ring->reschedule = B_FALSE;
4214 		mac_tx_ring_update(igb->mac_hdl, tx_ring->ring_handle);
4215 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
4216 	}
4217 
4218 	if (link_changed)
4219 		mac_link_update(igb->mac_hdl, igb->link_state);
4220 
4221 	return (result);
4222 }
4223 
4224 /*
4225  * igb_intr_msi - Interrupt handler for MSI
4226  */
4227 static uint_t
4228 igb_intr_msi(void *arg1, void *arg2)
4229 {
4230 	igb_t *igb = (igb_t *)arg1;
4231 	uint32_t icr;
4232 
4233 	_NOTE(ARGUNUSED(arg2));
4234 
4235 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4236 
4237 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4238 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4239 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4240 		return (DDI_INTR_CLAIMED);
4241 	}
4242 
4243 	/* Make sure all interrupt causes cleared */
4244 	(void) E1000_READ_REG(&igb->hw, E1000_EICR);
4245 
4246 	/*
4247 	 * For MSI interrupt, we have only one vector,
4248 	 * so we have only one rx ring and one tx ring enabled.
4249 	 */
4250 	ASSERT(igb->num_rx_rings == 1);
4251 	ASSERT(igb->num_tx_rings == 1);
4252 
4253 	if (icr & E1000_ICR_RXT0) {
4254 		igb_intr_rx_work(&igb->rx_rings[0]);
4255 	}
4256 
4257 	if (icr & E1000_ICR_TXDW) {
4258 		igb_intr_tx_work(&igb->tx_rings[0]);
4259 	}
4260 
4261 	if (icr & E1000_ICR_LSC) {
4262 		igb_intr_link_work(igb);
4263 	}
4264 
4265 	if (icr & E1000_ICR_DRSTA) {
4266 		/* 82580 Full Device Reset needed */
4267 		atomic_or_32(&igb->igb_state, IGB_STALL);
4268 	}
4269 
4270 	return (DDI_INTR_CLAIMED);
4271 }
4272 
4273 /*
4274  * igb_intr_rx - Interrupt handler for rx
4275  */
4276 static uint_t
4277 igb_intr_rx(void *arg1, void *arg2)
4278 {
4279 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg1;
4280 
4281 	_NOTE(ARGUNUSED(arg2));
4282 
4283 	/*
4284 	 * Only used via MSI-X vector so don't check cause bits
4285 	 * and only clean the given ring.
4286 	 */
4287 	igb_intr_rx_work(rx_ring);
4288 
4289 	return (DDI_INTR_CLAIMED);
4290 }
4291 
4292 /*
4293  * igb_intr_tx - Interrupt handler for tx
4294  */
4295 static uint_t
4296 igb_intr_tx(void *arg1, void *arg2)
4297 {
4298 	igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg1;
4299 
4300 	_NOTE(ARGUNUSED(arg2));
4301 
4302 	/*
4303 	 * Only used via MSI-X vector so don't check cause bits
4304 	 * and only clean the given ring.
4305 	 */
4306 	igb_intr_tx_work(tx_ring);
4307 
4308 	return (DDI_INTR_CLAIMED);
4309 }
4310 
4311 /*
4312  * igb_intr_tx_other - Interrupt handler for both tx and other
4313  *
4314  */
4315 static uint_t
4316 igb_intr_tx_other(void *arg1, void *arg2)
4317 {
4318 	igb_t *igb = (igb_t *)arg1;
4319 	uint32_t icr;
4320 
4321 	_NOTE(ARGUNUSED(arg2));
4322 
4323 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4324 
4325 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4326 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4327 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4328 		return (DDI_INTR_CLAIMED);
4329 	}
4330 
4331 	/*
4332 	 * Look for tx reclaiming work first. Remember, in the
4333 	 * case of only interrupt sharing, only one tx ring is
4334 	 * used
4335 	 */
4336 	igb_intr_tx_work(&igb->tx_rings[0]);
4337 
4338 	/*
4339 	 * Check for "other" causes.
4340 	 */
4341 	if (icr & E1000_ICR_LSC) {
4342 		igb_intr_link_work(igb);
4343 	}
4344 
4345 	/*
4346 	 * The DOUTSYNC bit indicates a tx packet dropped because
4347 	 * DMA engine gets "out of sync". There isn't a real fix
4348 	 * for this. The Intel recommendation is to count the number
4349 	 * of occurrences so user can detect when it is happening.
4350 	 * The issue is non-fatal and there's no recovery action
4351 	 * available.
4352 	 */
4353 	if (icr & E1000_ICR_DOUTSYNC) {
4354 		IGB_STAT(igb->dout_sync);
4355 	}
4356 
4357 	if (icr & E1000_ICR_DRSTA) {
4358 		/* 82580 Full Device Reset needed */
4359 		atomic_or_32(&igb->igb_state, IGB_STALL);
4360 	}
4361 
4362 	return (DDI_INTR_CLAIMED);
4363 }
4364 
4365 /*
4366  * igb_alloc_intrs - Allocate interrupts for the driver
4367  *
4368  * Normal sequence is to try MSI-X; if not sucessful, try MSI;
4369  * if not successful, try Legacy.
4370  * igb->intr_force can be used to force sequence to start with
4371  * any of the 3 types.
4372  * If MSI-X is not used, number of tx/rx rings is forced to 1.
4373  */
4374 static int
4375 igb_alloc_intrs(igb_t *igb)
4376 {
4377 	dev_info_t *devinfo;
4378 	int intr_types;
4379 	int rc;
4380 
4381 	devinfo = igb->dip;
4382 
4383 	/* Get supported interrupt types */
4384 	rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4385 
4386 	if (rc != DDI_SUCCESS) {
4387 		igb_log(igb, IGB_LOG_ERROR,
4388 		    "Get supported interrupt types failed: %d", rc);
4389 		return (IGB_FAILURE);
4390 	}
4391 	igb_log(igb, IGB_LOG_INFO, "Supported interrupt types: %x",
4392 	    intr_types);
4393 
4394 	igb->intr_type = 0;
4395 
4396 	/* Install MSI-X interrupts */
4397 	if ((intr_types & DDI_INTR_TYPE_MSIX) &&
4398 	    (igb->intr_force <= IGB_INTR_MSIX)) {
4399 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSIX);
4400 
4401 		if (rc == IGB_SUCCESS)
4402 			return (IGB_SUCCESS);
4403 
4404 		igb_log(igb, IGB_LOG_INFO,
4405 		    "Allocate MSI-X failed, trying MSI interrupts...");
4406 	}
4407 
4408 	/* MSI-X not used, force rings to 1 */
4409 	igb->num_rx_rings = 1;
4410 	igb->num_tx_rings = 1;
4411 	igb_log(igb, IGB_LOG_INFO,
4412 	    "MSI-X not used, force rx and tx queue number to 1");
4413 
4414 	/* Install MSI interrupts */
4415 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
4416 	    (igb->intr_force <= IGB_INTR_MSI)) {
4417 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSI);
4418 
4419 		if (rc == IGB_SUCCESS)
4420 			return (IGB_SUCCESS);
4421 
4422 		igb_log(igb, IGB_LOG_INFO,
4423 		    "Allocate MSI failed, trying Legacy interrupts...");
4424 	}
4425 
4426 	/* Install legacy interrupts */
4427 	if (intr_types & DDI_INTR_TYPE_FIXED) {
4428 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_FIXED);
4429 
4430 		if (rc == IGB_SUCCESS)
4431 			return (IGB_SUCCESS);
4432 
4433 		igb_log(igb, IGB_LOG_INFO,
4434 		    "Allocate Legacy interrupts failed");
4435 	}
4436 
4437 	/* If none of the 3 types succeeded, return failure */
4438 	return (IGB_FAILURE);
4439 }
4440 
4441 /*
4442  * igb_alloc_intr_handles - Allocate interrupt handles.
4443  *
4444  * For legacy and MSI, only 1 handle is needed.  For MSI-X,
4445  * if fewer than 2 handles are available, return failure.
4446  * Upon success, this sets the number of Rx rings to a number that
4447  * matches the handles available for Rx interrupts.
4448  */
4449 static int
4450 igb_alloc_intr_handles(igb_t *igb, int intr_type)
4451 {
4452 	dev_info_t *devinfo;
4453 	int orig, request, count, avail, actual;
4454 	int diff, minimum;
4455 	int rc;
4456 
4457 	devinfo = igb->dip;
4458 
4459 	switch (intr_type) {
4460 	case DDI_INTR_TYPE_FIXED:
4461 		request = 1;	/* Request 1 legacy interrupt handle */
4462 		minimum = 1;
4463 		igb_log(igb, IGB_LOG_INFO, "interrupt type: legacy");
4464 		break;
4465 
4466 	case DDI_INTR_TYPE_MSI:
4467 		request = 1;	/* Request 1 MSI interrupt handle */
4468 		minimum = 1;
4469 		igb_log(igb, IGB_LOG_INFO, "interrupt type: MSI");
4470 		break;
4471 
4472 	case DDI_INTR_TYPE_MSIX:
4473 		/*
4474 		 * Number of vectors for the adapter is
4475 		 * # rx rings + # tx rings
4476 		 * One of tx vectors is for tx & other
4477 		 */
4478 		request = igb->num_rx_rings + igb->num_tx_rings;
4479 		orig = request;
4480 		minimum = 2;
4481 		igb_log(igb, IGB_LOG_INFO, "interrupt type: MSI-X");
4482 		break;
4483 
4484 	default:
4485 		igb_log(igb, IGB_LOG_INFO,
4486 		    "invalid call to igb_alloc_intr_handles(): %d\n",
4487 		    intr_type);
4488 		return (IGB_FAILURE);
4489 	}
4490 	igb_log(igb, IGB_LOG_INFO,
4491 	    "interrupt handles requested: %d  minimum: %d",
4492 	    request, minimum);
4493 
4494 	/*
4495 	 * Get number of supported interrupts
4496 	 */
4497 	rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
4498 	if ((rc != DDI_SUCCESS) || (count < minimum)) {
4499 		igb_log(igb, IGB_LOG_INFO,
4500 		    "Get supported interrupt number failed. "
4501 		    "Return: %d, count: %d", rc, count);
4502 		return (IGB_FAILURE);
4503 	}
4504 	igb_log(igb, IGB_LOG_INFO, "interrupts supported: %d", count);
4505 
4506 	/*
4507 	 * Get number of available interrupts
4508 	 */
4509 	rc = ddi_intr_get_navail(devinfo, intr_type, &avail);
4510 	if ((rc != DDI_SUCCESS) || (avail < minimum)) {
4511 		igb_log(igb, IGB_LOG_INFO,
4512 		    "Get available interrupt number failed. "
4513 		    "Return: %d, available: %d", rc, avail);
4514 		return (IGB_FAILURE);
4515 	}
4516 	igb_log(igb, IGB_LOG_INFO, "interrupts available: %d", avail);
4517 
4518 	if (avail < request) {
4519 		igb_log(igb, IGB_LOG_INFO,
4520 		    "Request %d handles, %d available",
4521 		    request, avail);
4522 		request = avail;
4523 	}
4524 
4525 	actual = 0;
4526 	igb->intr_cnt = 0;
4527 
4528 	/*
4529 	 * Allocate an array of interrupt handles
4530 	 */
4531 	igb->intr_size = request * sizeof (ddi_intr_handle_t);
4532 	igb->htable = kmem_alloc(igb->intr_size, KM_SLEEP);
4533 
4534 	rc = ddi_intr_alloc(devinfo, igb->htable, intr_type, 0,
4535 	    request, &actual, DDI_INTR_ALLOC_NORMAL);
4536 	if (rc != DDI_SUCCESS) {
4537 		igb_log(igb, IGB_LOG_INFO, "Allocate interrupts failed. "
4538 		    "return: %d, request: %d, actual: %d",
4539 		    rc, request, actual);
4540 		goto alloc_handle_fail;
4541 	}
4542 	igb_log(igb, IGB_LOG_INFO, "interrupts actually allocated: %d", actual);
4543 
4544 	igb->intr_cnt = actual;
4545 
4546 	if (actual < minimum) {
4547 		igb_log(igb, IGB_LOG_INFO,
4548 		    "Insufficient interrupt handles allocated: %d",
4549 		    actual);
4550 		goto alloc_handle_fail;
4551 	}
4552 
4553 	/*
4554 	 * For MSI-X, actual might force us to reduce number of tx & rx rings
4555 	 */
4556 	if ((intr_type == DDI_INTR_TYPE_MSIX) && (orig > actual)) {
4557 		diff = orig - actual;
4558 		if (diff < igb->num_tx_rings) {
4559 			igb_log(igb, IGB_LOG_INFO,
4560 			    "MSI-X vectors force Tx queue number to %d",
4561 			    igb->num_tx_rings - diff);
4562 			igb->num_tx_rings -= diff;
4563 		} else {
4564 			igb_log(igb, IGB_LOG_INFO,
4565 			    "MSI-X vectors force Tx queue number to 1");
4566 			igb->num_tx_rings = 1;
4567 
4568 			igb_log(igb, IGB_LOG_INFO,
4569 			    "MSI-X vectors force Rx queue number to %d",
4570 			    actual - 1);
4571 			igb->num_rx_rings = actual - 1;
4572 		}
4573 	}
4574 
4575 	/*
4576 	 * Get priority for first vector, assume remaining are all the same
4577 	 */
4578 	rc = ddi_intr_get_pri(igb->htable[0], &igb->intr_pri);
4579 	if (rc != DDI_SUCCESS) {
4580 		igb_log(igb, IGB_LOG_INFO,
4581 		    "Get interrupt priority failed: %d", rc);
4582 		goto alloc_handle_fail;
4583 	}
4584 
4585 	rc = ddi_intr_get_cap(igb->htable[0], &igb->intr_cap);
4586 	if (rc != DDI_SUCCESS) {
4587 		igb_log(igb, IGB_LOG_INFO,
4588 		    "Get interrupt cap failed: %d", rc);
4589 		goto alloc_handle_fail;
4590 	}
4591 
4592 	igb->intr_type = intr_type;
4593 
4594 	return (IGB_SUCCESS);
4595 
4596 alloc_handle_fail:
4597 	igb_rem_intrs(igb);
4598 
4599 	return (IGB_FAILURE);
4600 }
4601 
4602 /*
4603  * igb_add_intr_handlers - Add interrupt handlers based on the interrupt type
4604  *
4605  * Before adding the interrupt handlers, the interrupt vectors have
4606  * been allocated, and the rx/tx rings have also been allocated.
4607  */
4608 static int
4609 igb_add_intr_handlers(igb_t *igb)
4610 {
4611 	igb_rx_ring_t *rx_ring;
4612 	igb_tx_ring_t *tx_ring;
4613 	int vector;
4614 	int rc;
4615 	int i;
4616 
4617 	vector = 0;
4618 
4619 	switch (igb->intr_type) {
4620 	case DDI_INTR_TYPE_MSIX:
4621 		/* Add interrupt handler for tx + other */
4622 		tx_ring = &igb->tx_rings[0];
4623 		rc = ddi_intr_add_handler(igb->htable[vector],
4624 		    (ddi_intr_handler_t *)igb_intr_tx_other,
4625 		    (void *)igb, NULL);
4626 
4627 		if (rc != DDI_SUCCESS) {
4628 			igb_log(igb, IGB_LOG_INFO,
4629 			    "Add tx/other interrupt handler failed: %d", rc);
4630 			return (IGB_FAILURE);
4631 		}
4632 		tx_ring->intr_vector = vector;
4633 		vector++;
4634 
4635 		/* Add interrupt handler for each rx ring */
4636 		for (i = 0; i < igb->num_rx_rings; i++) {
4637 			rx_ring = &igb->rx_rings[i];
4638 
4639 			rc = ddi_intr_add_handler(igb->htable[vector],
4640 			    (ddi_intr_handler_t *)igb_intr_rx,
4641 			    (void *)rx_ring, NULL);
4642 
4643 			if (rc != DDI_SUCCESS) {
4644 				igb_log(igb, IGB_LOG_INFO,
4645 				    "Add rx interrupt handler failed. "
4646 				    "return: %d, rx ring: %d", rc, i);
4647 				for (vector--; vector >= 0; vector--) {
4648 					(void) ddi_intr_remove_handler(
4649 					    igb->htable[vector]);
4650 				}
4651 				return (IGB_FAILURE);
4652 			}
4653 
4654 			rx_ring->intr_vector = vector;
4655 
4656 			vector++;
4657 		}
4658 
4659 		/* Add interrupt handler for each tx ring from 2nd ring */
4660 		for (i = 1; i < igb->num_tx_rings; i++) {
4661 			tx_ring = &igb->tx_rings[i];
4662 
4663 			rc = ddi_intr_add_handler(igb->htable[vector],
4664 			    (ddi_intr_handler_t *)igb_intr_tx,
4665 			    (void *)tx_ring, NULL);
4666 
4667 			if (rc != DDI_SUCCESS) {
4668 				igb_log(igb, IGB_LOG_INFO,
4669 				    "Add tx interrupt handler failed. "
4670 				    "return: %d, tx ring: %d", rc, i);
4671 				for (vector--; vector >= 0; vector--) {
4672 					(void) ddi_intr_remove_handler(
4673 					    igb->htable[vector]);
4674 				}
4675 				return (IGB_FAILURE);
4676 			}
4677 
4678 			tx_ring->intr_vector = vector;
4679 
4680 			vector++;
4681 		}
4682 
4683 		break;
4684 
4685 	case DDI_INTR_TYPE_MSI:
4686 		/* Add interrupt handlers for the only vector */
4687 		rc = ddi_intr_add_handler(igb->htable[vector],
4688 		    (ddi_intr_handler_t *)igb_intr_msi,
4689 		    (void *)igb, NULL);
4690 
4691 		if (rc != DDI_SUCCESS) {
4692 			igb_log(igb, IGB_LOG_INFO,
4693 			    "Add MSI interrupt handler failed: %d", rc);
4694 			return (IGB_FAILURE);
4695 		}
4696 
4697 		rx_ring = &igb->rx_rings[0];
4698 		rx_ring->intr_vector = vector;
4699 
4700 		vector++;
4701 		break;
4702 
4703 	case DDI_INTR_TYPE_FIXED:
4704 		/* Add interrupt handlers for the only vector */
4705 		rc = ddi_intr_add_handler(igb->htable[vector],
4706 		    (ddi_intr_handler_t *)igb_intr_legacy,
4707 		    (void *)igb, NULL);
4708 
4709 		if (rc != DDI_SUCCESS) {
4710 			igb_log(igb, IGB_LOG_INFO,
4711 			    "Add legacy interrupt handler failed: %d", rc);
4712 			return (IGB_FAILURE);
4713 		}
4714 
4715 		rx_ring = &igb->rx_rings[0];
4716 		rx_ring->intr_vector = vector;
4717 
4718 		vector++;
4719 		break;
4720 
4721 	default:
4722 		return (IGB_FAILURE);
4723 	}
4724 
4725 	ASSERT(vector == igb->intr_cnt);
4726 
4727 	return (IGB_SUCCESS);
4728 }
4729 
4730 /*
4731  * igb_setup_msix_82575 - setup 82575 adapter to use MSI-X interrupts
4732  *
4733  * For each vector enabled on the adapter, Set the MSIXBM register accordingly
4734  */
4735 static void
4736 igb_setup_msix_82575(igb_t *igb)
4737 {
4738 	uint32_t eims = 0;
4739 	int i, vector;
4740 	struct e1000_hw *hw = &igb->hw;
4741 
4742 	/*
4743 	 * Set vector for tx ring 0 and other causes.
4744 	 * NOTE assumption that it is vector 0.
4745 	 */
4746 	vector = 0;
4747 
4748 	igb->eims_mask = E1000_EICR_TX_QUEUE0 | E1000_EICR_OTHER;
4749 	E1000_WRITE_REG(hw, E1000_MSIXBM(vector), igb->eims_mask);
4750 	vector++;
4751 
4752 	for (i = 0; i < igb->num_rx_rings; i++) {
4753 		/*
4754 		 * Set vector for each rx ring
4755 		 */
4756 		eims = (E1000_EICR_RX_QUEUE0 << i);
4757 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4758 
4759 		/*
4760 		 * Accumulate bits to enable in
4761 		 * igb_enable_adapter_interrupts_82575()
4762 		 */
4763 		igb->eims_mask |= eims;
4764 
4765 		vector++;
4766 	}
4767 
4768 	for (i = 1; i < igb->num_tx_rings; i++) {
4769 		/*
4770 		 * Set vector for each tx ring from 2nd tx ring
4771 		 */
4772 		eims = (E1000_EICR_TX_QUEUE0 << i);
4773 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4774 
4775 		/*
4776 		 * Accumulate bits to enable in
4777 		 * igb_enable_adapter_interrupts_82575()
4778 		 */
4779 		igb->eims_mask |= eims;
4780 
4781 		vector++;
4782 	}
4783 
4784 	ASSERT(vector == igb->intr_cnt);
4785 
4786 	/*
4787 	 * Disable IAM for ICR interrupt bits
4788 	 */
4789 	E1000_WRITE_REG(hw, E1000_IAM, 0);
4790 	E1000_WRITE_FLUSH(hw);
4791 }
4792 
4793 /*
4794  * igb_setup_msix_82576 - setup 82576 adapter to use MSI-X interrupts
4795  *
4796  * 82576 uses a table based method for assigning vectors.  Each queue has a
4797  * single entry in the table to which we write a vector number along with a
4798  * "valid" bit.  The entry is a single byte in a 4-byte register.  Vectors
4799  * take a different position in the 4-byte register depending on whether
4800  * they are numbered above or below 8.
4801  */
4802 static void
4803 igb_setup_msix_82576(igb_t *igb)
4804 {
4805 	struct e1000_hw *hw = &igb->hw;
4806 	uint32_t ivar, index, vector;
4807 	int i;
4808 
4809 	/* must enable msi-x capability before IVAR settings */
4810 	E1000_WRITE_REG(hw, E1000_GPIE,
4811 	    (E1000_GPIE_MSIX_MODE | E1000_GPIE_PBA | E1000_GPIE_NSICR));
4812 
4813 	/*
4814 	 * Set vector for tx ring 0 and other causes.
4815 	 * NOTE assumption that it is vector 0.
4816 	 * This is also interdependent with installation of interrupt service
4817 	 * routines in igb_add_intr_handlers().
4818 	 */
4819 
4820 	/* assign "other" causes to vector 0 */
4821 	vector = 0;
4822 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4823 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4824 
4825 	/* assign tx ring 0 to vector 0 */
4826 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4827 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4828 
4829 	/* prepare to enable tx & other interrupt causes */
4830 	igb->eims_mask = (1 << vector);
4831 
4832 	vector ++;
4833 	for (i = 0; i < igb->num_rx_rings; i++) {
4834 		/*
4835 		 * Set vector for each rx ring
4836 		 */
4837 		index = (i & 0x7);
4838 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4839 
4840 		if (i < 8) {
4841 			/* vector goes into low byte of register */
4842 			ivar = ivar & 0xFFFFFF00;
4843 			ivar |= (vector | E1000_IVAR_VALID);
4844 		} else {
4845 			/* vector goes into third byte of register */
4846 			ivar = ivar & 0xFF00FFFF;
4847 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
4848 		}
4849 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4850 
4851 		/* Accumulate interrupt-cause bits to enable */
4852 		igb->eims_mask |= (1 << vector);
4853 
4854 		vector ++;
4855 	}
4856 
4857 	for (i = 1; i < igb->num_tx_rings; i++) {
4858 		/*
4859 		 * Set vector for each tx ring from 2nd tx ring.
4860 		 * Note assumption that tx vectors numericall follow rx vectors.
4861 		 */
4862 		index = (i & 0x7);
4863 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4864 
4865 		if (i < 8) {
4866 			/* vector goes into second byte of register */
4867 			ivar = ivar & 0xFFFF00FF;
4868 			ivar |= ((vector | E1000_IVAR_VALID) << 8);
4869 		} else {
4870 			/* vector goes into fourth byte of register */
4871 			ivar = ivar & 0x00FFFFFF;
4872 			ivar |= (vector | E1000_IVAR_VALID) << 24;
4873 		}
4874 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4875 
4876 		/* Accumulate interrupt-cause bits to enable */
4877 		igb->eims_mask |= (1 << vector);
4878 
4879 		vector ++;
4880 	}
4881 
4882 	ASSERT(vector == igb->intr_cnt);
4883 }
4884 
4885 /*
4886  * igb_setup_msix_82580 - setup 82580 adapter to use MSI-X interrupts
4887  *
4888  * 82580 uses same table approach at 82576 but has fewer entries.  Each
4889  * queue has a single entry in the table to which we write a vector number
4890  * along with a "valid" bit.  Vectors take a different position in the
4891  * register depending on * whether * they are numbered above or below 4.
4892  */
4893 static void
4894 igb_setup_msix_82580(igb_t *igb)
4895 {
4896 	struct e1000_hw *hw = &igb->hw;
4897 	uint32_t ivar, index, vector;
4898 	int i;
4899 
4900 	/* must enable msi-x capability before IVAR settings */
4901 	E1000_WRITE_REG(hw, E1000_GPIE, (E1000_GPIE_MSIX_MODE |
4902 	    E1000_GPIE_PBA | E1000_GPIE_NSICR | E1000_GPIE_EIAME));
4903 	/*
4904 	 * Set vector for tx ring 0 and other causes.
4905 	 * NOTE assumption that it is vector 0.
4906 	 * This is also interdependent with installation of interrupt service
4907 	 * routines in igb_add_intr_handlers().
4908 	 */
4909 
4910 	/* assign "other" causes to vector 0 */
4911 	vector = 0;
4912 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4913 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4914 
4915 	/* assign tx ring 0 to vector 0 */
4916 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4917 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4918 
4919 	/* prepare to enable tx & other interrupt causes */
4920 	igb->eims_mask = (1 << vector);
4921 
4922 	vector ++;
4923 
4924 	for (i = 0; i < igb->num_rx_rings; i++) {
4925 		/*
4926 		 * Set vector for each rx ring
4927 		 */
4928 		index = (i >> 1);
4929 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4930 
4931 		if (i & 1) {
4932 			/* vector goes into third byte of register */
4933 			ivar = ivar & 0xFF00FFFF;
4934 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
4935 		} else {
4936 			/* vector goes into low byte of register */
4937 			ivar = ivar & 0xFFFFFF00;
4938 			ivar |= (vector | E1000_IVAR_VALID);
4939 		}
4940 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4941 
4942 		/* Accumulate interrupt-cause bits to enable */
4943 		igb->eims_mask |= (1 << vector);
4944 
4945 		vector ++;
4946 	}
4947 
4948 	for (i = 1; i < igb->num_tx_rings; i++) {
4949 		/*
4950 		 * Set vector for each tx ring from 2nd tx ring.
4951 		 * Note assumption that tx vectors numericall follow rx vectors.
4952 		 */
4953 		index = (i >> 1);
4954 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4955 
4956 		if (i & 1) {
4957 			/* vector goes into high byte of register */
4958 			ivar = ivar & 0x00FFFFFF;
4959 			ivar |= ((vector | E1000_IVAR_VALID) << 24);
4960 		} else {
4961 			/* vector goes into second byte of register */
4962 			ivar = ivar & 0xFFFF00FF;
4963 			ivar |= (vector | E1000_IVAR_VALID) << 8;
4964 		}
4965 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4966 
4967 		/* Accumulate interrupt-cause bits to enable */
4968 		igb->eims_mask |= (1 << vector);
4969 
4970 		vector ++;
4971 	}
4972 	ASSERT(vector == igb->intr_cnt);
4973 }
4974 
4975 /*
4976  * igb_rem_intr_handlers - remove the interrupt handlers
4977  */
4978 static void
4979 igb_rem_intr_handlers(igb_t *igb)
4980 {
4981 	int i;
4982 	int rc;
4983 
4984 	for (i = 0; i < igb->intr_cnt; i++) {
4985 		rc = ddi_intr_remove_handler(igb->htable[i]);
4986 		if (rc != DDI_SUCCESS) {
4987 			igb_log(igb, IGB_LOG_INFO,
4988 			    "Remove intr handler failed: %d", rc);
4989 		}
4990 	}
4991 }
4992 
4993 /*
4994  * igb_rem_intrs - remove the allocated interrupts
4995  */
4996 static void
4997 igb_rem_intrs(igb_t *igb)
4998 {
4999 	int i;
5000 	int rc;
5001 
5002 	for (i = 0; i < igb->intr_cnt; i++) {
5003 		rc = ddi_intr_free(igb->htable[i]);
5004 		if (rc != DDI_SUCCESS) {
5005 			igb_log(igb, IGB_LOG_INFO,
5006 			    "Free intr failed: %d", rc);
5007 		}
5008 	}
5009 
5010 	kmem_free(igb->htable, igb->intr_size);
5011 	igb->htable = NULL;
5012 }
5013 
5014 /*
5015  * igb_enable_intrs - enable all the ddi interrupts
5016  */
5017 static int
5018 igb_enable_intrs(igb_t *igb)
5019 {
5020 	int i;
5021 	int rc;
5022 
5023 	/* Enable interrupts */
5024 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
5025 		/* Call ddi_intr_block_enable() for MSI */
5026 		rc = ddi_intr_block_enable(igb->htable, igb->intr_cnt);
5027 		if (rc != DDI_SUCCESS) {
5028 			igb_log(igb, IGB_LOG_ERROR,
5029 			    "Enable block intr failed: %d", rc);
5030 			return (IGB_FAILURE);
5031 		}
5032 	} else {
5033 		/* Call ddi_intr_enable() for Legacy/MSI non block enable */
5034 		for (i = 0; i < igb->intr_cnt; i++) {
5035 			rc = ddi_intr_enable(igb->htable[i]);
5036 			if (rc != DDI_SUCCESS) {
5037 				igb_log(igb, IGB_LOG_ERROR,
5038 				    "Enable intr failed: %d", rc);
5039 				return (IGB_FAILURE);
5040 			}
5041 		}
5042 	}
5043 
5044 	return (IGB_SUCCESS);
5045 }
5046 
5047 /*
5048  * igb_disable_intrs - disable all the ddi interrupts
5049  */
5050 static int
5051 igb_disable_intrs(igb_t *igb)
5052 {
5053 	int i;
5054 	int rc;
5055 
5056 	/* Disable all interrupts */
5057 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
5058 		rc = ddi_intr_block_disable(igb->htable, igb->intr_cnt);
5059 		if (rc != DDI_SUCCESS) {
5060 			igb_log(igb, IGB_LOG_ERROR,
5061 			    "Disable block intr failed: %d", rc);
5062 			return (IGB_FAILURE);
5063 		}
5064 	} else {
5065 		for (i = 0; i < igb->intr_cnt; i++) {
5066 			rc = ddi_intr_disable(igb->htable[i]);
5067 			if (rc != DDI_SUCCESS) {
5068 				igb_log(igb, IGB_LOG_ERROR,
5069 				    "Disable intr failed: %d", rc);
5070 				return (IGB_FAILURE);
5071 			}
5072 		}
5073 	}
5074 
5075 	return (IGB_SUCCESS);
5076 }
5077 
5078 /*
5079  * igb_get_phy_state - Get and save the parameters read from PHY registers
5080  */
5081 static void
5082 igb_get_phy_state(igb_t *igb)
5083 {
5084 	struct e1000_hw *hw = &igb->hw;
5085 	uint16_t phy_ctrl;
5086 	uint16_t phy_status;
5087 	uint16_t phy_an_adv;
5088 	uint16_t phy_an_exp;
5089 	uint16_t phy_ext_status;
5090 	uint16_t phy_1000t_ctrl;
5091 	uint16_t phy_1000t_status;
5092 	uint16_t phy_lp_able;
5093 
5094 	ASSERT(mutex_owned(&igb->gen_lock));
5095 
5096 	if (hw->phy.media_type == e1000_media_type_copper) {
5097 		(void) e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
5098 		(void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
5099 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &phy_an_adv);
5100 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_an_exp);
5101 		(void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, &phy_ext_status);
5102 		(void) e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_1000t_ctrl);
5103 		(void) e1000_read_phy_reg(hw,
5104 		    PHY_1000T_STATUS, &phy_1000t_status);
5105 		(void) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_lp_able);
5106 
5107 		igb->param_autoneg_cap =
5108 		    (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0;
5109 		igb->param_pause_cap =
5110 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
5111 		igb->param_asym_pause_cap =
5112 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
5113 		igb->param_1000fdx_cap =
5114 		    ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) ||
5115 		    (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0;
5116 		igb->param_1000hdx_cap =
5117 		    ((phy_ext_status & IEEE_ESR_1000T_HD_CAPS) ||
5118 		    (phy_ext_status & IEEE_ESR_1000X_HD_CAPS)) ? 1 : 0;
5119 		igb->param_100t4_cap =
5120 		    (phy_status & MII_SR_100T4_CAPS) ? 1 : 0;
5121 		igb->param_100fdx_cap = ((phy_status & MII_SR_100X_FD_CAPS) ||
5122 		    (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0;
5123 		igb->param_100hdx_cap = ((phy_status & MII_SR_100X_HD_CAPS) ||
5124 		    (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0;
5125 		igb->param_10fdx_cap =
5126 		    (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0;
5127 		igb->param_10hdx_cap =
5128 		    (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0;
5129 		igb->param_rem_fault =
5130 		    (phy_status & MII_SR_REMOTE_FAULT) ? 1 : 0;
5131 
5132 		igb->param_adv_autoneg_cap = hw->mac.autoneg;
5133 		igb->param_adv_pause_cap =
5134 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
5135 		igb->param_adv_asym_pause_cap =
5136 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
5137 		igb->param_adv_1000hdx_cap =
5138 		    (phy_1000t_ctrl & CR_1000T_HD_CAPS) ? 1 : 0;
5139 		igb->param_adv_100t4_cap =
5140 		    (phy_an_adv & NWAY_AR_100T4_CAPS) ? 1 : 0;
5141 		igb->param_adv_rem_fault =
5142 		    (phy_an_adv & NWAY_AR_REMOTE_FAULT) ? 1 : 0;
5143 		if (igb->param_adv_autoneg_cap == 1) {
5144 			igb->param_adv_1000fdx_cap =
5145 			    (phy_1000t_ctrl & CR_1000T_FD_CAPS) ? 1 : 0;
5146 			igb->param_adv_100fdx_cap =
5147 			    (phy_an_adv & NWAY_AR_100TX_FD_CAPS) ? 1 : 0;
5148 			igb->param_adv_100hdx_cap =
5149 			    (phy_an_adv & NWAY_AR_100TX_HD_CAPS) ? 1 : 0;
5150 			igb->param_adv_10fdx_cap =
5151 			    (phy_an_adv & NWAY_AR_10T_FD_CAPS) ? 1 : 0;
5152 			igb->param_adv_10hdx_cap =
5153 			    (phy_an_adv & NWAY_AR_10T_HD_CAPS) ? 1 : 0;
5154 		}
5155 
5156 		igb->param_lp_autoneg_cap =
5157 		    (phy_an_exp & NWAY_ER_LP_NWAY_CAPS) ? 1 : 0;
5158 		igb->param_lp_pause_cap =
5159 		    (phy_lp_able & NWAY_LPAR_PAUSE) ? 1 : 0;
5160 		igb->param_lp_asym_pause_cap =
5161 		    (phy_lp_able & NWAY_LPAR_ASM_DIR) ? 1 : 0;
5162 		igb->param_lp_1000fdx_cap =
5163 		    (phy_1000t_status & SR_1000T_LP_FD_CAPS) ? 1 : 0;
5164 		igb->param_lp_1000hdx_cap =
5165 		    (phy_1000t_status & SR_1000T_LP_HD_CAPS) ? 1 : 0;
5166 		igb->param_lp_100t4_cap =
5167 		    (phy_lp_able & NWAY_LPAR_100T4_CAPS) ? 1 : 0;
5168 		igb->param_lp_100fdx_cap =
5169 		    (phy_lp_able & NWAY_LPAR_100TX_FD_CAPS) ? 1 : 0;
5170 		igb->param_lp_100hdx_cap =
5171 		    (phy_lp_able & NWAY_LPAR_100TX_HD_CAPS) ? 1 : 0;
5172 		igb->param_lp_10fdx_cap =
5173 		    (phy_lp_able & NWAY_LPAR_10T_FD_CAPS) ? 1 : 0;
5174 		igb->param_lp_10hdx_cap =
5175 		    (phy_lp_able & NWAY_LPAR_10T_HD_CAPS) ? 1 : 0;
5176 		igb->param_lp_rem_fault =
5177 		    (phy_lp_able & NWAY_LPAR_REMOTE_FAULT) ? 1 : 0;
5178 	} else {
5179 		/*
5180 		 * 1Gig Fiber adapter only offers 1Gig Full Duplex.
5181 		 */
5182 		igb->param_autoneg_cap = 0;
5183 		igb->param_pause_cap = 1;
5184 		igb->param_asym_pause_cap = 1;
5185 		igb->param_1000fdx_cap = 1;
5186 		igb->param_1000hdx_cap = 0;
5187 		igb->param_100t4_cap = 0;
5188 		igb->param_100fdx_cap = 0;
5189 		igb->param_100hdx_cap = 0;
5190 		igb->param_10fdx_cap = 0;
5191 		igb->param_10hdx_cap = 0;
5192 
5193 		igb->param_adv_autoneg_cap = 0;
5194 		igb->param_adv_pause_cap = 1;
5195 		igb->param_adv_asym_pause_cap = 1;
5196 		igb->param_adv_1000fdx_cap = 1;
5197 		igb->param_adv_1000hdx_cap = 0;
5198 		igb->param_adv_100t4_cap = 0;
5199 		igb->param_adv_100fdx_cap = 0;
5200 		igb->param_adv_100hdx_cap = 0;
5201 		igb->param_adv_10fdx_cap = 0;
5202 		igb->param_adv_10hdx_cap = 0;
5203 
5204 		igb->param_lp_autoneg_cap = 0;
5205 		igb->param_lp_pause_cap = 0;
5206 		igb->param_lp_asym_pause_cap = 0;
5207 		igb->param_lp_1000fdx_cap = 0;
5208 		igb->param_lp_1000hdx_cap = 0;
5209 		igb->param_lp_100t4_cap = 0;
5210 		igb->param_lp_100fdx_cap = 0;
5211 		igb->param_lp_100hdx_cap = 0;
5212 		igb->param_lp_10fdx_cap = 0;
5213 		igb->param_lp_10hdx_cap = 0;
5214 		igb->param_lp_rem_fault = 0;
5215 	}
5216 }
5217 
5218 /*
5219  * synchronize the adv* and en* parameters.
5220  *
5221  * See comments in <sys/dld.h> for details of the *_en_*
5222  * parameters. The usage of ndd for setting adv parameters will
5223  * synchronize all the en parameters with the e1000g parameters,
5224  * implicitly disabling any settings made via dladm.
5225  */
5226 static void
5227 igb_param_sync(igb_t *igb)
5228 {
5229 	igb->param_en_1000fdx_cap = igb->param_adv_1000fdx_cap;
5230 	igb->param_en_1000hdx_cap = igb->param_adv_1000hdx_cap;
5231 	igb->param_en_100t4_cap = igb->param_adv_100t4_cap;
5232 	igb->param_en_100fdx_cap = igb->param_adv_100fdx_cap;
5233 	igb->param_en_100hdx_cap = igb->param_adv_100hdx_cap;
5234 	igb->param_en_10fdx_cap = igb->param_adv_10fdx_cap;
5235 	igb->param_en_10hdx_cap = igb->param_adv_10hdx_cap;
5236 }
5237 
5238 /*
5239  * igb_get_driver_control
5240  */
5241 static void
5242 igb_get_driver_control(struct e1000_hw *hw)
5243 {
5244 	uint32_t ctrl_ext;
5245 
5246 	/* Notify firmware that driver is in control of device */
5247 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5248 	ctrl_ext |= E1000_CTRL_EXT_DRV_LOAD;
5249 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5250 }
5251 
5252 /*
5253  * igb_release_driver_control
5254  */
5255 static void
5256 igb_release_driver_control(struct e1000_hw *hw)
5257 {
5258 	uint32_t ctrl_ext;
5259 
5260 	/* Notify firmware that driver is no longer in control of device */
5261 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5262 	ctrl_ext &= ~E1000_CTRL_EXT_DRV_LOAD;
5263 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5264 }
5265 
5266 /*
5267  * igb_atomic_reserve - Atomic decrease operation
5268  */
5269 int
5270 igb_atomic_reserve(uint32_t *count_p, uint32_t n)
5271 {
5272 	uint32_t oldval;
5273 	uint32_t newval;
5274 
5275 	/* ATOMICALLY */
5276 	do {
5277 		oldval = *count_p;
5278 		if (oldval < n)
5279 			return (-1);
5280 		newval = oldval - n;
5281 	} while (atomic_cas_32(count_p, oldval, newval) != oldval);
5282 
5283 	return (newval);
5284 }
5285 
5286 /*
5287  * FMA support
5288  */
5289 
5290 int
5291 igb_check_acc_handle(ddi_acc_handle_t handle)
5292 {
5293 	ddi_fm_error_t de;
5294 
5295 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
5296 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
5297 	return (de.fme_status);
5298 }
5299 
5300 int
5301 igb_check_dma_handle(ddi_dma_handle_t handle)
5302 {
5303 	ddi_fm_error_t de;
5304 
5305 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
5306 	return (de.fme_status);
5307 }
5308 
5309 /*
5310  * The IO fault service error handling callback function
5311  */
5312 /*ARGSUSED*/
5313 static int
5314 igb_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
5315 {
5316 	/*
5317 	 * as the driver can always deal with an error in any dma or
5318 	 * access handle, we can just return the fme_status value.
5319 	 */
5320 	pci_ereport_post(dip, err, NULL);
5321 	return (err->fme_status);
5322 }
5323 
5324 static void
5325 igb_fm_init(igb_t *igb)
5326 {
5327 	ddi_iblock_cookie_t iblk;
5328 	int fma_dma_flag;
5329 
5330 	/* Only register with IO Fault Services if we have some capability */
5331 	if (igb->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
5332 		igb_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
5333 	} else {
5334 		igb_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
5335 	}
5336 
5337 	if (igb->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
5338 		fma_dma_flag = 1;
5339 	} else {
5340 		fma_dma_flag = 0;
5341 	}
5342 
5343 	(void) igb_set_fma_flags(fma_dma_flag);
5344 
5345 	if (igb->fm_capabilities) {
5346 
5347 		/* Register capabilities with IO Fault Services */
5348 		ddi_fm_init(igb->dip, &igb->fm_capabilities, &iblk);
5349 
5350 		/*
5351 		 * Initialize pci ereport capabilities if ereport capable
5352 		 */
5353 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5354 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5355 			pci_ereport_setup(igb->dip);
5356 
5357 		/*
5358 		 * Register error callback if error callback capable
5359 		 */
5360 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5361 			ddi_fm_handler_register(igb->dip,
5362 			    igb_fm_error_cb, (void*) igb);
5363 	}
5364 }
5365 
5366 static void
5367 igb_fm_fini(igb_t *igb)
5368 {
5369 	/* Only unregister FMA capabilities if we registered some */
5370 	if (igb->fm_capabilities) {
5371 
5372 		/*
5373 		 * Release any resources allocated by pci_ereport_setup()
5374 		 */
5375 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5376 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5377 			pci_ereport_teardown(igb->dip);
5378 
5379 		/*
5380 		 * Un-register error callback if error callback capable
5381 		 */
5382 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5383 			ddi_fm_handler_unregister(igb->dip);
5384 
5385 		/* Unregister from IO Fault Services */
5386 		ddi_fm_fini(igb->dip);
5387 	}
5388 }
5389 
5390 void
5391 igb_fm_ereport(igb_t *igb, char *detail)
5392 {
5393 	uint64_t ena;
5394 	char buf[FM_MAX_CLASS];
5395 
5396 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
5397 	ena = fm_ena_generate(0, FM_ENA_FMT1);
5398 	if (DDI_FM_EREPORT_CAP(igb->fm_capabilities)) {
5399 		ddi_fm_ereport_post(igb->dip, buf, ena, DDI_NOSLEEP,
5400 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
5401 	}
5402 }
5403 
5404 static int
5405 igb_ufm_fill_image(ddi_ufm_handle_t *ufmh, void *arg, uint_t imgno,
5406     ddi_ufm_image_t *imgp)
5407 {
5408 	igb_t *igb = arg;
5409 	const char *type;
5410 
5411 	if (imgno != 0) {
5412 		return (EINVAL);
5413 	}
5414 
5415 	ddi_ufm_image_set_desc(imgp, "NVM");
5416 	ddi_ufm_image_set_nslots(imgp, 1);
5417 	switch (igb->hw.nvm.type) {
5418 	case e1000_nvm_eeprom_spi:
5419 		type = "SPI EEPROM";
5420 		break;
5421 	case e1000_nvm_eeprom_microwire:
5422 		type = "Microwire EEPROM";
5423 		break;
5424 	case e1000_nvm_invm:
5425 		type = "Internal NVM";
5426 		break;
5427 	case e1000_nvm_flash_hw:
5428 	case e1000_nvm_flash_sw:
5429 		type = "Flash";
5430 		break;
5431 	default:
5432 		type = NULL;
5433 		break;
5434 	}
5435 
5436 	if (type != NULL) {
5437 		nvlist_t *nvl;
5438 
5439 		nvl = fnvlist_alloc();
5440 		fnvlist_add_string(nvl, "image-type", type);
5441 		/*
5442 		 * The DDI takes ownership of the nvlist_t at this point.
5443 		 */
5444 		ddi_ufm_image_set_misc(imgp, nvl);
5445 	}
5446 
5447 	return (0);
5448 }
5449 
5450 static int
5451 igb_ufm_fill_slot(ddi_ufm_handle_t *ufmh, void *arg, uint_t imgno,
5452     uint_t slotno, ddi_ufm_slot_t *slotp)
5453 {
5454 	igb_t *igb = arg;
5455 	char *ver;
5456 
5457 	if (imgno != 0 || slotno != 0) {
5458 		return (EINVAL);
5459 	}
5460 
5461 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, igb->dip, DDI_PROP_DONTPASS,
5462 	    "nvm-version", &ver) == 0) {
5463 		ddi_ufm_slot_set_version(slotp, ver);
5464 		ddi_prop_free(ver);
5465 	} else {
5466 		ddi_ufm_slot_set_version(slotp, "unknown");
5467 	}
5468 
5469 	ddi_ufm_slot_set_attrs(slotp, DDI_UFM_ATTR_ACTIVE |
5470 	    DDI_UFM_ATTR_READABLE | DDI_UFM_ATTR_WRITEABLE);
5471 	ddi_ufm_slot_set_imgsize(slotp, igb->hw.nvm.word_size * 2);
5472 	return (0);
5473 }
5474 
5475 static int
5476 igb_ufm_getcaps(ddi_ufm_handle_t *ufmh, void *arg, ddi_ufm_cap_t *caps)
5477 {
5478 	igb_t *igb = arg;
5479 
5480 	*caps = 0;
5481 	if (igb->hw.nvm.type != e1000_nvm_none &&
5482 	    igb->hw.nvm.type != e1000_nvm_unknown) {
5483 		*caps |= DDI_UFM_CAP_REPORT;
5484 
5485 		if (igb->hw.nvm.ops.read != NULL) {
5486 			*caps |= DDI_UFM_CAP_READIMG;
5487 		}
5488 	}
5489 
5490 	return (0);
5491 }
5492 
5493 static int
5494 igb_ufm_readimg(ddi_ufm_handle_t *ufmh, void *arg, uint_t imgno, uint_t slotno,
5495     uint64_t len, uint64_t offset, void *buf, uint64_t *nread)
5496 {
5497 	igb_t *igb = arg;
5498 	uint16_t wordoff, nwords, *buf16 = buf;
5499 	uint32_t imgsize = igb->hw.nvm.word_size * 2;
5500 	int ret;
5501 
5502 	if (imgno != 0 || slotno != 0) {
5503 		return (EINVAL);
5504 	}
5505 
5506 	if (len > imgsize || offset > imgsize || len + offset > imgsize) {
5507 		return (EINVAL);
5508 	}
5509 
5510 	if (igb->hw.nvm.ops.read == NULL) {
5511 		return (ENOTSUP);
5512 	}
5513 
5514 	/*
5515 	 * Hardware provides us a means to read 16-bit words. For the time
5516 	 * being, restrict offset and length to be 2 byte aligned. We should
5517 	 * probably reduce this restriction. We could probably just use a bounce
5518 	 * buffer.
5519 	 */
5520 	if ((offset % 2) != 0 || (len % 2) != 0) {
5521 		return (EINVAL);
5522 	}
5523 
5524 	wordoff = offset >> 1;
5525 	nwords = len >> 1;
5526 	mutex_enter(&igb->gen_lock);
5527 	ret = e1000_read_nvm(&igb->hw, wordoff, nwords, buf16);
5528 	mutex_exit(&igb->gen_lock);
5529 
5530 	if (ret == 0) {
5531 		uint16_t i;
5532 		*nread = len;
5533 		for (i = 0; i < nwords; i++) {
5534 			buf16[i] = LE_16(buf16[i]);
5535 		}
5536 	} else {
5537 		ret = EIO;
5538 	}
5539 
5540 	return (ret);
5541 }
5542