xref: /titanic_50/usr/src/uts/common/io/igb/igb_main.c (revision c3a558e7c77127215b010652905be7916ec5a080)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #include "igb_sw.h"
29 
30 static char ident[] = "Intel 1Gb Ethernet";
31 static char igb_version[] = "igb 1.1.13";
32 
33 /*
34  * Local function protoypes
35  */
36 static int igb_register_mac(igb_t *);
37 static int igb_identify_hardware(igb_t *);
38 static int igb_regs_map(igb_t *);
39 static void igb_init_properties(igb_t *);
40 static int igb_init_driver_settings(igb_t *);
41 static void igb_init_locks(igb_t *);
42 static void igb_destroy_locks(igb_t *);
43 static int igb_init_mac_address(igb_t *);
44 static int igb_init(igb_t *);
45 static int igb_init_adapter(igb_t *);
46 static void igb_stop_adapter(igb_t *);
47 static int igb_reset(igb_t *);
48 static void igb_tx_clean(igb_t *);
49 static boolean_t igb_tx_drain(igb_t *);
50 static boolean_t igb_rx_drain(igb_t *);
51 static int igb_alloc_rings(igb_t *);
52 static int igb_alloc_rx_data(igb_t *);
53 static void igb_free_rx_data(igb_t *);
54 static void igb_free_rings(igb_t *);
55 static void igb_setup_rings(igb_t *);
56 static void igb_setup_rx(igb_t *);
57 static void igb_setup_tx(igb_t *);
58 static void igb_setup_rx_ring(igb_rx_ring_t *);
59 static void igb_setup_tx_ring(igb_tx_ring_t *);
60 static void igb_setup_rss(igb_t *);
61 static void igb_setup_mac_rss_classify(igb_t *);
62 static void igb_setup_mac_classify(igb_t *);
63 static void igb_init_unicst(igb_t *);
64 static void igb_setup_multicst(igb_t *);
65 static void igb_get_phy_state(igb_t *);
66 static void igb_param_sync(igb_t *);
67 static void igb_get_conf(igb_t *);
68 static int igb_get_prop(igb_t *, char *, int, int, int);
69 static boolean_t igb_is_link_up(igb_t *);
70 static boolean_t igb_link_check(igb_t *);
71 static void igb_local_timer(void *);
72 static void igb_link_timer(void *);
73 static void igb_arm_watchdog_timer(igb_t *);
74 static void igb_start_watchdog_timer(igb_t *);
75 static void igb_restart_watchdog_timer(igb_t *);
76 static void igb_stop_watchdog_timer(igb_t *);
77 static void igb_start_link_timer(igb_t *);
78 static void igb_stop_link_timer(igb_t *);
79 static void igb_disable_adapter_interrupts(igb_t *);
80 static void igb_enable_adapter_interrupts_82575(igb_t *);
81 static void igb_enable_adapter_interrupts_82576(igb_t *);
82 static void igb_enable_adapter_interrupts_82580(igb_t *);
83 static boolean_t is_valid_mac_addr(uint8_t *);
84 static boolean_t igb_stall_check(igb_t *);
85 static boolean_t igb_set_loopback_mode(igb_t *, uint32_t);
86 static void igb_set_external_loopback(igb_t *);
87 static void igb_set_internal_phy_loopback(igb_t *);
88 static void igb_set_internal_serdes_loopback(igb_t *);
89 static boolean_t igb_find_mac_address(igb_t *);
90 static int igb_alloc_intrs(igb_t *);
91 static int igb_alloc_intr_handles(igb_t *, int);
92 static int igb_add_intr_handlers(igb_t *);
93 static void igb_rem_intr_handlers(igb_t *);
94 static void igb_rem_intrs(igb_t *);
95 static int igb_enable_intrs(igb_t *);
96 static int igb_disable_intrs(igb_t *);
97 static void igb_setup_msix_82575(igb_t *);
98 static void igb_setup_msix_82576(igb_t *);
99 static void igb_setup_msix_82580(igb_t *);
100 static uint_t igb_intr_legacy(void *, void *);
101 static uint_t igb_intr_msi(void *, void *);
102 static uint_t igb_intr_rx(void *, void *);
103 static uint_t igb_intr_tx(void *, void *);
104 static uint_t igb_intr_tx_other(void *, void *);
105 static void igb_intr_rx_work(igb_rx_ring_t *);
106 static void igb_intr_tx_work(igb_tx_ring_t *);
107 static void igb_intr_link_work(igb_t *);
108 static void igb_get_driver_control(struct e1000_hw *);
109 static void igb_release_driver_control(struct e1000_hw *);
110 
111 static int igb_attach(dev_info_t *, ddi_attach_cmd_t);
112 static int igb_detach(dev_info_t *, ddi_detach_cmd_t);
113 static int igb_resume(dev_info_t *);
114 static int igb_suspend(dev_info_t *);
115 static int igb_quiesce(dev_info_t *);
116 static void igb_unconfigure(dev_info_t *, igb_t *);
117 static int igb_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
118     const void *);
119 static void igb_fm_init(igb_t *);
120 static void igb_fm_fini(igb_t *);
121 static void igb_release_multicast(igb_t *);
122 
123 char *igb_priv_props[] = {
124 	"_tx_copy_thresh",
125 	"_tx_recycle_thresh",
126 	"_tx_overload_thresh",
127 	"_tx_resched_thresh",
128 	"_rx_copy_thresh",
129 	"_rx_limit_per_intr",
130 	"_intr_throttling",
131 	"_adv_pause_cap",
132 	"_adv_asym_pause_cap",
133 	NULL
134 };
135 
136 static struct cb_ops igb_cb_ops = {
137 	nulldev,		/* cb_open */
138 	nulldev,		/* cb_close */
139 	nodev,			/* cb_strategy */
140 	nodev,			/* cb_print */
141 	nodev,			/* cb_dump */
142 	nodev,			/* cb_read */
143 	nodev,			/* cb_write */
144 	nodev,			/* cb_ioctl */
145 	nodev,			/* cb_devmap */
146 	nodev,			/* cb_mmap */
147 	nodev,			/* cb_segmap */
148 	nochpoll,		/* cb_chpoll */
149 	ddi_prop_op,		/* cb_prop_op */
150 	NULL,			/* cb_stream */
151 	D_MP | D_HOTPLUG,	/* cb_flag */
152 	CB_REV,			/* cb_rev */
153 	nodev,			/* cb_aread */
154 	nodev			/* cb_awrite */
155 };
156 
157 static struct dev_ops igb_dev_ops = {
158 	DEVO_REV,		/* devo_rev */
159 	0,			/* devo_refcnt */
160 	NULL,			/* devo_getinfo */
161 	nulldev,		/* devo_identify */
162 	nulldev,		/* devo_probe */
163 	igb_attach,		/* devo_attach */
164 	igb_detach,		/* devo_detach */
165 	nodev,			/* devo_reset */
166 	&igb_cb_ops,		/* devo_cb_ops */
167 	NULL,			/* devo_bus_ops */
168 	ddi_power,		/* devo_power */
169 	igb_quiesce,	/* devo_quiesce */
170 };
171 
172 static struct modldrv igb_modldrv = {
173 	&mod_driverops,		/* Type of module.  This one is a driver */
174 	ident,			/* Discription string */
175 	&igb_dev_ops,		/* driver ops */
176 };
177 
178 static struct modlinkage igb_modlinkage = {
179 	MODREV_1, &igb_modldrv, NULL
180 };
181 
182 /* Access attributes for register mapping */
183 ddi_device_acc_attr_t igb_regs_acc_attr = {
184 	DDI_DEVICE_ATTR_V1,
185 	DDI_STRUCTURE_LE_ACC,
186 	DDI_STRICTORDER_ACC,
187 	DDI_FLAGERR_ACC
188 };
189 
190 #define	IGB_M_CALLBACK_FLAGS \
191 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
192 
193 static mac_callbacks_t igb_m_callbacks = {
194 	IGB_M_CALLBACK_FLAGS,
195 	igb_m_stat,
196 	igb_m_start,
197 	igb_m_stop,
198 	igb_m_promisc,
199 	igb_m_multicst,
200 	NULL,
201 	NULL,
202 	NULL,
203 	igb_m_ioctl,
204 	igb_m_getcapab,
205 	NULL,
206 	NULL,
207 	igb_m_setprop,
208 	igb_m_getprop,
209 	igb_m_propinfo
210 };
211 
212 /*
213  * Initialize capabilities of each supported adapter type
214  */
215 static adapter_info_t igb_82575_cap = {
216 	/* limits */
217 	4,		/* maximum number of rx queues */
218 	1,		/* minimum number of rx queues */
219 	4,		/* default number of rx queues */
220 	4,		/* maximum number of tx queues */
221 	1,		/* minimum number of tx queues */
222 	4,		/* default number of tx queues */
223 	65535,		/* maximum interrupt throttle rate */
224 	0,		/* minimum interrupt throttle rate */
225 	200,		/* default interrupt throttle rate */
226 
227 	/* function pointers */
228 	igb_enable_adapter_interrupts_82575,
229 	igb_setup_msix_82575,
230 
231 	/* capabilities */
232 	(IGB_FLAG_HAS_DCA |	/* capability flags */
233 	IGB_FLAG_VMDQ_POOL),
234 
235 	0xffc00000		/* mask for RXDCTL register */
236 };
237 
238 static adapter_info_t igb_82576_cap = {
239 	/* limits */
240 	16,		/* maximum number of rx queues */
241 	1,		/* minimum number of rx queues */
242 	4,		/* default number of rx queues */
243 	16,		/* maximum number of tx queues */
244 	1,		/* minimum number of tx queues */
245 	4,		/* default number of tx queues */
246 	65535,		/* maximum interrupt throttle rate */
247 	0,		/* minimum interrupt throttle rate */
248 	200,		/* default interrupt throttle rate */
249 
250 	/* function pointers */
251 	igb_enable_adapter_interrupts_82576,
252 	igb_setup_msix_82576,
253 
254 	/* capabilities */
255 	(IGB_FLAG_HAS_DCA |	/* capability flags */
256 	IGB_FLAG_VMDQ_POOL |
257 	IGB_FLAG_NEED_CTX_IDX),
258 
259 	0xffe00000		/* mask for RXDCTL register */
260 };
261 
262 static adapter_info_t igb_82580_cap = {
263 	/* limits */
264 	8,		/* maximum number of rx queues */
265 	1,		/* minimum number of rx queues */
266 	4,		/* default number of rx queues */
267 	8,		/* maximum number of tx queues */
268 	1,		/* minimum number of tx queues */
269 	4,		/* default number of tx queues */
270 	65535,		/* maximum interrupt throttle rate */
271 	0,		/* minimum interrupt throttle rate */
272 	200,		/* default interrupt throttle rate */
273 
274 	/* function pointers */
275 	igb_enable_adapter_interrupts_82580,
276 	igb_setup_msix_82580,
277 
278 	/* capabilities */
279 	(IGB_FLAG_HAS_DCA |	/* capability flags */
280 	IGB_FLAG_VMDQ_POOL |
281 	IGB_FLAG_NEED_CTX_IDX),
282 
283 	0xffe00000		/* mask for RXDCTL register */
284 };
285 
286 /*
287  * Module Initialization Functions
288  */
289 
290 int
291 _init(void)
292 {
293 	int status;
294 
295 	mac_init_ops(&igb_dev_ops, MODULE_NAME);
296 
297 	status = mod_install(&igb_modlinkage);
298 
299 	if (status != DDI_SUCCESS) {
300 		mac_fini_ops(&igb_dev_ops);
301 	}
302 
303 	return (status);
304 }
305 
306 int
307 _fini(void)
308 {
309 	int status;
310 
311 	status = mod_remove(&igb_modlinkage);
312 
313 	if (status == DDI_SUCCESS) {
314 		mac_fini_ops(&igb_dev_ops);
315 	}
316 
317 	return (status);
318 
319 }
320 
321 int
322 _info(struct modinfo *modinfop)
323 {
324 	int status;
325 
326 	status = mod_info(&igb_modlinkage, modinfop);
327 
328 	return (status);
329 }
330 
331 /*
332  * igb_attach - driver attach
333  *
334  * This function is the device specific initialization entry
335  * point. This entry point is required and must be written.
336  * The DDI_ATTACH command must be provided in the attach entry
337  * point. When attach() is called with cmd set to DDI_ATTACH,
338  * all normal kernel services (such as kmem_alloc(9F)) are
339  * available for use by the driver.
340  *
341  * The attach() function will be called once for each instance
342  * of  the  device  on  the  system with cmd set to DDI_ATTACH.
343  * Until attach() succeeds, the only driver entry points which
344  * may be called are open(9E) and getinfo(9E).
345  */
346 static int
347 igb_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
348 {
349 	igb_t *igb;
350 	struct igb_osdep *osdep;
351 	struct e1000_hw *hw;
352 	int instance;
353 
354 	/*
355 	 * Check the command and perform corresponding operations
356 	 */
357 	switch (cmd) {
358 	default:
359 		return (DDI_FAILURE);
360 
361 	case DDI_RESUME:
362 		return (igb_resume(devinfo));
363 
364 	case DDI_ATTACH:
365 		break;
366 	}
367 
368 	/* Get the device instance */
369 	instance = ddi_get_instance(devinfo);
370 
371 	/* Allocate memory for the instance data structure */
372 	igb = kmem_zalloc(sizeof (igb_t), KM_SLEEP);
373 
374 	igb->dip = devinfo;
375 	igb->instance = instance;
376 
377 	hw = &igb->hw;
378 	osdep = &igb->osdep;
379 	hw->back = osdep;
380 	osdep->igb = igb;
381 
382 	/* Attach the instance pointer to the dev_info data structure */
383 	ddi_set_driver_private(devinfo, igb);
384 
385 
386 	/* Initialize for fma support */
387 	igb->fm_capabilities = igb_get_prop(igb, "fm-capable",
388 	    0, 0x0f,
389 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
390 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
391 	igb_fm_init(igb);
392 	igb->attach_progress |= ATTACH_PROGRESS_FMINIT;
393 
394 	/*
395 	 * Map PCI config space registers
396 	 */
397 	if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
398 		igb_error(igb, "Failed to map PCI configurations");
399 		goto attach_fail;
400 	}
401 	igb->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
402 
403 	/*
404 	 * Identify the chipset family
405 	 */
406 	if (igb_identify_hardware(igb) != IGB_SUCCESS) {
407 		igb_error(igb, "Failed to identify hardware");
408 		goto attach_fail;
409 	}
410 
411 	/*
412 	 * Map device registers
413 	 */
414 	if (igb_regs_map(igb) != IGB_SUCCESS) {
415 		igb_error(igb, "Failed to map device registers");
416 		goto attach_fail;
417 	}
418 	igb->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
419 
420 	/*
421 	 * Initialize driver parameters
422 	 */
423 	igb_init_properties(igb);
424 	igb->attach_progress |= ATTACH_PROGRESS_PROPS;
425 
426 	/*
427 	 * Allocate interrupts
428 	 */
429 	if (igb_alloc_intrs(igb) != IGB_SUCCESS) {
430 		igb_error(igb, "Failed to allocate interrupts");
431 		goto attach_fail;
432 	}
433 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
434 
435 	/*
436 	 * Allocate rx/tx rings based on the ring numbers.
437 	 * The actual numbers of rx/tx rings are decided by the number of
438 	 * allocated interrupt vectors, so we should allocate the rings after
439 	 * interrupts are allocated.
440 	 */
441 	if (igb_alloc_rings(igb) != IGB_SUCCESS) {
442 		igb_error(igb, "Failed to allocate rx/tx rings or groups");
443 		goto attach_fail;
444 	}
445 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
446 
447 	/*
448 	 * Add interrupt handlers
449 	 */
450 	if (igb_add_intr_handlers(igb) != IGB_SUCCESS) {
451 		igb_error(igb, "Failed to add interrupt handlers");
452 		goto attach_fail;
453 	}
454 	igb->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
455 
456 	/*
457 	 * Initialize driver parameters
458 	 */
459 	if (igb_init_driver_settings(igb) != IGB_SUCCESS) {
460 		igb_error(igb, "Failed to initialize driver settings");
461 		goto attach_fail;
462 	}
463 
464 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
465 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
466 		goto attach_fail;
467 	}
468 
469 	/*
470 	 * Initialize mutexes for this device.
471 	 * Do this before enabling the interrupt handler and
472 	 * register the softint to avoid the condition where
473 	 * interrupt handler can try using uninitialized mutex
474 	 */
475 	igb_init_locks(igb);
476 	igb->attach_progress |= ATTACH_PROGRESS_LOCKS;
477 
478 	/*
479 	 * Initialize the adapter
480 	 */
481 	if (igb_init(igb) != IGB_SUCCESS) {
482 		igb_error(igb, "Failed to initialize adapter");
483 		goto attach_fail;
484 	}
485 	igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
486 
487 	/*
488 	 * Initialize statistics
489 	 */
490 	if (igb_init_stats(igb) != IGB_SUCCESS) {
491 		igb_error(igb, "Failed to initialize statistics");
492 		goto attach_fail;
493 	}
494 	igb->attach_progress |= ATTACH_PROGRESS_STATS;
495 
496 	/*
497 	 * Register the driver to the MAC
498 	 */
499 	if (igb_register_mac(igb) != IGB_SUCCESS) {
500 		igb_error(igb, "Failed to register MAC");
501 		goto attach_fail;
502 	}
503 	igb->attach_progress |= ATTACH_PROGRESS_MAC;
504 
505 	/*
506 	 * Now that mutex locks are initialized, and the chip is also
507 	 * initialized, enable interrupts.
508 	 */
509 	if (igb_enable_intrs(igb) != IGB_SUCCESS) {
510 		igb_error(igb, "Failed to enable DDI interrupts");
511 		goto attach_fail;
512 	}
513 	igb->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
514 
515 	igb_log(igb, "%s", igb_version);
516 	atomic_or_32(&igb->igb_state, IGB_INITIALIZED);
517 
518 	return (DDI_SUCCESS);
519 
520 attach_fail:
521 	igb_unconfigure(devinfo, igb);
522 	return (DDI_FAILURE);
523 }
524 
525 /*
526  * igb_detach - driver detach
527  *
528  * The detach() function is the complement of the attach routine.
529  * If cmd is set to DDI_DETACH, detach() is used to remove  the
530  * state  associated  with  a  given  instance of a device node
531  * prior to the removal of that instance from the system.
532  *
533  * The detach() function will be called once for each  instance
534  * of the device for which there has been a successful attach()
535  * once there are no longer  any  opens  on  the  device.
536  *
537  * Interrupts routine are disabled, All memory allocated by this
538  * driver are freed.
539  */
540 static int
541 igb_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
542 {
543 	igb_t *igb;
544 
545 	/*
546 	 * Check detach command
547 	 */
548 	switch (cmd) {
549 	default:
550 		return (DDI_FAILURE);
551 
552 	case DDI_SUSPEND:
553 		return (igb_suspend(devinfo));
554 
555 	case DDI_DETACH:
556 		break;
557 	}
558 
559 
560 	/*
561 	 * Get the pointer to the driver private data structure
562 	 */
563 	igb = (igb_t *)ddi_get_driver_private(devinfo);
564 	if (igb == NULL)
565 		return (DDI_FAILURE);
566 
567 	/*
568 	 * Unregister MAC. If failed, we have to fail the detach
569 	 */
570 	if (mac_unregister(igb->mac_hdl) != 0) {
571 		igb_error(igb, "Failed to unregister MAC");
572 		return (DDI_FAILURE);
573 	}
574 	igb->attach_progress &= ~ATTACH_PROGRESS_MAC;
575 
576 	/*
577 	 * If the device is still running, it needs to be stopped first.
578 	 * This check is necessary because under some specific circumstances,
579 	 * the detach routine can be called without stopping the interface
580 	 * first.
581 	 */
582 	mutex_enter(&igb->gen_lock);
583 	if (igb->igb_state & IGB_STARTED) {
584 		atomic_and_32(&igb->igb_state, ~IGB_STARTED);
585 		igb_stop(igb, B_TRUE);
586 		mutex_exit(&igb->gen_lock);
587 		/* Disable and stop the watchdog timer */
588 		igb_disable_watchdog_timer(igb);
589 	} else
590 		mutex_exit(&igb->gen_lock);
591 
592 	/*
593 	 * Check if there are still rx buffers held by the upper layer.
594 	 * If so, fail the detach.
595 	 */
596 	if (!igb_rx_drain(igb))
597 		return (DDI_FAILURE);
598 
599 	/*
600 	 * Do the remaining unconfigure routines
601 	 */
602 	igb_unconfigure(devinfo, igb);
603 
604 	return (DDI_SUCCESS);
605 }
606 
607 /*
608  * quiesce(9E) entry point.
609  *
610  * This function is called when the system is single-threaded at high
611  * PIL with preemption disabled. Therefore, this function must not be
612  * blocked.
613  *
614  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
615  * DDI_FAILURE indicates an error condition and should almost never happen.
616  */
617 static int
618 igb_quiesce(dev_info_t *devinfo)
619 {
620 	igb_t *igb;
621 	struct e1000_hw *hw;
622 
623 	igb = (igb_t *)ddi_get_driver_private(devinfo);
624 
625 	if (igb == NULL)
626 		return (DDI_FAILURE);
627 
628 	hw = &igb->hw;
629 
630 	/*
631 	 * Disable the adapter interrupts
632 	 */
633 	igb_disable_adapter_interrupts(igb);
634 
635 	/* Tell firmware driver is no longer in control */
636 	igb_release_driver_control(hw);
637 
638 	/*
639 	 * Reset the chipset
640 	 */
641 	(void) e1000_reset_hw(hw);
642 
643 	/*
644 	 * Reset PHY if possible
645 	 */
646 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
647 		(void) e1000_phy_hw_reset(hw);
648 
649 	return (DDI_SUCCESS);
650 }
651 
652 /*
653  * igb_unconfigure - release all resources held by this instance
654  */
655 static void
656 igb_unconfigure(dev_info_t *devinfo, igb_t *igb)
657 {
658 	/*
659 	 * Disable interrupt
660 	 */
661 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
662 		(void) igb_disable_intrs(igb);
663 	}
664 
665 	/*
666 	 * Unregister MAC
667 	 */
668 	if (igb->attach_progress & ATTACH_PROGRESS_MAC) {
669 		(void) mac_unregister(igb->mac_hdl);
670 	}
671 
672 	/*
673 	 * Free statistics
674 	 */
675 	if (igb->attach_progress & ATTACH_PROGRESS_STATS) {
676 		kstat_delete((kstat_t *)igb->igb_ks);
677 	}
678 
679 	/*
680 	 * Remove interrupt handlers
681 	 */
682 	if (igb->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
683 		igb_rem_intr_handlers(igb);
684 	}
685 
686 	/*
687 	 * Remove interrupts
688 	 */
689 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
690 		igb_rem_intrs(igb);
691 	}
692 
693 	/*
694 	 * Remove driver properties
695 	 */
696 	if (igb->attach_progress & ATTACH_PROGRESS_PROPS) {
697 		(void) ddi_prop_remove_all(devinfo);
698 	}
699 
700 	/*
701 	 * Stop the adapter
702 	 */
703 	if (igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) {
704 		mutex_enter(&igb->gen_lock);
705 		igb_stop_adapter(igb);
706 		mutex_exit(&igb->gen_lock);
707 		if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
708 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_UNAFFECTED);
709 	}
710 
711 	/*
712 	 * Free multicast table
713 	 */
714 	igb_release_multicast(igb);
715 
716 	/*
717 	 * Free register handle
718 	 */
719 	if (igb->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
720 		if (igb->osdep.reg_handle != NULL)
721 			ddi_regs_map_free(&igb->osdep.reg_handle);
722 	}
723 
724 	/*
725 	 * Free PCI config handle
726 	 */
727 	if (igb->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
728 		if (igb->osdep.cfg_handle != NULL)
729 			pci_config_teardown(&igb->osdep.cfg_handle);
730 	}
731 
732 	/*
733 	 * Free locks
734 	 */
735 	if (igb->attach_progress & ATTACH_PROGRESS_LOCKS) {
736 		igb_destroy_locks(igb);
737 	}
738 
739 	/*
740 	 * Free the rx/tx rings
741 	 */
742 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
743 		igb_free_rings(igb);
744 	}
745 
746 	/*
747 	 * Remove FMA
748 	 */
749 	if (igb->attach_progress & ATTACH_PROGRESS_FMINIT) {
750 		igb_fm_fini(igb);
751 	}
752 
753 	/*
754 	 * Free the driver data structure
755 	 */
756 	kmem_free(igb, sizeof (igb_t));
757 
758 	ddi_set_driver_private(devinfo, NULL);
759 }
760 
761 /*
762  * igb_register_mac - Register the driver and its function pointers with
763  * the GLD interface
764  */
765 static int
766 igb_register_mac(igb_t *igb)
767 {
768 	struct e1000_hw *hw = &igb->hw;
769 	mac_register_t *mac;
770 	int status;
771 
772 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
773 		return (IGB_FAILURE);
774 
775 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
776 	mac->m_driver = igb;
777 	mac->m_dip = igb->dip;
778 	mac->m_src_addr = hw->mac.addr;
779 	mac->m_callbacks = &igb_m_callbacks;
780 	mac->m_min_sdu = 0;
781 	mac->m_max_sdu = igb->max_frame_size -
782 	    sizeof (struct ether_vlan_header) - ETHERFCSL;
783 	mac->m_margin = VLAN_TAGSZ;
784 	mac->m_priv_props = igb_priv_props;
785 	mac->m_v12n = MAC_VIRT_LEVEL1;
786 
787 	status = mac_register(mac, &igb->mac_hdl);
788 
789 	mac_free(mac);
790 
791 	return ((status == 0) ? IGB_SUCCESS : IGB_FAILURE);
792 }
793 
794 /*
795  * igb_identify_hardware - Identify the type of the chipset
796  */
797 static int
798 igb_identify_hardware(igb_t *igb)
799 {
800 	struct e1000_hw *hw = &igb->hw;
801 	struct igb_osdep *osdep = &igb->osdep;
802 
803 	/*
804 	 * Get the device id
805 	 */
806 	hw->vendor_id =
807 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
808 	hw->device_id =
809 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
810 	hw->revision_id =
811 	    pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
812 	hw->subsystem_device_id =
813 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
814 	hw->subsystem_vendor_id =
815 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
816 
817 	/*
818 	 * Set the mac type of the adapter based on the device id
819 	 */
820 	if (e1000_set_mac_type(hw) != E1000_SUCCESS) {
821 		return (IGB_FAILURE);
822 	}
823 
824 	/*
825 	 * Install adapter capabilities based on mac type
826 	 */
827 	switch (hw->mac.type) {
828 	case e1000_82575:
829 		igb->capab = &igb_82575_cap;
830 		break;
831 	case e1000_82576:
832 		igb->capab = &igb_82576_cap;
833 		break;
834 	case e1000_82580:
835 		igb->capab = &igb_82580_cap;
836 		break;
837 	default:
838 		return (IGB_FAILURE);
839 	}
840 
841 	return (IGB_SUCCESS);
842 }
843 
844 /*
845  * igb_regs_map - Map the device registers
846  */
847 static int
848 igb_regs_map(igb_t *igb)
849 {
850 	dev_info_t *devinfo = igb->dip;
851 	struct e1000_hw *hw = &igb->hw;
852 	struct igb_osdep *osdep = &igb->osdep;
853 	off_t mem_size;
854 
855 	/*
856 	 * First get the size of device registers to be mapped.
857 	 */
858 	if (ddi_dev_regsize(devinfo, IGB_ADAPTER_REGSET, &mem_size) !=
859 	    DDI_SUCCESS) {
860 		return (IGB_FAILURE);
861 	}
862 
863 	/*
864 	 * Call ddi_regs_map_setup() to map registers
865 	 */
866 	if ((ddi_regs_map_setup(devinfo, IGB_ADAPTER_REGSET,
867 	    (caddr_t *)&hw->hw_addr, 0,
868 	    mem_size, &igb_regs_acc_attr,
869 	    &osdep->reg_handle)) != DDI_SUCCESS) {
870 		return (IGB_FAILURE);
871 	}
872 
873 	return (IGB_SUCCESS);
874 }
875 
876 /*
877  * igb_init_properties - Initialize driver properties
878  */
879 static void
880 igb_init_properties(igb_t *igb)
881 {
882 	/*
883 	 * Get conf file properties, including link settings
884 	 * jumbo frames, ring number, descriptor number, etc.
885 	 */
886 	igb_get_conf(igb);
887 }
888 
889 /*
890  * igb_init_driver_settings - Initialize driver settings
891  *
892  * The settings include hardware function pointers, bus information,
893  * rx/tx rings settings, link state, and any other parameters that
894  * need to be setup during driver initialization.
895  */
896 static int
897 igb_init_driver_settings(igb_t *igb)
898 {
899 	struct e1000_hw *hw = &igb->hw;
900 	igb_rx_ring_t *rx_ring;
901 	igb_tx_ring_t *tx_ring;
902 	uint32_t rx_size;
903 	uint32_t tx_size;
904 	int i;
905 
906 	/*
907 	 * Initialize chipset specific hardware function pointers
908 	 */
909 	if (e1000_setup_init_funcs(hw, B_TRUE) != E1000_SUCCESS) {
910 		return (IGB_FAILURE);
911 	}
912 
913 	/*
914 	 * Get bus information
915 	 */
916 	if (e1000_get_bus_info(hw) != E1000_SUCCESS) {
917 		return (IGB_FAILURE);
918 	}
919 
920 	/*
921 	 * Get the system page size
922 	 */
923 	igb->page_size = ddi_ptob(igb->dip, (ulong_t)1);
924 
925 	/*
926 	 * Set rx buffer size
927 	 * The IP header alignment room is counted in the calculation.
928 	 * The rx buffer size is in unit of 1K that is required by the
929 	 * chipset hardware.
930 	 */
931 	rx_size = igb->max_frame_size + IPHDR_ALIGN_ROOM;
932 	igb->rx_buf_size = ((rx_size >> 10) +
933 	    ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
934 
935 	/*
936 	 * Set tx buffer size
937 	 */
938 	tx_size = igb->max_frame_size;
939 	igb->tx_buf_size = ((tx_size >> 10) +
940 	    ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
941 
942 	/*
943 	 * Initialize rx/tx rings parameters
944 	 */
945 	for (i = 0; i < igb->num_rx_rings; i++) {
946 		rx_ring = &igb->rx_rings[i];
947 		rx_ring->index = i;
948 		rx_ring->igb = igb;
949 	}
950 
951 	for (i = 0; i < igb->num_tx_rings; i++) {
952 		tx_ring = &igb->tx_rings[i];
953 		tx_ring->index = i;
954 		tx_ring->igb = igb;
955 		if (igb->tx_head_wb_enable)
956 			tx_ring->tx_recycle = igb_tx_recycle_head_wb;
957 		else
958 			tx_ring->tx_recycle = igb_tx_recycle_legacy;
959 
960 		tx_ring->ring_size = igb->tx_ring_size;
961 		tx_ring->free_list_size = igb->tx_ring_size +
962 		    (igb->tx_ring_size >> 1);
963 	}
964 
965 	/*
966 	 * Initialize values of interrupt throttling rates
967 	 */
968 	for (i = 1; i < MAX_NUM_EITR; i++)
969 		igb->intr_throttling[i] = igb->intr_throttling[0];
970 
971 	/*
972 	 * The initial link state should be "unknown"
973 	 */
974 	igb->link_state = LINK_STATE_UNKNOWN;
975 
976 	return (IGB_SUCCESS);
977 }
978 
979 /*
980  * igb_init_locks - Initialize locks
981  */
982 static void
983 igb_init_locks(igb_t *igb)
984 {
985 	igb_rx_ring_t *rx_ring;
986 	igb_tx_ring_t *tx_ring;
987 	int i;
988 
989 	for (i = 0; i < igb->num_rx_rings; i++) {
990 		rx_ring = &igb->rx_rings[i];
991 		mutex_init(&rx_ring->rx_lock, NULL,
992 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
993 	}
994 
995 	for (i = 0; i < igb->num_tx_rings; i++) {
996 		tx_ring = &igb->tx_rings[i];
997 		mutex_init(&tx_ring->tx_lock, NULL,
998 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
999 		mutex_init(&tx_ring->recycle_lock, NULL,
1000 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1001 		mutex_init(&tx_ring->tcb_head_lock, NULL,
1002 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1003 		mutex_init(&tx_ring->tcb_tail_lock, NULL,
1004 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1005 	}
1006 
1007 	mutex_init(&igb->gen_lock, NULL,
1008 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1009 
1010 	mutex_init(&igb->watchdog_lock, NULL,
1011 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1012 
1013 	mutex_init(&igb->link_lock, NULL,
1014 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
1015 }
1016 
1017 /*
1018  * igb_destroy_locks - Destroy locks
1019  */
1020 static void
1021 igb_destroy_locks(igb_t *igb)
1022 {
1023 	igb_rx_ring_t *rx_ring;
1024 	igb_tx_ring_t *tx_ring;
1025 	int i;
1026 
1027 	for (i = 0; i < igb->num_rx_rings; i++) {
1028 		rx_ring = &igb->rx_rings[i];
1029 		mutex_destroy(&rx_ring->rx_lock);
1030 	}
1031 
1032 	for (i = 0; i < igb->num_tx_rings; i++) {
1033 		tx_ring = &igb->tx_rings[i];
1034 		mutex_destroy(&tx_ring->tx_lock);
1035 		mutex_destroy(&tx_ring->recycle_lock);
1036 		mutex_destroy(&tx_ring->tcb_head_lock);
1037 		mutex_destroy(&tx_ring->tcb_tail_lock);
1038 	}
1039 
1040 	mutex_destroy(&igb->gen_lock);
1041 	mutex_destroy(&igb->watchdog_lock);
1042 	mutex_destroy(&igb->link_lock);
1043 }
1044 
1045 static int
1046 igb_resume(dev_info_t *devinfo)
1047 {
1048 	igb_t *igb;
1049 
1050 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1051 	if (igb == NULL)
1052 		return (DDI_FAILURE);
1053 
1054 	mutex_enter(&igb->gen_lock);
1055 
1056 	if (igb->igb_state & IGB_STARTED) {
1057 		if (igb_start(igb, B_FALSE) != IGB_SUCCESS) {
1058 			mutex_exit(&igb->gen_lock);
1059 			return (DDI_FAILURE);
1060 		}
1061 
1062 		/*
1063 		 * Enable and start the watchdog timer
1064 		 */
1065 		igb_enable_watchdog_timer(igb);
1066 	}
1067 
1068 	atomic_and_32(&igb->igb_state, ~IGB_SUSPENDED);
1069 
1070 	mutex_exit(&igb->gen_lock);
1071 
1072 	return (DDI_SUCCESS);
1073 }
1074 
1075 static int
1076 igb_suspend(dev_info_t *devinfo)
1077 {
1078 	igb_t *igb;
1079 
1080 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1081 	if (igb == NULL)
1082 		return (DDI_FAILURE);
1083 
1084 	mutex_enter(&igb->gen_lock);
1085 
1086 	atomic_or_32(&igb->igb_state, IGB_SUSPENDED);
1087 
1088 	if (!(igb->igb_state & IGB_STARTED)) {
1089 		mutex_exit(&igb->gen_lock);
1090 		return (DDI_SUCCESS);
1091 	}
1092 
1093 	igb_stop(igb, B_FALSE);
1094 
1095 	mutex_exit(&igb->gen_lock);
1096 
1097 	/*
1098 	 * Disable and stop the watchdog timer
1099 	 */
1100 	igb_disable_watchdog_timer(igb);
1101 
1102 	return (DDI_SUCCESS);
1103 }
1104 
1105 static int
1106 igb_init(igb_t *igb)
1107 {
1108 	mutex_enter(&igb->gen_lock);
1109 
1110 	/*
1111 	 * Initilize the adapter
1112 	 */
1113 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
1114 		mutex_exit(&igb->gen_lock);
1115 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1116 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1117 		return (IGB_FAILURE);
1118 	}
1119 
1120 	mutex_exit(&igb->gen_lock);
1121 
1122 	return (IGB_SUCCESS);
1123 }
1124 
1125 /*
1126  * igb_init_mac_address - Initialize the default MAC address
1127  *
1128  * On success, the MAC address is entered in the igb->hw.mac.addr
1129  * and hw->mac.perm_addr fields and the adapter's RAR(0) receive
1130  * address register.
1131  *
1132  * Important side effects:
1133  * 1. adapter is reset - this is required to put it in a known state.
1134  * 2. all of non-volatile memory (NVM) is read & checksummed - NVM is where
1135  * MAC address and all default settings are stored, so a valid checksum
1136  * is required.
1137  */
1138 static int
1139 igb_init_mac_address(igb_t *igb)
1140 {
1141 	struct e1000_hw *hw = &igb->hw;
1142 
1143 	ASSERT(mutex_owned(&igb->gen_lock));
1144 
1145 	/*
1146 	 * Reset chipset to put the hardware in a known state
1147 	 * before we try to get MAC address from NVM.
1148 	 */
1149 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1150 		igb_error(igb, "Adapter reset failed.");
1151 		goto init_mac_fail;
1152 	}
1153 
1154 	/*
1155 	 * NVM validation
1156 	 */
1157 	if (e1000_validate_nvm_checksum(hw) < 0) {
1158 		/*
1159 		 * Some PCI-E parts fail the first check due to
1160 		 * the link being in sleep state.  Call it again,
1161 		 * if it fails a second time its a real issue.
1162 		 */
1163 		if (e1000_validate_nvm_checksum(hw) < 0) {
1164 			igb_error(igb,
1165 			    "Invalid NVM checksum. Please contact "
1166 			    "the vendor to update the NVM.");
1167 			goto init_mac_fail;
1168 		}
1169 	}
1170 
1171 	/*
1172 	 * Get the mac address
1173 	 * This function should handle SPARC case correctly.
1174 	 */
1175 	if (!igb_find_mac_address(igb)) {
1176 		igb_error(igb, "Failed to get the mac address");
1177 		goto init_mac_fail;
1178 	}
1179 
1180 	/* Validate mac address */
1181 	if (!is_valid_mac_addr(hw->mac.addr)) {
1182 		igb_error(igb, "Invalid mac address");
1183 		goto init_mac_fail;
1184 	}
1185 
1186 	return (IGB_SUCCESS);
1187 
1188 init_mac_fail:
1189 	return (IGB_FAILURE);
1190 }
1191 
1192 /*
1193  * igb_init_adapter - Initialize the adapter
1194  */
1195 static int
1196 igb_init_adapter(igb_t *igb)
1197 {
1198 	struct e1000_hw *hw = &igb->hw;
1199 	uint32_t pba;
1200 	uint32_t high_water;
1201 	int i;
1202 
1203 	ASSERT(mutex_owned(&igb->gen_lock));
1204 
1205 	/*
1206 	 * In order to obtain the default MAC address, this will reset the
1207 	 * adapter and validate the NVM that the address and many other
1208 	 * default settings come from.
1209 	 */
1210 	if (igb_init_mac_address(igb) != IGB_SUCCESS) {
1211 		igb_error(igb, "Failed to initialize MAC address");
1212 		goto init_adapter_fail;
1213 	}
1214 
1215 	/*
1216 	 * Setup flow control
1217 	 *
1218 	 * These parameters set thresholds for the adapter's generation(Tx)
1219 	 * and response(Rx) to Ethernet PAUSE frames.  These are just threshold
1220 	 * settings.  Flow control is enabled or disabled in the configuration
1221 	 * file.
1222 	 * High-water mark is set down from the top of the rx fifo (not
1223 	 * sensitive to max_frame_size) and low-water is set just below
1224 	 * high-water mark.
1225 	 * The high water mark must be low enough to fit one full frame above
1226 	 * it in the rx FIFO.  Should be the lower of:
1227 	 * 90% of the Rx FIFO size, or the full Rx FIFO size minus one full
1228 	 * frame.
1229 	 */
1230 	/*
1231 	 * The default setting of PBA is correct for 82575 and other supported
1232 	 * adapters do not have the E1000_PBA register, so PBA value is only
1233 	 * used for calculation here and is never written to the adapter.
1234 	 */
1235 	if (hw->mac.type == e1000_82575) {
1236 		pba = E1000_PBA_34K;
1237 	} else {
1238 		pba = E1000_PBA_64K;
1239 	}
1240 
1241 	high_water = min(((pba << 10) * 9 / 10),
1242 	    ((pba << 10) - igb->max_frame_size));
1243 
1244 	if (hw->mac.type == e1000_82575) {
1245 		/* 8-byte granularity */
1246 		hw->fc.high_water = high_water & 0xFFF8;
1247 		hw->fc.low_water = hw->fc.high_water - 8;
1248 	} else {
1249 		/* 16-byte granularity */
1250 		hw->fc.high_water = high_water & 0xFFF0;
1251 		hw->fc.low_water = hw->fc.high_water - 16;
1252 	}
1253 
1254 	hw->fc.pause_time = E1000_FC_PAUSE_TIME;
1255 	hw->fc.send_xon = B_TRUE;
1256 
1257 	(void) e1000_validate_mdi_setting(hw);
1258 
1259 	/*
1260 	 * Reset the chipset hardware the second time to put PBA settings
1261 	 * into effect.
1262 	 */
1263 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1264 		igb_error(igb, "Second reset failed");
1265 		goto init_adapter_fail;
1266 	}
1267 
1268 	/*
1269 	 * Don't wait for auto-negotiation to complete
1270 	 */
1271 	hw->phy.autoneg_wait_to_complete = B_FALSE;
1272 
1273 	/*
1274 	 * Copper options
1275 	 */
1276 	if (hw->phy.media_type == e1000_media_type_copper) {
1277 		hw->phy.mdix = 0;	/* AUTO_ALL_MODES */
1278 		hw->phy.disable_polarity_correction = B_FALSE;
1279 		hw->phy.ms_type = e1000_ms_hw_default; /* E1000_MASTER_SLAVE */
1280 	}
1281 
1282 	/*
1283 	 * Initialize link settings
1284 	 */
1285 	(void) igb_setup_link(igb, B_FALSE);
1286 
1287 	/*
1288 	 * Configure/Initialize hardware
1289 	 */
1290 	if (e1000_init_hw(hw) != E1000_SUCCESS) {
1291 		igb_error(igb, "Failed to initialize hardware");
1292 		goto init_adapter_fail;
1293 	}
1294 
1295 	/*
1296 	 *  Start the link setup timer
1297 	 */
1298 	igb_start_link_timer(igb);
1299 
1300 	/*
1301 	 * Disable wakeup control by default
1302 	 */
1303 	E1000_WRITE_REG(hw, E1000_WUC, 0);
1304 
1305 	/*
1306 	 * Record phy info in hw struct
1307 	 */
1308 	(void) e1000_get_phy_info(hw);
1309 
1310 	/*
1311 	 * Make sure driver has control
1312 	 */
1313 	igb_get_driver_control(hw);
1314 
1315 	/*
1316 	 * Restore LED settings to the default from EEPROM
1317 	 * to meet the standard for Sun platforms.
1318 	 */
1319 	(void) e1000_cleanup_led(hw);
1320 
1321 	/*
1322 	 * Setup MSI-X interrupts
1323 	 */
1324 	if (igb->intr_type == DDI_INTR_TYPE_MSIX)
1325 		igb->capab->setup_msix(igb);
1326 
1327 	/*
1328 	 * Initialize unicast addresses.
1329 	 */
1330 	igb_init_unicst(igb);
1331 
1332 	/*
1333 	 * Setup and initialize the mctable structures.
1334 	 */
1335 	igb_setup_multicst(igb);
1336 
1337 	/*
1338 	 * Set interrupt throttling rate
1339 	 */
1340 	for (i = 0; i < igb->intr_cnt; i++)
1341 		E1000_WRITE_REG(hw, E1000_EITR(i), igb->intr_throttling[i]);
1342 
1343 	/*
1344 	 * Save the state of the phy
1345 	 */
1346 	igb_get_phy_state(igb);
1347 
1348 	igb_param_sync(igb);
1349 
1350 	return (IGB_SUCCESS);
1351 
1352 init_adapter_fail:
1353 	/*
1354 	 * Reset PHY if possible
1355 	 */
1356 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
1357 		(void) e1000_phy_hw_reset(hw);
1358 
1359 	return (IGB_FAILURE);
1360 }
1361 
1362 /*
1363  * igb_stop_adapter - Stop the adapter
1364  */
1365 static void
1366 igb_stop_adapter(igb_t *igb)
1367 {
1368 	struct e1000_hw *hw = &igb->hw;
1369 
1370 	ASSERT(mutex_owned(&igb->gen_lock));
1371 
1372 	/* Stop the link setup timer */
1373 	igb_stop_link_timer(igb);
1374 
1375 	/* Tell firmware driver is no longer in control */
1376 	igb_release_driver_control(hw);
1377 
1378 	/*
1379 	 * Reset the chipset
1380 	 */
1381 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1382 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1383 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1384 	}
1385 
1386 	/*
1387 	 * e1000_phy_hw_reset is not needed here, MAC reset above is sufficient
1388 	 */
1389 }
1390 
1391 /*
1392  * igb_reset - Reset the chipset and restart the driver.
1393  *
1394  * It involves stopping and re-starting the chipset,
1395  * and re-configuring the rx/tx rings.
1396  */
1397 static int
1398 igb_reset(igb_t *igb)
1399 {
1400 	int i;
1401 
1402 	mutex_enter(&igb->gen_lock);
1403 
1404 	ASSERT(igb->igb_state & IGB_STARTED);
1405 	atomic_and_32(&igb->igb_state, ~IGB_STARTED);
1406 
1407 	/*
1408 	 * Disable the adapter interrupts to stop any rx/tx activities
1409 	 * before draining pending data and resetting hardware.
1410 	 */
1411 	igb_disable_adapter_interrupts(igb);
1412 
1413 	/*
1414 	 * Drain the pending transmit packets
1415 	 */
1416 	(void) igb_tx_drain(igb);
1417 
1418 	for (i = 0; i < igb->num_rx_rings; i++)
1419 		mutex_enter(&igb->rx_rings[i].rx_lock);
1420 	for (i = 0; i < igb->num_tx_rings; i++)
1421 		mutex_enter(&igb->tx_rings[i].tx_lock);
1422 
1423 	/*
1424 	 * Stop the adapter
1425 	 */
1426 	igb_stop_adapter(igb);
1427 
1428 	/*
1429 	 * Clean the pending tx data/resources
1430 	 */
1431 	igb_tx_clean(igb);
1432 
1433 	/*
1434 	 * Start the adapter
1435 	 */
1436 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
1437 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1438 		goto reset_failure;
1439 	}
1440 
1441 	/*
1442 	 * Setup the rx/tx rings
1443 	 */
1444 	igb->tx_ring_init = B_FALSE;
1445 	igb_setup_rings(igb);
1446 
1447 	atomic_and_32(&igb->igb_state, ~(IGB_ERROR | IGB_STALL));
1448 
1449 	/*
1450 	 * Enable adapter interrupts
1451 	 * The interrupts must be enabled after the driver state is START
1452 	 */
1453 	igb->capab->enable_intr(igb);
1454 
1455 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1456 		goto reset_failure;
1457 
1458 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1459 		goto reset_failure;
1460 
1461 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1462 		mutex_exit(&igb->tx_rings[i].tx_lock);
1463 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1464 		mutex_exit(&igb->rx_rings[i].rx_lock);
1465 
1466 	atomic_or_32(&igb->igb_state, IGB_STARTED);
1467 
1468 	mutex_exit(&igb->gen_lock);
1469 
1470 	return (IGB_SUCCESS);
1471 
1472 reset_failure:
1473 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1474 		mutex_exit(&igb->tx_rings[i].tx_lock);
1475 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1476 		mutex_exit(&igb->rx_rings[i].rx_lock);
1477 
1478 	mutex_exit(&igb->gen_lock);
1479 
1480 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1481 
1482 	return (IGB_FAILURE);
1483 }
1484 
1485 /*
1486  * igb_tx_clean - Clean the pending transmit packets and DMA resources
1487  */
1488 static void
1489 igb_tx_clean(igb_t *igb)
1490 {
1491 	igb_tx_ring_t *tx_ring;
1492 	tx_control_block_t *tcb;
1493 	link_list_t pending_list;
1494 	uint32_t desc_num;
1495 	int i, j;
1496 
1497 	LINK_LIST_INIT(&pending_list);
1498 
1499 	for (i = 0; i < igb->num_tx_rings; i++) {
1500 		tx_ring = &igb->tx_rings[i];
1501 
1502 		mutex_enter(&tx_ring->recycle_lock);
1503 
1504 		/*
1505 		 * Clean the pending tx data - the pending packets in the
1506 		 * work_list that have no chances to be transmitted again.
1507 		 *
1508 		 * We must ensure the chipset is stopped or the link is down
1509 		 * before cleaning the transmit packets.
1510 		 */
1511 		desc_num = 0;
1512 		for (j = 0; j < tx_ring->ring_size; j++) {
1513 			tcb = tx_ring->work_list[j];
1514 			if (tcb != NULL) {
1515 				desc_num += tcb->desc_num;
1516 
1517 				tx_ring->work_list[j] = NULL;
1518 
1519 				igb_free_tcb(tcb);
1520 
1521 				LIST_PUSH_TAIL(&pending_list, &tcb->link);
1522 			}
1523 		}
1524 
1525 		if (desc_num > 0) {
1526 			atomic_add_32(&tx_ring->tbd_free, desc_num);
1527 			ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
1528 
1529 			/*
1530 			 * Reset the head and tail pointers of the tbd ring;
1531 			 * Reset the head write-back if it is enabled.
1532 			 */
1533 			tx_ring->tbd_head = 0;
1534 			tx_ring->tbd_tail = 0;
1535 			if (igb->tx_head_wb_enable)
1536 				*tx_ring->tbd_head_wb = 0;
1537 
1538 			E1000_WRITE_REG(&igb->hw, E1000_TDH(tx_ring->index), 0);
1539 			E1000_WRITE_REG(&igb->hw, E1000_TDT(tx_ring->index), 0);
1540 		}
1541 
1542 		mutex_exit(&tx_ring->recycle_lock);
1543 
1544 		/*
1545 		 * Add the tx control blocks in the pending list to
1546 		 * the free list.
1547 		 */
1548 		igb_put_free_list(tx_ring, &pending_list);
1549 	}
1550 }
1551 
1552 /*
1553  * igb_tx_drain - Drain the tx rings to allow pending packets to be transmitted
1554  */
1555 static boolean_t
1556 igb_tx_drain(igb_t *igb)
1557 {
1558 	igb_tx_ring_t *tx_ring;
1559 	boolean_t done;
1560 	int i, j;
1561 
1562 	/*
1563 	 * Wait for a specific time to allow pending tx packets
1564 	 * to be transmitted.
1565 	 *
1566 	 * Check the counter tbd_free to see if transmission is done.
1567 	 * No lock protection is needed here.
1568 	 *
1569 	 * Return B_TRUE if all pending packets have been transmitted;
1570 	 * Otherwise return B_FALSE;
1571 	 */
1572 	for (i = 0; i < TX_DRAIN_TIME; i++) {
1573 
1574 		done = B_TRUE;
1575 		for (j = 0; j < igb->num_tx_rings; j++) {
1576 			tx_ring = &igb->tx_rings[j];
1577 			done = done &&
1578 			    (tx_ring->tbd_free == tx_ring->ring_size);
1579 		}
1580 
1581 		if (done)
1582 			break;
1583 
1584 		msec_delay(1);
1585 	}
1586 
1587 	return (done);
1588 }
1589 
1590 /*
1591  * igb_rx_drain - Wait for all rx buffers to be released by upper layer
1592  */
1593 static boolean_t
1594 igb_rx_drain(igb_t *igb)
1595 {
1596 	boolean_t done;
1597 	int i;
1598 
1599 	/*
1600 	 * Polling the rx free list to check if those rx buffers held by
1601 	 * the upper layer are released.
1602 	 *
1603 	 * Check the counter rcb_free to see if all pending buffers are
1604 	 * released. No lock protection is needed here.
1605 	 *
1606 	 * Return B_TRUE if all pending buffers have been released;
1607 	 * Otherwise return B_FALSE;
1608 	 */
1609 	for (i = 0; i < RX_DRAIN_TIME; i++) {
1610 		done = (igb->rcb_pending == 0);
1611 
1612 		if (done)
1613 			break;
1614 
1615 		msec_delay(1);
1616 	}
1617 
1618 	return (done);
1619 }
1620 
1621 /*
1622  * igb_start - Start the driver/chipset
1623  */
1624 int
1625 igb_start(igb_t *igb, boolean_t alloc_buffer)
1626 {
1627 	int i;
1628 
1629 	ASSERT(mutex_owned(&igb->gen_lock));
1630 
1631 	if (alloc_buffer) {
1632 		if (igb_alloc_rx_data(igb) != IGB_SUCCESS) {
1633 			igb_error(igb,
1634 			    "Failed to allocate software receive rings");
1635 			return (IGB_FAILURE);
1636 		}
1637 
1638 		/* Allocate buffers for all the rx/tx rings */
1639 		if (igb_alloc_dma(igb) != IGB_SUCCESS) {
1640 			igb_error(igb, "Failed to allocate DMA resource");
1641 			return (IGB_FAILURE);
1642 		}
1643 
1644 		igb->tx_ring_init = B_TRUE;
1645 	} else {
1646 		igb->tx_ring_init = B_FALSE;
1647 	}
1648 
1649 	for (i = 0; i < igb->num_rx_rings; i++)
1650 		mutex_enter(&igb->rx_rings[i].rx_lock);
1651 	for (i = 0; i < igb->num_tx_rings; i++)
1652 		mutex_enter(&igb->tx_rings[i].tx_lock);
1653 
1654 	/*
1655 	 * Start the adapter
1656 	 */
1657 	if ((igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) == 0) {
1658 		if (igb_init_adapter(igb) != IGB_SUCCESS) {
1659 			igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1660 			goto start_failure;
1661 		}
1662 		igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
1663 	}
1664 
1665 	/*
1666 	 * Setup the rx/tx rings
1667 	 */
1668 	igb_setup_rings(igb);
1669 
1670 	/*
1671 	 * Enable adapter interrupts
1672 	 * The interrupts must be enabled after the driver state is START
1673 	 */
1674 	igb->capab->enable_intr(igb);
1675 
1676 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1677 		goto start_failure;
1678 
1679 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1680 		goto start_failure;
1681 
1682 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1683 		mutex_exit(&igb->tx_rings[i].tx_lock);
1684 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1685 		mutex_exit(&igb->rx_rings[i].rx_lock);
1686 
1687 	return (IGB_SUCCESS);
1688 
1689 start_failure:
1690 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1691 		mutex_exit(&igb->tx_rings[i].tx_lock);
1692 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1693 		mutex_exit(&igb->rx_rings[i].rx_lock);
1694 
1695 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1696 
1697 	return (IGB_FAILURE);
1698 }
1699 
1700 /*
1701  * igb_stop - Stop the driver/chipset
1702  */
1703 void
1704 igb_stop(igb_t *igb, boolean_t free_buffer)
1705 {
1706 	int i;
1707 
1708 	ASSERT(mutex_owned(&igb->gen_lock));
1709 
1710 	igb->attach_progress &= ~ATTACH_PROGRESS_INIT_ADAPTER;
1711 
1712 	/*
1713 	 * Disable the adapter interrupts
1714 	 */
1715 	igb_disable_adapter_interrupts(igb);
1716 
1717 	/*
1718 	 * Drain the pending tx packets
1719 	 */
1720 	(void) igb_tx_drain(igb);
1721 
1722 	for (i = 0; i < igb->num_rx_rings; i++)
1723 		mutex_enter(&igb->rx_rings[i].rx_lock);
1724 	for (i = 0; i < igb->num_tx_rings; i++)
1725 		mutex_enter(&igb->tx_rings[i].tx_lock);
1726 
1727 	/*
1728 	 * Stop the adapter
1729 	 */
1730 	igb_stop_adapter(igb);
1731 
1732 	/*
1733 	 * Clean the pending tx data/resources
1734 	 */
1735 	igb_tx_clean(igb);
1736 
1737 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1738 		mutex_exit(&igb->tx_rings[i].tx_lock);
1739 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1740 		mutex_exit(&igb->rx_rings[i].rx_lock);
1741 
1742 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1743 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1744 
1745 	if (igb->link_state == LINK_STATE_UP) {
1746 		igb->link_state = LINK_STATE_UNKNOWN;
1747 		mac_link_update(igb->mac_hdl, igb->link_state);
1748 	}
1749 
1750 	if (free_buffer) {
1751 		/*
1752 		 * Release the DMA/memory resources of rx/tx rings
1753 		 */
1754 		igb_free_dma(igb);
1755 		igb_free_rx_data(igb);
1756 	}
1757 }
1758 
1759 /*
1760  * igb_alloc_rings - Allocate memory space for rx/tx rings
1761  */
1762 static int
1763 igb_alloc_rings(igb_t *igb)
1764 {
1765 	/*
1766 	 * Allocate memory space for rx rings
1767 	 */
1768 	igb->rx_rings = kmem_zalloc(
1769 	    sizeof (igb_rx_ring_t) * igb->num_rx_rings,
1770 	    KM_NOSLEEP);
1771 
1772 	if (igb->rx_rings == NULL) {
1773 		return (IGB_FAILURE);
1774 	}
1775 
1776 	/*
1777 	 * Allocate memory space for tx rings
1778 	 */
1779 	igb->tx_rings = kmem_zalloc(
1780 	    sizeof (igb_tx_ring_t) * igb->num_tx_rings,
1781 	    KM_NOSLEEP);
1782 
1783 	if (igb->tx_rings == NULL) {
1784 		kmem_free(igb->rx_rings,
1785 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1786 		igb->rx_rings = NULL;
1787 		return (IGB_FAILURE);
1788 	}
1789 
1790 	/*
1791 	 * Allocate memory space for rx ring groups
1792 	 */
1793 	igb->rx_groups = kmem_zalloc(
1794 	    sizeof (igb_rx_group_t) * igb->num_rx_groups,
1795 	    KM_NOSLEEP);
1796 
1797 	if (igb->rx_groups == NULL) {
1798 		kmem_free(igb->rx_rings,
1799 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1800 		kmem_free(igb->tx_rings,
1801 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1802 		igb->rx_rings = NULL;
1803 		igb->tx_rings = NULL;
1804 		return (IGB_FAILURE);
1805 	}
1806 
1807 	return (IGB_SUCCESS);
1808 }
1809 
1810 /*
1811  * igb_free_rings - Free the memory space of rx/tx rings.
1812  */
1813 static void
1814 igb_free_rings(igb_t *igb)
1815 {
1816 	if (igb->rx_rings != NULL) {
1817 		kmem_free(igb->rx_rings,
1818 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1819 		igb->rx_rings = NULL;
1820 	}
1821 
1822 	if (igb->tx_rings != NULL) {
1823 		kmem_free(igb->tx_rings,
1824 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1825 		igb->tx_rings = NULL;
1826 	}
1827 
1828 	if (igb->rx_groups != NULL) {
1829 		kmem_free(igb->rx_groups,
1830 		    sizeof (igb_rx_group_t) * igb->num_rx_groups);
1831 		igb->rx_groups = NULL;
1832 	}
1833 }
1834 
1835 static int
1836 igb_alloc_rx_data(igb_t *igb)
1837 {
1838 	igb_rx_ring_t *rx_ring;
1839 	int i;
1840 
1841 	for (i = 0; i < igb->num_rx_rings; i++) {
1842 		rx_ring = &igb->rx_rings[i];
1843 		if (igb_alloc_rx_ring_data(rx_ring) != IGB_SUCCESS)
1844 			goto alloc_rx_rings_failure;
1845 	}
1846 	return (IGB_SUCCESS);
1847 
1848 alloc_rx_rings_failure:
1849 	igb_free_rx_data(igb);
1850 	return (IGB_FAILURE);
1851 }
1852 
1853 static void
1854 igb_free_rx_data(igb_t *igb)
1855 {
1856 	igb_rx_ring_t *rx_ring;
1857 	igb_rx_data_t *rx_data;
1858 	int i;
1859 
1860 	for (i = 0; i < igb->num_rx_rings; i++) {
1861 		rx_ring = &igb->rx_rings[i];
1862 
1863 		mutex_enter(&igb->rx_pending_lock);
1864 		rx_data = rx_ring->rx_data;
1865 
1866 		if (rx_data != NULL) {
1867 			rx_data->flag |= IGB_RX_STOPPED;
1868 
1869 			if (rx_data->rcb_pending == 0) {
1870 				igb_free_rx_ring_data(rx_data);
1871 				rx_ring->rx_data = NULL;
1872 			}
1873 		}
1874 
1875 		mutex_exit(&igb->rx_pending_lock);
1876 	}
1877 }
1878 
1879 /*
1880  * igb_setup_rings - Setup rx/tx rings
1881  */
1882 static void
1883 igb_setup_rings(igb_t *igb)
1884 {
1885 	/*
1886 	 * Setup the rx/tx rings, including the following:
1887 	 *
1888 	 * 1. Setup the descriptor ring and the control block buffers;
1889 	 * 2. Initialize necessary registers for receive/transmit;
1890 	 * 3. Initialize software pointers/parameters for receive/transmit;
1891 	 */
1892 	igb_setup_rx(igb);
1893 
1894 	igb_setup_tx(igb);
1895 }
1896 
1897 static void
1898 igb_setup_rx_ring(igb_rx_ring_t *rx_ring)
1899 {
1900 	igb_t *igb = rx_ring->igb;
1901 	igb_rx_data_t *rx_data = rx_ring->rx_data;
1902 	struct e1000_hw *hw = &igb->hw;
1903 	rx_control_block_t *rcb;
1904 	union e1000_adv_rx_desc	*rbd;
1905 	uint32_t size;
1906 	uint32_t buf_low;
1907 	uint32_t buf_high;
1908 	uint32_t rxdctl;
1909 	int i;
1910 
1911 	ASSERT(mutex_owned(&rx_ring->rx_lock));
1912 	ASSERT(mutex_owned(&igb->gen_lock));
1913 
1914 	/*
1915 	 * Initialize descriptor ring with buffer addresses
1916 	 */
1917 	for (i = 0; i < igb->rx_ring_size; i++) {
1918 		rcb = rx_data->work_list[i];
1919 		rbd = &rx_data->rbd_ring[i];
1920 
1921 		rbd->read.pkt_addr = rcb->rx_buf.dma_address;
1922 		rbd->read.hdr_addr = NULL;
1923 	}
1924 
1925 	/*
1926 	 * Initialize the base address registers
1927 	 */
1928 	buf_low = (uint32_t)rx_data->rbd_area.dma_address;
1929 	buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32);
1930 	E1000_WRITE_REG(hw, E1000_RDBAH(rx_ring->index), buf_high);
1931 	E1000_WRITE_REG(hw, E1000_RDBAL(rx_ring->index), buf_low);
1932 
1933 	/*
1934 	 * Initialize the length register
1935 	 */
1936 	size = rx_data->ring_size * sizeof (union e1000_adv_rx_desc);
1937 	E1000_WRITE_REG(hw, E1000_RDLEN(rx_ring->index), size);
1938 
1939 	/*
1940 	 * Initialize buffer size & descriptor type
1941 	 */
1942 	E1000_WRITE_REG(hw, E1000_SRRCTL(rx_ring->index),
1943 	    ((igb->rx_buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) |
1944 	    E1000_SRRCTL_DESCTYPE_ADV_ONEBUF));
1945 
1946 	/*
1947 	 * Setup the Receive Descriptor Control Register (RXDCTL)
1948 	 */
1949 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rx_ring->index));
1950 	rxdctl &= igb->capab->rxdctl_mask;
1951 	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
1952 	rxdctl |= 16;		/* pthresh */
1953 	rxdctl |= 8 << 8;	/* hthresh */
1954 	rxdctl |= 1 << 16;	/* wthresh */
1955 	E1000_WRITE_REG(hw, E1000_RXDCTL(rx_ring->index), rxdctl);
1956 
1957 	rx_data->rbd_next = 0;
1958 }
1959 
1960 static void
1961 igb_setup_rx(igb_t *igb)
1962 {
1963 	igb_rx_ring_t *rx_ring;
1964 	igb_rx_data_t *rx_data;
1965 	igb_rx_group_t *rx_group;
1966 	struct e1000_hw *hw = &igb->hw;
1967 	uint32_t rctl, rxcsum;
1968 	uint32_t ring_per_group;
1969 	int i;
1970 
1971 	/*
1972 	 * Setup the Receive Control Register (RCTL), and enable the
1973 	 * receiver. The initial configuration is to: enable the receiver,
1974 	 * accept broadcasts, discard bad packets, accept long packets,
1975 	 * disable VLAN filter checking, and set receive buffer size to
1976 	 * 2k.  For 82575, also set the receive descriptor minimum
1977 	 * threshold size to 1/2 the ring.
1978 	 */
1979 	rctl = E1000_READ_REG(hw, E1000_RCTL);
1980 
1981 	/*
1982 	 * Clear the field used for wakeup control.  This driver doesn't do
1983 	 * wakeup but leave this here for completeness.
1984 	 */
1985 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1986 	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
1987 
1988 	rctl |= (E1000_RCTL_EN |	/* Enable Receive Unit */
1989 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
1990 	    E1000_RCTL_LPE |		/* Large Packet Enable */
1991 					/* Multicast filter offset */
1992 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
1993 	    E1000_RCTL_RDMTS_HALF |	/* rx descriptor threshold */
1994 	    E1000_RCTL_SECRC);		/* Strip Ethernet CRC */
1995 
1996 	for (i = 0; i < igb->num_rx_groups; i++) {
1997 		rx_group = &igb->rx_groups[i];
1998 		rx_group->index = i;
1999 		rx_group->igb = igb;
2000 	}
2001 
2002 	/*
2003 	 * Set up all rx descriptor rings - must be called before receive unit
2004 	 * enabled.
2005 	 */
2006 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2007 	for (i = 0; i < igb->num_rx_rings; i++) {
2008 		rx_ring = &igb->rx_rings[i];
2009 		igb_setup_rx_ring(rx_ring);
2010 
2011 		/*
2012 		 * Map a ring to a group by assigning a group index
2013 		 */
2014 		rx_ring->group_index = i / ring_per_group;
2015 	}
2016 
2017 	/*
2018 	 * Setup the Rx Long Packet Max Length register
2019 	 */
2020 	E1000_WRITE_REG(hw, E1000_RLPML, igb->max_frame_size);
2021 
2022 	/*
2023 	 * Hardware checksum settings
2024 	 */
2025 	if (igb->rx_hcksum_enable) {
2026 		rxcsum =
2027 		    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum */
2028 		    E1000_RXCSUM_IPOFL;		/* IP checksum */
2029 
2030 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2031 	}
2032 
2033 	/*
2034 	 * Setup classify and RSS for multiple receive queues
2035 	 */
2036 	switch (igb->vmdq_mode) {
2037 	case E1000_VMDQ_OFF:
2038 		/*
2039 		 * One ring group, only RSS is needed when more than
2040 		 * one ring enabled.
2041 		 */
2042 		if (igb->num_rx_rings > 1)
2043 			igb_setup_rss(igb);
2044 		break;
2045 	case E1000_VMDQ_MAC:
2046 		/*
2047 		 * Multiple groups, each group has one ring,
2048 		 * only the MAC classification is needed.
2049 		 */
2050 		igb_setup_mac_classify(igb);
2051 		break;
2052 	case E1000_VMDQ_MAC_RSS:
2053 		/*
2054 		 * Multiple groups and multiple rings, both
2055 		 * MAC classification and RSS are needed.
2056 		 */
2057 		igb_setup_mac_rss_classify(igb);
2058 		break;
2059 	}
2060 
2061 	/*
2062 	 * Enable the receive unit - must be done after all
2063 	 * the rx setup above.
2064 	 */
2065 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2066 
2067 	/*
2068 	 * Initialize all adapter ring head & tail pointers - must
2069 	 * be done after receive unit is enabled
2070 	 */
2071 	for (i = 0; i < igb->num_rx_rings; i++) {
2072 		rx_ring = &igb->rx_rings[i];
2073 		rx_data = rx_ring->rx_data;
2074 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2075 		E1000_WRITE_REG(hw, E1000_RDT(i), rx_data->ring_size - 1);
2076 	}
2077 
2078 	/*
2079 	 * 82575 with manageability enabled needs a special flush to make
2080 	 * sure the fifos start clean.
2081 	 */
2082 	if ((hw->mac.type == e1000_82575) &&
2083 	    (E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) {
2084 		e1000_rx_fifo_flush_82575(hw);
2085 	}
2086 }
2087 
2088 static void
2089 igb_setup_tx_ring(igb_tx_ring_t *tx_ring)
2090 {
2091 	igb_t *igb = tx_ring->igb;
2092 	struct e1000_hw *hw = &igb->hw;
2093 	uint32_t size;
2094 	uint32_t buf_low;
2095 	uint32_t buf_high;
2096 	uint32_t reg_val;
2097 
2098 	ASSERT(mutex_owned(&tx_ring->tx_lock));
2099 	ASSERT(mutex_owned(&igb->gen_lock));
2100 
2101 
2102 	/*
2103 	 * Initialize the length register
2104 	 */
2105 	size = tx_ring->ring_size * sizeof (union e1000_adv_tx_desc);
2106 	E1000_WRITE_REG(hw, E1000_TDLEN(tx_ring->index), size);
2107 
2108 	/*
2109 	 * Initialize the base address registers
2110 	 */
2111 	buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
2112 	buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
2113 	E1000_WRITE_REG(hw, E1000_TDBAL(tx_ring->index), buf_low);
2114 	E1000_WRITE_REG(hw, E1000_TDBAH(tx_ring->index), buf_high);
2115 
2116 	/*
2117 	 * Setup head & tail pointers
2118 	 */
2119 	E1000_WRITE_REG(hw, E1000_TDH(tx_ring->index), 0);
2120 	E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), 0);
2121 
2122 	/*
2123 	 * Setup head write-back
2124 	 */
2125 	if (igb->tx_head_wb_enable) {
2126 		/*
2127 		 * The memory of the head write-back is allocated using
2128 		 * the extra tbd beyond the tail of the tbd ring.
2129 		 */
2130 		tx_ring->tbd_head_wb = (uint32_t *)
2131 		    ((uintptr_t)tx_ring->tbd_area.address + size);
2132 		*tx_ring->tbd_head_wb = 0;
2133 
2134 		buf_low = (uint32_t)
2135 		    (tx_ring->tbd_area.dma_address + size);
2136 		buf_high = (uint32_t)
2137 		    ((tx_ring->tbd_area.dma_address + size) >> 32);
2138 
2139 		/* Set the head write-back enable bit */
2140 		buf_low |= E1000_TX_HEAD_WB_ENABLE;
2141 
2142 		E1000_WRITE_REG(hw, E1000_TDWBAL(tx_ring->index), buf_low);
2143 		E1000_WRITE_REG(hw, E1000_TDWBAH(tx_ring->index), buf_high);
2144 
2145 		/*
2146 		 * Turn off relaxed ordering for head write back or it will
2147 		 * cause problems with the tx recycling
2148 		 */
2149 		reg_val = E1000_READ_REG(hw,
2150 		    E1000_DCA_TXCTRL(tx_ring->index));
2151 		reg_val &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
2152 		E1000_WRITE_REG(hw,
2153 		    E1000_DCA_TXCTRL(tx_ring->index), reg_val);
2154 	} else {
2155 		tx_ring->tbd_head_wb = NULL;
2156 	}
2157 
2158 	tx_ring->tbd_head = 0;
2159 	tx_ring->tbd_tail = 0;
2160 	tx_ring->tbd_free = tx_ring->ring_size;
2161 
2162 	if (igb->tx_ring_init == B_TRUE) {
2163 		tx_ring->tcb_head = 0;
2164 		tx_ring->tcb_tail = 0;
2165 		tx_ring->tcb_free = tx_ring->free_list_size;
2166 	}
2167 
2168 	/*
2169 	 * Enable TXDCTL per queue
2170 	 */
2171 	reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
2172 	reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
2173 	E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
2174 
2175 	/*
2176 	 * Initialize hardware checksum offload settings
2177 	 */
2178 	bzero(&tx_ring->tx_context, sizeof (tx_context_t));
2179 }
2180 
2181 static void
2182 igb_setup_tx(igb_t *igb)
2183 {
2184 	igb_tx_ring_t *tx_ring;
2185 	struct e1000_hw *hw = &igb->hw;
2186 	uint32_t reg_val;
2187 	int i;
2188 
2189 	for (i = 0; i < igb->num_tx_rings; i++) {
2190 		tx_ring = &igb->tx_rings[i];
2191 		igb_setup_tx_ring(tx_ring);
2192 	}
2193 
2194 	/*
2195 	 * Setup the Transmit Control Register (TCTL)
2196 	 */
2197 	reg_val = E1000_READ_REG(hw, E1000_TCTL);
2198 	reg_val &= ~E1000_TCTL_CT;
2199 	reg_val |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2200 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2201 
2202 	/* Enable transmits */
2203 	reg_val |= E1000_TCTL_EN;
2204 
2205 	E1000_WRITE_REG(hw, E1000_TCTL, reg_val);
2206 }
2207 
2208 /*
2209  * igb_setup_rss - Setup receive-side scaling feature
2210  */
2211 static void
2212 igb_setup_rss(igb_t *igb)
2213 {
2214 	struct e1000_hw *hw = &igb->hw;
2215 	uint32_t i, mrqc, rxcsum;
2216 	int shift = 0;
2217 	uint32_t random;
2218 	union e1000_reta {
2219 		uint32_t	dword;
2220 		uint8_t		bytes[4];
2221 	} reta;
2222 
2223 	/* Setup the Redirection Table */
2224 	if (hw->mac.type == e1000_82576) {
2225 		shift = 3;
2226 	} else if (hw->mac.type == e1000_82575) {
2227 		shift = 6;
2228 	}
2229 	for (i = 0; i < (32 * 4); i++) {
2230 		reta.bytes[i & 3] = (i % igb->num_rx_rings) << shift;
2231 		if ((i & 3) == 3) {
2232 			E1000_WRITE_REG(hw,
2233 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2234 		}
2235 	}
2236 
2237 	/* Fill out hash function seeds */
2238 	for (i = 0; i < 10; i++) {
2239 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2240 		    sizeof (uint32_t));
2241 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2242 	}
2243 
2244 	/* Setup the Multiple Receive Queue Control register */
2245 	mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2246 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2247 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2248 	    E1000_MRQC_RSS_FIELD_IPV6 |
2249 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2250 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2251 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2252 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2253 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2254 
2255 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2256 
2257 	/*
2258 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2259 	 *
2260 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2261 	 * checksum offloading provided by the 82575 chipset besides the IP
2262 	 * header checksum offloading and the TCP/UDP checksum offloading.
2263 	 * The Packet Checksum is by default computed over the entire packet
2264 	 * from the first byte of the DA through the last byte of the CRC,
2265 	 * including the Ethernet and IP headers.
2266 	 *
2267 	 * It is a hardware limitation that Packet Checksum is mutually
2268 	 * exclusive with RSS.
2269 	 */
2270 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2271 	rxcsum |= E1000_RXCSUM_PCSD;
2272 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2273 }
2274 
2275 /*
2276  * igb_setup_mac_rss_classify - Setup MAC classification and rss
2277  */
2278 static void
2279 igb_setup_mac_rss_classify(igb_t *igb)
2280 {
2281 	struct e1000_hw *hw = &igb->hw;
2282 	uint32_t i, mrqc, vmdctl, rxcsum;
2283 	uint32_t ring_per_group;
2284 	int shift_group0, shift_group1;
2285 	uint32_t random;
2286 	union e1000_reta {
2287 		uint32_t	dword;
2288 		uint8_t		bytes[4];
2289 	} reta;
2290 
2291 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2292 
2293 	/* Setup the Redirection Table, it is shared between two groups */
2294 	shift_group0 = 2;
2295 	shift_group1 = 6;
2296 	for (i = 0; i < (32 * 4); i++) {
2297 		reta.bytes[i & 3] = ((i % ring_per_group) << shift_group0) |
2298 		    ((ring_per_group + (i % ring_per_group)) << shift_group1);
2299 		if ((i & 3) == 3) {
2300 			E1000_WRITE_REG(hw,
2301 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2302 		}
2303 	}
2304 
2305 	/* Fill out hash function seeds */
2306 	for (i = 0; i < 10; i++) {
2307 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2308 		    sizeof (uint32_t));
2309 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2310 	}
2311 
2312 	/*
2313 	 * Setup the Multiple Receive Queue Control register,
2314 	 * enable VMDq based on packet destination MAC address and RSS.
2315 	 */
2316 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_RSS_GROUP;
2317 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2318 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2319 	    E1000_MRQC_RSS_FIELD_IPV6 |
2320 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2321 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2322 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2323 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2324 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2325 
2326 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2327 
2328 
2329 	/* Define the default group and default queues */
2330 	vmdctl = E1000_VMDQ_MAC_GROUP_DEFAULT_QUEUE;
2331 	E1000_WRITE_REG(hw, E1000_VT_CTL, vmdctl);
2332 
2333 	/*
2334 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2335 	 *
2336 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2337 	 * checksum offloading provided by the 82575 chipset besides the IP
2338 	 * header checksum offloading and the TCP/UDP checksum offloading.
2339 	 * The Packet Checksum is by default computed over the entire packet
2340 	 * from the first byte of the DA through the last byte of the CRC,
2341 	 * including the Ethernet and IP headers.
2342 	 *
2343 	 * It is a hardware limitation that Packet Checksum is mutually
2344 	 * exclusive with RSS.
2345 	 */
2346 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2347 	rxcsum |= E1000_RXCSUM_PCSD;
2348 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2349 }
2350 
2351 /*
2352  * igb_setup_mac_classify - Setup MAC classification feature
2353  */
2354 static void
2355 igb_setup_mac_classify(igb_t *igb)
2356 {
2357 	struct e1000_hw *hw = &igb->hw;
2358 	uint32_t mrqc, rxcsum;
2359 
2360 	/*
2361 	 * Setup the Multiple Receive Queue Control register,
2362 	 * enable VMDq based on packet destination MAC address.
2363 	 */
2364 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_GROUP;
2365 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2366 
2367 	/*
2368 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2369 	 *
2370 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2371 	 * checksum offloading provided by the 82575 chipset besides the IP
2372 	 * header checksum offloading and the TCP/UDP checksum offloading.
2373 	 * The Packet Checksum is by default computed over the entire packet
2374 	 * from the first byte of the DA through the last byte of the CRC,
2375 	 * including the Ethernet and IP headers.
2376 	 *
2377 	 * It is a hardware limitation that Packet Checksum is mutually
2378 	 * exclusive with RSS.
2379 	 */
2380 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2381 	rxcsum |= E1000_RXCSUM_PCSD;
2382 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2383 
2384 }
2385 
2386 /*
2387  * igb_init_unicst - Initialize the unicast addresses
2388  */
2389 static void
2390 igb_init_unicst(igb_t *igb)
2391 {
2392 	struct e1000_hw *hw = &igb->hw;
2393 	int slot;
2394 
2395 	/*
2396 	 * Here we should consider two situations:
2397 	 *
2398 	 * 1. Chipset is initialized the first time
2399 	 *    Initialize the multiple unicast addresses, and
2400 	 *    save the default MAC address.
2401 	 *
2402 	 * 2. Chipset is reset
2403 	 *    Recover the multiple unicast addresses from the
2404 	 *    software data structure to the RAR registers.
2405 	 */
2406 
2407 	/*
2408 	 * Clear the default MAC address in the RAR0 rgister,
2409 	 * which is loaded from EEPROM when system boot or chipreset,
2410 	 * this will cause the conficts with add_mac/rem_mac entry
2411 	 * points when VMDq is enabled. For this reason, the RAR0
2412 	 * must be cleared for both cases mentioned above.
2413 	 */
2414 	e1000_rar_clear(hw, 0);
2415 
2416 	if (!igb->unicst_init) {
2417 
2418 		/* Initialize the multiple unicast addresses */
2419 		igb->unicst_total = MAX_NUM_UNICAST_ADDRESSES;
2420 		igb->unicst_avail = igb->unicst_total;
2421 
2422 		for (slot = 0; slot < igb->unicst_total; slot++)
2423 			igb->unicst_addr[slot].mac.set = 0;
2424 
2425 		igb->unicst_init = B_TRUE;
2426 	} else {
2427 		/* Re-configure the RAR registers */
2428 		for (slot = 0; slot < igb->unicst_total; slot++) {
2429 			e1000_rar_set_vmdq(hw, igb->unicst_addr[slot].mac.addr,
2430 			    slot, igb->vmdq_mode,
2431 			    igb->unicst_addr[slot].mac.group_index);
2432 		}
2433 	}
2434 }
2435 
2436 /*
2437  * igb_unicst_find - Find the slot for the specified unicast address
2438  */
2439 int
2440 igb_unicst_find(igb_t *igb, const uint8_t *mac_addr)
2441 {
2442 	int slot;
2443 
2444 	ASSERT(mutex_owned(&igb->gen_lock));
2445 
2446 	for (slot = 0; slot < igb->unicst_total; slot++) {
2447 		if (bcmp(igb->unicst_addr[slot].mac.addr,
2448 		    mac_addr, ETHERADDRL) == 0)
2449 			return (slot);
2450 	}
2451 
2452 	return (-1);
2453 }
2454 
2455 /*
2456  * igb_unicst_set - Set the unicast address to the specified slot
2457  */
2458 int
2459 igb_unicst_set(igb_t *igb, const uint8_t *mac_addr,
2460     int slot)
2461 {
2462 	struct e1000_hw *hw = &igb->hw;
2463 
2464 	ASSERT(mutex_owned(&igb->gen_lock));
2465 
2466 	/*
2467 	 * Save the unicast address in the software data structure
2468 	 */
2469 	bcopy(mac_addr, igb->unicst_addr[slot].mac.addr, ETHERADDRL);
2470 
2471 	/*
2472 	 * Set the unicast address to the RAR register
2473 	 */
2474 	e1000_rar_set(hw, (uint8_t *)mac_addr, slot);
2475 
2476 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2477 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2478 		return (EIO);
2479 	}
2480 
2481 	return (0);
2482 }
2483 
2484 /*
2485  * igb_multicst_add - Add a multicst address
2486  */
2487 int
2488 igb_multicst_add(igb_t *igb, const uint8_t *multiaddr)
2489 {
2490 	struct ether_addr *new_table;
2491 	size_t new_len;
2492 	size_t old_len;
2493 
2494 	ASSERT(mutex_owned(&igb->gen_lock));
2495 
2496 	if ((multiaddr[0] & 01) == 0) {
2497 		igb_error(igb, "Illegal multicast address");
2498 		return (EINVAL);
2499 	}
2500 
2501 	if (igb->mcast_count >= igb->mcast_max_num) {
2502 		igb_error(igb, "Adapter requested more than %d mcast addresses",
2503 		    igb->mcast_max_num);
2504 		return (ENOENT);
2505 	}
2506 
2507 	if (igb->mcast_count == igb->mcast_alloc_count) {
2508 		old_len = igb->mcast_alloc_count *
2509 		    sizeof (struct ether_addr);
2510 		new_len = (igb->mcast_alloc_count + MCAST_ALLOC_COUNT) *
2511 		    sizeof (struct ether_addr);
2512 
2513 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
2514 		if (new_table == NULL) {
2515 			igb_error(igb,
2516 			    "Not enough memory to alloc mcast table");
2517 			return (ENOMEM);
2518 		}
2519 
2520 		if (igb->mcast_table != NULL) {
2521 			bcopy(igb->mcast_table, new_table, old_len);
2522 			kmem_free(igb->mcast_table, old_len);
2523 		}
2524 		igb->mcast_alloc_count += MCAST_ALLOC_COUNT;
2525 		igb->mcast_table = new_table;
2526 	}
2527 
2528 	bcopy(multiaddr,
2529 	    &igb->mcast_table[igb->mcast_count], ETHERADDRL);
2530 	igb->mcast_count++;
2531 
2532 	/*
2533 	 * Update the multicast table in the hardware
2534 	 */
2535 	igb_setup_multicst(igb);
2536 
2537 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2538 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2539 		return (EIO);
2540 	}
2541 
2542 	return (0);
2543 }
2544 
2545 /*
2546  * igb_multicst_remove - Remove a multicst address
2547  */
2548 int
2549 igb_multicst_remove(igb_t *igb, const uint8_t *multiaddr)
2550 {
2551 	struct ether_addr *new_table;
2552 	size_t new_len;
2553 	size_t old_len;
2554 	int i;
2555 
2556 	ASSERT(mutex_owned(&igb->gen_lock));
2557 
2558 	for (i = 0; i < igb->mcast_count; i++) {
2559 		if (bcmp(multiaddr, &igb->mcast_table[i],
2560 		    ETHERADDRL) == 0) {
2561 			for (i++; i < igb->mcast_count; i++) {
2562 				igb->mcast_table[i - 1] =
2563 				    igb->mcast_table[i];
2564 			}
2565 			igb->mcast_count--;
2566 			break;
2567 		}
2568 	}
2569 
2570 	if ((igb->mcast_alloc_count - igb->mcast_count) >
2571 	    MCAST_ALLOC_COUNT) {
2572 		old_len = igb->mcast_alloc_count *
2573 		    sizeof (struct ether_addr);
2574 		new_len = (igb->mcast_alloc_count - MCAST_ALLOC_COUNT) *
2575 		    sizeof (struct ether_addr);
2576 
2577 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
2578 		if (new_table != NULL) {
2579 			bcopy(igb->mcast_table, new_table, new_len);
2580 			kmem_free(igb->mcast_table, old_len);
2581 			igb->mcast_alloc_count -= MCAST_ALLOC_COUNT;
2582 			igb->mcast_table = new_table;
2583 		}
2584 	}
2585 
2586 	/*
2587 	 * Update the multicast table in the hardware
2588 	 */
2589 	igb_setup_multicst(igb);
2590 
2591 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2592 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2593 		return (EIO);
2594 	}
2595 
2596 	return (0);
2597 }
2598 
2599 static void
2600 igb_release_multicast(igb_t *igb)
2601 {
2602 	if (igb->mcast_table != NULL) {
2603 		kmem_free(igb->mcast_table,
2604 		    igb->mcast_alloc_count * sizeof (struct ether_addr));
2605 		igb->mcast_table = NULL;
2606 	}
2607 }
2608 
2609 /*
2610  * igb_setup_multicast - setup multicast data structures
2611  *
2612  * This routine initializes all of the multicast related structures
2613  * and save them in the hardware registers.
2614  */
2615 static void
2616 igb_setup_multicst(igb_t *igb)
2617 {
2618 	uint8_t *mc_addr_list;
2619 	uint32_t mc_addr_count;
2620 	struct e1000_hw *hw = &igb->hw;
2621 
2622 	ASSERT(mutex_owned(&igb->gen_lock));
2623 	ASSERT(igb->mcast_count <= igb->mcast_max_num);
2624 
2625 	mc_addr_list = (uint8_t *)igb->mcast_table;
2626 	mc_addr_count = igb->mcast_count;
2627 
2628 	/*
2629 	 * Update the multicase addresses to the MTA registers
2630 	 */
2631 	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
2632 }
2633 
2634 /*
2635  * igb_get_conf - Get driver configurations set in driver.conf
2636  *
2637  * This routine gets user-configured values out of the configuration
2638  * file igb.conf.
2639  *
2640  * For each configurable value, there is a minimum, a maximum, and a
2641  * default.
2642  * If user does not configure a value, use the default.
2643  * If user configures below the minimum, use the minumum.
2644  * If user configures above the maximum, use the maxumum.
2645  */
2646 static void
2647 igb_get_conf(igb_t *igb)
2648 {
2649 	struct e1000_hw *hw = &igb->hw;
2650 	uint32_t default_mtu;
2651 	uint32_t flow_control;
2652 	uint32_t ring_per_group;
2653 	int i;
2654 
2655 	/*
2656 	 * igb driver supports the following user configurations:
2657 	 *
2658 	 * Link configurations:
2659 	 *    adv_autoneg_cap
2660 	 *    adv_1000fdx_cap
2661 	 *    adv_100fdx_cap
2662 	 *    adv_100hdx_cap
2663 	 *    adv_10fdx_cap
2664 	 *    adv_10hdx_cap
2665 	 * Note: 1000hdx is not supported.
2666 	 *
2667 	 * Jumbo frame configuration:
2668 	 *    default_mtu
2669 	 *
2670 	 * Ethernet flow control configuration:
2671 	 *    flow_control
2672 	 *
2673 	 * Multiple rings configurations:
2674 	 *    tx_queue_number
2675 	 *    tx_ring_size
2676 	 *    rx_queue_number
2677 	 *    rx_ring_size
2678 	 *
2679 	 * Call igb_get_prop() to get the value for a specific
2680 	 * configuration parameter.
2681 	 */
2682 
2683 	/*
2684 	 * Link configurations
2685 	 */
2686 	igb->param_adv_autoneg_cap = igb_get_prop(igb,
2687 	    PROP_ADV_AUTONEG_CAP, 0, 1, 1);
2688 	igb->param_adv_1000fdx_cap = igb_get_prop(igb,
2689 	    PROP_ADV_1000FDX_CAP, 0, 1, 1);
2690 	igb->param_adv_100fdx_cap = igb_get_prop(igb,
2691 	    PROP_ADV_100FDX_CAP, 0, 1, 1);
2692 	igb->param_adv_100hdx_cap = igb_get_prop(igb,
2693 	    PROP_ADV_100HDX_CAP, 0, 1, 1);
2694 	igb->param_adv_10fdx_cap = igb_get_prop(igb,
2695 	    PROP_ADV_10FDX_CAP, 0, 1, 1);
2696 	igb->param_adv_10hdx_cap = igb_get_prop(igb,
2697 	    PROP_ADV_10HDX_CAP, 0, 1, 1);
2698 
2699 	/*
2700 	 * Jumbo frame configurations
2701 	 */
2702 	default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
2703 	    MIN_MTU, MAX_MTU, DEFAULT_MTU);
2704 
2705 	igb->max_frame_size = default_mtu +
2706 	    sizeof (struct ether_vlan_header) + ETHERFCSL;
2707 
2708 	/*
2709 	 * Ethernet flow control configuration
2710 	 */
2711 	flow_control = igb_get_prop(igb, PROP_FLOW_CONTROL,
2712 	    e1000_fc_none, 4, e1000_fc_full);
2713 	if (flow_control == 4)
2714 		flow_control = e1000_fc_default;
2715 
2716 	hw->fc.requested_mode = flow_control;
2717 
2718 	/*
2719 	 * Multiple rings configurations
2720 	 */
2721 	igb->tx_ring_size = igb_get_prop(igb, PROP_TX_RING_SIZE,
2722 	    MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
2723 	igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
2724 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
2725 
2726 	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0);
2727 	igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
2728 	    MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
2729 	/*
2730 	 * Currently we do not support VMDq for 82576 and 82580.
2731 	 * If it is e1000_82576, set num_rx_groups to 1.
2732 	 */
2733 	if (hw->mac.type >= e1000_82576)
2734 		igb->num_rx_groups = 1;
2735 
2736 	if (igb->mr_enable) {
2737 		igb->num_tx_rings = igb->capab->def_tx_que_num;
2738 		igb->num_rx_rings = igb->capab->def_rx_que_num;
2739 	} else {
2740 		igb->num_tx_rings = 1;
2741 		igb->num_rx_rings = 1;
2742 
2743 		if (igb->num_rx_groups > 1) {
2744 			igb_error(igb,
2745 			    "Invalid rx groups number. Please enable multiple "
2746 			    "rings first");
2747 			igb->num_rx_groups = 1;
2748 		}
2749 	}
2750 
2751 	/*
2752 	 * Check the divisibility between rx rings and rx groups.
2753 	 */
2754 	for (i = igb->num_rx_groups; i > 0; i--) {
2755 		if ((igb->num_rx_rings % i) == 0)
2756 			break;
2757 	}
2758 	if (i != igb->num_rx_groups) {
2759 		igb_error(igb,
2760 		    "Invalid rx groups number. Downgrade the rx group "
2761 		    "number to %d.", i);
2762 		igb->num_rx_groups = i;
2763 	}
2764 
2765 	/*
2766 	 * Get the ring number per group.
2767 	 */
2768 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2769 
2770 	if (igb->num_rx_groups == 1) {
2771 		/*
2772 		 * One rx ring group, the rx ring number is num_rx_rings.
2773 		 */
2774 		igb->vmdq_mode = E1000_VMDQ_OFF;
2775 	} else if (ring_per_group == 1) {
2776 		/*
2777 		 * Multiple rx groups, each group has one rx ring.
2778 		 */
2779 		igb->vmdq_mode = E1000_VMDQ_MAC;
2780 	} else {
2781 		/*
2782 		 * Multiple groups and multiple rings.
2783 		 */
2784 		igb->vmdq_mode = E1000_VMDQ_MAC_RSS;
2785 	}
2786 
2787 	/*
2788 	 * Tunable used to force an interrupt type. The only use is
2789 	 * for testing of the lesser interrupt types.
2790 	 * 0 = don't force interrupt type
2791 	 * 1 = force interrupt type MSIX
2792 	 * 2 = force interrupt type MSI
2793 	 * 3 = force interrupt type Legacy
2794 	 */
2795 	igb->intr_force = igb_get_prop(igb, PROP_INTR_FORCE,
2796 	    IGB_INTR_NONE, IGB_INTR_LEGACY, IGB_INTR_NONE);
2797 
2798 	igb->tx_hcksum_enable = igb_get_prop(igb, PROP_TX_HCKSUM_ENABLE,
2799 	    0, 1, 1);
2800 	igb->rx_hcksum_enable = igb_get_prop(igb, PROP_RX_HCKSUM_ENABLE,
2801 	    0, 1, 1);
2802 	igb->lso_enable = igb_get_prop(igb, PROP_LSO_ENABLE,
2803 	    0, 1, 1);
2804 	igb->tx_head_wb_enable = igb_get_prop(igb, PROP_TX_HEAD_WB_ENABLE,
2805 	    0, 1, 1);
2806 
2807 	/*
2808 	 * igb LSO needs the tx h/w checksum support.
2809 	 * Here LSO will be disabled if tx h/w checksum has been disabled.
2810 	 */
2811 	if (igb->tx_hcksum_enable == B_FALSE)
2812 		igb->lso_enable = B_FALSE;
2813 
2814 	igb->tx_copy_thresh = igb_get_prop(igb, PROP_TX_COPY_THRESHOLD,
2815 	    MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
2816 	    DEFAULT_TX_COPY_THRESHOLD);
2817 	igb->tx_recycle_thresh = igb_get_prop(igb, PROP_TX_RECYCLE_THRESHOLD,
2818 	    MIN_TX_RECYCLE_THRESHOLD, MAX_TX_RECYCLE_THRESHOLD,
2819 	    DEFAULT_TX_RECYCLE_THRESHOLD);
2820 	igb->tx_overload_thresh = igb_get_prop(igb, PROP_TX_OVERLOAD_THRESHOLD,
2821 	    MIN_TX_OVERLOAD_THRESHOLD, MAX_TX_OVERLOAD_THRESHOLD,
2822 	    DEFAULT_TX_OVERLOAD_THRESHOLD);
2823 	igb->tx_resched_thresh = igb_get_prop(igb, PROP_TX_RESCHED_THRESHOLD,
2824 	    MIN_TX_RESCHED_THRESHOLD, MAX_TX_RESCHED_THRESHOLD,
2825 	    DEFAULT_TX_RESCHED_THRESHOLD);
2826 
2827 	igb->rx_copy_thresh = igb_get_prop(igb, PROP_RX_COPY_THRESHOLD,
2828 	    MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
2829 	    DEFAULT_RX_COPY_THRESHOLD);
2830 	igb->rx_limit_per_intr = igb_get_prop(igb, PROP_RX_LIMIT_PER_INTR,
2831 	    MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
2832 	    DEFAULT_RX_LIMIT_PER_INTR);
2833 
2834 	igb->intr_throttling[0] = igb_get_prop(igb, PROP_INTR_THROTTLING,
2835 	    igb->capab->min_intr_throttle,
2836 	    igb->capab->max_intr_throttle,
2837 	    igb->capab->def_intr_throttle);
2838 
2839 	/*
2840 	 * Max number of multicast addresses
2841 	 */
2842 	igb->mcast_max_num =
2843 	    igb_get_prop(igb, PROP_MCAST_MAX_NUM,
2844 	    MIN_MCAST_NUM, MAX_MCAST_NUM, DEFAULT_MCAST_NUM);
2845 }
2846 
2847 /*
2848  * igb_get_prop - Get a property value out of the configuration file igb.conf
2849  *
2850  * Caller provides the name of the property, a default value, a minimum
2851  * value, and a maximum value.
2852  *
2853  * Return configured value of the property, with default, minimum and
2854  * maximum properly applied.
2855  */
2856 static int
2857 igb_get_prop(igb_t *igb,
2858     char *propname,	/* name of the property */
2859     int minval,		/* minimum acceptable value */
2860     int maxval,		/* maximim acceptable value */
2861     int defval)		/* default value */
2862 {
2863 	int value;
2864 
2865 	/*
2866 	 * Call ddi_prop_get_int() to read the conf settings
2867 	 */
2868 	value = ddi_prop_get_int(DDI_DEV_T_ANY, igb->dip,
2869 	    DDI_PROP_DONTPASS, propname, defval);
2870 
2871 	if (value > maxval)
2872 		value = maxval;
2873 
2874 	if (value < minval)
2875 		value = minval;
2876 
2877 	return (value);
2878 }
2879 
2880 /*
2881  * igb_setup_link - Using the link properties to setup the link
2882  */
2883 int
2884 igb_setup_link(igb_t *igb, boolean_t setup_hw)
2885 {
2886 	struct e1000_mac_info *mac;
2887 	struct e1000_phy_info *phy;
2888 	boolean_t invalid;
2889 
2890 	mac = &igb->hw.mac;
2891 	phy = &igb->hw.phy;
2892 	invalid = B_FALSE;
2893 
2894 	if (igb->param_adv_autoneg_cap == 1) {
2895 		mac->autoneg = B_TRUE;
2896 		phy->autoneg_advertised = 0;
2897 
2898 		/*
2899 		 * 1000hdx is not supported for autonegotiation
2900 		 */
2901 		if (igb->param_adv_1000fdx_cap == 1)
2902 			phy->autoneg_advertised |= ADVERTISE_1000_FULL;
2903 
2904 		if (igb->param_adv_100fdx_cap == 1)
2905 			phy->autoneg_advertised |= ADVERTISE_100_FULL;
2906 
2907 		if (igb->param_adv_100hdx_cap == 1)
2908 			phy->autoneg_advertised |= ADVERTISE_100_HALF;
2909 
2910 		if (igb->param_adv_10fdx_cap == 1)
2911 			phy->autoneg_advertised |= ADVERTISE_10_FULL;
2912 
2913 		if (igb->param_adv_10hdx_cap == 1)
2914 			phy->autoneg_advertised |= ADVERTISE_10_HALF;
2915 
2916 		if (phy->autoneg_advertised == 0)
2917 			invalid = B_TRUE;
2918 	} else {
2919 		mac->autoneg = B_FALSE;
2920 
2921 		/*
2922 		 * 1000fdx and 1000hdx are not supported for forced link
2923 		 */
2924 		if (igb->param_adv_100fdx_cap == 1)
2925 			mac->forced_speed_duplex = ADVERTISE_100_FULL;
2926 		else if (igb->param_adv_100hdx_cap == 1)
2927 			mac->forced_speed_duplex = ADVERTISE_100_HALF;
2928 		else if (igb->param_adv_10fdx_cap == 1)
2929 			mac->forced_speed_duplex = ADVERTISE_10_FULL;
2930 		else if (igb->param_adv_10hdx_cap == 1)
2931 			mac->forced_speed_duplex = ADVERTISE_10_HALF;
2932 		else
2933 			invalid = B_TRUE;
2934 	}
2935 
2936 	if (invalid) {
2937 		igb_notice(igb, "Invalid link settings. Setup link to "
2938 		    "autonegotiation with full link capabilities.");
2939 		mac->autoneg = B_TRUE;
2940 		phy->autoneg_advertised = ADVERTISE_1000_FULL |
2941 		    ADVERTISE_100_FULL | ADVERTISE_100_HALF |
2942 		    ADVERTISE_10_FULL | ADVERTISE_10_HALF;
2943 	}
2944 
2945 	if (setup_hw) {
2946 		if (e1000_setup_link(&igb->hw) != E1000_SUCCESS)
2947 			return (IGB_FAILURE);
2948 	}
2949 
2950 	return (IGB_SUCCESS);
2951 }
2952 
2953 
2954 /*
2955  * igb_is_link_up - Check if the link is up
2956  */
2957 static boolean_t
2958 igb_is_link_up(igb_t *igb)
2959 {
2960 	struct e1000_hw *hw = &igb->hw;
2961 	boolean_t link_up = B_FALSE;
2962 
2963 	ASSERT(mutex_owned(&igb->gen_lock));
2964 
2965 	/*
2966 	 * get_link_status is set in the interrupt handler on link-status-change
2967 	 * or rx sequence error interrupt.  get_link_status will stay
2968 	 * false until the e1000_check_for_link establishes link only
2969 	 * for copper adapters.
2970 	 */
2971 	switch (hw->phy.media_type) {
2972 	case e1000_media_type_copper:
2973 		if (hw->mac.get_link_status) {
2974 			(void) e1000_check_for_link(hw);
2975 			link_up = !hw->mac.get_link_status;
2976 		} else {
2977 			link_up = B_TRUE;
2978 		}
2979 		break;
2980 	case e1000_media_type_fiber:
2981 		(void) e1000_check_for_link(hw);
2982 		link_up = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU);
2983 		break;
2984 	case e1000_media_type_internal_serdes:
2985 		(void) e1000_check_for_link(hw);
2986 		link_up = hw->mac.serdes_has_link;
2987 		break;
2988 	}
2989 
2990 	return (link_up);
2991 }
2992 
2993 /*
2994  * igb_link_check - Link status processing
2995  */
2996 static boolean_t
2997 igb_link_check(igb_t *igb)
2998 {
2999 	struct e1000_hw *hw = &igb->hw;
3000 	uint16_t speed = 0, duplex = 0;
3001 	boolean_t link_changed = B_FALSE;
3002 
3003 	ASSERT(mutex_owned(&igb->gen_lock));
3004 
3005 	if (igb_is_link_up(igb)) {
3006 		/*
3007 		 * The Link is up, check whether it was marked as down earlier
3008 		 */
3009 		if (igb->link_state != LINK_STATE_UP) {
3010 			(void) e1000_get_speed_and_duplex(hw, &speed, &duplex);
3011 			igb->link_speed = speed;
3012 			igb->link_duplex = duplex;
3013 			igb->link_state = LINK_STATE_UP;
3014 			igb->link_down_timeout = 0;
3015 			link_changed = B_TRUE;
3016 			if (!igb->link_complete)
3017 				igb_stop_link_timer(igb);
3018 		}
3019 	} else if (igb->link_complete) {
3020 		if (igb->link_state != LINK_STATE_DOWN) {
3021 			igb->link_speed = 0;
3022 			igb->link_duplex = 0;
3023 			igb->link_state = LINK_STATE_DOWN;
3024 			link_changed = B_TRUE;
3025 		}
3026 
3027 		if (igb->igb_state & IGB_STARTED) {
3028 			if (igb->link_down_timeout < MAX_LINK_DOWN_TIMEOUT) {
3029 				igb->link_down_timeout++;
3030 			} else if (igb->link_down_timeout ==
3031 			    MAX_LINK_DOWN_TIMEOUT) {
3032 				igb_tx_clean(igb);
3033 				igb->link_down_timeout++;
3034 			}
3035 		}
3036 	}
3037 
3038 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3039 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3040 		return (B_FALSE);
3041 	}
3042 
3043 	return (link_changed);
3044 }
3045 
3046 /*
3047  * igb_local_timer - driver watchdog function
3048  *
3049  * This function will handle the hardware stall check, link status
3050  * check and other routines.
3051  */
3052 static void
3053 igb_local_timer(void *arg)
3054 {
3055 	igb_t *igb = (igb_t *)arg;
3056 	boolean_t link_changed = B_FALSE;
3057 
3058 	if (igb->igb_state & IGB_ERROR) {
3059 		igb->reset_count++;
3060 		if (igb_reset(igb) == IGB_SUCCESS)
3061 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3062 
3063 		igb_restart_watchdog_timer(igb);
3064 		return;
3065 	}
3066 
3067 	if (igb_stall_check(igb) || (igb->igb_state & IGB_STALL)) {
3068 		igb_fm_ereport(igb, DDI_FM_DEVICE_STALL);
3069 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
3070 		igb->reset_count++;
3071 		if (igb_reset(igb) == IGB_SUCCESS)
3072 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
3073 
3074 		igb_restart_watchdog_timer(igb);
3075 		return;
3076 	}
3077 
3078 	mutex_enter(&igb->gen_lock);
3079 	if (!(igb->igb_state & IGB_SUSPENDED) && (igb->igb_state & IGB_STARTED))
3080 		link_changed = igb_link_check(igb);
3081 	mutex_exit(&igb->gen_lock);
3082 
3083 	if (link_changed)
3084 		mac_link_update(igb->mac_hdl, igb->link_state);
3085 
3086 	igb_restart_watchdog_timer(igb);
3087 }
3088 
3089 /*
3090  * igb_link_timer - link setup timer function
3091  *
3092  * It is called when the timer for link setup is expired, which indicates
3093  * the completion of the link setup. The link state will not be updated
3094  * until the link setup is completed. And the link state will not be sent
3095  * to the upper layer through mac_link_update() in this function. It will
3096  * be updated in the local timer routine or the interrupts service routine
3097  * after the interface is started (plumbed).
3098  */
3099 static void
3100 igb_link_timer(void *arg)
3101 {
3102 	igb_t *igb = (igb_t *)arg;
3103 
3104 	mutex_enter(&igb->link_lock);
3105 	igb->link_complete = B_TRUE;
3106 	igb->link_tid = 0;
3107 	mutex_exit(&igb->link_lock);
3108 }
3109 /*
3110  * igb_stall_check - check for transmit stall
3111  *
3112  * This function checks if the adapter is stalled (in transmit).
3113  *
3114  * It is called each time the watchdog timeout is invoked.
3115  * If the transmit descriptor reclaim continuously fails,
3116  * the watchdog value will increment by 1. If the watchdog
3117  * value exceeds the threshold, the igb is assumed to
3118  * have stalled and need to be reset.
3119  */
3120 static boolean_t
3121 igb_stall_check(igb_t *igb)
3122 {
3123 	igb_tx_ring_t *tx_ring;
3124 	struct e1000_hw *hw = &igb->hw;
3125 	boolean_t result;
3126 	int i;
3127 
3128 	if (igb->link_state != LINK_STATE_UP)
3129 		return (B_FALSE);
3130 
3131 	/*
3132 	 * If any tx ring is stalled, we'll reset the chipset
3133 	 */
3134 	result = B_FALSE;
3135 	for (i = 0; i < igb->num_tx_rings; i++) {
3136 		tx_ring = &igb->tx_rings[i];
3137 
3138 		if (tx_ring->recycle_fail > 0)
3139 			tx_ring->stall_watchdog++;
3140 		else
3141 			tx_ring->stall_watchdog = 0;
3142 
3143 		if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
3144 			result = B_TRUE;
3145 			if (hw->mac.type == e1000_82580) {
3146 				hw->dev_spec._82575.global_device_reset
3147 				    = B_TRUE;
3148 			}
3149 			break;
3150 		}
3151 	}
3152 
3153 	if (result) {
3154 		tx_ring->stall_watchdog = 0;
3155 		tx_ring->recycle_fail = 0;
3156 	}
3157 
3158 	return (result);
3159 }
3160 
3161 
3162 /*
3163  * is_valid_mac_addr - Check if the mac address is valid
3164  */
3165 static boolean_t
3166 is_valid_mac_addr(uint8_t *mac_addr)
3167 {
3168 	const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
3169 	const uint8_t addr_test2[6] =
3170 	    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
3171 
3172 	if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
3173 	    !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
3174 		return (B_FALSE);
3175 
3176 	return (B_TRUE);
3177 }
3178 
3179 static boolean_t
3180 igb_find_mac_address(igb_t *igb)
3181 {
3182 	struct e1000_hw *hw = &igb->hw;
3183 #ifdef __sparc
3184 	uchar_t *bytes;
3185 	struct ether_addr sysaddr;
3186 	uint_t nelts;
3187 	int err;
3188 	boolean_t found = B_FALSE;
3189 
3190 	/*
3191 	 * The "vendor's factory-set address" may already have
3192 	 * been extracted from the chip, but if the property
3193 	 * "local-mac-address" is set we use that instead.
3194 	 *
3195 	 * We check whether it looks like an array of 6
3196 	 * bytes (which it should, if OBP set it).  If we can't
3197 	 * make sense of it this way, we'll ignore it.
3198 	 */
3199 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3200 	    DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
3201 	if (err == DDI_PROP_SUCCESS) {
3202 		if (nelts == ETHERADDRL) {
3203 			while (nelts--)
3204 				hw->mac.addr[nelts] = bytes[nelts];
3205 			found = B_TRUE;
3206 		}
3207 		ddi_prop_free(bytes);
3208 	}
3209 
3210 	/*
3211 	 * Look up the OBP property "local-mac-address?". If the user has set
3212 	 * 'local-mac-address? = false', use "the system address" instead.
3213 	 */
3214 	if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip, 0,
3215 	    "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
3216 		if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
3217 			if (localetheraddr(NULL, &sysaddr) != 0) {
3218 				bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
3219 				found = B_TRUE;
3220 			}
3221 		}
3222 		ddi_prop_free(bytes);
3223 	}
3224 
3225 	/*
3226 	 * Finally(!), if there's a valid "mac-address" property (created
3227 	 * if we netbooted from this interface), we must use this instead
3228 	 * of any of the above to ensure that the NFS/install server doesn't
3229 	 * get confused by the address changing as Solaris takes over!
3230 	 */
3231 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3232 	    DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
3233 	if (err == DDI_PROP_SUCCESS) {
3234 		if (nelts == ETHERADDRL) {
3235 			while (nelts--)
3236 				hw->mac.addr[nelts] = bytes[nelts];
3237 			found = B_TRUE;
3238 		}
3239 		ddi_prop_free(bytes);
3240 	}
3241 
3242 	if (found) {
3243 		bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
3244 		return (B_TRUE);
3245 	}
3246 #endif
3247 
3248 	/*
3249 	 * Read the device MAC address from the EEPROM
3250 	 */
3251 	if (e1000_read_mac_addr(hw) != E1000_SUCCESS)
3252 		return (B_FALSE);
3253 
3254 	return (B_TRUE);
3255 }
3256 
3257 #pragma inline(igb_arm_watchdog_timer)
3258 
3259 static void
3260 igb_arm_watchdog_timer(igb_t *igb)
3261 {
3262 	/*
3263 	 * Fire a watchdog timer
3264 	 */
3265 	igb->watchdog_tid =
3266 	    timeout(igb_local_timer,
3267 	    (void *)igb, 1 * drv_usectohz(1000000));
3268 
3269 }
3270 
3271 /*
3272  * igb_enable_watchdog_timer - Enable and start the driver watchdog timer
3273  */
3274 void
3275 igb_enable_watchdog_timer(igb_t *igb)
3276 {
3277 	mutex_enter(&igb->watchdog_lock);
3278 
3279 	if (!igb->watchdog_enable) {
3280 		igb->watchdog_enable = B_TRUE;
3281 		igb->watchdog_start = B_TRUE;
3282 		igb_arm_watchdog_timer(igb);
3283 	}
3284 
3285 	mutex_exit(&igb->watchdog_lock);
3286 
3287 }
3288 
3289 /*
3290  * igb_disable_watchdog_timer - Disable and stop the driver watchdog timer
3291  */
3292 void
3293 igb_disable_watchdog_timer(igb_t *igb)
3294 {
3295 	timeout_id_t tid;
3296 
3297 	mutex_enter(&igb->watchdog_lock);
3298 
3299 	igb->watchdog_enable = B_FALSE;
3300 	igb->watchdog_start = B_FALSE;
3301 	tid = igb->watchdog_tid;
3302 	igb->watchdog_tid = 0;
3303 
3304 	mutex_exit(&igb->watchdog_lock);
3305 
3306 	if (tid != 0)
3307 		(void) untimeout(tid);
3308 
3309 }
3310 
3311 /*
3312  * igb_start_watchdog_timer - Start the driver watchdog timer
3313  */
3314 static void
3315 igb_start_watchdog_timer(igb_t *igb)
3316 {
3317 	mutex_enter(&igb->watchdog_lock);
3318 
3319 	if (igb->watchdog_enable) {
3320 		if (!igb->watchdog_start) {
3321 			igb->watchdog_start = B_TRUE;
3322 			igb_arm_watchdog_timer(igb);
3323 		}
3324 	}
3325 
3326 	mutex_exit(&igb->watchdog_lock);
3327 }
3328 
3329 /*
3330  * igb_restart_watchdog_timer - Restart the driver watchdog timer
3331  */
3332 static void
3333 igb_restart_watchdog_timer(igb_t *igb)
3334 {
3335 	mutex_enter(&igb->watchdog_lock);
3336 
3337 	if (igb->watchdog_start)
3338 		igb_arm_watchdog_timer(igb);
3339 
3340 	mutex_exit(&igb->watchdog_lock);
3341 }
3342 
3343 /*
3344  * igb_stop_watchdog_timer - Stop the driver watchdog timer
3345  */
3346 static void
3347 igb_stop_watchdog_timer(igb_t *igb)
3348 {
3349 	timeout_id_t tid;
3350 
3351 	mutex_enter(&igb->watchdog_lock);
3352 
3353 	igb->watchdog_start = B_FALSE;
3354 	tid = igb->watchdog_tid;
3355 	igb->watchdog_tid = 0;
3356 
3357 	mutex_exit(&igb->watchdog_lock);
3358 
3359 	if (tid != 0)
3360 		(void) untimeout(tid);
3361 }
3362 
3363 /*
3364  * igb_start_link_timer - Start the link setup timer
3365  */
3366 static void
3367 igb_start_link_timer(struct igb *igb)
3368 {
3369 	struct e1000_hw *hw = &igb->hw;
3370 	clock_t link_timeout;
3371 
3372 	if (hw->mac.autoneg)
3373 		link_timeout = PHY_AUTO_NEG_LIMIT *
3374 		    drv_usectohz(100000);
3375 	else
3376 		link_timeout = PHY_FORCE_LIMIT * drv_usectohz(100000);
3377 
3378 	mutex_enter(&igb->link_lock);
3379 	if (hw->phy.autoneg_wait_to_complete) {
3380 		igb->link_complete = B_TRUE;
3381 	} else {
3382 		igb->link_complete = B_FALSE;
3383 		igb->link_tid = timeout(igb_link_timer, (void *)igb,
3384 		    link_timeout);
3385 	}
3386 	mutex_exit(&igb->link_lock);
3387 }
3388 
3389 /*
3390  * igb_stop_link_timer - Stop the link setup timer
3391  */
3392 static void
3393 igb_stop_link_timer(struct igb *igb)
3394 {
3395 	timeout_id_t tid;
3396 
3397 	mutex_enter(&igb->link_lock);
3398 	igb->link_complete = B_TRUE;
3399 	tid = igb->link_tid;
3400 	igb->link_tid = 0;
3401 	mutex_exit(&igb->link_lock);
3402 
3403 	if (tid != 0)
3404 		(void) untimeout(tid);
3405 }
3406 
3407 /*
3408  * igb_disable_adapter_interrupts - Clear/disable all hardware interrupts
3409  */
3410 static void
3411 igb_disable_adapter_interrupts(igb_t *igb)
3412 {
3413 	struct e1000_hw *hw = &igb->hw;
3414 
3415 	/*
3416 	 * Set the IMC register to mask all the interrupts,
3417 	 * including the tx interrupts.
3418 	 */
3419 	E1000_WRITE_REG(hw, E1000_IMC, ~0);
3420 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3421 
3422 	/*
3423 	 * Additional disabling for MSI-X
3424 	 */
3425 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3426 		E1000_WRITE_REG(hw, E1000_EIMC, ~0);
3427 		E1000_WRITE_REG(hw, E1000_EIAC, 0);
3428 		E1000_WRITE_REG(hw, E1000_EIAM, 0);
3429 	}
3430 
3431 	E1000_WRITE_FLUSH(hw);
3432 }
3433 
3434 /*
3435  * igb_enable_adapter_interrupts_82580 - Enable NIC interrupts for 82580
3436  */
3437 static void
3438 igb_enable_adapter_interrupts_82580(igb_t *igb)
3439 {
3440 	struct e1000_hw *hw = &igb->hw;
3441 
3442 	/* Clear any pending interrupts */
3443 	(void) E1000_READ_REG(hw, E1000_ICR);
3444 	igb->ims_mask |= E1000_IMS_DRSTA;
3445 
3446 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3447 
3448 		/* Interrupt enabling for MSI-X */
3449 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3450 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3451 		igb->ims_mask = (E1000_IMS_LSC | E1000_IMS_DRSTA);
3452 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3453 	} else { /* Interrupt enabling for MSI and legacy */
3454 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3455 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3456 		igb->ims_mask |= E1000_IMS_DRSTA;
3457 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
3458 	}
3459 
3460 	/* Disable auto-mask for ICR interrupt bits */
3461 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3462 
3463 	E1000_WRITE_FLUSH(hw);
3464 }
3465 
3466 /*
3467  * igb_enable_adapter_interrupts_82576 - Enable NIC interrupts for 82576
3468  */
3469 static void
3470 igb_enable_adapter_interrupts_82576(igb_t *igb)
3471 {
3472 	struct e1000_hw *hw = &igb->hw;
3473 
3474 	/* Clear any pending interrupts */
3475 	(void) E1000_READ_REG(hw, E1000_ICR);
3476 
3477 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3478 
3479 		/* Interrupt enabling for MSI-X */
3480 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3481 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3482 		igb->ims_mask = E1000_IMS_LSC;
3483 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3484 	} else {
3485 		/* Interrupt enabling for MSI and legacy */
3486 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3487 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3488 		E1000_WRITE_REG(hw, E1000_IMS,
3489 		    (IMS_ENABLE_MASK | E1000_IMS_TXQE));
3490 	}
3491 
3492 	/* Disable auto-mask for ICR interrupt bits */
3493 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3494 
3495 	E1000_WRITE_FLUSH(hw);
3496 }
3497 
3498 /*
3499  * igb_enable_adapter_interrupts_82575 - Enable NIC interrupts for 82575
3500  */
3501 static void
3502 igb_enable_adapter_interrupts_82575(igb_t *igb)
3503 {
3504 	struct e1000_hw *hw = &igb->hw;
3505 	uint32_t reg;
3506 
3507 	/* Clear any pending interrupts */
3508 	(void) E1000_READ_REG(hw, E1000_ICR);
3509 
3510 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3511 		/* Interrupt enabling for MSI-X */
3512 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3513 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3514 		igb->ims_mask = E1000_IMS_LSC;
3515 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3516 
3517 		/* Enable MSI-X PBA support */
3518 		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
3519 		reg |= E1000_CTRL_EXT_PBA_CLR;
3520 
3521 		/* Non-selective interrupt clear-on-read */
3522 		reg |= E1000_CTRL_EXT_IRCA;	/* Called NSICR in the EAS */
3523 
3524 		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
3525 	} else {
3526 		/* Interrupt enabling for MSI and legacy */
3527 		igb->ims_mask = IMS_ENABLE_MASK;
3528 		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
3529 	}
3530 
3531 	E1000_WRITE_FLUSH(hw);
3532 }
3533 
3534 /*
3535  * Loopback Support
3536  */
3537 static lb_property_t lb_normal =
3538 	{ normal,	"normal",	IGB_LB_NONE		};
3539 static lb_property_t lb_external =
3540 	{ external,	"External",	IGB_LB_EXTERNAL		};
3541 static lb_property_t lb_phy =
3542 	{ internal,	"PHY",		IGB_LB_INTERNAL_PHY	};
3543 static lb_property_t lb_serdes =
3544 	{ internal,	"SerDes",	IGB_LB_INTERNAL_SERDES	};
3545 
3546 enum ioc_reply
3547 igb_loopback_ioctl(igb_t *igb, struct iocblk *iocp, mblk_t *mp)
3548 {
3549 	lb_info_sz_t *lbsp;
3550 	lb_property_t *lbpp;
3551 	struct e1000_hw *hw;
3552 	uint32_t *lbmp;
3553 	uint32_t size;
3554 	uint32_t value;
3555 
3556 	hw = &igb->hw;
3557 
3558 	if (mp->b_cont == NULL)
3559 		return (IOC_INVAL);
3560 
3561 	switch (iocp->ioc_cmd) {
3562 	default:
3563 		return (IOC_INVAL);
3564 
3565 	case LB_GET_INFO_SIZE:
3566 		size = sizeof (lb_info_sz_t);
3567 		if (iocp->ioc_count != size)
3568 			return (IOC_INVAL);
3569 
3570 		value = sizeof (lb_normal);
3571 		if (hw->phy.media_type == e1000_media_type_copper)
3572 			value += sizeof (lb_phy);
3573 		else
3574 			value += sizeof (lb_serdes);
3575 		value += sizeof (lb_external);
3576 
3577 		lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
3578 		*lbsp = value;
3579 		break;
3580 
3581 	case LB_GET_INFO:
3582 		value = sizeof (lb_normal);
3583 		if (hw->phy.media_type == e1000_media_type_copper)
3584 			value += sizeof (lb_phy);
3585 		else
3586 			value += sizeof (lb_serdes);
3587 		value += sizeof (lb_external);
3588 
3589 		size = value;
3590 		if (iocp->ioc_count != size)
3591 			return (IOC_INVAL);
3592 
3593 		value = 0;
3594 		lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
3595 
3596 		lbpp[value++] = lb_normal;
3597 		if (hw->phy.media_type == e1000_media_type_copper)
3598 			lbpp[value++] = lb_phy;
3599 		else
3600 			lbpp[value++] = lb_serdes;
3601 		lbpp[value++] = lb_external;
3602 		break;
3603 
3604 	case LB_GET_MODE:
3605 		size = sizeof (uint32_t);
3606 		if (iocp->ioc_count != size)
3607 			return (IOC_INVAL);
3608 
3609 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3610 		*lbmp = igb->loopback_mode;
3611 		break;
3612 
3613 	case LB_SET_MODE:
3614 		size = 0;
3615 		if (iocp->ioc_count != sizeof (uint32_t))
3616 			return (IOC_INVAL);
3617 
3618 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3619 		if (!igb_set_loopback_mode(igb, *lbmp))
3620 			return (IOC_INVAL);
3621 		break;
3622 	}
3623 
3624 	iocp->ioc_count = size;
3625 	iocp->ioc_error = 0;
3626 
3627 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3628 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3629 		return (IOC_INVAL);
3630 	}
3631 
3632 	return (IOC_REPLY);
3633 }
3634 
3635 /*
3636  * igb_set_loopback_mode - Setup loopback based on the loopback mode
3637  */
3638 static boolean_t
3639 igb_set_loopback_mode(igb_t *igb, uint32_t mode)
3640 {
3641 	struct e1000_hw *hw;
3642 	int i;
3643 
3644 	if (mode == igb->loopback_mode)
3645 		return (B_TRUE);
3646 
3647 	hw = &igb->hw;
3648 
3649 	igb->loopback_mode = mode;
3650 
3651 	if (mode == IGB_LB_NONE) {
3652 		/* Reset the chip */
3653 		hw->phy.autoneg_wait_to_complete = B_TRUE;
3654 		(void) igb_reset(igb);
3655 		hw->phy.autoneg_wait_to_complete = B_FALSE;
3656 		return (B_TRUE);
3657 	}
3658 
3659 	mutex_enter(&igb->gen_lock);
3660 
3661 	switch (mode) {
3662 	default:
3663 		mutex_exit(&igb->gen_lock);
3664 		return (B_FALSE);
3665 
3666 	case IGB_LB_EXTERNAL:
3667 		igb_set_external_loopback(igb);
3668 		break;
3669 
3670 	case IGB_LB_INTERNAL_PHY:
3671 		igb_set_internal_phy_loopback(igb);
3672 		break;
3673 
3674 	case IGB_LB_INTERNAL_SERDES:
3675 		igb_set_internal_serdes_loopback(igb);
3676 		break;
3677 	}
3678 
3679 	mutex_exit(&igb->gen_lock);
3680 
3681 	/*
3682 	 * When external loopback is set, wait up to 1000ms to get the link up.
3683 	 * According to test, 1000ms can work and it's an experimental value.
3684 	 */
3685 	if (mode == IGB_LB_EXTERNAL) {
3686 		for (i = 0; i <= 10; i++) {
3687 			mutex_enter(&igb->gen_lock);
3688 			(void) igb_link_check(igb);
3689 			mutex_exit(&igb->gen_lock);
3690 
3691 			if (igb->link_state == LINK_STATE_UP)
3692 				break;
3693 
3694 			msec_delay(100);
3695 		}
3696 
3697 		if (igb->link_state != LINK_STATE_UP) {
3698 			/*
3699 			 * Does not support external loopback.
3700 			 * Reset driver to loopback none.
3701 			 */
3702 			igb->loopback_mode = IGB_LB_NONE;
3703 
3704 			/* Reset the chip */
3705 			hw->phy.autoneg_wait_to_complete = B_TRUE;
3706 			(void) igb_reset(igb);
3707 			hw->phy.autoneg_wait_to_complete = B_FALSE;
3708 
3709 			IGB_DEBUGLOG_0(igb, "Set external loopback failed, "
3710 			    "reset to loopback none.");
3711 
3712 			return (B_FALSE);
3713 		}
3714 	}
3715 
3716 	return (B_TRUE);
3717 }
3718 
3719 /*
3720  * igb_set_external_loopback - Set the external loopback mode
3721  */
3722 static void
3723 igb_set_external_loopback(igb_t *igb)
3724 {
3725 	struct e1000_hw *hw;
3726 
3727 	hw = &igb->hw;
3728 
3729 	/* Set phy to known state */
3730 	(void) e1000_phy_hw_reset(hw);
3731 
3732 	(void) e1000_write_phy_reg(hw, 0x0, 0x0140);
3733 	(void) e1000_write_phy_reg(hw, 0x9, 0x1b00);
3734 	(void) e1000_write_phy_reg(hw, 0x12, 0x1610);
3735 	(void) e1000_write_phy_reg(hw, 0x1f37, 0x3f1c);
3736 }
3737 
3738 /*
3739  * igb_set_internal_phy_loopback - Set the internal PHY loopback mode
3740  */
3741 static void
3742 igb_set_internal_phy_loopback(igb_t *igb)
3743 {
3744 	struct e1000_hw *hw;
3745 	uint32_t ctrl_ext;
3746 	uint16_t phy_ctrl;
3747 	uint16_t phy_pconf;
3748 
3749 	hw = &igb->hw;
3750 
3751 	/* Set link mode to PHY (00b) in the Extended Control register */
3752 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3753 	ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3754 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3755 
3756 	/*
3757 	 * Set PHY control register (0x4140):
3758 	 *    Set full duplex mode
3759 	 *    Set loopback bit
3760 	 *    Clear auto-neg enable bit
3761 	 *    Set PHY speed
3762 	 */
3763 	phy_ctrl = MII_CR_FULL_DUPLEX | MII_CR_SPEED_1000 | MII_CR_LOOPBACK;
3764 	(void) e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
3765 
3766 	/* Set the link disable bit in the Port Configuration register */
3767 	(void) e1000_read_phy_reg(hw, 0x10, &phy_pconf);
3768 	phy_pconf |= (uint16_t)1 << 14;
3769 	(void) e1000_write_phy_reg(hw, 0x10, phy_pconf);
3770 }
3771 
3772 /*
3773  * igb_set_internal_serdes_loopback - Set the internal SerDes loopback mode
3774  */
3775 static void
3776 igb_set_internal_serdes_loopback(igb_t *igb)
3777 {
3778 	struct e1000_hw *hw;
3779 	uint32_t ctrl_ext;
3780 	uint32_t ctrl;
3781 	uint32_t pcs_lctl;
3782 	uint32_t connsw;
3783 
3784 	hw = &igb->hw;
3785 
3786 	/* Set link mode to SerDes (11b) in the Extended Control register */
3787 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3788 	ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
3789 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3790 
3791 	/* Configure the SerDes to loopback */
3792 	E1000_WRITE_REG(hw, E1000_SCTL, 0x410);
3793 
3794 	/* Set Device Control register */
3795 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
3796 	ctrl |= (E1000_CTRL_FD |	/* Force full duplex */
3797 	    E1000_CTRL_SLU);		/* Force link up */
3798 	ctrl &= ~(E1000_CTRL_RFCE |	/* Disable receive flow control */
3799 	    E1000_CTRL_TFCE |		/* Disable transmit flow control */
3800 	    E1000_CTRL_LRST);		/* Clear link reset */
3801 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
3802 
3803 	/* Set PCS Link Control register */
3804 	pcs_lctl = E1000_READ_REG(hw, E1000_PCS_LCTL);
3805 	pcs_lctl |= (E1000_PCS_LCTL_FORCE_LINK |
3806 	    E1000_PCS_LCTL_FSD |
3807 	    E1000_PCS_LCTL_FDV_FULL |
3808 	    E1000_PCS_LCTL_FLV_LINK_UP);
3809 	pcs_lctl &= ~E1000_PCS_LCTL_AN_ENABLE;
3810 	E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_lctl);
3811 
3812 	/* Set the Copper/Fiber Switch Control - CONNSW register */
3813 	connsw = E1000_READ_REG(hw, E1000_CONNSW);
3814 	connsw &= ~E1000_CONNSW_ENRGSRC;
3815 	E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
3816 }
3817 
3818 #pragma inline(igb_intr_rx_work)
3819 /*
3820  * igb_intr_rx_work - rx processing of ISR
3821  */
3822 static void
3823 igb_intr_rx_work(igb_rx_ring_t *rx_ring)
3824 {
3825 	mblk_t *mp;
3826 
3827 	mutex_enter(&rx_ring->rx_lock);
3828 	mp = igb_rx(rx_ring, IGB_NO_POLL);
3829 	mutex_exit(&rx_ring->rx_lock);
3830 
3831 	if (mp != NULL)
3832 		mac_rx_ring(rx_ring->igb->mac_hdl, rx_ring->ring_handle, mp,
3833 		    rx_ring->ring_gen_num);
3834 }
3835 
3836 #pragma inline(igb_intr_tx_work)
3837 /*
3838  * igb_intr_tx_work - tx processing of ISR
3839  */
3840 static void
3841 igb_intr_tx_work(igb_tx_ring_t *tx_ring)
3842 {
3843 	igb_t *igb = tx_ring->igb;
3844 
3845 	/* Recycle the tx descriptors */
3846 	tx_ring->tx_recycle(tx_ring);
3847 
3848 	/* Schedule the re-transmit */
3849 	if (tx_ring->reschedule &&
3850 	    (tx_ring->tbd_free >= igb->tx_resched_thresh)) {
3851 		tx_ring->reschedule = B_FALSE;
3852 		mac_tx_ring_update(tx_ring->igb->mac_hdl, tx_ring->ring_handle);
3853 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
3854 	}
3855 }
3856 
3857 #pragma inline(igb_intr_link_work)
3858 /*
3859  * igb_intr_link_work - link-status-change processing of ISR
3860  */
3861 static void
3862 igb_intr_link_work(igb_t *igb)
3863 {
3864 	boolean_t link_changed;
3865 
3866 	igb_stop_watchdog_timer(igb);
3867 
3868 	mutex_enter(&igb->gen_lock);
3869 
3870 	/*
3871 	 * Because we got a link-status-change interrupt, force
3872 	 * e1000_check_for_link() to look at phy
3873 	 */
3874 	igb->hw.mac.get_link_status = B_TRUE;
3875 
3876 	/* igb_link_check takes care of link status change */
3877 	link_changed = igb_link_check(igb);
3878 
3879 	/* Get new phy state */
3880 	igb_get_phy_state(igb);
3881 
3882 	mutex_exit(&igb->gen_lock);
3883 
3884 	if (link_changed)
3885 		mac_link_update(igb->mac_hdl, igb->link_state);
3886 
3887 	igb_start_watchdog_timer(igb);
3888 }
3889 
3890 /*
3891  * igb_intr_legacy - Interrupt handler for legacy interrupts
3892  */
3893 static uint_t
3894 igb_intr_legacy(void *arg1, void *arg2)
3895 {
3896 	igb_t *igb = (igb_t *)arg1;
3897 	igb_tx_ring_t *tx_ring;
3898 	uint32_t icr;
3899 	mblk_t *mp;
3900 	boolean_t tx_reschedule;
3901 	boolean_t link_changed;
3902 	uint_t result;
3903 
3904 	_NOTE(ARGUNUSED(arg2));
3905 
3906 	mutex_enter(&igb->gen_lock);
3907 
3908 	if (igb->igb_state & IGB_SUSPENDED) {
3909 		mutex_exit(&igb->gen_lock);
3910 		return (DDI_INTR_UNCLAIMED);
3911 	}
3912 
3913 	mp = NULL;
3914 	tx_reschedule = B_FALSE;
3915 	link_changed = B_FALSE;
3916 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
3917 
3918 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3919 		mutex_exit(&igb->gen_lock);
3920 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3921 		atomic_or_32(&igb->igb_state, IGB_ERROR);
3922 		return (DDI_INTR_UNCLAIMED);
3923 	}
3924 
3925 	if (icr & E1000_ICR_INT_ASSERTED) {
3926 		/*
3927 		 * E1000_ICR_INT_ASSERTED bit was set:
3928 		 * Read(Clear) the ICR, claim this interrupt,
3929 		 * look for work to do.
3930 		 */
3931 		ASSERT(igb->num_rx_rings == 1);
3932 		ASSERT(igb->num_tx_rings == 1);
3933 
3934 		/* Make sure all interrupt causes cleared */
3935 		(void) E1000_READ_REG(&igb->hw, E1000_EICR);
3936 
3937 		if (icr & E1000_ICR_RXT0) {
3938 			mp = igb_rx(&igb->rx_rings[0], IGB_NO_POLL);
3939 		}
3940 
3941 		if (icr & E1000_ICR_TXDW) {
3942 			tx_ring = &igb->tx_rings[0];
3943 
3944 			/* Recycle the tx descriptors */
3945 			tx_ring->tx_recycle(tx_ring);
3946 
3947 			/* Schedule the re-transmit */
3948 			tx_reschedule = (tx_ring->reschedule &&
3949 			    (tx_ring->tbd_free >= igb->tx_resched_thresh));
3950 		}
3951 
3952 		if (icr & E1000_ICR_LSC) {
3953 			/*
3954 			 * Because we got a link-status-change interrupt, force
3955 			 * e1000_check_for_link() to look at phy
3956 			 */
3957 			igb->hw.mac.get_link_status = B_TRUE;
3958 
3959 			/* igb_link_check takes care of link status change */
3960 			link_changed = igb_link_check(igb);
3961 
3962 			/* Get new phy state */
3963 			igb_get_phy_state(igb);
3964 		}
3965 
3966 		if (icr & E1000_ICR_DRSTA) {
3967 			/* 82580 Full Device Reset needed */
3968 			atomic_or_32(&igb->igb_state, IGB_STALL);
3969 		}
3970 
3971 		result = DDI_INTR_CLAIMED;
3972 	} else {
3973 		/*
3974 		 * E1000_ICR_INT_ASSERTED bit was not set:
3975 		 * Don't claim this interrupt.
3976 		 */
3977 		result = DDI_INTR_UNCLAIMED;
3978 	}
3979 
3980 	mutex_exit(&igb->gen_lock);
3981 
3982 	/*
3983 	 * Do the following work outside of the gen_lock
3984 	 */
3985 	if (mp != NULL)
3986 		mac_rx(igb->mac_hdl, NULL, mp);
3987 
3988 	if (tx_reschedule)  {
3989 		tx_ring->reschedule = B_FALSE;
3990 		mac_tx_ring_update(igb->mac_hdl, tx_ring->ring_handle);
3991 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
3992 	}
3993 
3994 	if (link_changed)
3995 		mac_link_update(igb->mac_hdl, igb->link_state);
3996 
3997 	return (result);
3998 }
3999 
4000 /*
4001  * igb_intr_msi - Interrupt handler for MSI
4002  */
4003 static uint_t
4004 igb_intr_msi(void *arg1, void *arg2)
4005 {
4006 	igb_t *igb = (igb_t *)arg1;
4007 	uint32_t icr;
4008 
4009 	_NOTE(ARGUNUSED(arg2));
4010 
4011 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4012 
4013 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4014 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4015 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4016 		return (DDI_INTR_CLAIMED);
4017 	}
4018 
4019 	/* Make sure all interrupt causes cleared */
4020 	(void) E1000_READ_REG(&igb->hw, E1000_EICR);
4021 
4022 	/*
4023 	 * For MSI interrupt, we have only one vector,
4024 	 * so we have only one rx ring and one tx ring enabled.
4025 	 */
4026 	ASSERT(igb->num_rx_rings == 1);
4027 	ASSERT(igb->num_tx_rings == 1);
4028 
4029 	if (icr & E1000_ICR_RXT0) {
4030 		igb_intr_rx_work(&igb->rx_rings[0]);
4031 	}
4032 
4033 	if (icr & E1000_ICR_TXDW) {
4034 		igb_intr_tx_work(&igb->tx_rings[0]);
4035 	}
4036 
4037 	if (icr & E1000_ICR_LSC) {
4038 		igb_intr_link_work(igb);
4039 	}
4040 
4041 	if (icr & E1000_ICR_DRSTA) {
4042 		/* 82580 Full Device Reset needed */
4043 		atomic_or_32(&igb->igb_state, IGB_STALL);
4044 	}
4045 
4046 	return (DDI_INTR_CLAIMED);
4047 }
4048 
4049 /*
4050  * igb_intr_rx - Interrupt handler for rx
4051  */
4052 static uint_t
4053 igb_intr_rx(void *arg1, void *arg2)
4054 {
4055 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg1;
4056 
4057 	_NOTE(ARGUNUSED(arg2));
4058 
4059 	/*
4060 	 * Only used via MSI-X vector so don't check cause bits
4061 	 * and only clean the given ring.
4062 	 */
4063 	igb_intr_rx_work(rx_ring);
4064 
4065 	return (DDI_INTR_CLAIMED);
4066 }
4067 
4068 /*
4069  * igb_intr_tx - Interrupt handler for tx
4070  */
4071 static uint_t
4072 igb_intr_tx(void *arg1, void *arg2)
4073 {
4074 	igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg1;
4075 
4076 	_NOTE(ARGUNUSED(arg2));
4077 
4078 	/*
4079 	 * Only used via MSI-X vector so don't check cause bits
4080 	 * and only clean the given ring.
4081 	 */
4082 	igb_intr_tx_work(tx_ring);
4083 
4084 	return (DDI_INTR_CLAIMED);
4085 }
4086 
4087 /*
4088  * igb_intr_tx_other - Interrupt handler for both tx and other
4089  *
4090  */
4091 static uint_t
4092 igb_intr_tx_other(void *arg1, void *arg2)
4093 {
4094 	igb_t *igb = (igb_t *)arg1;
4095 	uint32_t icr;
4096 
4097 	_NOTE(ARGUNUSED(arg2));
4098 
4099 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
4100 
4101 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
4102 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
4103 		atomic_or_32(&igb->igb_state, IGB_ERROR);
4104 		return (DDI_INTR_CLAIMED);
4105 	}
4106 
4107 	/*
4108 	 * Look for tx reclaiming work first. Remember, in the
4109 	 * case of only interrupt sharing, only one tx ring is
4110 	 * used
4111 	 */
4112 	igb_intr_tx_work(&igb->tx_rings[0]);
4113 
4114 	/*
4115 	 * Check for "other" causes.
4116 	 */
4117 	if (icr & E1000_ICR_LSC) {
4118 		igb_intr_link_work(igb);
4119 	}
4120 
4121 	/*
4122 	 * The DOUTSYNC bit indicates a tx packet dropped because
4123 	 * DMA engine gets "out of sync". There isn't a real fix
4124 	 * for this. The Intel recommendation is to count the number
4125 	 * of occurrences so user can detect when it is happening.
4126 	 * The issue is non-fatal and there's no recovery action
4127 	 * available.
4128 	 */
4129 	if (icr & E1000_ICR_DOUTSYNC) {
4130 		IGB_STAT(igb->dout_sync);
4131 	}
4132 
4133 	if (icr & E1000_ICR_DRSTA) {
4134 		/* 82580 Full Device Reset needed */
4135 		atomic_or_32(&igb->igb_state, IGB_STALL);
4136 	}
4137 
4138 	return (DDI_INTR_CLAIMED);
4139 }
4140 
4141 /*
4142  * igb_alloc_intrs - Allocate interrupts for the driver
4143  *
4144  * Normal sequence is to try MSI-X; if not sucessful, try MSI;
4145  * if not successful, try Legacy.
4146  * igb->intr_force can be used to force sequence to start with
4147  * any of the 3 types.
4148  * If MSI-X is not used, number of tx/rx rings is forced to 1.
4149  */
4150 static int
4151 igb_alloc_intrs(igb_t *igb)
4152 {
4153 	dev_info_t *devinfo;
4154 	int intr_types;
4155 	int rc;
4156 
4157 	devinfo = igb->dip;
4158 
4159 	/* Get supported interrupt types */
4160 	rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4161 
4162 	if (rc != DDI_SUCCESS) {
4163 		igb_log(igb,
4164 		    "Get supported interrupt types failed: %d", rc);
4165 		return (IGB_FAILURE);
4166 	}
4167 	IGB_DEBUGLOG_1(igb, "Supported interrupt types: %x", intr_types);
4168 
4169 	igb->intr_type = 0;
4170 
4171 	/* Install MSI-X interrupts */
4172 	if ((intr_types & DDI_INTR_TYPE_MSIX) &&
4173 	    (igb->intr_force <= IGB_INTR_MSIX)) {
4174 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSIX);
4175 
4176 		if (rc == IGB_SUCCESS)
4177 			return (IGB_SUCCESS);
4178 
4179 		igb_log(igb,
4180 		    "Allocate MSI-X failed, trying MSI interrupts...");
4181 	}
4182 
4183 	/* MSI-X not used, force rings to 1 */
4184 	igb->num_rx_rings = 1;
4185 	igb->num_tx_rings = 1;
4186 	igb_log(igb,
4187 	    "MSI-X not used, force rx and tx queue number to 1");
4188 
4189 	/* Install MSI interrupts */
4190 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
4191 	    (igb->intr_force <= IGB_INTR_MSI)) {
4192 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSI);
4193 
4194 		if (rc == IGB_SUCCESS)
4195 			return (IGB_SUCCESS);
4196 
4197 		igb_log(igb,
4198 		    "Allocate MSI failed, trying Legacy interrupts...");
4199 	}
4200 
4201 	/* Install legacy interrupts */
4202 	if (intr_types & DDI_INTR_TYPE_FIXED) {
4203 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_FIXED);
4204 
4205 		if (rc == IGB_SUCCESS)
4206 			return (IGB_SUCCESS);
4207 
4208 		igb_log(igb,
4209 		    "Allocate Legacy interrupts failed");
4210 	}
4211 
4212 	/* If none of the 3 types succeeded, return failure */
4213 	return (IGB_FAILURE);
4214 }
4215 
4216 /*
4217  * igb_alloc_intr_handles - Allocate interrupt handles.
4218  *
4219  * For legacy and MSI, only 1 handle is needed.  For MSI-X,
4220  * if fewer than 2 handles are available, return failure.
4221  * Upon success, this sets the number of Rx rings to a number that
4222  * matches the handles available for Rx interrupts.
4223  */
4224 static int
4225 igb_alloc_intr_handles(igb_t *igb, int intr_type)
4226 {
4227 	dev_info_t *devinfo;
4228 	int orig, request, count, avail, actual;
4229 	int diff, minimum;
4230 	int rc;
4231 
4232 	devinfo = igb->dip;
4233 
4234 	switch (intr_type) {
4235 	case DDI_INTR_TYPE_FIXED:
4236 		request = 1;	/* Request 1 legacy interrupt handle */
4237 		minimum = 1;
4238 		IGB_DEBUGLOG_0(igb, "interrupt type: legacy");
4239 		break;
4240 
4241 	case DDI_INTR_TYPE_MSI:
4242 		request = 1;	/* Request 1 MSI interrupt handle */
4243 		minimum = 1;
4244 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI");
4245 		break;
4246 
4247 	case DDI_INTR_TYPE_MSIX:
4248 		/*
4249 		 * Number of vectors for the adapter is
4250 		 * # rx rings + # tx rings
4251 		 * One of tx vectors is for tx & other
4252 		 */
4253 		request = igb->num_rx_rings + igb->num_tx_rings;
4254 		orig = request;
4255 		minimum = 2;
4256 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI-X");
4257 		break;
4258 
4259 	default:
4260 		igb_log(igb,
4261 		    "invalid call to igb_alloc_intr_handles(): %d\n",
4262 		    intr_type);
4263 		return (IGB_FAILURE);
4264 	}
4265 	IGB_DEBUGLOG_2(igb, "interrupt handles requested: %d  minimum: %d",
4266 	    request, minimum);
4267 
4268 	/*
4269 	 * Get number of supported interrupts
4270 	 */
4271 	rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
4272 	if ((rc != DDI_SUCCESS) || (count < minimum)) {
4273 		igb_log(igb,
4274 		    "Get supported interrupt number failed. "
4275 		    "Return: %d, count: %d", rc, count);
4276 		return (IGB_FAILURE);
4277 	}
4278 	IGB_DEBUGLOG_1(igb, "interrupts supported: %d", count);
4279 
4280 	/*
4281 	 * Get number of available interrupts
4282 	 */
4283 	rc = ddi_intr_get_navail(devinfo, intr_type, &avail);
4284 	if ((rc != DDI_SUCCESS) || (avail < minimum)) {
4285 		igb_log(igb,
4286 		    "Get available interrupt number failed. "
4287 		    "Return: %d, available: %d", rc, avail);
4288 		return (IGB_FAILURE);
4289 	}
4290 	IGB_DEBUGLOG_1(igb, "interrupts available: %d", avail);
4291 
4292 	if (avail < request) {
4293 		igb_log(igb, "Request %d handles, %d available",
4294 		    request, avail);
4295 		request = avail;
4296 	}
4297 
4298 	actual = 0;
4299 	igb->intr_cnt = 0;
4300 
4301 	/*
4302 	 * Allocate an array of interrupt handles
4303 	 */
4304 	igb->intr_size = request * sizeof (ddi_intr_handle_t);
4305 	igb->htable = kmem_alloc(igb->intr_size, KM_SLEEP);
4306 
4307 	rc = ddi_intr_alloc(devinfo, igb->htable, intr_type, 0,
4308 	    request, &actual, DDI_INTR_ALLOC_NORMAL);
4309 	if (rc != DDI_SUCCESS) {
4310 		igb_log(igb, "Allocate interrupts failed. "
4311 		    "return: %d, request: %d, actual: %d",
4312 		    rc, request, actual);
4313 		goto alloc_handle_fail;
4314 	}
4315 	IGB_DEBUGLOG_1(igb, "interrupts actually allocated: %d", actual);
4316 
4317 	igb->intr_cnt = actual;
4318 
4319 	if (actual < minimum) {
4320 		igb_log(igb, "Insufficient interrupt handles allocated: %d",
4321 		    actual);
4322 		goto alloc_handle_fail;
4323 	}
4324 
4325 	/*
4326 	 * For MSI-X, actual might force us to reduce number of tx & rx rings
4327 	 */
4328 	if ((intr_type == DDI_INTR_TYPE_MSIX) && (orig > actual)) {
4329 		diff = orig - actual;
4330 		if (diff < igb->num_tx_rings) {
4331 			igb_log(igb,
4332 			    "MSI-X vectors force Tx queue number to %d",
4333 			    igb->num_tx_rings - diff);
4334 			igb->num_tx_rings -= diff;
4335 		} else {
4336 			igb_log(igb,
4337 			    "MSI-X vectors force Tx queue number to 1");
4338 			igb->num_tx_rings = 1;
4339 
4340 			igb_log(igb,
4341 			    "MSI-X vectors force Rx queue number to %d",
4342 			    actual - 1);
4343 			igb->num_rx_rings = actual - 1;
4344 		}
4345 	}
4346 
4347 	/*
4348 	 * Get priority for first vector, assume remaining are all the same
4349 	 */
4350 	rc = ddi_intr_get_pri(igb->htable[0], &igb->intr_pri);
4351 	if (rc != DDI_SUCCESS) {
4352 		igb_log(igb,
4353 		    "Get interrupt priority failed: %d", rc);
4354 		goto alloc_handle_fail;
4355 	}
4356 
4357 	rc = ddi_intr_get_cap(igb->htable[0], &igb->intr_cap);
4358 	if (rc != DDI_SUCCESS) {
4359 		igb_log(igb,
4360 		    "Get interrupt cap failed: %d", rc);
4361 		goto alloc_handle_fail;
4362 	}
4363 
4364 	igb->intr_type = intr_type;
4365 
4366 	return (IGB_SUCCESS);
4367 
4368 alloc_handle_fail:
4369 	igb_rem_intrs(igb);
4370 
4371 	return (IGB_FAILURE);
4372 }
4373 
4374 /*
4375  * igb_add_intr_handlers - Add interrupt handlers based on the interrupt type
4376  *
4377  * Before adding the interrupt handlers, the interrupt vectors have
4378  * been allocated, and the rx/tx rings have also been allocated.
4379  */
4380 static int
4381 igb_add_intr_handlers(igb_t *igb)
4382 {
4383 	igb_rx_ring_t *rx_ring;
4384 	igb_tx_ring_t *tx_ring;
4385 	int vector;
4386 	int rc;
4387 	int i;
4388 
4389 	vector = 0;
4390 
4391 	switch (igb->intr_type) {
4392 	case DDI_INTR_TYPE_MSIX:
4393 		/* Add interrupt handler for tx + other */
4394 		tx_ring = &igb->tx_rings[0];
4395 		rc = ddi_intr_add_handler(igb->htable[vector],
4396 		    (ddi_intr_handler_t *)igb_intr_tx_other,
4397 		    (void *)igb, NULL);
4398 
4399 		if (rc != DDI_SUCCESS) {
4400 			igb_log(igb,
4401 			    "Add tx/other interrupt handler failed: %d", rc);
4402 			return (IGB_FAILURE);
4403 		}
4404 		tx_ring->intr_vector = vector;
4405 		vector++;
4406 
4407 		/* Add interrupt handler for each rx ring */
4408 		for (i = 0; i < igb->num_rx_rings; i++) {
4409 			rx_ring = &igb->rx_rings[i];
4410 
4411 			rc = ddi_intr_add_handler(igb->htable[vector],
4412 			    (ddi_intr_handler_t *)igb_intr_rx,
4413 			    (void *)rx_ring, NULL);
4414 
4415 			if (rc != DDI_SUCCESS) {
4416 				igb_log(igb,
4417 				    "Add rx interrupt handler failed. "
4418 				    "return: %d, rx ring: %d", rc, i);
4419 				for (vector--; vector >= 0; vector--) {
4420 					(void) ddi_intr_remove_handler(
4421 					    igb->htable[vector]);
4422 				}
4423 				return (IGB_FAILURE);
4424 			}
4425 
4426 			rx_ring->intr_vector = vector;
4427 
4428 			vector++;
4429 		}
4430 
4431 		/* Add interrupt handler for each tx ring from 2nd ring */
4432 		for (i = 1; i < igb->num_tx_rings; i++) {
4433 			tx_ring = &igb->tx_rings[i];
4434 
4435 			rc = ddi_intr_add_handler(igb->htable[vector],
4436 			    (ddi_intr_handler_t *)igb_intr_tx,
4437 			    (void *)tx_ring, NULL);
4438 
4439 			if (rc != DDI_SUCCESS) {
4440 				igb_log(igb,
4441 				    "Add tx interrupt handler failed. "
4442 				    "return: %d, tx ring: %d", rc, i);
4443 				for (vector--; vector >= 0; vector--) {
4444 					(void) ddi_intr_remove_handler(
4445 					    igb->htable[vector]);
4446 				}
4447 				return (IGB_FAILURE);
4448 			}
4449 
4450 			tx_ring->intr_vector = vector;
4451 
4452 			vector++;
4453 		}
4454 
4455 		break;
4456 
4457 	case DDI_INTR_TYPE_MSI:
4458 		/* Add interrupt handlers for the only vector */
4459 		rc = ddi_intr_add_handler(igb->htable[vector],
4460 		    (ddi_intr_handler_t *)igb_intr_msi,
4461 		    (void *)igb, NULL);
4462 
4463 		if (rc != DDI_SUCCESS) {
4464 			igb_log(igb,
4465 			    "Add MSI interrupt handler failed: %d", rc);
4466 			return (IGB_FAILURE);
4467 		}
4468 
4469 		rx_ring = &igb->rx_rings[0];
4470 		rx_ring->intr_vector = vector;
4471 
4472 		vector++;
4473 		break;
4474 
4475 	case DDI_INTR_TYPE_FIXED:
4476 		/* Add interrupt handlers for the only vector */
4477 		rc = ddi_intr_add_handler(igb->htable[vector],
4478 		    (ddi_intr_handler_t *)igb_intr_legacy,
4479 		    (void *)igb, NULL);
4480 
4481 		if (rc != DDI_SUCCESS) {
4482 			igb_log(igb,
4483 			    "Add legacy interrupt handler failed: %d", rc);
4484 			return (IGB_FAILURE);
4485 		}
4486 
4487 		rx_ring = &igb->rx_rings[0];
4488 		rx_ring->intr_vector = vector;
4489 
4490 		vector++;
4491 		break;
4492 
4493 	default:
4494 		return (IGB_FAILURE);
4495 	}
4496 
4497 	ASSERT(vector == igb->intr_cnt);
4498 
4499 	return (IGB_SUCCESS);
4500 }
4501 
4502 /*
4503  * igb_setup_msix_82575 - setup 82575 adapter to use MSI-X interrupts
4504  *
4505  * For each vector enabled on the adapter, Set the MSIXBM register accordingly
4506  */
4507 static void
4508 igb_setup_msix_82575(igb_t *igb)
4509 {
4510 	uint32_t eims = 0;
4511 	int i, vector;
4512 	struct e1000_hw *hw = &igb->hw;
4513 
4514 	/*
4515 	 * Set vector for tx ring 0 and other causes.
4516 	 * NOTE assumption that it is vector 0.
4517 	 */
4518 	vector = 0;
4519 
4520 	igb->eims_mask = E1000_EICR_TX_QUEUE0 | E1000_EICR_OTHER;
4521 	E1000_WRITE_REG(hw, E1000_MSIXBM(vector), igb->eims_mask);
4522 	vector++;
4523 
4524 	for (i = 0; i < igb->num_rx_rings; i++) {
4525 		/*
4526 		 * Set vector for each rx ring
4527 		 */
4528 		eims = (E1000_EICR_RX_QUEUE0 << i);
4529 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4530 
4531 		/*
4532 		 * Accumulate bits to enable in
4533 		 * igb_enable_adapter_interrupts_82575()
4534 		 */
4535 		igb->eims_mask |= eims;
4536 
4537 		vector++;
4538 	}
4539 
4540 	for (i = 1; i < igb->num_tx_rings; i++) {
4541 		/*
4542 		 * Set vector for each tx ring from 2nd tx ring
4543 		 */
4544 		eims = (E1000_EICR_TX_QUEUE0 << i);
4545 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4546 
4547 		/*
4548 		 * Accumulate bits to enable in
4549 		 * igb_enable_adapter_interrupts_82575()
4550 		 */
4551 		igb->eims_mask |= eims;
4552 
4553 		vector++;
4554 	}
4555 
4556 	ASSERT(vector == igb->intr_cnt);
4557 
4558 	/*
4559 	 * Disable IAM for ICR interrupt bits
4560 	 */
4561 	E1000_WRITE_REG(hw, E1000_IAM, 0);
4562 	E1000_WRITE_FLUSH(hw);
4563 }
4564 
4565 /*
4566  * igb_setup_msix_82576 - setup 82576 adapter to use MSI-X interrupts
4567  *
4568  * 82576 uses a table based method for assigning vectors.  Each queue has a
4569  * single entry in the table to which we write a vector number along with a
4570  * "valid" bit.  The entry is a single byte in a 4-byte register.  Vectors
4571  * take a different position in the 4-byte register depending on whether
4572  * they are numbered above or below 8.
4573  */
4574 static void
4575 igb_setup_msix_82576(igb_t *igb)
4576 {
4577 	struct e1000_hw *hw = &igb->hw;
4578 	uint32_t ivar, index, vector;
4579 	int i;
4580 
4581 	/* must enable msi-x capability before IVAR settings */
4582 	E1000_WRITE_REG(hw, E1000_GPIE,
4583 	    (E1000_GPIE_MSIX_MODE | E1000_GPIE_PBA | E1000_GPIE_NSICR));
4584 
4585 	/*
4586 	 * Set vector for tx ring 0 and other causes.
4587 	 * NOTE assumption that it is vector 0.
4588 	 * This is also interdependent with installation of interrupt service
4589 	 * routines in igb_add_intr_handlers().
4590 	 */
4591 
4592 	/* assign "other" causes to vector 0 */
4593 	vector = 0;
4594 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4595 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4596 
4597 	/* assign tx ring 0 to vector 0 */
4598 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4599 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4600 
4601 	/* prepare to enable tx & other interrupt causes */
4602 	igb->eims_mask = (1 << vector);
4603 
4604 	vector ++;
4605 	for (i = 0; i < igb->num_rx_rings; i++) {
4606 		/*
4607 		 * Set vector for each rx ring
4608 		 */
4609 		index = (i & 0x7);
4610 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4611 
4612 		if (i < 8) {
4613 			/* vector goes into low byte of register */
4614 			ivar = ivar & 0xFFFFFF00;
4615 			ivar |= (vector | E1000_IVAR_VALID);
4616 		} else {
4617 			/* vector goes into third byte of register */
4618 			ivar = ivar & 0xFF00FFFF;
4619 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
4620 		}
4621 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4622 
4623 		/* Accumulate interrupt-cause bits to enable */
4624 		igb->eims_mask |= (1 << vector);
4625 
4626 		vector ++;
4627 	}
4628 
4629 	for (i = 1; i < igb->num_tx_rings; i++) {
4630 		/*
4631 		 * Set vector for each tx ring from 2nd tx ring.
4632 		 * Note assumption that tx vectors numericall follow rx vectors.
4633 		 */
4634 		index = (i & 0x7);
4635 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4636 
4637 		if (i < 8) {
4638 			/* vector goes into second byte of register */
4639 			ivar = ivar & 0xFFFF00FF;
4640 			ivar |= ((vector | E1000_IVAR_VALID) << 8);
4641 		} else {
4642 			/* vector goes into fourth byte of register */
4643 			ivar = ivar & 0x00FFFFFF;
4644 			ivar |= (vector | E1000_IVAR_VALID) << 24;
4645 		}
4646 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4647 
4648 		/* Accumulate interrupt-cause bits to enable */
4649 		igb->eims_mask |= (1 << vector);
4650 
4651 		vector ++;
4652 	}
4653 
4654 	ASSERT(vector == igb->intr_cnt);
4655 }
4656 
4657 /*
4658  * igb_setup_msix_82580 - setup 82580 adapter to use MSI-X interrupts
4659  *
4660  * 82580 uses same table approach at 82576 but has fewer entries.  Each
4661  * queue has a single entry in the table to which we write a vector number
4662  * along with a "valid" bit.  Vectors take a different position in the
4663  * register depending on * whether * they are numbered above or below 4.
4664  */
4665 static void
4666 igb_setup_msix_82580(igb_t *igb)
4667 {
4668 	struct e1000_hw *hw = &igb->hw;
4669 	uint32_t ivar, index, vector;
4670 	int i;
4671 
4672 	/* must enable msi-x capability before IVAR settings */
4673 	E1000_WRITE_REG(hw, E1000_GPIE, (E1000_GPIE_MSIX_MODE |
4674 	    E1000_GPIE_PBA | E1000_GPIE_NSICR | E1000_GPIE_EIAME));
4675 	/*
4676 	 * Set vector for tx ring 0 and other causes.
4677 	 * NOTE assumption that it is vector 0.
4678 	 * This is also interdependent with installation of interrupt service
4679 	 * routines in igb_add_intr_handlers().
4680 	 */
4681 
4682 	/* assign "other" causes to vector 0 */
4683 	vector = 0;
4684 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4685 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4686 
4687 	/* assign tx ring 0 to vector 0 */
4688 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4689 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4690 
4691 	/* prepare to enable tx & other interrupt causes */
4692 	igb->eims_mask = (1 << vector);
4693 
4694 	vector ++;
4695 
4696 	for (i = 0; i < igb->num_rx_rings; i++) {
4697 		/*
4698 		 * Set vector for each rx ring
4699 		 */
4700 		index = (i >> 1);
4701 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4702 
4703 		if (i & 1) {
4704 			/* vector goes into third byte of register */
4705 			ivar = ivar & 0xFF00FFFF;
4706 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
4707 		} else {
4708 			/* vector goes into low byte of register */
4709 			ivar = ivar & 0xFFFFFF00;
4710 			ivar |= (vector | E1000_IVAR_VALID);
4711 		}
4712 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4713 
4714 		/* Accumulate interrupt-cause bits to enable */
4715 		igb->eims_mask |= (1 << vector);
4716 
4717 		vector ++;
4718 	}
4719 
4720 	for (i = 1; i < igb->num_tx_rings; i++) {
4721 		/*
4722 		 * Set vector for each tx ring from 2nd tx ring.
4723 		 * Note assumption that tx vectors numericall follow rx vectors.
4724 		 */
4725 		index = (i >> 1);
4726 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4727 
4728 		if (i & 1) {
4729 			/* vector goes into high byte of register */
4730 			ivar = ivar & 0x00FFFFFF;
4731 			ivar |= ((vector | E1000_IVAR_VALID) << 24);
4732 		} else {
4733 			/* vector goes into second byte of register */
4734 			ivar = ivar & 0xFFFF00FF;
4735 			ivar |= (vector | E1000_IVAR_VALID) << 8;
4736 		}
4737 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4738 
4739 		/* Accumulate interrupt-cause bits to enable */
4740 		igb->eims_mask |= (1 << vector);
4741 
4742 		vector ++;
4743 	}
4744 	ASSERT(vector == igb->intr_cnt);
4745 }
4746 
4747 /*
4748  * igb_rem_intr_handlers - remove the interrupt handlers
4749  */
4750 static void
4751 igb_rem_intr_handlers(igb_t *igb)
4752 {
4753 	int i;
4754 	int rc;
4755 
4756 	for (i = 0; i < igb->intr_cnt; i++) {
4757 		rc = ddi_intr_remove_handler(igb->htable[i]);
4758 		if (rc != DDI_SUCCESS) {
4759 			IGB_DEBUGLOG_1(igb,
4760 			    "Remove intr handler failed: %d", rc);
4761 		}
4762 	}
4763 }
4764 
4765 /*
4766  * igb_rem_intrs - remove the allocated interrupts
4767  */
4768 static void
4769 igb_rem_intrs(igb_t *igb)
4770 {
4771 	int i;
4772 	int rc;
4773 
4774 	for (i = 0; i < igb->intr_cnt; i++) {
4775 		rc = ddi_intr_free(igb->htable[i]);
4776 		if (rc != DDI_SUCCESS) {
4777 			IGB_DEBUGLOG_1(igb,
4778 			    "Free intr failed: %d", rc);
4779 		}
4780 	}
4781 
4782 	kmem_free(igb->htable, igb->intr_size);
4783 	igb->htable = NULL;
4784 }
4785 
4786 /*
4787  * igb_enable_intrs - enable all the ddi interrupts
4788  */
4789 static int
4790 igb_enable_intrs(igb_t *igb)
4791 {
4792 	int i;
4793 	int rc;
4794 
4795 	/* Enable interrupts */
4796 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4797 		/* Call ddi_intr_block_enable() for MSI */
4798 		rc = ddi_intr_block_enable(igb->htable, igb->intr_cnt);
4799 		if (rc != DDI_SUCCESS) {
4800 			igb_log(igb,
4801 			    "Enable block intr failed: %d", rc);
4802 			return (IGB_FAILURE);
4803 		}
4804 	} else {
4805 		/* Call ddi_intr_enable() for Legacy/MSI non block enable */
4806 		for (i = 0; i < igb->intr_cnt; i++) {
4807 			rc = ddi_intr_enable(igb->htable[i]);
4808 			if (rc != DDI_SUCCESS) {
4809 				igb_log(igb,
4810 				    "Enable intr failed: %d", rc);
4811 				return (IGB_FAILURE);
4812 			}
4813 		}
4814 	}
4815 
4816 	return (IGB_SUCCESS);
4817 }
4818 
4819 /*
4820  * igb_disable_intrs - disable all the ddi interrupts
4821  */
4822 static int
4823 igb_disable_intrs(igb_t *igb)
4824 {
4825 	int i;
4826 	int rc;
4827 
4828 	/* Disable all interrupts */
4829 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4830 		rc = ddi_intr_block_disable(igb->htable, igb->intr_cnt);
4831 		if (rc != DDI_SUCCESS) {
4832 			igb_log(igb,
4833 			    "Disable block intr failed: %d", rc);
4834 			return (IGB_FAILURE);
4835 		}
4836 	} else {
4837 		for (i = 0; i < igb->intr_cnt; i++) {
4838 			rc = ddi_intr_disable(igb->htable[i]);
4839 			if (rc != DDI_SUCCESS) {
4840 				igb_log(igb,
4841 				    "Disable intr failed: %d", rc);
4842 				return (IGB_FAILURE);
4843 			}
4844 		}
4845 	}
4846 
4847 	return (IGB_SUCCESS);
4848 }
4849 
4850 /*
4851  * igb_get_phy_state - Get and save the parameters read from PHY registers
4852  */
4853 static void
4854 igb_get_phy_state(igb_t *igb)
4855 {
4856 	struct e1000_hw *hw = &igb->hw;
4857 	uint16_t phy_ctrl;
4858 	uint16_t phy_status;
4859 	uint16_t phy_an_adv;
4860 	uint16_t phy_an_exp;
4861 	uint16_t phy_ext_status;
4862 	uint16_t phy_1000t_ctrl;
4863 	uint16_t phy_1000t_status;
4864 	uint16_t phy_lp_able;
4865 
4866 	ASSERT(mutex_owned(&igb->gen_lock));
4867 
4868 	if (hw->phy.media_type == e1000_media_type_copper) {
4869 		(void) e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
4870 		(void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
4871 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &phy_an_adv);
4872 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_an_exp);
4873 		(void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, &phy_ext_status);
4874 		(void) e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_1000t_ctrl);
4875 		(void) e1000_read_phy_reg(hw,
4876 		    PHY_1000T_STATUS, &phy_1000t_status);
4877 		(void) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_lp_able);
4878 
4879 		igb->param_autoneg_cap =
4880 		    (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0;
4881 		igb->param_pause_cap =
4882 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4883 		igb->param_asym_pause_cap =
4884 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4885 		igb->param_1000fdx_cap =
4886 		    ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) ||
4887 		    (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0;
4888 		igb->param_1000hdx_cap =
4889 		    ((phy_ext_status & IEEE_ESR_1000T_HD_CAPS) ||
4890 		    (phy_ext_status & IEEE_ESR_1000X_HD_CAPS)) ? 1 : 0;
4891 		igb->param_100t4_cap =
4892 		    (phy_status & MII_SR_100T4_CAPS) ? 1 : 0;
4893 		igb->param_100fdx_cap = ((phy_status & MII_SR_100X_FD_CAPS) ||
4894 		    (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0;
4895 		igb->param_100hdx_cap = ((phy_status & MII_SR_100X_HD_CAPS) ||
4896 		    (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0;
4897 		igb->param_10fdx_cap =
4898 		    (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0;
4899 		igb->param_10hdx_cap =
4900 		    (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0;
4901 		igb->param_rem_fault =
4902 		    (phy_status & MII_SR_REMOTE_FAULT) ? 1 : 0;
4903 
4904 		igb->param_adv_autoneg_cap = hw->mac.autoneg;
4905 		igb->param_adv_pause_cap =
4906 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4907 		igb->param_adv_asym_pause_cap =
4908 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4909 		igb->param_adv_1000hdx_cap =
4910 		    (phy_1000t_ctrl & CR_1000T_HD_CAPS) ? 1 : 0;
4911 		igb->param_adv_100t4_cap =
4912 		    (phy_an_adv & NWAY_AR_100T4_CAPS) ? 1 : 0;
4913 		igb->param_adv_rem_fault =
4914 		    (phy_an_adv & NWAY_AR_REMOTE_FAULT) ? 1 : 0;
4915 		if (igb->param_adv_autoneg_cap == 1) {
4916 			igb->param_adv_1000fdx_cap =
4917 			    (phy_1000t_ctrl & CR_1000T_FD_CAPS) ? 1 : 0;
4918 			igb->param_adv_100fdx_cap =
4919 			    (phy_an_adv & NWAY_AR_100TX_FD_CAPS) ? 1 : 0;
4920 			igb->param_adv_100hdx_cap =
4921 			    (phy_an_adv & NWAY_AR_100TX_HD_CAPS) ? 1 : 0;
4922 			igb->param_adv_10fdx_cap =
4923 			    (phy_an_adv & NWAY_AR_10T_FD_CAPS) ? 1 : 0;
4924 			igb->param_adv_10hdx_cap =
4925 			    (phy_an_adv & NWAY_AR_10T_HD_CAPS) ? 1 : 0;
4926 		}
4927 
4928 		igb->param_lp_autoneg_cap =
4929 		    (phy_an_exp & NWAY_ER_LP_NWAY_CAPS) ? 1 : 0;
4930 		igb->param_lp_pause_cap =
4931 		    (phy_lp_able & NWAY_LPAR_PAUSE) ? 1 : 0;
4932 		igb->param_lp_asym_pause_cap =
4933 		    (phy_lp_able & NWAY_LPAR_ASM_DIR) ? 1 : 0;
4934 		igb->param_lp_1000fdx_cap =
4935 		    (phy_1000t_status & SR_1000T_LP_FD_CAPS) ? 1 : 0;
4936 		igb->param_lp_1000hdx_cap =
4937 		    (phy_1000t_status & SR_1000T_LP_HD_CAPS) ? 1 : 0;
4938 		igb->param_lp_100t4_cap =
4939 		    (phy_lp_able & NWAY_LPAR_100T4_CAPS) ? 1 : 0;
4940 		igb->param_lp_100fdx_cap =
4941 		    (phy_lp_able & NWAY_LPAR_100TX_FD_CAPS) ? 1 : 0;
4942 		igb->param_lp_100hdx_cap =
4943 		    (phy_lp_able & NWAY_LPAR_100TX_HD_CAPS) ? 1 : 0;
4944 		igb->param_lp_10fdx_cap =
4945 		    (phy_lp_able & NWAY_LPAR_10T_FD_CAPS) ? 1 : 0;
4946 		igb->param_lp_10hdx_cap =
4947 		    (phy_lp_able & NWAY_LPAR_10T_HD_CAPS) ? 1 : 0;
4948 		igb->param_lp_rem_fault =
4949 		    (phy_lp_able & NWAY_LPAR_REMOTE_FAULT) ? 1 : 0;
4950 	} else {
4951 		/*
4952 		 * 1Gig Fiber adapter only offers 1Gig Full Duplex.
4953 		 */
4954 		igb->param_autoneg_cap = 0;
4955 		igb->param_pause_cap = 1;
4956 		igb->param_asym_pause_cap = 1;
4957 		igb->param_1000fdx_cap = 1;
4958 		igb->param_1000hdx_cap = 0;
4959 		igb->param_100t4_cap = 0;
4960 		igb->param_100fdx_cap = 0;
4961 		igb->param_100hdx_cap = 0;
4962 		igb->param_10fdx_cap = 0;
4963 		igb->param_10hdx_cap = 0;
4964 
4965 		igb->param_adv_autoneg_cap = 0;
4966 		igb->param_adv_pause_cap = 1;
4967 		igb->param_adv_asym_pause_cap = 1;
4968 		igb->param_adv_1000fdx_cap = 1;
4969 		igb->param_adv_1000hdx_cap = 0;
4970 		igb->param_adv_100t4_cap = 0;
4971 		igb->param_adv_100fdx_cap = 0;
4972 		igb->param_adv_100hdx_cap = 0;
4973 		igb->param_adv_10fdx_cap = 0;
4974 		igb->param_adv_10hdx_cap = 0;
4975 
4976 		igb->param_lp_autoneg_cap = 0;
4977 		igb->param_lp_pause_cap = 0;
4978 		igb->param_lp_asym_pause_cap = 0;
4979 		igb->param_lp_1000fdx_cap = 0;
4980 		igb->param_lp_1000hdx_cap = 0;
4981 		igb->param_lp_100t4_cap = 0;
4982 		igb->param_lp_100fdx_cap = 0;
4983 		igb->param_lp_100hdx_cap = 0;
4984 		igb->param_lp_10fdx_cap = 0;
4985 		igb->param_lp_10hdx_cap = 0;
4986 		igb->param_lp_rem_fault = 0;
4987 	}
4988 }
4989 
4990 /*
4991  * synchronize the adv* and en* parameters.
4992  *
4993  * See comments in <sys/dld.h> for details of the *_en_*
4994  * parameters. The usage of ndd for setting adv parameters will
4995  * synchronize all the en parameters with the e1000g parameters,
4996  * implicitly disabling any settings made via dladm.
4997  */
4998 static void
4999 igb_param_sync(igb_t *igb)
5000 {
5001 	igb->param_en_1000fdx_cap = igb->param_adv_1000fdx_cap;
5002 	igb->param_en_1000hdx_cap = igb->param_adv_1000hdx_cap;
5003 	igb->param_en_100t4_cap = igb->param_adv_100t4_cap;
5004 	igb->param_en_100fdx_cap = igb->param_adv_100fdx_cap;
5005 	igb->param_en_100hdx_cap = igb->param_adv_100hdx_cap;
5006 	igb->param_en_10fdx_cap = igb->param_adv_10fdx_cap;
5007 	igb->param_en_10hdx_cap = igb->param_adv_10hdx_cap;
5008 }
5009 
5010 /*
5011  * igb_get_driver_control
5012  */
5013 static void
5014 igb_get_driver_control(struct e1000_hw *hw)
5015 {
5016 	uint32_t ctrl_ext;
5017 
5018 	/* Notify firmware that driver is in control of device */
5019 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5020 	ctrl_ext |= E1000_CTRL_EXT_DRV_LOAD;
5021 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5022 }
5023 
5024 /*
5025  * igb_release_driver_control
5026  */
5027 static void
5028 igb_release_driver_control(struct e1000_hw *hw)
5029 {
5030 	uint32_t ctrl_ext;
5031 
5032 	/* Notify firmware that driver is no longer in control of device */
5033 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
5034 	ctrl_ext &= ~E1000_CTRL_EXT_DRV_LOAD;
5035 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
5036 }
5037 
5038 /*
5039  * igb_atomic_reserve - Atomic decrease operation
5040  */
5041 int
5042 igb_atomic_reserve(uint32_t *count_p, uint32_t n)
5043 {
5044 	uint32_t oldval;
5045 	uint32_t newval;
5046 
5047 	/* ATOMICALLY */
5048 	do {
5049 		oldval = *count_p;
5050 		if (oldval < n)
5051 			return (-1);
5052 		newval = oldval - n;
5053 	} while (atomic_cas_32(count_p, oldval, newval) != oldval);
5054 
5055 	return (newval);
5056 }
5057 
5058 /*
5059  * FMA support
5060  */
5061 
5062 int
5063 igb_check_acc_handle(ddi_acc_handle_t handle)
5064 {
5065 	ddi_fm_error_t de;
5066 
5067 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
5068 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
5069 	return (de.fme_status);
5070 }
5071 
5072 int
5073 igb_check_dma_handle(ddi_dma_handle_t handle)
5074 {
5075 	ddi_fm_error_t de;
5076 
5077 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
5078 	return (de.fme_status);
5079 }
5080 
5081 /*
5082  * The IO fault service error handling callback function
5083  */
5084 /*ARGSUSED*/
5085 static int
5086 igb_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
5087 {
5088 	/*
5089 	 * as the driver can always deal with an error in any dma or
5090 	 * access handle, we can just return the fme_status value.
5091 	 */
5092 	pci_ereport_post(dip, err, NULL);
5093 	return (err->fme_status);
5094 }
5095 
5096 static void
5097 igb_fm_init(igb_t *igb)
5098 {
5099 	ddi_iblock_cookie_t iblk;
5100 	int fma_dma_flag;
5101 
5102 	/* Only register with IO Fault Services if we have some capability */
5103 	if (igb->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
5104 		igb_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
5105 	} else {
5106 		igb_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
5107 	}
5108 
5109 	if (igb->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
5110 		fma_dma_flag = 1;
5111 	} else {
5112 		fma_dma_flag = 0;
5113 	}
5114 
5115 	(void) igb_set_fma_flags(fma_dma_flag);
5116 
5117 	if (igb->fm_capabilities) {
5118 
5119 		/* Register capabilities with IO Fault Services */
5120 		ddi_fm_init(igb->dip, &igb->fm_capabilities, &iblk);
5121 
5122 		/*
5123 		 * Initialize pci ereport capabilities if ereport capable
5124 		 */
5125 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5126 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5127 			pci_ereport_setup(igb->dip);
5128 
5129 		/*
5130 		 * Register error callback if error callback capable
5131 		 */
5132 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5133 			ddi_fm_handler_register(igb->dip,
5134 			    igb_fm_error_cb, (void*) igb);
5135 	}
5136 }
5137 
5138 static void
5139 igb_fm_fini(igb_t *igb)
5140 {
5141 	/* Only unregister FMA capabilities if we registered some */
5142 	if (igb->fm_capabilities) {
5143 
5144 		/*
5145 		 * Release any resources allocated by pci_ereport_setup()
5146 		 */
5147 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
5148 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5149 			pci_ereport_teardown(igb->dip);
5150 
5151 		/*
5152 		 * Un-register error callback if error callback capable
5153 		 */
5154 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
5155 			ddi_fm_handler_unregister(igb->dip);
5156 
5157 		/* Unregister from IO Fault Services */
5158 		ddi_fm_fini(igb->dip);
5159 	}
5160 }
5161 
5162 void
5163 igb_fm_ereport(igb_t *igb, char *detail)
5164 {
5165 	uint64_t ena;
5166 	char buf[FM_MAX_CLASS];
5167 
5168 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
5169 	ena = fm_ena_generate(0, FM_ENA_FMT1);
5170 	if (DDI_FM_EREPORT_CAP(igb->fm_capabilities)) {
5171 		ddi_fm_ereport_post(igb->dip, buf, ena, DDI_NOSLEEP,
5172 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
5173 	}
5174 }
5175