xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_main.c (revision 1bff1300cebf1ea8e11ce928b10e208097e67f24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright(c) 2007-2010 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Copyright 2020 Joyent, Inc.
29  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
30  * Copyright (c) 2013 Saso Kiselkov. All rights reserved.
31  * Copyright (c) 2013 OSN Online Service Nuernberg GmbH. All rights reserved.
32  * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
33  */
34 
35 #include "ixgbe_sw.h"
36 
37 static char ixgbe_ident[] = "Intel 10Gb Ethernet";
38 
39 /*
40  * Local function protoypes
41  */
42 static int ixgbe_register_mac(ixgbe_t *);
43 static int ixgbe_identify_hardware(ixgbe_t *);
44 static int ixgbe_regs_map(ixgbe_t *);
45 static void ixgbe_init_properties(ixgbe_t *);
46 static int ixgbe_init_driver_settings(ixgbe_t *);
47 static void ixgbe_init_locks(ixgbe_t *);
48 static void ixgbe_destroy_locks(ixgbe_t *);
49 static int ixgbe_init(ixgbe_t *);
50 static int ixgbe_chip_start(ixgbe_t *);
51 static void ixgbe_chip_stop(ixgbe_t *);
52 static int ixgbe_reset(ixgbe_t *);
53 static void ixgbe_tx_clean(ixgbe_t *);
54 static boolean_t ixgbe_tx_drain(ixgbe_t *);
55 static boolean_t ixgbe_rx_drain(ixgbe_t *);
56 static int ixgbe_alloc_rings(ixgbe_t *);
57 static void ixgbe_free_rings(ixgbe_t *);
58 static int ixgbe_alloc_rx_data(ixgbe_t *);
59 static void ixgbe_free_rx_data(ixgbe_t *);
60 static int ixgbe_setup_rings(ixgbe_t *);
61 static int ixgbe_setup_rx(ixgbe_t *);
62 static void ixgbe_setup_tx(ixgbe_t *);
63 static void ixgbe_setup_rx_ring(ixgbe_rx_ring_t *);
64 static void ixgbe_setup_tx_ring(ixgbe_tx_ring_t *);
65 static void ixgbe_setup_rss(ixgbe_t *);
66 static void ixgbe_setup_vmdq(ixgbe_t *);
67 static void ixgbe_setup_vmdq_rss(ixgbe_t *);
68 static void ixgbe_setup_rss_table(ixgbe_t *);
69 static void ixgbe_init_unicst(ixgbe_t *);
70 static int ixgbe_init_vlan(ixgbe_t *);
71 static int ixgbe_unicst_find(ixgbe_t *, const uint8_t *);
72 static void ixgbe_setup_multicst(ixgbe_t *);
73 static void ixgbe_get_hw_state(ixgbe_t *);
74 static void ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe);
75 static void ixgbe_get_conf(ixgbe_t *);
76 static void ixgbe_init_params(ixgbe_t *);
77 static int ixgbe_get_prop(ixgbe_t *, char *, int, int, int);
78 static void ixgbe_driver_link_check(ixgbe_t *);
79 static void ixgbe_sfp_check(void *);
80 static void ixgbe_overtemp_check(void *);
81 static void ixgbe_phy_check(void *);
82 static void ixgbe_link_timer(void *);
83 static void ixgbe_local_timer(void *);
84 static void ixgbe_arm_watchdog_timer(ixgbe_t *);
85 static void ixgbe_restart_watchdog_timer(ixgbe_t *);
86 static void ixgbe_disable_adapter_interrupts(ixgbe_t *);
87 static void ixgbe_enable_adapter_interrupts(ixgbe_t *);
88 static boolean_t is_valid_mac_addr(uint8_t *);
89 static boolean_t ixgbe_stall_check(ixgbe_t *);
90 static boolean_t ixgbe_set_loopback_mode(ixgbe_t *, uint32_t);
91 static void ixgbe_set_internal_mac_loopback(ixgbe_t *);
92 static boolean_t ixgbe_find_mac_address(ixgbe_t *);
93 static int ixgbe_alloc_intrs(ixgbe_t *);
94 static int ixgbe_alloc_intr_handles(ixgbe_t *, int);
95 static int ixgbe_add_intr_handlers(ixgbe_t *);
96 static void ixgbe_map_rxring_to_vector(ixgbe_t *, int, int);
97 static void ixgbe_map_txring_to_vector(ixgbe_t *, int, int);
98 static void ixgbe_setup_ivar(ixgbe_t *, uint16_t, uint8_t, int8_t);
99 static void ixgbe_enable_ivar(ixgbe_t *, uint16_t, int8_t);
100 static void ixgbe_disable_ivar(ixgbe_t *, uint16_t, int8_t);
101 static uint32_t ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index);
102 static int ixgbe_map_intrs_to_vectors(ixgbe_t *);
103 static void ixgbe_setup_adapter_vector(ixgbe_t *);
104 static void ixgbe_rem_intr_handlers(ixgbe_t *);
105 static void ixgbe_rem_intrs(ixgbe_t *);
106 static int ixgbe_enable_intrs(ixgbe_t *);
107 static int ixgbe_disable_intrs(ixgbe_t *);
108 static uint_t ixgbe_intr_legacy(void *, void *);
109 static uint_t ixgbe_intr_msi(void *, void *);
110 static uint_t ixgbe_intr_msix(void *, void *);
111 static void ixgbe_intr_rx_work(ixgbe_rx_ring_t *);
112 static void ixgbe_intr_tx_work(ixgbe_tx_ring_t *);
113 static void ixgbe_intr_other_work(ixgbe_t *, uint32_t);
114 static void ixgbe_get_driver_control(struct ixgbe_hw *);
115 static int ixgbe_addmac(void *, const uint8_t *);
116 static int ixgbe_remmac(void *, const uint8_t *);
117 static int ixgbe_addvlan(mac_group_driver_t, uint16_t);
118 static int ixgbe_remvlan(mac_group_driver_t, uint16_t);
119 static void ixgbe_release_driver_control(struct ixgbe_hw *);
120 
121 static int ixgbe_attach(dev_info_t *, ddi_attach_cmd_t);
122 static int ixgbe_detach(dev_info_t *, ddi_detach_cmd_t);
123 static int ixgbe_resume(dev_info_t *);
124 static int ixgbe_suspend(dev_info_t *);
125 static int ixgbe_quiesce(dev_info_t *);
126 static void ixgbe_unconfigure(dev_info_t *, ixgbe_t *);
127 static uint8_t *ixgbe_mc_table_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
128 static int ixgbe_cbfunc(dev_info_t *, ddi_cb_action_t, void *, void *, void *);
129 static int ixgbe_intr_cb_register(ixgbe_t *);
130 static int ixgbe_intr_adjust(ixgbe_t *, ddi_cb_action_t, int);
131 
132 static int ixgbe_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err,
133     const void *impl_data);
134 static void ixgbe_fm_init(ixgbe_t *);
135 static void ixgbe_fm_fini(ixgbe_t *);
136 
137 char *ixgbe_priv_props[] = {
138 	"_tx_copy_thresh",
139 	"_tx_recycle_thresh",
140 	"_tx_overload_thresh",
141 	"_tx_resched_thresh",
142 	"_rx_copy_thresh",
143 	"_rx_limit_per_intr",
144 	"_intr_throttling",
145 	"_adv_pause_cap",
146 	"_adv_asym_pause_cap",
147 	NULL
148 };
149 
150 #define	IXGBE_MAX_PRIV_PROPS \
151 	(sizeof (ixgbe_priv_props) / sizeof (mac_priv_prop_t))
152 
153 static struct cb_ops ixgbe_cb_ops = {
154 	nulldev,		/* cb_open */
155 	nulldev,		/* cb_close */
156 	nodev,			/* cb_strategy */
157 	nodev,			/* cb_print */
158 	nodev,			/* cb_dump */
159 	nodev,			/* cb_read */
160 	nodev,			/* cb_write */
161 	nodev,			/* cb_ioctl */
162 	nodev,			/* cb_devmap */
163 	nodev,			/* cb_mmap */
164 	nodev,			/* cb_segmap */
165 	nochpoll,		/* cb_chpoll */
166 	ddi_prop_op,		/* cb_prop_op */
167 	NULL,			/* cb_stream */
168 	D_MP | D_HOTPLUG,	/* cb_flag */
169 	CB_REV,			/* cb_rev */
170 	nodev,			/* cb_aread */
171 	nodev			/* cb_awrite */
172 };
173 
174 static struct dev_ops ixgbe_dev_ops = {
175 	DEVO_REV,		/* devo_rev */
176 	0,			/* devo_refcnt */
177 	NULL,			/* devo_getinfo */
178 	nulldev,		/* devo_identify */
179 	nulldev,		/* devo_probe */
180 	ixgbe_attach,		/* devo_attach */
181 	ixgbe_detach,		/* devo_detach */
182 	nodev,			/* devo_reset */
183 	&ixgbe_cb_ops,		/* devo_cb_ops */
184 	NULL,			/* devo_bus_ops */
185 	ddi_power,		/* devo_power */
186 	ixgbe_quiesce,		/* devo_quiesce */
187 };
188 
189 static struct modldrv ixgbe_modldrv = {
190 	&mod_driverops,		/* Type of module.  This one is a driver */
191 	ixgbe_ident,		/* Discription string */
192 	&ixgbe_dev_ops		/* driver ops */
193 };
194 
195 static struct modlinkage ixgbe_modlinkage = {
196 	MODREV_1, &ixgbe_modldrv, NULL
197 };
198 
199 /*
200  * Access attributes for register mapping
201  */
202 ddi_device_acc_attr_t ixgbe_regs_acc_attr = {
203 	DDI_DEVICE_ATTR_V1,
204 	DDI_STRUCTURE_LE_ACC,
205 	DDI_STRICTORDER_ACC,
206 	DDI_FLAGERR_ACC
207 };
208 
209 /*
210  * Loopback property
211  */
212 static lb_property_t lb_normal = {
213 	normal,	"normal", IXGBE_LB_NONE
214 };
215 
216 static lb_property_t lb_mac = {
217 	internal, "MAC", IXGBE_LB_INTERNAL_MAC
218 };
219 
220 static lb_property_t lb_external = {
221 	external, "External", IXGBE_LB_EXTERNAL
222 };
223 
224 #define	IXGBE_M_CALLBACK_FLAGS \
225 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
226 
227 static mac_callbacks_t ixgbe_m_callbacks = {
228 	IXGBE_M_CALLBACK_FLAGS,
229 	ixgbe_m_stat,
230 	ixgbe_m_start,
231 	ixgbe_m_stop,
232 	ixgbe_m_promisc,
233 	ixgbe_m_multicst,
234 	NULL,
235 	NULL,
236 	NULL,
237 	ixgbe_m_ioctl,
238 	ixgbe_m_getcapab,
239 	NULL,
240 	NULL,
241 	ixgbe_m_setprop,
242 	ixgbe_m_getprop,
243 	ixgbe_m_propinfo
244 };
245 
246 /*
247  * Initialize capabilities of each supported adapter type
248  */
249 static adapter_info_t ixgbe_82598eb_cap = {
250 	64,		/* maximum number of rx queues */
251 	1,		/* minimum number of rx queues */
252 	64,		/* default number of rx queues */
253 	16,		/* maximum number of rx groups */
254 	1,		/* minimum number of rx groups */
255 	1,		/* default number of rx groups */
256 	32,		/* maximum number of tx queues */
257 	1,		/* minimum number of tx queues */
258 	8,		/* default number of tx queues */
259 	16366,		/* maximum MTU size */
260 	0xFFFF,		/* maximum interrupt throttle rate */
261 	0,		/* minimum interrupt throttle rate */
262 	200,		/* default interrupt throttle rate */
263 	18,		/* maximum total msix vectors */
264 	16,		/* maximum number of ring vectors */
265 	2,		/* maximum number of other vectors */
266 	IXGBE_EICR_LSC,	/* "other" interrupt types handled */
267 	0,		/* "other" interrupt types enable mask */
268 	(IXGBE_FLAG_DCA_CAPABLE	/* capability flags */
269 	| IXGBE_FLAG_RSS_CAPABLE
270 	| IXGBE_FLAG_VMDQ_CAPABLE)
271 };
272 
273 static adapter_info_t ixgbe_82599eb_cap = {
274 	128,		/* maximum number of rx queues */
275 	1,		/* minimum number of rx queues */
276 	128,		/* default number of rx queues */
277 	64,		/* maximum number of rx groups */
278 	1,		/* minimum number of rx groups */
279 	1,		/* default number of rx groups */
280 	128,		/* maximum number of tx queues */
281 	1,		/* minimum number of tx queues */
282 	8,		/* default number of tx queues */
283 	15500,		/* maximum MTU size */
284 	0xFF8,		/* maximum interrupt throttle rate */
285 	0,		/* minimum interrupt throttle rate */
286 	200,		/* default interrupt throttle rate */
287 	64,		/* maximum total msix vectors */
288 	16,		/* maximum number of ring vectors */
289 	2,		/* maximum number of other vectors */
290 	(IXGBE_EICR_LSC
291 	| IXGBE_EICR_GPI_SDP1
292 	| IXGBE_EICR_GPI_SDP2), /* "other" interrupt types handled */
293 
294 	(IXGBE_SDP1_GPIEN
295 	| IXGBE_SDP2_GPIEN), /* "other" interrupt types enable mask */
296 
297 	(IXGBE_FLAG_DCA_CAPABLE
298 	| IXGBE_FLAG_RSS_CAPABLE
299 	| IXGBE_FLAG_VMDQ_CAPABLE
300 	| IXGBE_FLAG_RSC_CAPABLE
301 	| IXGBE_FLAG_SFP_PLUG_CAPABLE) /* capability flags */
302 };
303 
304 static adapter_info_t ixgbe_X540_cap = {
305 	128,		/* maximum number of rx queues */
306 	1,		/* minimum number of rx queues */
307 	128,		/* default number of rx queues */
308 	64,		/* maximum number of rx groups */
309 	1,		/* minimum number of rx groups */
310 	1,		/* default number of rx groups */
311 	128,		/* maximum number of tx queues */
312 	1,		/* minimum number of tx queues */
313 	8,		/* default number of tx queues */
314 	15500,		/* maximum MTU size */
315 	0xFF8,		/* maximum interrupt throttle rate */
316 	0,		/* minimum interrupt throttle rate */
317 	200,		/* default interrupt throttle rate */
318 	64,		/* maximum total msix vectors */
319 	16,		/* maximum number of ring vectors */
320 	2,		/* maximum number of other vectors */
321 	(IXGBE_EICR_LSC
322 	| IXGBE_EICR_GPI_SDP1_X540
323 	| IXGBE_EICR_GPI_SDP2_X540), /* "other" interrupt types handled */
324 
325 	(IXGBE_SDP1_GPIEN_X540
326 	| IXGBE_SDP2_GPIEN_X540), /* "other" interrupt types enable mask */
327 
328 	(IXGBE_FLAG_DCA_CAPABLE
329 	| IXGBE_FLAG_RSS_CAPABLE
330 	| IXGBE_FLAG_VMDQ_CAPABLE
331 	| IXGBE_FLAG_RSC_CAPABLE) /* capability flags */
332 };
333 
334 static adapter_info_t ixgbe_X550_cap = {
335 	128,		/* maximum number of rx queues */
336 	1,		/* minimum number of rx queues */
337 	128,		/* default number of rx queues */
338 	64,		/* maximum number of rx groups */
339 	1,		/* minimum number of rx groups */
340 	1,		/* default number of rx groups */
341 	128,		/* maximum number of tx queues */
342 	1,		/* minimum number of tx queues */
343 	8,		/* default number of tx queues */
344 	15500,		/* maximum MTU size */
345 	0xFF8,		/* maximum interrupt throttle rate */
346 	0,		/* minimum interrupt throttle rate */
347 	0x200,		/* default interrupt throttle rate */
348 	64,		/* maximum total msix vectors */
349 	16,		/* maximum number of ring vectors */
350 	2,		/* maximum number of other vectors */
351 	IXGBE_EICR_LSC,	/* "other" interrupt types handled */
352 	0,		/* "other" interrupt types enable mask */
353 	(IXGBE_FLAG_RSS_CAPABLE
354 	| IXGBE_FLAG_VMDQ_CAPABLE
355 	| IXGBE_FLAG_RSC_CAPABLE) /* capability flags */
356 };
357 
358 /*
359  * Module Initialization Functions.
360  */
361 
362 int
363 _init(void)
364 {
365 	int status;
366 
367 	mac_init_ops(&ixgbe_dev_ops, MODULE_NAME);
368 
369 	status = mod_install(&ixgbe_modlinkage);
370 
371 	if (status != DDI_SUCCESS) {
372 		mac_fini_ops(&ixgbe_dev_ops);
373 	}
374 
375 	return (status);
376 }
377 
378 int
379 _fini(void)
380 {
381 	int status;
382 
383 	status = mod_remove(&ixgbe_modlinkage);
384 
385 	if (status == DDI_SUCCESS) {
386 		mac_fini_ops(&ixgbe_dev_ops);
387 	}
388 
389 	return (status);
390 }
391 
392 int
393 _info(struct modinfo *modinfop)
394 {
395 	int status;
396 
397 	status = mod_info(&ixgbe_modlinkage, modinfop);
398 
399 	return (status);
400 }
401 
402 /*
403  * ixgbe_attach - Driver attach.
404  *
405  * This function is the device specific initialization entry
406  * point. This entry point is required and must be written.
407  * The DDI_ATTACH command must be provided in the attach entry
408  * point. When attach() is called with cmd set to DDI_ATTACH,
409  * all normal kernel services (such as kmem_alloc(9F)) are
410  * available for use by the driver.
411  *
412  * The attach() function will be called once for each instance
413  * of  the  device  on  the  system with cmd set to DDI_ATTACH.
414  * Until attach() succeeds, the only driver entry points which
415  * may be called are open(9E) and getinfo(9E).
416  */
417 static int
418 ixgbe_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
419 {
420 	ixgbe_t *ixgbe;
421 	struct ixgbe_osdep *osdep;
422 	struct ixgbe_hw *hw;
423 	int instance;
424 	char taskqname[32];
425 
426 	/*
427 	 * Check the command and perform corresponding operations
428 	 */
429 	switch (cmd) {
430 	default:
431 		return (DDI_FAILURE);
432 
433 	case DDI_RESUME:
434 		return (ixgbe_resume(devinfo));
435 
436 	case DDI_ATTACH:
437 		break;
438 	}
439 
440 	/* Get the device instance */
441 	instance = ddi_get_instance(devinfo);
442 
443 	/* Allocate memory for the instance data structure */
444 	ixgbe = kmem_zalloc(sizeof (ixgbe_t), KM_SLEEP);
445 
446 	ixgbe->dip = devinfo;
447 	ixgbe->instance = instance;
448 
449 	hw = &ixgbe->hw;
450 	osdep = &ixgbe->osdep;
451 	hw->back = osdep;
452 	osdep->ixgbe = ixgbe;
453 
454 	/* Attach the instance pointer to the dev_info data structure */
455 	ddi_set_driver_private(devinfo, ixgbe);
456 
457 	/*
458 	 * Initialize for FMA support
459 	 */
460 	ixgbe->fm_capabilities = ixgbe_get_prop(ixgbe, PROP_FM_CAPABLE,
461 	    0, 0x0f, DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
462 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
463 	ixgbe_fm_init(ixgbe);
464 	ixgbe->attach_progress |= ATTACH_PROGRESS_FM_INIT;
465 
466 	/*
467 	 * Map PCI config space registers
468 	 */
469 	if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
470 		ixgbe_error(ixgbe, "Failed to map PCI configurations");
471 		goto attach_fail;
472 	}
473 	ixgbe->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
474 
475 	/*
476 	 * Identify the chipset family
477 	 */
478 	if (ixgbe_identify_hardware(ixgbe) != IXGBE_SUCCESS) {
479 		ixgbe_error(ixgbe, "Failed to identify hardware");
480 		goto attach_fail;
481 	}
482 
483 	/*
484 	 * Map device registers
485 	 */
486 	if (ixgbe_regs_map(ixgbe) != IXGBE_SUCCESS) {
487 		ixgbe_error(ixgbe, "Failed to map device registers");
488 		goto attach_fail;
489 	}
490 	ixgbe->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
491 
492 	/*
493 	 * Initialize driver parameters
494 	 */
495 	ixgbe_init_properties(ixgbe);
496 	ixgbe->attach_progress |= ATTACH_PROGRESS_PROPS;
497 
498 	/*
499 	 * Register interrupt callback
500 	 */
501 	if (ixgbe_intr_cb_register(ixgbe) != IXGBE_SUCCESS) {
502 		ixgbe_error(ixgbe, "Failed to register interrupt callback");
503 		goto attach_fail;
504 	}
505 
506 	/*
507 	 * Allocate interrupts
508 	 */
509 	if (ixgbe_alloc_intrs(ixgbe) != IXGBE_SUCCESS) {
510 		ixgbe_error(ixgbe, "Failed to allocate interrupts");
511 		goto attach_fail;
512 	}
513 	ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
514 
515 	/*
516 	 * Allocate rx/tx rings based on the ring numbers.
517 	 * The actual numbers of rx/tx rings are decided by the number of
518 	 * allocated interrupt vectors, so we should allocate the rings after
519 	 * interrupts are allocated.
520 	 */
521 	if (ixgbe_alloc_rings(ixgbe) != IXGBE_SUCCESS) {
522 		ixgbe_error(ixgbe, "Failed to allocate rx and tx rings");
523 		goto attach_fail;
524 	}
525 	ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
526 
527 	/*
528 	 * Map rings to interrupt vectors
529 	 */
530 	if (ixgbe_map_intrs_to_vectors(ixgbe) != IXGBE_SUCCESS) {
531 		ixgbe_error(ixgbe, "Failed to map interrupts to vectors");
532 		goto attach_fail;
533 	}
534 
535 	/*
536 	 * Add interrupt handlers
537 	 */
538 	if (ixgbe_add_intr_handlers(ixgbe) != IXGBE_SUCCESS) {
539 		ixgbe_error(ixgbe, "Failed to add interrupt handlers");
540 		goto attach_fail;
541 	}
542 	ixgbe->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
543 
544 	/*
545 	 * Create a taskq for sfp-change
546 	 */
547 	(void) sprintf(taskqname, "ixgbe%d_sfp_taskq", instance);
548 	if ((ixgbe->sfp_taskq = ddi_taskq_create(devinfo, taskqname,
549 	    1, TASKQ_DEFAULTPRI, 0)) == NULL) {
550 		ixgbe_error(ixgbe, "sfp_taskq create failed");
551 		goto attach_fail;
552 	}
553 	ixgbe->attach_progress |= ATTACH_PROGRESS_SFP_TASKQ;
554 
555 	/*
556 	 * Create a taskq for over-temp
557 	 */
558 	(void) sprintf(taskqname, "ixgbe%d_overtemp_taskq", instance);
559 	if ((ixgbe->overtemp_taskq = ddi_taskq_create(devinfo, taskqname,
560 	    1, TASKQ_DEFAULTPRI, 0)) == NULL) {
561 		ixgbe_error(ixgbe, "overtemp_taskq create failed");
562 		goto attach_fail;
563 	}
564 	ixgbe->attach_progress |= ATTACH_PROGRESS_OVERTEMP_TASKQ;
565 
566 	/*
567 	 * Create a taskq for processing external PHY interrupts
568 	 */
569 	(void) sprintf(taskqname, "ixgbe%d_phy_taskq", instance);
570 	if ((ixgbe->phy_taskq = ddi_taskq_create(devinfo, taskqname,
571 	    1, TASKQ_DEFAULTPRI, 0)) == NULL) {
572 		ixgbe_error(ixgbe, "phy_taskq create failed");
573 		goto attach_fail;
574 	}
575 	ixgbe->attach_progress |= ATTACH_PROGRESS_PHY_TASKQ;
576 
577 	/*
578 	 * Initialize driver parameters
579 	 */
580 	if (ixgbe_init_driver_settings(ixgbe) != IXGBE_SUCCESS) {
581 		ixgbe_error(ixgbe, "Failed to initialize driver settings");
582 		goto attach_fail;
583 	}
584 
585 	/*
586 	 * Initialize mutexes for this device.
587 	 * Do this before enabling the interrupt handler and
588 	 * register the softint to avoid the condition where
589 	 * interrupt handler can try using uninitialized mutex.
590 	 */
591 	ixgbe_init_locks(ixgbe);
592 	ixgbe->attach_progress |= ATTACH_PROGRESS_LOCKS;
593 
594 	/*
595 	 * Initialize chipset hardware
596 	 */
597 	if (ixgbe_init(ixgbe) != IXGBE_SUCCESS) {
598 		ixgbe_error(ixgbe, "Failed to initialize adapter");
599 		goto attach_fail;
600 	}
601 	ixgbe->link_check_complete = B_FALSE;
602 	ixgbe->link_check_hrtime = gethrtime() +
603 	    (IXGBE_LINK_UP_TIME * 100000000ULL);
604 	ixgbe->attach_progress |= ATTACH_PROGRESS_INIT;
605 
606 	if (ixgbe_check_acc_handle(ixgbe->osdep.cfg_handle) != DDI_FM_OK) {
607 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
608 		goto attach_fail;
609 	}
610 
611 	/*
612 	 * Initialize adapter capabilities
613 	 */
614 	ixgbe_init_params(ixgbe);
615 
616 	/*
617 	 * Initialize statistics
618 	 */
619 	if (ixgbe_init_stats(ixgbe) != IXGBE_SUCCESS) {
620 		ixgbe_error(ixgbe, "Failed to initialize statistics");
621 		goto attach_fail;
622 	}
623 	ixgbe->attach_progress |= ATTACH_PROGRESS_STATS;
624 
625 	/*
626 	 * Register the driver to the MAC
627 	 */
628 	if (ixgbe_register_mac(ixgbe) != IXGBE_SUCCESS) {
629 		ixgbe_error(ixgbe, "Failed to register MAC");
630 		goto attach_fail;
631 	}
632 	mac_link_update(ixgbe->mac_hdl, LINK_STATE_UNKNOWN);
633 	ixgbe->attach_progress |= ATTACH_PROGRESS_MAC;
634 
635 	ixgbe->periodic_id = ddi_periodic_add(ixgbe_link_timer, ixgbe,
636 	    IXGBE_CYCLIC_PERIOD, DDI_IPL_0);
637 	if (ixgbe->periodic_id == 0) {
638 		ixgbe_error(ixgbe, "Failed to add the link check timer");
639 		goto attach_fail;
640 	}
641 	ixgbe->attach_progress |= ATTACH_PROGRESS_LINK_TIMER;
642 
643 	/*
644 	 * Now that mutex locks are initialized, and the chip is also
645 	 * initialized, enable interrupts.
646 	 */
647 	if (ixgbe_enable_intrs(ixgbe) != IXGBE_SUCCESS) {
648 		ixgbe_error(ixgbe, "Failed to enable DDI interrupts");
649 		goto attach_fail;
650 	}
651 	ixgbe->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
652 
653 	ixgbe_log(ixgbe, "%s", ixgbe_ident);
654 	atomic_or_32(&ixgbe->ixgbe_state, IXGBE_INITIALIZED);
655 
656 	return (DDI_SUCCESS);
657 
658 attach_fail:
659 	ixgbe_unconfigure(devinfo, ixgbe);
660 	return (DDI_FAILURE);
661 }
662 
663 /*
664  * ixgbe_detach - Driver detach.
665  *
666  * The detach() function is the complement of the attach routine.
667  * If cmd is set to DDI_DETACH, detach() is used to remove  the
668  * state  associated  with  a  given  instance of a device node
669  * prior to the removal of that instance from the system.
670  *
671  * The detach() function will be called once for each  instance
672  * of the device for which there has been a successful attach()
673  * once there are no longer  any  opens  on  the  device.
674  *
675  * Interrupts routine are disabled, All memory allocated by this
676  * driver are freed.
677  */
678 static int
679 ixgbe_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
680 {
681 	ixgbe_t *ixgbe;
682 
683 	/*
684 	 * Check detach command
685 	 */
686 	switch (cmd) {
687 	default:
688 		return (DDI_FAILURE);
689 
690 	case DDI_SUSPEND:
691 		return (ixgbe_suspend(devinfo));
692 
693 	case DDI_DETACH:
694 		break;
695 	}
696 
697 	/*
698 	 * Get the pointer to the driver private data structure
699 	 */
700 	ixgbe = (ixgbe_t *)ddi_get_driver_private(devinfo);
701 	if (ixgbe == NULL)
702 		return (DDI_FAILURE);
703 
704 	/*
705 	 * If the device is still running, it needs to be stopped first.
706 	 * This check is necessary because under some specific circumstances,
707 	 * the detach routine can be called without stopping the interface
708 	 * first.
709 	 */
710 	if (ixgbe->ixgbe_state & IXGBE_STARTED) {
711 		atomic_and_32(&ixgbe->ixgbe_state, ~IXGBE_STARTED);
712 		mutex_enter(&ixgbe->gen_lock);
713 		ixgbe_stop(ixgbe, B_TRUE);
714 		mutex_exit(&ixgbe->gen_lock);
715 		/* Disable and stop the watchdog timer */
716 		ixgbe_disable_watchdog_timer(ixgbe);
717 	}
718 
719 	/*
720 	 * Check if there are still rx buffers held by the upper layer.
721 	 * If so, fail the detach.
722 	 */
723 	if (!ixgbe_rx_drain(ixgbe))
724 		return (DDI_FAILURE);
725 
726 	/*
727 	 * Do the remaining unconfigure routines
728 	 */
729 	ixgbe_unconfigure(devinfo, ixgbe);
730 
731 	return (DDI_SUCCESS);
732 }
733 
734 /*
735  * quiesce(9E) entry point.
736  *
737  * This function is called when the system is single-threaded at high
738  * PIL with preemption disabled. Therefore, this function must not be
739  * blocked.
740  *
741  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
742  * DDI_FAILURE indicates an error condition and should almost never happen.
743  */
744 static int
745 ixgbe_quiesce(dev_info_t *devinfo)
746 {
747 	ixgbe_t *ixgbe;
748 	struct ixgbe_hw *hw;
749 
750 	ixgbe = (ixgbe_t *)ddi_get_driver_private(devinfo);
751 
752 	if (ixgbe == NULL)
753 		return (DDI_FAILURE);
754 
755 	hw = &ixgbe->hw;
756 
757 	/*
758 	 * Disable the adapter interrupts
759 	 */
760 	ixgbe_disable_adapter_interrupts(ixgbe);
761 
762 	/*
763 	 * Tell firmware driver is no longer in control
764 	 */
765 	ixgbe_release_driver_control(hw);
766 
767 	/*
768 	 * Reset the chipset
769 	 */
770 	(void) ixgbe_reset_hw(hw);
771 
772 	/*
773 	 * Reset PHY
774 	 */
775 	(void) ixgbe_reset_phy(hw);
776 
777 	return (DDI_SUCCESS);
778 }
779 
780 static void
781 ixgbe_unconfigure(dev_info_t *devinfo, ixgbe_t *ixgbe)
782 {
783 	/*
784 	 * Disable interrupt
785 	 */
786 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
787 		(void) ixgbe_disable_intrs(ixgbe);
788 	}
789 
790 	/*
791 	 * remove the link check timer
792 	 */
793 	if (ixgbe->attach_progress & ATTACH_PROGRESS_LINK_TIMER) {
794 		if (ixgbe->periodic_id != NULL) {
795 			ddi_periodic_delete(ixgbe->periodic_id);
796 			ixgbe->periodic_id = NULL;
797 		}
798 	}
799 
800 	/*
801 	 * Unregister MAC
802 	 */
803 	if (ixgbe->attach_progress & ATTACH_PROGRESS_MAC) {
804 		(void) mac_unregister(ixgbe->mac_hdl);
805 	}
806 
807 	/*
808 	 * Free statistics
809 	 */
810 	if (ixgbe->attach_progress & ATTACH_PROGRESS_STATS) {
811 		kstat_delete((kstat_t *)ixgbe->ixgbe_ks);
812 	}
813 
814 	/*
815 	 * Remove interrupt handlers
816 	 */
817 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
818 		ixgbe_rem_intr_handlers(ixgbe);
819 	}
820 
821 	/*
822 	 * Remove taskq for sfp-status-change
823 	 */
824 	if (ixgbe->attach_progress & ATTACH_PROGRESS_SFP_TASKQ) {
825 		ddi_taskq_destroy(ixgbe->sfp_taskq);
826 	}
827 
828 	/*
829 	 * Remove taskq for over-temp
830 	 */
831 	if (ixgbe->attach_progress & ATTACH_PROGRESS_OVERTEMP_TASKQ) {
832 		ddi_taskq_destroy(ixgbe->overtemp_taskq);
833 	}
834 
835 	/*
836 	 * Remove taskq for external PHYs
837 	 */
838 	if (ixgbe->attach_progress & ATTACH_PROGRESS_PHY_TASKQ) {
839 		ddi_taskq_destroy(ixgbe->phy_taskq);
840 	}
841 
842 	/*
843 	 * Remove interrupts
844 	 */
845 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
846 		ixgbe_rem_intrs(ixgbe);
847 	}
848 
849 	/*
850 	 * Unregister interrupt callback handler
851 	 */
852 	if (ixgbe->cb_hdl != NULL) {
853 		(void) ddi_cb_unregister(ixgbe->cb_hdl);
854 	}
855 
856 	/*
857 	 * Remove driver properties
858 	 */
859 	if (ixgbe->attach_progress & ATTACH_PROGRESS_PROPS) {
860 		(void) ddi_prop_remove_all(devinfo);
861 	}
862 
863 	/*
864 	 * Stop the chipset
865 	 */
866 	if (ixgbe->attach_progress & ATTACH_PROGRESS_INIT) {
867 		mutex_enter(&ixgbe->gen_lock);
868 		ixgbe_chip_stop(ixgbe);
869 		mutex_exit(&ixgbe->gen_lock);
870 	}
871 
872 	/*
873 	 * Free register handle
874 	 */
875 	if (ixgbe->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
876 		if (ixgbe->osdep.reg_handle != NULL)
877 			ddi_regs_map_free(&ixgbe->osdep.reg_handle);
878 	}
879 
880 	/*
881 	 * Free PCI config handle
882 	 */
883 	if (ixgbe->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
884 		if (ixgbe->osdep.cfg_handle != NULL)
885 			pci_config_teardown(&ixgbe->osdep.cfg_handle);
886 	}
887 
888 	/*
889 	 * Free locks
890 	 */
891 	if (ixgbe->attach_progress & ATTACH_PROGRESS_LOCKS) {
892 		ixgbe_destroy_locks(ixgbe);
893 	}
894 
895 	/*
896 	 * Free the rx/tx rings
897 	 */
898 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
899 		ixgbe_free_rings(ixgbe);
900 	}
901 
902 	/*
903 	 * Unregister FMA capabilities
904 	 */
905 	if (ixgbe->attach_progress & ATTACH_PROGRESS_FM_INIT) {
906 		ixgbe_fm_fini(ixgbe);
907 	}
908 
909 	/*
910 	 * Free the driver data structure
911 	 */
912 	kmem_free(ixgbe, sizeof (ixgbe_t));
913 
914 	ddi_set_driver_private(devinfo, NULL);
915 }
916 
917 /*
918  * ixgbe_register_mac - Register the driver and its function pointers with
919  * the GLD interface.
920  */
921 static int
922 ixgbe_register_mac(ixgbe_t *ixgbe)
923 {
924 	struct ixgbe_hw *hw = &ixgbe->hw;
925 	mac_register_t *mac;
926 	int status;
927 
928 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
929 		return (IXGBE_FAILURE);
930 
931 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
932 	mac->m_driver = ixgbe;
933 	mac->m_dip = ixgbe->dip;
934 	mac->m_src_addr = hw->mac.addr;
935 	mac->m_callbacks = &ixgbe_m_callbacks;
936 	mac->m_min_sdu = 0;
937 	mac->m_max_sdu = ixgbe->default_mtu;
938 	mac->m_margin = VLAN_TAGSZ;
939 	mac->m_priv_props = ixgbe_priv_props;
940 	mac->m_v12n = MAC_VIRT_LEVEL1;
941 
942 	status = mac_register(mac, &ixgbe->mac_hdl);
943 
944 	mac_free(mac);
945 
946 	return ((status == 0) ? IXGBE_SUCCESS : IXGBE_FAILURE);
947 }
948 
949 /*
950  * ixgbe_identify_hardware - Identify the type of the chipset.
951  */
952 static int
953 ixgbe_identify_hardware(ixgbe_t *ixgbe)
954 {
955 	struct ixgbe_hw *hw = &ixgbe->hw;
956 	struct ixgbe_osdep *osdep = &ixgbe->osdep;
957 
958 	/*
959 	 * Get the device id
960 	 */
961 	hw->vendor_id =
962 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
963 	hw->device_id =
964 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
965 	hw->revision_id =
966 	    pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
967 	hw->subsystem_device_id =
968 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
969 	hw->subsystem_vendor_id =
970 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
971 
972 	/*
973 	 * Set the mac type of the adapter based on the device id
974 	 */
975 	if (ixgbe_set_mac_type(hw) != IXGBE_SUCCESS) {
976 		return (IXGBE_FAILURE);
977 	}
978 
979 	/*
980 	 * Install adapter capabilities
981 	 */
982 	switch (hw->mac.type) {
983 	case ixgbe_mac_82598EB:
984 		IXGBE_DEBUGLOG_0(ixgbe, "identify 82598 adapter\n");
985 		ixgbe->capab = &ixgbe_82598eb_cap;
986 
987 		if (ixgbe_get_media_type(hw) == ixgbe_media_type_copper) {
988 			ixgbe->capab->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
989 			ixgbe->capab->other_intr |= IXGBE_EICR_GPI_SDP1;
990 			ixgbe->capab->other_gpie |= IXGBE_SDP1_GPIEN;
991 		}
992 		break;
993 
994 	case ixgbe_mac_82599EB:
995 		IXGBE_DEBUGLOG_0(ixgbe, "identify 82599 adapter\n");
996 		ixgbe->capab = &ixgbe_82599eb_cap;
997 
998 		if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) {
999 			ixgbe->capab->flags |= IXGBE_FLAG_TEMP_SENSOR_CAPABLE;
1000 			ixgbe->capab->other_intr |= IXGBE_EICR_GPI_SDP0;
1001 			ixgbe->capab->other_gpie |= IXGBE_SDP0_GPIEN;
1002 		}
1003 		break;
1004 
1005 	case ixgbe_mac_X540:
1006 		IXGBE_DEBUGLOG_0(ixgbe, "identify X540 adapter\n");
1007 		ixgbe->capab = &ixgbe_X540_cap;
1008 		/*
1009 		 * For now, X540 is all set in its capab structure.
1010 		 * As other X540 variants show up, things can change here.
1011 		 */
1012 		break;
1013 
1014 	case ixgbe_mac_X550:
1015 	case ixgbe_mac_X550EM_x:
1016 	case ixgbe_mac_X550EM_a:
1017 		IXGBE_DEBUGLOG_0(ixgbe, "identify X550 adapter\n");
1018 		ixgbe->capab = &ixgbe_X550_cap;
1019 
1020 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
1021 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
1022 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N ||
1023 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_QSFP ||
1024 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_QSFP_N) {
1025 			ixgbe->capab->flags |= IXGBE_FLAG_SFP_PLUG_CAPABLE;
1026 		}
1027 
1028 		/*
1029 		 * Link detection on X552 SFP+ and X552/X557-AT
1030 		 */
1031 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
1032 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
1033 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N ||
1034 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
1035 			ixgbe->capab->other_intr |=
1036 			    IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
1037 		}
1038 		if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
1039 			ixgbe->capab->other_gpie |= IXGBE_SDP0_GPIEN_X540;
1040 		}
1041 		break;
1042 
1043 	default:
1044 		IXGBE_DEBUGLOG_1(ixgbe,
1045 		    "adapter not supported in ixgbe_identify_hardware(): %d\n",
1046 		    hw->mac.type);
1047 		return (IXGBE_FAILURE);
1048 	}
1049 
1050 	return (IXGBE_SUCCESS);
1051 }
1052 
1053 /*
1054  * ixgbe_regs_map - Map the device registers.
1055  *
1056  */
1057 static int
1058 ixgbe_regs_map(ixgbe_t *ixgbe)
1059 {
1060 	dev_info_t *devinfo = ixgbe->dip;
1061 	struct ixgbe_hw *hw = &ixgbe->hw;
1062 	struct ixgbe_osdep *osdep = &ixgbe->osdep;
1063 	off_t mem_size;
1064 
1065 	/*
1066 	 * First get the size of device registers to be mapped.
1067 	 */
1068 	if (ddi_dev_regsize(devinfo, IXGBE_ADAPTER_REGSET, &mem_size)
1069 	    != DDI_SUCCESS) {
1070 		return (IXGBE_FAILURE);
1071 	}
1072 
1073 	/*
1074 	 * Call ddi_regs_map_setup() to map registers
1075 	 */
1076 	if ((ddi_regs_map_setup(devinfo, IXGBE_ADAPTER_REGSET,
1077 	    (caddr_t *)&hw->hw_addr, 0,
1078 	    mem_size, &ixgbe_regs_acc_attr,
1079 	    &osdep->reg_handle)) != DDI_SUCCESS) {
1080 		return (IXGBE_FAILURE);
1081 	}
1082 
1083 	return (IXGBE_SUCCESS);
1084 }
1085 
1086 /*
1087  * ixgbe_init_properties - Initialize driver properties.
1088  */
1089 static void
1090 ixgbe_init_properties(ixgbe_t *ixgbe)
1091 {
1092 	/*
1093 	 * Get conf file properties, including link settings
1094 	 * jumbo frames, ring number, descriptor number, etc.
1095 	 */
1096 	ixgbe_get_conf(ixgbe);
1097 }
1098 
1099 /*
1100  * ixgbe_init_driver_settings - Initialize driver settings.
1101  *
1102  * The settings include hardware function pointers, bus information,
1103  * rx/tx rings settings, link state, and any other parameters that
1104  * need to be setup during driver initialization.
1105  */
1106 static int
1107 ixgbe_init_driver_settings(ixgbe_t *ixgbe)
1108 {
1109 	struct ixgbe_hw *hw = &ixgbe->hw;
1110 	dev_info_t *devinfo = ixgbe->dip;
1111 	ixgbe_rx_ring_t *rx_ring;
1112 	ixgbe_rx_group_t *rx_group;
1113 	ixgbe_tx_ring_t *tx_ring;
1114 	uint32_t rx_size;
1115 	uint32_t tx_size;
1116 	uint32_t ring_per_group;
1117 	int i;
1118 
1119 	/*
1120 	 * Initialize chipset specific hardware function pointers
1121 	 */
1122 	if (ixgbe_init_shared_code(hw) != IXGBE_SUCCESS) {
1123 		return (IXGBE_FAILURE);
1124 	}
1125 
1126 	/*
1127 	 * Get the system page size
1128 	 */
1129 	ixgbe->sys_page_size = ddi_ptob(devinfo, (ulong_t)1);
1130 
1131 	/*
1132 	 * Set rx buffer size
1133 	 *
1134 	 * The IP header alignment room is counted in the calculation.
1135 	 * The rx buffer size is in unit of 1K that is required by the
1136 	 * chipset hardware.
1137 	 */
1138 	rx_size = ixgbe->max_frame_size + IPHDR_ALIGN_ROOM;
1139 	ixgbe->rx_buf_size = ((rx_size >> 10) +
1140 	    ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
1141 
1142 	/*
1143 	 * Set tx buffer size
1144 	 */
1145 	tx_size = ixgbe->max_frame_size;
1146 	ixgbe->tx_buf_size = ((tx_size >> 10) +
1147 	    ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
1148 
1149 	/*
1150 	 * Initialize rx/tx rings/groups parameters
1151 	 */
1152 	ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
1153 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
1154 		rx_ring = &ixgbe->rx_rings[i];
1155 		rx_ring->index = i;
1156 		rx_ring->ixgbe = ixgbe;
1157 		rx_ring->group_index = i / ring_per_group;
1158 		rx_ring->hw_index = ixgbe_get_hw_rx_index(ixgbe, i);
1159 	}
1160 
1161 	for (i = 0; i < ixgbe->num_rx_groups; i++) {
1162 		rx_group = &ixgbe->rx_groups[i];
1163 		rx_group->index = i;
1164 		rx_group->ixgbe = ixgbe;
1165 		list_create(&rx_group->vlans, sizeof (ixgbe_vlan_t),
1166 		    offsetof(ixgbe_vlan_t, ixvl_link));
1167 	}
1168 
1169 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
1170 		tx_ring = &ixgbe->tx_rings[i];
1171 		tx_ring->index = i;
1172 		tx_ring->ixgbe = ixgbe;
1173 		if (ixgbe->tx_head_wb_enable)
1174 			tx_ring->tx_recycle = ixgbe_tx_recycle_head_wb;
1175 		else
1176 			tx_ring->tx_recycle = ixgbe_tx_recycle_legacy;
1177 
1178 		tx_ring->ring_size = ixgbe->tx_ring_size;
1179 		tx_ring->free_list_size = ixgbe->tx_ring_size +
1180 		    (ixgbe->tx_ring_size >> 1);
1181 	}
1182 
1183 	/*
1184 	 * Initialize values of interrupt throttling rate
1185 	 */
1186 	for (i = 1; i < MAX_INTR_VECTOR; i++)
1187 		ixgbe->intr_throttling[i] = ixgbe->intr_throttling[0];
1188 
1189 	/*
1190 	 * The initial link state should be "unknown"
1191 	 */
1192 	ixgbe->link_state = LINK_STATE_UNKNOWN;
1193 
1194 	return (IXGBE_SUCCESS);
1195 }
1196 
1197 /*
1198  * ixgbe_init_locks - Initialize locks.
1199  */
1200 static void
1201 ixgbe_init_locks(ixgbe_t *ixgbe)
1202 {
1203 	ixgbe_rx_ring_t *rx_ring;
1204 	ixgbe_tx_ring_t *tx_ring;
1205 	int i;
1206 
1207 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
1208 		rx_ring = &ixgbe->rx_rings[i];
1209 		mutex_init(&rx_ring->rx_lock, NULL,
1210 		    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1211 	}
1212 
1213 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
1214 		tx_ring = &ixgbe->tx_rings[i];
1215 		mutex_init(&tx_ring->tx_lock, NULL,
1216 		    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1217 		mutex_init(&tx_ring->recycle_lock, NULL,
1218 		    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1219 		mutex_init(&tx_ring->tcb_head_lock, NULL,
1220 		    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1221 		mutex_init(&tx_ring->tcb_tail_lock, NULL,
1222 		    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1223 	}
1224 
1225 	mutex_init(&ixgbe->gen_lock, NULL,
1226 	    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1227 
1228 	mutex_init(&ixgbe->watchdog_lock, NULL,
1229 	    MUTEX_DRIVER, DDI_INTR_PRI(ixgbe->intr_pri));
1230 }
1231 
1232 /*
1233  * ixgbe_destroy_locks - Destroy locks.
1234  */
1235 static void
1236 ixgbe_destroy_locks(ixgbe_t *ixgbe)
1237 {
1238 	ixgbe_rx_ring_t *rx_ring;
1239 	ixgbe_tx_ring_t *tx_ring;
1240 	int i;
1241 
1242 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
1243 		rx_ring = &ixgbe->rx_rings[i];
1244 		mutex_destroy(&rx_ring->rx_lock);
1245 	}
1246 
1247 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
1248 		tx_ring = &ixgbe->tx_rings[i];
1249 		mutex_destroy(&tx_ring->tx_lock);
1250 		mutex_destroy(&tx_ring->recycle_lock);
1251 		mutex_destroy(&tx_ring->tcb_head_lock);
1252 		mutex_destroy(&tx_ring->tcb_tail_lock);
1253 	}
1254 
1255 	mutex_destroy(&ixgbe->gen_lock);
1256 	mutex_destroy(&ixgbe->watchdog_lock);
1257 }
1258 
1259 /*
1260  * We need to try and determine which LED index in hardware corresponds to the
1261  * link/activity LED. This is the one that'll be overwritten when we perform
1262  * GLDv3 LED activity.
1263  */
1264 static void
1265 ixgbe_led_init(ixgbe_t *ixgbe)
1266 {
1267 	uint32_t reg, i;
1268 	struct ixgbe_hw *hw = &ixgbe->hw;
1269 
1270 	reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
1271 	for (i = 0; i < 4; i++) {
1272 		if (((reg >> IXGBE_LED_MODE_SHIFT(i)) &
1273 		    IXGBE_LED_MODE_MASK_BASE) == IXGBE_LED_LINK_ACTIVE) {
1274 			ixgbe->ixgbe_led_index = i;
1275 			return;
1276 		}
1277 	}
1278 
1279 	/*
1280 	 * If we couldn't determine this, we use the default for various MACs
1281 	 * based on information Intel has inserted into other drivers over the
1282 	 * years.
1283 	 */
1284 	switch (hw->mac.type) {
1285 	case ixgbe_mac_X550EM_a:
1286 		ixgbe->ixgbe_led_index = 0;
1287 		break;
1288 	case ixgbe_mac_X550EM_x:
1289 		ixgbe->ixgbe_led_index = 1;
1290 		break;
1291 	default:
1292 		ixgbe->ixgbe_led_index = 2;
1293 		break;
1294 	}
1295 }
1296 
1297 static int
1298 ixgbe_resume(dev_info_t *devinfo)
1299 {
1300 	ixgbe_t *ixgbe;
1301 	int i;
1302 
1303 	ixgbe = (ixgbe_t *)ddi_get_driver_private(devinfo);
1304 	if (ixgbe == NULL)
1305 		return (DDI_FAILURE);
1306 
1307 	mutex_enter(&ixgbe->gen_lock);
1308 
1309 	if (ixgbe->ixgbe_state & IXGBE_STARTED) {
1310 		if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) {
1311 			mutex_exit(&ixgbe->gen_lock);
1312 			return (DDI_FAILURE);
1313 		}
1314 
1315 		/*
1316 		 * Enable and start the watchdog timer
1317 		 */
1318 		ixgbe_enable_watchdog_timer(ixgbe);
1319 	}
1320 
1321 	atomic_and_32(&ixgbe->ixgbe_state, ~IXGBE_SUSPENDED);
1322 
1323 	if (ixgbe->ixgbe_state & IXGBE_STARTED) {
1324 		for (i = 0; i < ixgbe->num_tx_rings; i++) {
1325 			mac_tx_ring_update(ixgbe->mac_hdl,
1326 			    ixgbe->tx_rings[i].ring_handle);
1327 		}
1328 	}
1329 
1330 	mutex_exit(&ixgbe->gen_lock);
1331 
1332 	return (DDI_SUCCESS);
1333 }
1334 
1335 static int
1336 ixgbe_suspend(dev_info_t *devinfo)
1337 {
1338 	ixgbe_t *ixgbe;
1339 
1340 	ixgbe = (ixgbe_t *)ddi_get_driver_private(devinfo);
1341 	if (ixgbe == NULL)
1342 		return (DDI_FAILURE);
1343 
1344 	mutex_enter(&ixgbe->gen_lock);
1345 
1346 	atomic_or_32(&ixgbe->ixgbe_state, IXGBE_SUSPENDED);
1347 	if (!(ixgbe->ixgbe_state & IXGBE_STARTED)) {
1348 		mutex_exit(&ixgbe->gen_lock);
1349 		return (DDI_SUCCESS);
1350 	}
1351 	ixgbe_stop(ixgbe, B_FALSE);
1352 
1353 	mutex_exit(&ixgbe->gen_lock);
1354 
1355 	/*
1356 	 * Disable and stop the watchdog timer
1357 	 */
1358 	ixgbe_disable_watchdog_timer(ixgbe);
1359 
1360 	return (DDI_SUCCESS);
1361 }
1362 
1363 /*
1364  * ixgbe_init - Initialize the device.
1365  */
1366 static int
1367 ixgbe_init(ixgbe_t *ixgbe)
1368 {
1369 	struct ixgbe_hw *hw = &ixgbe->hw;
1370 	u8 pbanum[IXGBE_PBANUM_LENGTH];
1371 	int rv;
1372 
1373 	mutex_enter(&ixgbe->gen_lock);
1374 
1375 	/*
1376 	 * Configure/Initialize hardware
1377 	 */
1378 	rv = ixgbe_init_hw(hw);
1379 	if (rv != IXGBE_SUCCESS) {
1380 		switch (rv) {
1381 
1382 		/*
1383 		 * The first three errors are not prohibitive to us progressing
1384 		 * further, and are maily advisory in nature. In the case of a
1385 		 * SFP module not being present or not deemed supported by the
1386 		 * common code, we adivse the operator of this fact but carry on
1387 		 * instead of failing hard, as SFPs can be inserted or replaced
1388 		 * while the driver is running. In the case of a unknown error,
1389 		 * we fail-hard, logging the reason and emitting a FMA event.
1390 		 */
1391 		case IXGBE_ERR_EEPROM_VERSION:
1392 			ixgbe_error(ixgbe,
1393 			    "This Intel 10Gb Ethernet device is pre-release and"
1394 			    " contains outdated firmware. Please contact your"
1395 			    " hardware vendor for a replacement.");
1396 			break;
1397 		case IXGBE_ERR_SFP_NOT_PRESENT:
1398 			ixgbe_error(ixgbe,
1399 			    "No SFP+ module detected on this interface. Please "
1400 			    "install a supported SFP+ module for this "
1401 			    "interface to become operational.");
1402 			break;
1403 		case IXGBE_ERR_SFP_NOT_SUPPORTED:
1404 			ixgbe_error(ixgbe,
1405 			    "Unsupported SFP+ module detected. Please replace "
1406 			    "it with a supported SFP+ module per Intel "
1407 			    "documentation, or bypass this check with "
1408 			    "allow_unsupported_sfp=1 in ixgbe.conf.");
1409 			break;
1410 		default:
1411 			ixgbe_error(ixgbe,
1412 			    "Failed to initialize hardware. ixgbe_init_hw "
1413 			    "returned %d", rv);
1414 			ixgbe_fm_ereport(ixgbe, DDI_FM_DEVICE_INVAL_STATE);
1415 			goto init_fail;
1416 		}
1417 	}
1418 
1419 	/*
1420 	 * Need to init eeprom before validating the checksum.
1421 	 */
1422 	if (ixgbe_init_eeprom_params(hw) < 0) {
1423 		ixgbe_error(ixgbe,
1424 		    "Unable to intitialize the eeprom interface.");
1425 		ixgbe_fm_ereport(ixgbe, DDI_FM_DEVICE_INVAL_STATE);
1426 		goto init_fail;
1427 	}
1428 
1429 	/*
1430 	 * NVM validation
1431 	 */
1432 	if (ixgbe_validate_eeprom_checksum(hw, NULL) < 0) {
1433 		/*
1434 		 * Some PCI-E parts fail the first check due to
1435 		 * the link being in sleep state.  Call it again,
1436 		 * if it fails a second time it's a real issue.
1437 		 */
1438 		if (ixgbe_validate_eeprom_checksum(hw, NULL) < 0) {
1439 			ixgbe_error(ixgbe,
1440 			    "Invalid NVM checksum. Please contact "
1441 			    "the vendor to update the NVM.");
1442 			ixgbe_fm_ereport(ixgbe, DDI_FM_DEVICE_INVAL_STATE);
1443 			goto init_fail;
1444 		}
1445 	}
1446 
1447 	/*
1448 	 * Setup default flow control thresholds - enable/disable
1449 	 * & flow control type is controlled by ixgbe.conf
1450 	 */
1451 	hw->fc.high_water[0] = DEFAULT_FCRTH;
1452 	hw->fc.low_water[0] = DEFAULT_FCRTL;
1453 	hw->fc.pause_time = DEFAULT_FCPAUSE;
1454 	hw->fc.send_xon = B_TRUE;
1455 
1456 	/*
1457 	 * Initialize flow control
1458 	 */
1459 	(void) ixgbe_start_hw(hw);
1460 
1461 	/*
1462 	 * Initialize link settings
1463 	 */
1464 	(void) ixgbe_driver_setup_link(ixgbe, B_FALSE);
1465 
1466 	/*
1467 	 * Initialize the chipset hardware
1468 	 */
1469 	if (ixgbe_chip_start(ixgbe) != IXGBE_SUCCESS) {
1470 		ixgbe_fm_ereport(ixgbe, DDI_FM_DEVICE_INVAL_STATE);
1471 		goto init_fail;
1472 	}
1473 
1474 	/*
1475 	 * Read identifying information and place in devinfo.
1476 	 */
1477 	pbanum[0] = '\0';
1478 	(void) ixgbe_read_pba_string(hw, pbanum, sizeof (pbanum));
1479 	if (*pbanum != '\0') {
1480 		(void) ddi_prop_update_string(DDI_DEV_T_NONE, ixgbe->dip,
1481 		    "printed-board-assembly", (char *)pbanum);
1482 	}
1483 
1484 	/*
1485 	 * Determine LED index.
1486 	 */
1487 	ixgbe_led_init(ixgbe);
1488 
1489 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
1490 		goto init_fail;
1491 	}
1492 
1493 	mutex_exit(&ixgbe->gen_lock);
1494 	return (IXGBE_SUCCESS);
1495 
1496 init_fail:
1497 	/*
1498 	 * Reset PHY
1499 	 */
1500 	(void) ixgbe_reset_phy(hw);
1501 
1502 	mutex_exit(&ixgbe->gen_lock);
1503 	ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
1504 	return (IXGBE_FAILURE);
1505 }
1506 
1507 /*
1508  * ixgbe_chip_start - Initialize and start the chipset hardware.
1509  */
1510 static int
1511 ixgbe_chip_start(ixgbe_t *ixgbe)
1512 {
1513 	struct ixgbe_hw *hw = &ixgbe->hw;
1514 	int i;
1515 
1516 	ASSERT(mutex_owned(&ixgbe->gen_lock));
1517 
1518 	/*
1519 	 * Get the mac address
1520 	 * This function should handle SPARC case correctly.
1521 	 */
1522 	if (!ixgbe_find_mac_address(ixgbe)) {
1523 		ixgbe_error(ixgbe, "Failed to get the mac address");
1524 		return (IXGBE_FAILURE);
1525 	}
1526 
1527 	/*
1528 	 * Validate the mac address
1529 	 */
1530 	(void) ixgbe_init_rx_addrs(hw);
1531 	if (!is_valid_mac_addr(hw->mac.addr)) {
1532 		ixgbe_error(ixgbe, "Invalid mac address");
1533 		return (IXGBE_FAILURE);
1534 	}
1535 
1536 	/*
1537 	 * Re-enable relaxed ordering for performance.  It is disabled
1538 	 * by default in the hardware init.
1539 	 */
1540 	if (ixgbe->relax_order_enable == B_TRUE)
1541 		ixgbe_enable_relaxed_ordering(hw);
1542 
1543 	/*
1544 	 * Setup adapter interrupt vectors
1545 	 */
1546 	ixgbe_setup_adapter_vector(ixgbe);
1547 
1548 	/*
1549 	 * Initialize unicast addresses.
1550 	 */
1551 	ixgbe_init_unicst(ixgbe);
1552 
1553 	/*
1554 	 * Setup and initialize the mctable structures.
1555 	 */
1556 	ixgbe_setup_multicst(ixgbe);
1557 
1558 	/*
1559 	 * Set interrupt throttling rate
1560 	 */
1561 	for (i = 0; i < ixgbe->intr_cnt; i++) {
1562 		IXGBE_WRITE_REG(hw, IXGBE_EITR(i), ixgbe->intr_throttling[i]);
1563 	}
1564 
1565 	/*
1566 	 * Disable Wake-on-LAN
1567 	 */
1568 	IXGBE_WRITE_REG(hw, IXGBE_WUC, 0);
1569 
1570 	/*
1571 	 * Some adapters offer Energy Efficient Ethernet (EEE) support.
1572 	 * Due to issues with EEE in e1000g/igb, we disable this by default
1573 	 * as a precautionary measure.
1574 	 *
1575 	 * Currently, this is present on a number of the X550 family parts.
1576 	 */
1577 	(void) ixgbe_setup_eee(hw, B_FALSE);
1578 
1579 	/*
1580 	 * Turn on any present SFP Tx laser
1581 	 */
1582 	ixgbe_enable_tx_laser(hw);
1583 
1584 	/*
1585 	 * Power on the PHY
1586 	 */
1587 	(void) ixgbe_set_phy_power(hw, B_TRUE);
1588 
1589 	/*
1590 	 * Save the state of the PHY
1591 	 */
1592 	ixgbe_get_hw_state(ixgbe);
1593 
1594 	/*
1595 	 * Make sure driver has control
1596 	 */
1597 	ixgbe_get_driver_control(hw);
1598 
1599 	return (IXGBE_SUCCESS);
1600 }
1601 
1602 /*
1603  * ixgbe_chip_stop - Stop the chipset hardware
1604  */
1605 static void
1606 ixgbe_chip_stop(ixgbe_t *ixgbe)
1607 {
1608 	struct ixgbe_hw *hw = &ixgbe->hw;
1609 	int rv;
1610 
1611 	ASSERT(mutex_owned(&ixgbe->gen_lock));
1612 
1613 	/*
1614 	 * Stop interupt generation and disable Tx unit
1615 	 */
1616 	hw->adapter_stopped = B_FALSE;
1617 	(void) ixgbe_stop_adapter(hw);
1618 
1619 	/*
1620 	 * Reset the chipset
1621 	 */
1622 	(void) ixgbe_reset_hw(hw);
1623 
1624 	/*
1625 	 * Reset PHY
1626 	 */
1627 	(void) ixgbe_reset_phy(hw);
1628 
1629 	/*
1630 	 * Enter LPLU (Low Power, Link Up) mode, if available. Avoid resetting
1631 	 * the PHY while doing so. Else, just power down the PHY.
1632 	 */
1633 	if (hw->phy.ops.enter_lplu != NULL) {
1634 		hw->phy.reset_disable = B_TRUE;
1635 		rv = hw->phy.ops.enter_lplu(hw);
1636 		if (rv != IXGBE_SUCCESS)
1637 			ixgbe_error(ixgbe, "Error while entering LPLU: %d", rv);
1638 		hw->phy.reset_disable = B_FALSE;
1639 	} else {
1640 		(void) ixgbe_set_phy_power(hw, B_FALSE);
1641 	}
1642 
1643 	/*
1644 	 * Turn off any present SFP Tx laser
1645 	 * Expected for health and safety reasons
1646 	 */
1647 	ixgbe_disable_tx_laser(hw);
1648 
1649 	/*
1650 	 * Tell firmware driver is no longer in control
1651 	 */
1652 	ixgbe_release_driver_control(hw);
1653 
1654 }
1655 
1656 /*
1657  * ixgbe_reset - Reset the chipset and re-start the driver.
1658  *
1659  * It involves stopping and re-starting the chipset,
1660  * and re-configuring the rx/tx rings.
1661  */
1662 static int
1663 ixgbe_reset(ixgbe_t *ixgbe)
1664 {
1665 	int i;
1666 
1667 	/*
1668 	 * Disable and stop the watchdog timer
1669 	 */
1670 	ixgbe_disable_watchdog_timer(ixgbe);
1671 
1672 	mutex_enter(&ixgbe->gen_lock);
1673 
1674 	ASSERT(ixgbe->ixgbe_state & IXGBE_STARTED);
1675 	atomic_and_32(&ixgbe->ixgbe_state, ~IXGBE_STARTED);
1676 
1677 	ixgbe_stop(ixgbe, B_FALSE);
1678 
1679 	if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) {
1680 		mutex_exit(&ixgbe->gen_lock);
1681 		return (IXGBE_FAILURE);
1682 	}
1683 
1684 	/*
1685 	 * After resetting, need to recheck the link status.
1686 	 */
1687 	ixgbe->link_check_complete = B_FALSE;
1688 	ixgbe->link_check_hrtime = gethrtime() +
1689 	    (IXGBE_LINK_UP_TIME * 100000000ULL);
1690 
1691 	atomic_or_32(&ixgbe->ixgbe_state, IXGBE_STARTED);
1692 
1693 	if (!(ixgbe->ixgbe_state & IXGBE_SUSPENDED)) {
1694 		for (i = 0; i < ixgbe->num_tx_rings; i++) {
1695 			mac_tx_ring_update(ixgbe->mac_hdl,
1696 			    ixgbe->tx_rings[i].ring_handle);
1697 		}
1698 	}
1699 
1700 	mutex_exit(&ixgbe->gen_lock);
1701 
1702 	/*
1703 	 * Enable and start the watchdog timer
1704 	 */
1705 	ixgbe_enable_watchdog_timer(ixgbe);
1706 
1707 	return (IXGBE_SUCCESS);
1708 }
1709 
1710 /*
1711  * ixgbe_tx_clean - Clean the pending transmit packets and DMA resources.
1712  */
1713 static void
1714 ixgbe_tx_clean(ixgbe_t *ixgbe)
1715 {
1716 	ixgbe_tx_ring_t *tx_ring;
1717 	tx_control_block_t *tcb;
1718 	link_list_t pending_list;
1719 	uint32_t desc_num;
1720 	int i, j;
1721 
1722 	LINK_LIST_INIT(&pending_list);
1723 
1724 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
1725 		tx_ring = &ixgbe->tx_rings[i];
1726 
1727 		mutex_enter(&tx_ring->recycle_lock);
1728 
1729 		/*
1730 		 * Clean the pending tx data - the pending packets in the
1731 		 * work_list that have no chances to be transmitted again.
1732 		 *
1733 		 * We must ensure the chipset is stopped or the link is down
1734 		 * before cleaning the transmit packets.
1735 		 */
1736 		desc_num = 0;
1737 		for (j = 0; j < tx_ring->ring_size; j++) {
1738 			tcb = tx_ring->work_list[j];
1739 			if (tcb != NULL) {
1740 				desc_num += tcb->desc_num;
1741 
1742 				tx_ring->work_list[j] = NULL;
1743 
1744 				ixgbe_free_tcb(tcb);
1745 
1746 				LIST_PUSH_TAIL(&pending_list, &tcb->link);
1747 			}
1748 		}
1749 
1750 		if (desc_num > 0) {
1751 			atomic_add_32(&tx_ring->tbd_free, desc_num);
1752 			ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
1753 
1754 			/*
1755 			 * Reset the head and tail pointers of the tbd ring;
1756 			 * Reset the writeback head if it's enable.
1757 			 */
1758 			tx_ring->tbd_head = 0;
1759 			tx_ring->tbd_tail = 0;
1760 			if (ixgbe->tx_head_wb_enable)
1761 				*tx_ring->tbd_head_wb = 0;
1762 
1763 			IXGBE_WRITE_REG(&ixgbe->hw,
1764 			    IXGBE_TDH(tx_ring->index), 0);
1765 			IXGBE_WRITE_REG(&ixgbe->hw,
1766 			    IXGBE_TDT(tx_ring->index), 0);
1767 		}
1768 
1769 		mutex_exit(&tx_ring->recycle_lock);
1770 
1771 		/*
1772 		 * Add the tx control blocks in the pending list to
1773 		 * the free list.
1774 		 */
1775 		ixgbe_put_free_list(tx_ring, &pending_list);
1776 	}
1777 }
1778 
1779 /*
1780  * ixgbe_tx_drain - Drain the tx rings to allow pending packets to be
1781  * transmitted.
1782  */
1783 static boolean_t
1784 ixgbe_tx_drain(ixgbe_t *ixgbe)
1785 {
1786 	ixgbe_tx_ring_t *tx_ring;
1787 	boolean_t done;
1788 	int i, j;
1789 
1790 	/*
1791 	 * Wait for a specific time to allow pending tx packets
1792 	 * to be transmitted.
1793 	 *
1794 	 * Check the counter tbd_free to see if transmission is done.
1795 	 * No lock protection is needed here.
1796 	 *
1797 	 * Return B_TRUE if all pending packets have been transmitted;
1798 	 * Otherwise return B_FALSE;
1799 	 */
1800 	for (i = 0; i < TX_DRAIN_TIME; i++) {
1801 
1802 		done = B_TRUE;
1803 		for (j = 0; j < ixgbe->num_tx_rings; j++) {
1804 			tx_ring = &ixgbe->tx_rings[j];
1805 			done = done &&
1806 			    (tx_ring->tbd_free == tx_ring->ring_size);
1807 		}
1808 
1809 		if (done)
1810 			break;
1811 
1812 		msec_delay(1);
1813 	}
1814 
1815 	return (done);
1816 }
1817 
1818 /*
1819  * ixgbe_rx_drain - Wait for all rx buffers to be released by upper layer.
1820  */
1821 static boolean_t
1822 ixgbe_rx_drain(ixgbe_t *ixgbe)
1823 {
1824 	boolean_t done = B_TRUE;
1825 	int i;
1826 
1827 	/*
1828 	 * Polling the rx free list to check if those rx buffers held by
1829 	 * the upper layer are released.
1830 	 *
1831 	 * Check the counter rcb_free to see if all pending buffers are
1832 	 * released. No lock protection is needed here.
1833 	 *
1834 	 * Return B_TRUE if all pending buffers have been released;
1835 	 * Otherwise return B_FALSE;
1836 	 */
1837 	for (i = 0; i < RX_DRAIN_TIME; i++) {
1838 		done = (ixgbe->rcb_pending == 0);
1839 
1840 		if (done)
1841 			break;
1842 
1843 		msec_delay(1);
1844 	}
1845 
1846 	return (done);
1847 }
1848 
1849 /*
1850  * ixgbe_start - Start the driver/chipset.
1851  */
1852 int
1853 ixgbe_start(ixgbe_t *ixgbe, boolean_t alloc_buffer)
1854 {
1855 	struct ixgbe_hw *hw = &ixgbe->hw;
1856 	int i;
1857 
1858 	ASSERT(mutex_owned(&ixgbe->gen_lock));
1859 
1860 	if (alloc_buffer) {
1861 		if (ixgbe_alloc_rx_data(ixgbe) != IXGBE_SUCCESS) {
1862 			ixgbe_error(ixgbe,
1863 			    "Failed to allocate software receive rings");
1864 			return (IXGBE_FAILURE);
1865 		}
1866 
1867 		/* Allocate buffers for all the rx/tx rings */
1868 		if (ixgbe_alloc_dma(ixgbe) != IXGBE_SUCCESS) {
1869 			ixgbe_error(ixgbe, "Failed to allocate DMA resource");
1870 			return (IXGBE_FAILURE);
1871 		}
1872 
1873 		ixgbe->tx_ring_init = B_TRUE;
1874 	} else {
1875 		ixgbe->tx_ring_init = B_FALSE;
1876 	}
1877 
1878 	for (i = 0; i < ixgbe->num_rx_rings; i++)
1879 		mutex_enter(&ixgbe->rx_rings[i].rx_lock);
1880 	for (i = 0; i < ixgbe->num_tx_rings; i++)
1881 		mutex_enter(&ixgbe->tx_rings[i].tx_lock);
1882 
1883 	/*
1884 	 * Start the chipset hardware
1885 	 */
1886 	if (ixgbe_chip_start(ixgbe) != IXGBE_SUCCESS) {
1887 		ixgbe_fm_ereport(ixgbe, DDI_FM_DEVICE_INVAL_STATE);
1888 		goto start_failure;
1889 	}
1890 
1891 	/*
1892 	 * Configure link now for X550
1893 	 *
1894 	 * X550 possesses a LPLU (Low-Power Link Up) mode which keeps the
1895 	 * resting state of the adapter at a 1Gb FDX speed. Prior to the X550,
1896 	 * the resting state of the link would be the maximum speed that
1897 	 * autonegotiation will allow (usually 10Gb, infrastructure allowing)
1898 	 * so we never bothered with explicitly setting the link to 10Gb as it
1899 	 * would already be at that state on driver attach. With X550, we must
1900 	 * trigger a re-negotiation of the link in order to switch from a LPLU
1901 	 * 1Gb link to 10Gb (cable and link partner permitting.)
1902 	 */
1903 	if (hw->mac.type == ixgbe_mac_X550 ||
1904 	    hw->mac.type == ixgbe_mac_X550EM_a ||
1905 	    hw->mac.type == ixgbe_mac_X550EM_x) {
1906 		(void) ixgbe_driver_setup_link(ixgbe, B_TRUE);
1907 		ixgbe_get_hw_state(ixgbe);
1908 	}
1909 
1910 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
1911 		goto start_failure;
1912 	}
1913 
1914 	/*
1915 	 * Setup the rx/tx rings
1916 	 */
1917 	if (ixgbe_setup_rings(ixgbe) != IXGBE_SUCCESS)
1918 		goto start_failure;
1919 
1920 	/*
1921 	 * ixgbe_start() will be called when resetting, however if reset
1922 	 * happens, we need to clear the ERROR, STALL and OVERTEMP flags
1923 	 * before enabling the interrupts.
1924 	 */
1925 	atomic_and_32(&ixgbe->ixgbe_state, ~(IXGBE_ERROR
1926 	    | IXGBE_STALL| IXGBE_OVERTEMP));
1927 
1928 	/*
1929 	 * Enable adapter interrupts
1930 	 * The interrupts must be enabled after the driver state is START
1931 	 */
1932 	ixgbe_enable_adapter_interrupts(ixgbe);
1933 
1934 	for (i = ixgbe->num_tx_rings - 1; i >= 0; i--)
1935 		mutex_exit(&ixgbe->tx_rings[i].tx_lock);
1936 	for (i = ixgbe->num_rx_rings - 1; i >= 0; i--)
1937 		mutex_exit(&ixgbe->rx_rings[i].rx_lock);
1938 
1939 	return (IXGBE_SUCCESS);
1940 
1941 start_failure:
1942 	for (i = ixgbe->num_tx_rings - 1; i >= 0; i--)
1943 		mutex_exit(&ixgbe->tx_rings[i].tx_lock);
1944 	for (i = ixgbe->num_rx_rings - 1; i >= 0; i--)
1945 		mutex_exit(&ixgbe->rx_rings[i].rx_lock);
1946 
1947 	ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
1948 
1949 	return (IXGBE_FAILURE);
1950 }
1951 
1952 /*
1953  * ixgbe_stop - Stop the driver/chipset.
1954  */
1955 void
1956 ixgbe_stop(ixgbe_t *ixgbe, boolean_t free_buffer)
1957 {
1958 	int i;
1959 
1960 	ASSERT(mutex_owned(&ixgbe->gen_lock));
1961 
1962 	/*
1963 	 * Disable the adapter interrupts
1964 	 */
1965 	ixgbe_disable_adapter_interrupts(ixgbe);
1966 
1967 	/*
1968 	 * Drain the pending tx packets
1969 	 */
1970 	(void) ixgbe_tx_drain(ixgbe);
1971 
1972 	for (i = 0; i < ixgbe->num_rx_rings; i++)
1973 		mutex_enter(&ixgbe->rx_rings[i].rx_lock);
1974 	for (i = 0; i < ixgbe->num_tx_rings; i++)
1975 		mutex_enter(&ixgbe->tx_rings[i].tx_lock);
1976 
1977 	/*
1978 	 * Stop the chipset hardware
1979 	 */
1980 	ixgbe_chip_stop(ixgbe);
1981 
1982 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
1983 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
1984 	}
1985 
1986 	/*
1987 	 * Clean the pending tx data/resources
1988 	 */
1989 	ixgbe_tx_clean(ixgbe);
1990 
1991 	for (i = ixgbe->num_tx_rings - 1; i >= 0; i--)
1992 		mutex_exit(&ixgbe->tx_rings[i].tx_lock);
1993 	for (i = ixgbe->num_rx_rings - 1; i >= 0; i--)
1994 		mutex_exit(&ixgbe->rx_rings[i].rx_lock);
1995 
1996 	if (ixgbe->link_state == LINK_STATE_UP) {
1997 		ixgbe->link_state = LINK_STATE_UNKNOWN;
1998 		mac_link_update(ixgbe->mac_hdl, ixgbe->link_state);
1999 	}
2000 
2001 	if (free_buffer) {
2002 		/*
2003 		 * Release the DMA/memory resources of rx/tx rings
2004 		 */
2005 		ixgbe_free_dma(ixgbe);
2006 		ixgbe_free_rx_data(ixgbe);
2007 	}
2008 }
2009 
2010 /*
2011  * ixgbe_cbfunc - Driver interface for generic DDI callbacks
2012  */
2013 /* ARGSUSED */
2014 static int
2015 ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg,
2016     void *arg1, void *arg2)
2017 {
2018 	ixgbe_t *ixgbe = (ixgbe_t *)arg1;
2019 
2020 	switch (cbaction) {
2021 	/* IRM callback */
2022 	int count;
2023 	case DDI_CB_INTR_ADD:
2024 	case DDI_CB_INTR_REMOVE:
2025 		count = (int)(uintptr_t)cbarg;
2026 		ASSERT(ixgbe->intr_type == DDI_INTR_TYPE_MSIX);
2027 		DTRACE_PROBE2(ixgbe__irm__callback, int, count,
2028 		    int, ixgbe->intr_cnt);
2029 		if (ixgbe_intr_adjust(ixgbe, cbaction, count) !=
2030 		    DDI_SUCCESS) {
2031 			ixgbe_error(ixgbe,
2032 			    "IRM CB: Failed to adjust interrupts");
2033 			goto cb_fail;
2034 		}
2035 		break;
2036 	default:
2037 		IXGBE_DEBUGLOG_1(ixgbe, "DDI CB: action 0x%x NOT supported",
2038 		    cbaction);
2039 		return (DDI_ENOTSUP);
2040 	}
2041 	return (DDI_SUCCESS);
2042 cb_fail:
2043 	return (DDI_FAILURE);
2044 }
2045 
2046 /*
2047  * ixgbe_intr_adjust - Adjust interrupt to respond to IRM request.
2048  */
2049 static int
2050 ixgbe_intr_adjust(ixgbe_t *ixgbe, ddi_cb_action_t cbaction, int count)
2051 {
2052 	int i, rc, actual;
2053 
2054 	if (count == 0)
2055 		return (DDI_SUCCESS);
2056 
2057 	if ((cbaction == DDI_CB_INTR_ADD &&
2058 	    ixgbe->intr_cnt + count > ixgbe->intr_cnt_max) ||
2059 	    (cbaction == DDI_CB_INTR_REMOVE &&
2060 	    ixgbe->intr_cnt - count < ixgbe->intr_cnt_min))
2061 		return (DDI_FAILURE);
2062 
2063 	if (!(ixgbe->ixgbe_state & IXGBE_STARTED)) {
2064 		return (DDI_FAILURE);
2065 	}
2066 
2067 	for (i = 0; i < ixgbe->num_rx_rings; i++)
2068 		mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle, NULL);
2069 	for (i = 0; i < ixgbe->num_tx_rings; i++)
2070 		mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle, NULL);
2071 
2072 	mutex_enter(&ixgbe->gen_lock);
2073 	ixgbe->ixgbe_state &= ~IXGBE_STARTED;
2074 	ixgbe->ixgbe_state |= IXGBE_INTR_ADJUST;
2075 	ixgbe->ixgbe_state |= IXGBE_SUSPENDED;
2076 	mac_link_update(ixgbe->mac_hdl, LINK_STATE_UNKNOWN);
2077 
2078 	ixgbe_stop(ixgbe, B_FALSE);
2079 	/*
2080 	 * Disable interrupts
2081 	 */
2082 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
2083 		rc = ixgbe_disable_intrs(ixgbe);
2084 		ASSERT(rc == IXGBE_SUCCESS);
2085 	}
2086 	ixgbe->attach_progress &= ~ATTACH_PROGRESS_ENABLE_INTR;
2087 
2088 	/*
2089 	 * Remove interrupt handlers
2090 	 */
2091 	if (ixgbe->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
2092 		ixgbe_rem_intr_handlers(ixgbe);
2093 	}
2094 	ixgbe->attach_progress &= ~ATTACH_PROGRESS_ADD_INTR;
2095 
2096 	/*
2097 	 * Clear vect_map
2098 	 */
2099 	bzero(&ixgbe->vect_map, sizeof (ixgbe->vect_map));
2100 	switch (cbaction) {
2101 	case DDI_CB_INTR_ADD:
2102 		rc = ddi_intr_alloc(ixgbe->dip, ixgbe->htable,
2103 		    DDI_INTR_TYPE_MSIX, ixgbe->intr_cnt, count, &actual,
2104 		    DDI_INTR_ALLOC_NORMAL);
2105 		if (rc != DDI_SUCCESS || actual != count) {
2106 			ixgbe_log(ixgbe, "Adjust interrupts failed."
2107 			    "return: %d, irm cb size: %d, actual: %d",
2108 			    rc, count, actual);
2109 			goto intr_adjust_fail;
2110 		}
2111 		ixgbe->intr_cnt += count;
2112 		break;
2113 
2114 	case DDI_CB_INTR_REMOVE:
2115 		for (i = ixgbe->intr_cnt - count;
2116 		    i < ixgbe->intr_cnt; i ++) {
2117 			rc = ddi_intr_free(ixgbe->htable[i]);
2118 			ixgbe->htable[i] = NULL;
2119 			if (rc != DDI_SUCCESS) {
2120 				ixgbe_log(ixgbe, "Adjust interrupts failed."
2121 				    "return: %d, irm cb size: %d, actual: %d",
2122 				    rc, count, actual);
2123 				goto intr_adjust_fail;
2124 			}
2125 		}
2126 		ixgbe->intr_cnt -= count;
2127 		break;
2128 	}
2129 
2130 	/*
2131 	 * Get priority for first vector, assume remaining are all the same
2132 	 */
2133 	rc = ddi_intr_get_pri(ixgbe->htable[0], &ixgbe->intr_pri);
2134 	if (rc != DDI_SUCCESS) {
2135 		ixgbe_log(ixgbe,
2136 		    "Get interrupt priority failed: %d", rc);
2137 		goto intr_adjust_fail;
2138 	}
2139 	rc = ddi_intr_get_cap(ixgbe->htable[0], &ixgbe->intr_cap);
2140 	if (rc != DDI_SUCCESS) {
2141 		ixgbe_log(ixgbe, "Get interrupt cap failed: %d", rc);
2142 		goto intr_adjust_fail;
2143 	}
2144 	ixgbe->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
2145 
2146 	/*
2147 	 * Map rings to interrupt vectors
2148 	 */
2149 	if (ixgbe_map_intrs_to_vectors(ixgbe) != IXGBE_SUCCESS) {
2150 		ixgbe_error(ixgbe,
2151 		    "IRM CB: Failed to map interrupts to vectors");
2152 		goto intr_adjust_fail;
2153 	}
2154 
2155 	/*
2156 	 * Add interrupt handlers
2157 	 */
2158 	if (ixgbe_add_intr_handlers(ixgbe) != IXGBE_SUCCESS) {
2159 		ixgbe_error(ixgbe, "IRM CB: Failed to add interrupt handlers");
2160 		goto intr_adjust_fail;
2161 	}
2162 	ixgbe->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
2163 
2164 	/*
2165 	 * Now that mutex locks are initialized, and the chip is also
2166 	 * initialized, enable interrupts.
2167 	 */
2168 	if (ixgbe_enable_intrs(ixgbe) != IXGBE_SUCCESS) {
2169 		ixgbe_error(ixgbe, "IRM CB: Failed to enable DDI interrupts");
2170 		goto intr_adjust_fail;
2171 	}
2172 	ixgbe->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
2173 	if (ixgbe_start(ixgbe, B_FALSE) != IXGBE_SUCCESS) {
2174 		ixgbe_error(ixgbe, "IRM CB: Failed to start");
2175 		goto intr_adjust_fail;
2176 	}
2177 	ixgbe->ixgbe_state &= ~IXGBE_INTR_ADJUST;
2178 	ixgbe->ixgbe_state &= ~IXGBE_SUSPENDED;
2179 	ixgbe->ixgbe_state |= IXGBE_STARTED;
2180 	mutex_exit(&ixgbe->gen_lock);
2181 
2182 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
2183 		mac_ring_intr_set(ixgbe->rx_rings[i].ring_handle,
2184 		    ixgbe->htable[ixgbe->rx_rings[i].intr_vector]);
2185 	}
2186 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
2187 		mac_ring_intr_set(ixgbe->tx_rings[i].ring_handle,
2188 		    ixgbe->htable[ixgbe->tx_rings[i].intr_vector]);
2189 	}
2190 
2191 	/* Wakeup all Tx rings */
2192 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
2193 		mac_tx_ring_update(ixgbe->mac_hdl,
2194 		    ixgbe->tx_rings[i].ring_handle);
2195 	}
2196 
2197 	IXGBE_DEBUGLOG_3(ixgbe,
2198 	    "IRM CB: interrupts new value: 0x%x(0x%x:0x%x).",
2199 	    ixgbe->intr_cnt, ixgbe->intr_cnt_min, ixgbe->intr_cnt_max);
2200 	return (DDI_SUCCESS);
2201 
2202 intr_adjust_fail:
2203 	ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
2204 	mutex_exit(&ixgbe->gen_lock);
2205 	return (DDI_FAILURE);
2206 }
2207 
2208 /*
2209  * ixgbe_intr_cb_register - Register interrupt callback function.
2210  */
2211 static int
2212 ixgbe_intr_cb_register(ixgbe_t *ixgbe)
2213 {
2214 	if (ddi_cb_register(ixgbe->dip, DDI_CB_FLAG_INTR, ixgbe_cbfunc,
2215 	    ixgbe, NULL, &ixgbe->cb_hdl) != DDI_SUCCESS) {
2216 		return (IXGBE_FAILURE);
2217 	}
2218 	IXGBE_DEBUGLOG_0(ixgbe, "Interrupt callback function registered.");
2219 	return (IXGBE_SUCCESS);
2220 }
2221 
2222 /*
2223  * ixgbe_alloc_rings - Allocate memory space for rx/tx rings.
2224  */
2225 static int
2226 ixgbe_alloc_rings(ixgbe_t *ixgbe)
2227 {
2228 	/*
2229 	 * Allocate memory space for rx rings
2230 	 */
2231 	ixgbe->rx_rings = kmem_zalloc(
2232 	    sizeof (ixgbe_rx_ring_t) * ixgbe->num_rx_rings,
2233 	    KM_NOSLEEP);
2234 
2235 	if (ixgbe->rx_rings == NULL) {
2236 		return (IXGBE_FAILURE);
2237 	}
2238 
2239 	/*
2240 	 * Allocate memory space for tx rings
2241 	 */
2242 	ixgbe->tx_rings = kmem_zalloc(
2243 	    sizeof (ixgbe_tx_ring_t) * ixgbe->num_tx_rings,
2244 	    KM_NOSLEEP);
2245 
2246 	if (ixgbe->tx_rings == NULL) {
2247 		kmem_free(ixgbe->rx_rings,
2248 		    sizeof (ixgbe_rx_ring_t) * ixgbe->num_rx_rings);
2249 		ixgbe->rx_rings = NULL;
2250 		return (IXGBE_FAILURE);
2251 	}
2252 
2253 	/*
2254 	 * Allocate memory space for rx ring groups
2255 	 */
2256 	ixgbe->rx_groups = kmem_zalloc(
2257 	    sizeof (ixgbe_rx_group_t) * ixgbe->num_rx_groups,
2258 	    KM_NOSLEEP);
2259 
2260 	if (ixgbe->rx_groups == NULL) {
2261 		kmem_free(ixgbe->rx_rings,
2262 		    sizeof (ixgbe_rx_ring_t) * ixgbe->num_rx_rings);
2263 		kmem_free(ixgbe->tx_rings,
2264 		    sizeof (ixgbe_tx_ring_t) * ixgbe->num_tx_rings);
2265 		ixgbe->rx_rings = NULL;
2266 		ixgbe->tx_rings = NULL;
2267 		return (IXGBE_FAILURE);
2268 	}
2269 
2270 	return (IXGBE_SUCCESS);
2271 }
2272 
2273 /*
2274  * ixgbe_free_rings - Free the memory space of rx/tx rings.
2275  */
2276 static void
2277 ixgbe_free_rings(ixgbe_t *ixgbe)
2278 {
2279 	if (ixgbe->rx_rings != NULL) {
2280 		kmem_free(ixgbe->rx_rings,
2281 		    sizeof (ixgbe_rx_ring_t) * ixgbe->num_rx_rings);
2282 		ixgbe->rx_rings = NULL;
2283 	}
2284 
2285 	if (ixgbe->tx_rings != NULL) {
2286 		kmem_free(ixgbe->tx_rings,
2287 		    sizeof (ixgbe_tx_ring_t) * ixgbe->num_tx_rings);
2288 		ixgbe->tx_rings = NULL;
2289 	}
2290 
2291 	for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) {
2292 		ixgbe_vlan_t *vlp;
2293 		ixgbe_rx_group_t *rx_group = &ixgbe->rx_groups[i];
2294 
2295 		while ((vlp = list_remove_head(&rx_group->vlans)) != NULL)
2296 			kmem_free(vlp, sizeof (ixgbe_vlan_t));
2297 
2298 		list_destroy(&rx_group->vlans);
2299 	}
2300 
2301 	if (ixgbe->rx_groups != NULL) {
2302 		kmem_free(ixgbe->rx_groups,
2303 		    sizeof (ixgbe_rx_group_t) * ixgbe->num_rx_groups);
2304 		ixgbe->rx_groups = NULL;
2305 	}
2306 }
2307 
2308 static int
2309 ixgbe_alloc_rx_data(ixgbe_t *ixgbe)
2310 {
2311 	ixgbe_rx_ring_t *rx_ring;
2312 	int i;
2313 
2314 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
2315 		rx_ring = &ixgbe->rx_rings[i];
2316 		if (ixgbe_alloc_rx_ring_data(rx_ring) != IXGBE_SUCCESS)
2317 			goto alloc_rx_rings_failure;
2318 	}
2319 	return (IXGBE_SUCCESS);
2320 
2321 alloc_rx_rings_failure:
2322 	ixgbe_free_rx_data(ixgbe);
2323 	return (IXGBE_FAILURE);
2324 }
2325 
2326 static void
2327 ixgbe_free_rx_data(ixgbe_t *ixgbe)
2328 {
2329 	ixgbe_rx_ring_t *rx_ring;
2330 	ixgbe_rx_data_t *rx_data;
2331 	int i;
2332 
2333 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
2334 		rx_ring = &ixgbe->rx_rings[i];
2335 
2336 		mutex_enter(&ixgbe->rx_pending_lock);
2337 		rx_data = rx_ring->rx_data;
2338 
2339 		if (rx_data != NULL) {
2340 			rx_data->flag |= IXGBE_RX_STOPPED;
2341 
2342 			if (rx_data->rcb_pending == 0) {
2343 				ixgbe_free_rx_ring_data(rx_data);
2344 				rx_ring->rx_data = NULL;
2345 			}
2346 		}
2347 
2348 		mutex_exit(&ixgbe->rx_pending_lock);
2349 	}
2350 }
2351 
2352 /*
2353  * ixgbe_setup_rings - Setup rx/tx rings.
2354  */
2355 static int
2356 ixgbe_setup_rings(ixgbe_t *ixgbe)
2357 {
2358 	/*
2359 	 * Setup the rx/tx rings, including the following:
2360 	 *
2361 	 * 1. Setup the descriptor ring and the control block buffers;
2362 	 * 2. Initialize necessary registers for receive/transmit;
2363 	 * 3. Initialize software pointers/parameters for receive/transmit;
2364 	 */
2365 	if (ixgbe_setup_rx(ixgbe) != IXGBE_SUCCESS)
2366 		return (IXGBE_FAILURE);
2367 
2368 	ixgbe_setup_tx(ixgbe);
2369 
2370 	return (IXGBE_SUCCESS);
2371 }
2372 
2373 static void
2374 ixgbe_setup_rx_ring(ixgbe_rx_ring_t *rx_ring)
2375 {
2376 	ixgbe_t *ixgbe = rx_ring->ixgbe;
2377 	ixgbe_rx_data_t *rx_data = rx_ring->rx_data;
2378 	struct ixgbe_hw *hw = &ixgbe->hw;
2379 	rx_control_block_t *rcb;
2380 	union ixgbe_adv_rx_desc	*rbd;
2381 	uint32_t size;
2382 	uint32_t buf_low;
2383 	uint32_t buf_high;
2384 	uint32_t reg_val;
2385 	int i;
2386 
2387 	ASSERT(mutex_owned(&rx_ring->rx_lock));
2388 	ASSERT(mutex_owned(&ixgbe->gen_lock));
2389 
2390 	for (i = 0; i < ixgbe->rx_ring_size; i++) {
2391 		rcb = rx_data->work_list[i];
2392 		rbd = &rx_data->rbd_ring[i];
2393 
2394 		rbd->read.pkt_addr = rcb->rx_buf.dma_address;
2395 		rbd->read.hdr_addr = 0;
2396 	}
2397 
2398 	/*
2399 	 * Initialize the length register
2400 	 */
2401 	size = rx_data->ring_size * sizeof (union ixgbe_adv_rx_desc);
2402 	IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rx_ring->hw_index), size);
2403 
2404 	/*
2405 	 * Initialize the base address registers
2406 	 */
2407 	buf_low = (uint32_t)rx_data->rbd_area.dma_address;
2408 	buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32);
2409 	IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rx_ring->hw_index), buf_high);
2410 	IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rx_ring->hw_index), buf_low);
2411 
2412 	/*
2413 	 * Setup head & tail pointers
2414 	 */
2415 	IXGBE_WRITE_REG(hw, IXGBE_RDT(rx_ring->hw_index),
2416 	    rx_data->ring_size - 1);
2417 	IXGBE_WRITE_REG(hw, IXGBE_RDH(rx_ring->hw_index), 0);
2418 
2419 	rx_data->rbd_next = 0;
2420 	rx_data->lro_first = 0;
2421 
2422 	/*
2423 	 * Setup the Receive Descriptor Control Register (RXDCTL)
2424 	 * PTHRESH=32 descriptors (half the internal cache)
2425 	 * HTHRESH=0 descriptors (to minimize latency on fetch)
2426 	 * WTHRESH defaults to 1 (writeback each descriptor)
2427 	 */
2428 	reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index));
2429 	reg_val |= IXGBE_RXDCTL_ENABLE;	/* enable queue */
2430 
2431 	/* Not a valid value for 82599, X540 or X550 */
2432 	if (hw->mac.type == ixgbe_mac_82598EB) {
2433 		reg_val |= 0x0020;	/* pthresh */
2434 	}
2435 	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rx_ring->hw_index), reg_val);
2436 
2437 	if (hw->mac.type == ixgbe_mac_82599EB ||
2438 	    hw->mac.type == ixgbe_mac_X540 ||
2439 	    hw->mac.type == ixgbe_mac_X550 ||
2440 	    hw->mac.type == ixgbe_mac_X550EM_x ||
2441 	    hw->mac.type == ixgbe_mac_X550EM_a) {
2442 		reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2443 		reg_val |= (IXGBE_RDRXCTL_CRCSTRIP | IXGBE_RDRXCTL_AGGDIS);
2444 		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val);
2445 	}
2446 
2447 	/*
2448 	 * Setup the Split and Replication Receive Control Register.
2449 	 * Set the rx buffer size and the advanced descriptor type.
2450 	 */
2451 	reg_val = (ixgbe->rx_buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) |
2452 	    IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2453 	reg_val |= IXGBE_SRRCTL_DROP_EN;
2454 	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rx_ring->hw_index), reg_val);
2455 }
2456 
2457 static int
2458 ixgbe_setup_rx(ixgbe_t *ixgbe)
2459 {
2460 	ixgbe_rx_ring_t *rx_ring;
2461 	struct ixgbe_hw *hw = &ixgbe->hw;
2462 	uint32_t reg_val;
2463 	uint32_t i;
2464 	uint32_t psrtype_rss_bit;
2465 
2466 	/*
2467 	 * Ensure that Rx is disabled while setting up
2468 	 * the Rx unit and Rx descriptor ring(s)
2469 	 */
2470 	ixgbe_disable_rx(hw);
2471 
2472 	/* PSRTYPE must be configured for 82599 */
2473 	if (ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ &&
2474 	    ixgbe->classify_mode != IXGBE_CLASSIFY_VMDQ_RSS) {
2475 		reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR |
2476 		    IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR;
2477 		reg_val |= IXGBE_PSRTYPE_L2HDR;
2478 		reg_val |= 0x80000000;
2479 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), reg_val);
2480 	} else {
2481 		if (ixgbe->num_rx_groups > 32) {
2482 			psrtype_rss_bit = 0x20000000;
2483 		} else {
2484 			psrtype_rss_bit = 0x40000000;
2485 		}
2486 		for (i = 0; i < ixgbe->capab->max_rx_grp_num; i++) {
2487 			reg_val = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR |
2488 			    IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR;
2489 			reg_val |= IXGBE_PSRTYPE_L2HDR;
2490 			reg_val |= psrtype_rss_bit;
2491 			IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(i), reg_val);
2492 		}
2493 	}
2494 
2495 	/*
2496 	 * Set filter control in FCTRL to determine types of packets are passed
2497 	 * up to the driver.
2498 	 * - Pass broadcast packets.
2499 	 * - Do not pass flow control pause frames (82598-specific)
2500 	 */
2501 	reg_val = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2502 	reg_val |= IXGBE_FCTRL_BAM; /* Broadcast Accept Mode */
2503 	if (hw->mac.type == ixgbe_mac_82598EB) {
2504 		reg_val |= IXGBE_FCTRL_DPF; /* Discard Pause Frames */
2505 	}
2506 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_val);
2507 
2508 	/*
2509 	 * Hardware checksum settings
2510 	 */
2511 	if (ixgbe->rx_hcksum_enable) {
2512 		reg_val = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2513 		reg_val |= IXGBE_RXCSUM_IPPCSE;	/* IP checksum */
2514 		IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, reg_val);
2515 	}
2516 
2517 	/*
2518 	 * Setup VMDq and RSS for multiple receive queues
2519 	 */
2520 	switch (ixgbe->classify_mode) {
2521 	case IXGBE_CLASSIFY_RSS:
2522 		/*
2523 		 * One group, only RSS is needed when more than
2524 		 * one ring enabled.
2525 		 */
2526 		ixgbe_setup_rss(ixgbe);
2527 		break;
2528 
2529 	case IXGBE_CLASSIFY_VMDQ:
2530 		/*
2531 		 * Multiple groups, each group has one ring,
2532 		 * only VMDq is needed.
2533 		 */
2534 		ixgbe_setup_vmdq(ixgbe);
2535 		break;
2536 
2537 	case IXGBE_CLASSIFY_VMDQ_RSS:
2538 		/*
2539 		 * Multiple groups and multiple rings, both
2540 		 * VMDq and RSS are needed.
2541 		 */
2542 		ixgbe_setup_vmdq_rss(ixgbe);
2543 		break;
2544 
2545 	default:
2546 		break;
2547 	}
2548 
2549 	/*
2550 	 * Initialize VLAN SW and HW state if VLAN filtering is
2551 	 * enabled.
2552 	 */
2553 	if (ixgbe->vlft_enabled) {
2554 		if (ixgbe_init_vlan(ixgbe) != IXGBE_SUCCESS)
2555 			return (IXGBE_FAILURE);
2556 	}
2557 
2558 	/*
2559 	 * Enable the receive unit.  This must be done after filter
2560 	 * control is set in FCTRL. On 82598, we disable the descriptor monitor.
2561 	 * 82598 is the only adapter which defines this RXCTRL option.
2562 	 */
2563 	reg_val = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2564 	if (hw->mac.type == ixgbe_mac_82598EB)
2565 		reg_val |= IXGBE_RXCTRL_DMBYPS; /* descriptor monitor bypass */
2566 	reg_val |= IXGBE_RXCTRL_RXEN;
2567 	(void) ixgbe_enable_rx_dma(hw, reg_val);
2568 
2569 	/*
2570 	 * ixgbe_setup_rx_ring must be called after configuring RXCTRL
2571 	 */
2572 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
2573 		rx_ring = &ixgbe->rx_rings[i];
2574 		ixgbe_setup_rx_ring(rx_ring);
2575 	}
2576 
2577 	/*
2578 	 * The 82598 controller gives us the RNBC (Receive No Buffer
2579 	 * Count) register to determine the number of frames dropped
2580 	 * due to no available descriptors on the destination queue.
2581 	 * However, this register was removed starting with 82599 and
2582 	 * it was replaced with the RQSMR/QPRDC registers. The nice
2583 	 * thing about the new registers is that they allow you to map
2584 	 * groups of queues to specific stat registers. The bad thing
2585 	 * is there are only 16 slots in the stat registers, so this
2586 	 * won't work when we have 32 Rx groups. Instead, we map all
2587 	 * queues to the zero slot of the stat registers, giving us a
2588 	 * global counter at QPRDC[0] (with the equivalent semantics
2589 	 * of RNBC). Perhaps future controllers will have more slots
2590 	 * and we can implement per-group counters.
2591 	 */
2592 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
2593 		uint32_t index = ixgbe->rx_rings[i].hw_index;
2594 		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(index >> 2), 0);
2595 	}
2596 
2597 	/*
2598 	 * The Max Frame Size in MHADD/MAXFRS will be internally increased
2599 	 * by four bytes if the packet has a VLAN field, so includes MTU,
2600 	 * ethernet header and frame check sequence.
2601 	 * Register is MAXFRS in 82599.
2602 	 */
2603 	reg_val = IXGBE_READ_REG(hw, IXGBE_MHADD);
2604 	reg_val &= ~IXGBE_MHADD_MFS_MASK;
2605 	reg_val |= (ixgbe->default_mtu + sizeof (struct ether_header)
2606 	    + ETHERFCSL) << IXGBE_MHADD_MFS_SHIFT;
2607 	IXGBE_WRITE_REG(hw, IXGBE_MHADD, reg_val);
2608 
2609 	/*
2610 	 * Setup Jumbo Frame enable bit
2611 	 */
2612 	reg_val = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2613 	if (ixgbe->default_mtu > ETHERMTU)
2614 		reg_val |= IXGBE_HLREG0_JUMBOEN;
2615 	else
2616 		reg_val &= ~IXGBE_HLREG0_JUMBOEN;
2617 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_val);
2618 
2619 	/*
2620 	 * Setup RSC for multiple receive queues.
2621 	 */
2622 	if (ixgbe->lro_enable) {
2623 		for (i = 0; i < ixgbe->num_rx_rings; i++) {
2624 			/*
2625 			 * Make sure rx_buf_size * MAXDESC not greater
2626 			 * than 65535.
2627 			 * Intel recommends 4 for MAXDESC field value.
2628 			 */
2629 			reg_val = IXGBE_READ_REG(hw, IXGBE_RSCCTL(i));
2630 			reg_val |= IXGBE_RSCCTL_RSCEN;
2631 			if (ixgbe->rx_buf_size == IXGBE_PKG_BUF_16k)
2632 				reg_val |= IXGBE_RSCCTL_MAXDESC_1;
2633 			else
2634 				reg_val |= IXGBE_RSCCTL_MAXDESC_4;
2635 			IXGBE_WRITE_REG(hw,  IXGBE_RSCCTL(i), reg_val);
2636 		}
2637 
2638 		reg_val = IXGBE_READ_REG(hw, IXGBE_RSCDBU);
2639 		reg_val |= IXGBE_RSCDBU_RSCACKDIS;
2640 		IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, reg_val);
2641 
2642 		reg_val = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2643 		reg_val |= IXGBE_RDRXCTL_RSCACKC;
2644 		reg_val |= IXGBE_RDRXCTL_FCOE_WRFIX;
2645 		reg_val &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2646 
2647 		IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg_val);
2648 	}
2649 
2650 	return (IXGBE_SUCCESS);
2651 }
2652 
2653 static void
2654 ixgbe_setup_tx_ring(ixgbe_tx_ring_t *tx_ring)
2655 {
2656 	ixgbe_t *ixgbe = tx_ring->ixgbe;
2657 	struct ixgbe_hw *hw = &ixgbe->hw;
2658 	uint32_t size;
2659 	uint32_t buf_low;
2660 	uint32_t buf_high;
2661 	uint32_t reg_val;
2662 
2663 	ASSERT(mutex_owned(&tx_ring->tx_lock));
2664 	ASSERT(mutex_owned(&ixgbe->gen_lock));
2665 
2666 	/*
2667 	 * Initialize the length register
2668 	 */
2669 	size = tx_ring->ring_size * sizeof (union ixgbe_adv_tx_desc);
2670 	IXGBE_WRITE_REG(hw, IXGBE_TDLEN(tx_ring->index), size);
2671 
2672 	/*
2673 	 * Initialize the base address registers
2674 	 */
2675 	buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
2676 	buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
2677 	IXGBE_WRITE_REG(hw, IXGBE_TDBAL(tx_ring->index), buf_low);
2678 	IXGBE_WRITE_REG(hw, IXGBE_TDBAH(tx_ring->index), buf_high);
2679 
2680 	/*
2681 	 * Setup head & tail pointers
2682 	 */
2683 	IXGBE_WRITE_REG(hw, IXGBE_TDH(tx_ring->index), 0);
2684 	IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), 0);
2685 
2686 	/*
2687 	 * Setup head write-back
2688 	 */
2689 	if (ixgbe->tx_head_wb_enable) {
2690 		/*
2691 		 * The memory of the head write-back is allocated using
2692 		 * the extra tbd beyond the tail of the tbd ring.
2693 		 */
2694 		tx_ring->tbd_head_wb = (uint32_t *)
2695 		    ((uintptr_t)tx_ring->tbd_area.address + size);
2696 		*tx_ring->tbd_head_wb = 0;
2697 
2698 		buf_low = (uint32_t)
2699 		    (tx_ring->tbd_area.dma_address + size);
2700 		buf_high = (uint32_t)
2701 		    ((tx_ring->tbd_area.dma_address + size) >> 32);
2702 
2703 		/* Set the head write-back enable bit */
2704 		buf_low |= IXGBE_TDWBAL_HEAD_WB_ENABLE;
2705 
2706 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(tx_ring->index), buf_low);
2707 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(tx_ring->index), buf_high);
2708 
2709 		/*
2710 		 * Turn off relaxed ordering for head write back or it will
2711 		 * cause problems with the tx recycling
2712 		 */
2713 
2714 		reg_val = (hw->mac.type == ixgbe_mac_82598EB) ?
2715 		    IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(tx_ring->index)) :
2716 		    IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(tx_ring->index));
2717 		reg_val &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2718 		if (hw->mac.type == ixgbe_mac_82598EB) {
2719 			IXGBE_WRITE_REG(hw,
2720 			    IXGBE_DCA_TXCTRL(tx_ring->index), reg_val);
2721 		} else {
2722 			IXGBE_WRITE_REG(hw,
2723 			    IXGBE_DCA_TXCTRL_82599(tx_ring->index), reg_val);
2724 		}
2725 	} else {
2726 		tx_ring->tbd_head_wb = NULL;
2727 	}
2728 
2729 	tx_ring->tbd_head = 0;
2730 	tx_ring->tbd_tail = 0;
2731 	tx_ring->tbd_free = tx_ring->ring_size;
2732 
2733 	if (ixgbe->tx_ring_init == B_TRUE) {
2734 		tx_ring->tcb_head = 0;
2735 		tx_ring->tcb_tail = 0;
2736 		tx_ring->tcb_free = tx_ring->free_list_size;
2737 	}
2738 
2739 	/*
2740 	 * Initialize the s/w context structure
2741 	 */
2742 	bzero(&tx_ring->tx_context, sizeof (ixgbe_tx_context_t));
2743 }
2744 
2745 static void
2746 ixgbe_setup_tx(ixgbe_t *ixgbe)
2747 {
2748 	struct ixgbe_hw *hw = &ixgbe->hw;
2749 	ixgbe_tx_ring_t *tx_ring;
2750 	uint32_t reg_val;
2751 	int i;
2752 
2753 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
2754 		tx_ring = &ixgbe->tx_rings[i];
2755 		ixgbe_setup_tx_ring(tx_ring);
2756 	}
2757 
2758 	/*
2759 	 * Setup the per-ring statistics mapping. We map all Tx queues
2760 	 * to slot 0 to stay consistent with Rx.
2761 	 */
2762 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
2763 		switch (hw->mac.type) {
2764 		case ixgbe_mac_82598EB:
2765 			IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i >> 2), 0);
2766 			break;
2767 
2768 		default:
2769 			IXGBE_WRITE_REG(hw, IXGBE_TQSM(i >> 2), 0);
2770 			break;
2771 		}
2772 	}
2773 
2774 	/*
2775 	 * Enable CRC appending and TX padding (for short tx frames)
2776 	 */
2777 	reg_val = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2778 	reg_val |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN;
2779 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_val);
2780 
2781 	/*
2782 	 * enable DMA for 82599, X540 and X550 parts
2783 	 */
2784 	if (hw->mac.type == ixgbe_mac_82599EB ||
2785 	    hw->mac.type == ixgbe_mac_X540 ||
2786 	    hw->mac.type == ixgbe_mac_X550 ||
2787 	    hw->mac.type == ixgbe_mac_X550EM_x ||
2788 	    hw->mac.type == ixgbe_mac_X550EM_a) {
2789 		/* DMATXCTL.TE must be set after all Tx config is complete */
2790 		reg_val = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2791 		reg_val |= IXGBE_DMATXCTL_TE;
2792 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg_val);
2793 
2794 		/* Disable arbiter to set MTQC */
2795 		reg_val = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2796 		reg_val |= IXGBE_RTTDCS_ARBDIS;
2797 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg_val);
2798 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2799 		reg_val &= ~IXGBE_RTTDCS_ARBDIS;
2800 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg_val);
2801 	}
2802 
2803 	/*
2804 	 * Enabling tx queues ..
2805 	 * For 82599 must be done after DMATXCTL.TE is set
2806 	 */
2807 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
2808 		tx_ring = &ixgbe->tx_rings[i];
2809 		reg_val = IXGBE_READ_REG(hw, IXGBE_TXDCTL(tx_ring->index));
2810 		reg_val |= IXGBE_TXDCTL_ENABLE;
2811 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(tx_ring->index), reg_val);
2812 	}
2813 }
2814 
2815 /*
2816  * ixgbe_setup_rss - Setup receive-side scaling feature.
2817  */
2818 static void
2819 ixgbe_setup_rss(ixgbe_t *ixgbe)
2820 {
2821 	struct ixgbe_hw *hw = &ixgbe->hw;
2822 	uint32_t mrqc;
2823 
2824 	/*
2825 	 * Initialize RETA/ERETA table
2826 	 */
2827 	ixgbe_setup_rss_table(ixgbe);
2828 
2829 	/*
2830 	 * Enable RSS & perform hash on these packet types
2831 	 */
2832 	mrqc = IXGBE_MRQC_RSSEN |
2833 	    IXGBE_MRQC_RSS_FIELD_IPV4 |
2834 	    IXGBE_MRQC_RSS_FIELD_IPV4_TCP |
2835 	    IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
2836 	    IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP |
2837 	    IXGBE_MRQC_RSS_FIELD_IPV6_EX |
2838 	    IXGBE_MRQC_RSS_FIELD_IPV6 |
2839 	    IXGBE_MRQC_RSS_FIELD_IPV6_TCP |
2840 	    IXGBE_MRQC_RSS_FIELD_IPV6_UDP |
2841 	    IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2842 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2843 }
2844 
2845 /*
2846  * ixgbe_setup_vmdq - Setup MAC classification feature
2847  */
2848 static void
2849 ixgbe_setup_vmdq(ixgbe_t *ixgbe)
2850 {
2851 	struct ixgbe_hw *hw = &ixgbe->hw;
2852 	uint32_t vmdctl, i, vtctl, vlnctl;
2853 
2854 	/*
2855 	 * Setup the VMDq Control register, enable VMDq based on
2856 	 * packet destination MAC address:
2857 	 */
2858 	switch (hw->mac.type) {
2859 	case ixgbe_mac_82598EB:
2860 		/*
2861 		 * VMDq Enable = 1;
2862 		 * VMDq Filter = 0; MAC filtering
2863 		 * Default VMDq output index = 0;
2864 		 */
2865 		vmdctl = IXGBE_VMD_CTL_VMDQ_EN;
2866 		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
2867 		break;
2868 
2869 	case ixgbe_mac_82599EB:
2870 	case ixgbe_mac_X540:
2871 	case ixgbe_mac_X550:
2872 	case ixgbe_mac_X550EM_x:
2873 	case ixgbe_mac_X550EM_a:
2874 		/*
2875 		 * Enable VMDq-only.
2876 		 */
2877 		vmdctl = IXGBE_MRQC_VMDQEN;
2878 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, vmdctl);
2879 
2880 		for (i = 0; i < hw->mac.num_rar_entries; i++) {
2881 			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0);
2882 			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0);
2883 		}
2884 
2885 		/*
2886 		 * Enable Virtualization and Replication.
2887 		 */
2888 		vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
2889 		ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK;
2890 		vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
2891 		IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl);
2892 
2893 		/*
2894 		 * Enable VLAN filtering and switching (VFTA and VLVF).
2895 		 */
2896 		vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2897 		vlnctl |= IXGBE_VLNCTRL_VFE;
2898 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl);
2899 		ixgbe->vlft_enabled = B_TRUE;
2900 
2901 		/*
2902 		 * Enable receiving packets to all VFs
2903 		 */
2904 		IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL);
2905 		IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL);
2906 		break;
2907 
2908 	default:
2909 		break;
2910 	}
2911 }
2912 
2913 /*
2914  * ixgbe_setup_vmdq_rss - Setup both vmdq feature and rss feature.
2915  */
2916 static void
2917 ixgbe_setup_vmdq_rss(ixgbe_t *ixgbe)
2918 {
2919 	struct ixgbe_hw *hw = &ixgbe->hw;
2920 	uint32_t i, mrqc;
2921 	uint32_t vtctl, vmdctl, vlnctl;
2922 
2923 	/*
2924 	 * Initialize RETA/ERETA table
2925 	 */
2926 	ixgbe_setup_rss_table(ixgbe);
2927 
2928 	/*
2929 	 * Enable and setup RSS and VMDq
2930 	 */
2931 	switch (hw->mac.type) {
2932 	case ixgbe_mac_82598EB:
2933 		/*
2934 		 * Enable RSS & Setup RSS Hash functions
2935 		 */
2936 		mrqc = IXGBE_MRQC_RSSEN |
2937 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2938 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP |
2939 		    IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
2940 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP |
2941 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX |
2942 		    IXGBE_MRQC_RSS_FIELD_IPV6 |
2943 		    IXGBE_MRQC_RSS_FIELD_IPV6_TCP |
2944 		    IXGBE_MRQC_RSS_FIELD_IPV6_UDP |
2945 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2946 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2947 
2948 		/*
2949 		 * Enable and Setup VMDq
2950 		 * VMDq Filter = 0; MAC filtering
2951 		 * Default VMDq output index = 0;
2952 		 */
2953 		vmdctl = IXGBE_VMD_CTL_VMDQ_EN;
2954 		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
2955 		break;
2956 
2957 	case ixgbe_mac_82599EB:
2958 	case ixgbe_mac_X540:
2959 	case ixgbe_mac_X550:
2960 	case ixgbe_mac_X550EM_x:
2961 	case ixgbe_mac_X550EM_a:
2962 		/*
2963 		 * Enable RSS & Setup RSS Hash functions
2964 		 */
2965 		mrqc = IXGBE_MRQC_RSS_FIELD_IPV4 |
2966 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP |
2967 		    IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
2968 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP |
2969 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX |
2970 		    IXGBE_MRQC_RSS_FIELD_IPV6 |
2971 		    IXGBE_MRQC_RSS_FIELD_IPV6_TCP |
2972 		    IXGBE_MRQC_RSS_FIELD_IPV6_UDP |
2973 		    IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2974 
2975 		/*
2976 		 * Enable VMDq+RSS.
2977 		 */
2978 		if (ixgbe->num_rx_groups > 32)  {
2979 			mrqc = mrqc | IXGBE_MRQC_VMDQRSS64EN;
2980 		} else {
2981 			mrqc = mrqc | IXGBE_MRQC_VMDQRSS32EN;
2982 		}
2983 
2984 		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
2985 
2986 		for (i = 0; i < hw->mac.num_rar_entries; i++) {
2987 			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(i), 0);
2988 			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(i), 0);
2989 		}
2990 		break;
2991 
2992 	default:
2993 		break;
2994 
2995 	}
2996 
2997 	if (hw->mac.type == ixgbe_mac_82599EB ||
2998 	    hw->mac.type == ixgbe_mac_X540 ||
2999 	    hw->mac.type == ixgbe_mac_X550 ||
3000 	    hw->mac.type == ixgbe_mac_X550EM_x ||
3001 	    hw->mac.type == ixgbe_mac_X550EM_a) {
3002 		/*
3003 		 * Enable Virtualization and Replication.
3004 		 */
3005 		vtctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
3006 		ixgbe->rx_def_group = vtctl & IXGBE_VT_CTL_POOL_MASK;
3007 		vtctl |= IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3008 		vtctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3009 		IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vtctl);
3010 
3011 		/*
3012 		 * Enable VLAN filtering and switching (VFTA and VLVF).
3013 		 */
3014 		vlnctl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3015 		vlnctl |= IXGBE_VLNCTRL_VFE;
3016 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctl);
3017 		ixgbe->vlft_enabled = B_TRUE;
3018 
3019 		/*
3020 		 * Enable receiving packets to all VFs
3021 		 */
3022 		IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), IXGBE_VFRE_ENABLE_ALL);
3023 		IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), IXGBE_VFRE_ENABLE_ALL);
3024 	}
3025 }
3026 
3027 /*
3028  * ixgbe_setup_rss_table - Setup RSS table
3029  */
3030 static void
3031 ixgbe_setup_rss_table(ixgbe_t *ixgbe)
3032 {
3033 	struct ixgbe_hw *hw = &ixgbe->hw;
3034 	uint32_t i, j;
3035 	uint32_t random;
3036 	uint32_t reta;
3037 	uint32_t ring_per_group;
3038 	uint32_t ring;
3039 	uint32_t table_size;
3040 	uint32_t index_mult;
3041 	uint32_t rxcsum;
3042 
3043 	/*
3044 	 * Set multiplier for RETA setup and table size based on MAC type.
3045 	 * RETA table sizes vary by model:
3046 	 *
3047 	 * 82598, 82599, X540: 128 table entries.
3048 	 * X550: 512 table entries.
3049 	 */
3050 	index_mult = 0x1;
3051 	table_size = 128;
3052 	switch (ixgbe->hw.mac.type) {
3053 	case ixgbe_mac_82598EB:
3054 		index_mult = 0x11;
3055 		break;
3056 	case ixgbe_mac_X550:
3057 	case ixgbe_mac_X550EM_x:
3058 	case ixgbe_mac_X550EM_a:
3059 		table_size = 512;
3060 		break;
3061 	default:
3062 		break;
3063 	}
3064 
3065 	/*
3066 	 * Fill out RSS redirection table. The configuation of the indices is
3067 	 * hardware-dependent.
3068 	 *
3069 	 *  82598: 8 bits wide containing two 4 bit RSS indices
3070 	 *  82599, X540: 8 bits wide containing one 4 bit RSS index
3071 	 *  X550: 8 bits wide containing one 6 bit RSS index
3072 	 */
3073 	reta = 0;
3074 	ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
3075 
3076 	for (i = 0, j = 0; i < table_size; i++, j++) {
3077 		if (j == ring_per_group) j = 0;
3078 
3079 		/*
3080 		 * The low 8 bits are for hash value (n+0);
3081 		 * The next 8 bits are for hash value (n+1), etc.
3082 		 */
3083 		ring = (j * index_mult);
3084 		reta = reta >> 8;
3085 		reta = reta | (((uint32_t)ring) << 24);
3086 
3087 		if ((i & 3) == 3) {
3088 			/*
3089 			 * The first 128 table entries are programmed into the
3090 			 * RETA register, with any beyond that (eg; on X550)
3091 			 * into ERETA.
3092 			 */
3093 			if (i < 128)
3094 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
3095 			else
3096 				IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
3097 				    reta);
3098 			reta = 0;
3099 		}
3100 	}
3101 
3102 	/*
3103 	 * Fill out hash function seeds with a random constant
3104 	 */
3105 	for (i = 0; i < 10; i++) {
3106 		(void) random_get_pseudo_bytes((uint8_t *)&random,
3107 		    sizeof (uint32_t));
3108 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random);
3109 	}
3110 
3111 	/*
3112 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
3113 	 * It is an adapter hardware limitation that Packet Checksum is
3114 	 * mutually exclusive with RSS.
3115 	 */
3116 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
3117 	rxcsum |= IXGBE_RXCSUM_PCSD;
3118 	rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
3119 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
3120 }
3121 
3122 /*
3123  * ixgbe_init_unicst - Initialize the unicast addresses.
3124  */
3125 static void
3126 ixgbe_init_unicst(ixgbe_t *ixgbe)
3127 {
3128 	struct ixgbe_hw *hw = &ixgbe->hw;
3129 	uint8_t *mac_addr;
3130 	int slot;
3131 	/*
3132 	 * Here we should consider two situations:
3133 	 *
3134 	 * 1. Chipset is initialized at the first time,
3135 	 *    Clear all the multiple unicast addresses.
3136 	 *
3137 	 * 2. Chipset is reset
3138 	 *    Recover the multiple unicast addresses from the
3139 	 *    software data structure to the RAR registers.
3140 	 */
3141 	if (!ixgbe->unicst_init) {
3142 		/*
3143 		 * Initialize the multiple unicast addresses
3144 		 */
3145 		ixgbe->unicst_total = hw->mac.num_rar_entries;
3146 		ixgbe->unicst_avail = ixgbe->unicst_total;
3147 		for (slot = 0; slot < ixgbe->unicst_total; slot++) {
3148 			mac_addr = ixgbe->unicst_addr[slot].mac.addr;
3149 			bzero(mac_addr, ETHERADDRL);
3150 			(void) ixgbe_set_rar(hw, slot, mac_addr, 0, 0);
3151 			ixgbe->unicst_addr[slot].mac.set = 0;
3152 		}
3153 		ixgbe->unicst_init = B_TRUE;
3154 	} else {
3155 		/* Re-configure the RAR registers */
3156 		for (slot = 0; slot < ixgbe->unicst_total; slot++) {
3157 			mac_addr = ixgbe->unicst_addr[slot].mac.addr;
3158 			if (ixgbe->unicst_addr[slot].mac.set == 1) {
3159 				(void) ixgbe_set_rar(hw, slot, mac_addr,
3160 				    ixgbe->unicst_addr[slot].mac.group_index,
3161 				    IXGBE_RAH_AV);
3162 			} else {
3163 				bzero(mac_addr, ETHERADDRL);
3164 				(void) ixgbe_set_rar(hw, slot, mac_addr, 0, 0);
3165 			}
3166 		}
3167 	}
3168 }
3169 
3170 /*
3171  * ixgbe_unicst_find - Find the slot for the specified unicast address
3172  */
3173 int
3174 ixgbe_unicst_find(ixgbe_t *ixgbe, const uint8_t *mac_addr)
3175 {
3176 	int slot;
3177 
3178 	ASSERT(mutex_owned(&ixgbe->gen_lock));
3179 
3180 	for (slot = 0; slot < ixgbe->unicst_total; slot++) {
3181 		if (bcmp(ixgbe->unicst_addr[slot].mac.addr,
3182 		    mac_addr, ETHERADDRL) == 0)
3183 			return (slot);
3184 	}
3185 
3186 	return (-1);
3187 }
3188 
3189 /*
3190  * Restore the HW state to match the SW state during restart.
3191  */
3192 static int
3193 ixgbe_init_vlan(ixgbe_t *ixgbe)
3194 {
3195 	/*
3196 	 * The device is starting for the first time; there is nothing
3197 	 * to do.
3198 	 */
3199 	if (!ixgbe->vlft_init) {
3200 		ixgbe->vlft_init = B_TRUE;
3201 		return (IXGBE_SUCCESS);
3202 	}
3203 
3204 	for (uint_t i = 0; i < ixgbe->num_rx_groups; i++) {
3205 		int			ret;
3206 		boolean_t		vlvf_bypass;
3207 		ixgbe_rx_group_t	*rxg = &ixgbe->rx_groups[i];
3208 		struct ixgbe_hw		*hw = &ixgbe->hw;
3209 
3210 		if (rxg->aupe) {
3211 			uint32_t vml2flt;
3212 
3213 			vml2flt = IXGBE_READ_REG(hw, IXGBE_VMOLR(rxg->index));
3214 			vml2flt |= IXGBE_VMOLR_AUPE;
3215 			IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rxg->index), vml2flt);
3216 		}
3217 
3218 		vlvf_bypass = (rxg->index == ixgbe->rx_def_group);
3219 		for (ixgbe_vlan_t *vlp = list_head(&rxg->vlans); vlp != NULL;
3220 		    vlp = list_next(&rxg->vlans, vlp)) {
3221 			ret = ixgbe_set_vfta(hw, vlp->ixvl_vid, rxg->index,
3222 			    B_TRUE, vlvf_bypass);
3223 
3224 			if (ret != IXGBE_SUCCESS) {
3225 				ixgbe_error(ixgbe, "Failed to program VFTA"
3226 				    " for group %u, VID: %u, ret: %d.",
3227 				    rxg->index, vlp->ixvl_vid, ret);
3228 				return (IXGBE_FAILURE);
3229 			}
3230 		}
3231 	}
3232 
3233 	return (IXGBE_SUCCESS);
3234 }
3235 
3236 /*
3237  * ixgbe_multicst_add - Add a multicst address.
3238  */
3239 int
3240 ixgbe_multicst_add(ixgbe_t *ixgbe, const uint8_t *multiaddr)
3241 {
3242 	ASSERT(mutex_owned(&ixgbe->gen_lock));
3243 
3244 	if ((multiaddr[0] & 01) == 0) {
3245 		return (EINVAL);
3246 	}
3247 
3248 	if (ixgbe->mcast_count >= MAX_NUM_MULTICAST_ADDRESSES) {
3249 		return (ENOENT);
3250 	}
3251 
3252 	bcopy(multiaddr,
3253 	    &ixgbe->mcast_table[ixgbe->mcast_count], ETHERADDRL);
3254 	ixgbe->mcast_count++;
3255 
3256 	/*
3257 	 * Update the multicast table in the hardware
3258 	 */
3259 	ixgbe_setup_multicst(ixgbe);
3260 
3261 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
3262 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
3263 		return (EIO);
3264 	}
3265 
3266 	return (0);
3267 }
3268 
3269 /*
3270  * ixgbe_multicst_remove - Remove a multicst address.
3271  */
3272 int
3273 ixgbe_multicst_remove(ixgbe_t *ixgbe, const uint8_t *multiaddr)
3274 {
3275 	int i;
3276 
3277 	ASSERT(mutex_owned(&ixgbe->gen_lock));
3278 
3279 	for (i = 0; i < ixgbe->mcast_count; i++) {
3280 		if (bcmp(multiaddr, &ixgbe->mcast_table[i],
3281 		    ETHERADDRL) == 0) {
3282 			for (i++; i < ixgbe->mcast_count; i++) {
3283 				ixgbe->mcast_table[i - 1] =
3284 				    ixgbe->mcast_table[i];
3285 			}
3286 			ixgbe->mcast_count--;
3287 			break;
3288 		}
3289 	}
3290 
3291 	/*
3292 	 * Update the multicast table in the hardware
3293 	 */
3294 	ixgbe_setup_multicst(ixgbe);
3295 
3296 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
3297 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
3298 		return (EIO);
3299 	}
3300 
3301 	return (0);
3302 }
3303 
3304 /*
3305  * ixgbe_setup_multicast - Setup multicast data structures.
3306  *
3307  * This routine initializes all of the multicast related structures
3308  * and save them in the hardware registers.
3309  */
3310 static void
3311 ixgbe_setup_multicst(ixgbe_t *ixgbe)
3312 {
3313 	uint8_t *mc_addr_list;
3314 	uint32_t mc_addr_count;
3315 	struct ixgbe_hw *hw = &ixgbe->hw;
3316 
3317 	ASSERT(mutex_owned(&ixgbe->gen_lock));
3318 
3319 	ASSERT(ixgbe->mcast_count <= MAX_NUM_MULTICAST_ADDRESSES);
3320 
3321 	mc_addr_list = (uint8_t *)ixgbe->mcast_table;
3322 	mc_addr_count = ixgbe->mcast_count;
3323 
3324 	/*
3325 	 * Update the multicast addresses to the MTA registers
3326 	 */
3327 	(void) ixgbe_update_mc_addr_list(hw, mc_addr_list, mc_addr_count,
3328 	    ixgbe_mc_table_itr, TRUE);
3329 }
3330 
3331 /*
3332  * ixgbe_setup_vmdq_rss_conf - Configure vmdq and rss (number and mode).
3333  *
3334  * Configure the rx classification mode (vmdq & rss) and vmdq & rss numbers.
3335  * Different chipsets may have different allowed configuration of vmdq and rss.
3336  */
3337 static void
3338 ixgbe_setup_vmdq_rss_conf(ixgbe_t *ixgbe)
3339 {
3340 	struct ixgbe_hw *hw = &ixgbe->hw;
3341 	uint32_t ring_per_group;
3342 
3343 	switch (hw->mac.type) {
3344 	case ixgbe_mac_82598EB:
3345 		/*
3346 		 * 82598 supports the following combination:
3347 		 * vmdq no. x rss no.
3348 		 * [5..16]  x 1
3349 		 * [1..4]   x [1..16]
3350 		 * However 8 rss queue per pool (vmdq) is sufficient for
3351 		 * most cases.
3352 		 */
3353 		ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
3354 		if (ixgbe->num_rx_groups > 4) {
3355 			ixgbe->num_rx_rings = ixgbe->num_rx_groups;
3356 		} else {
3357 			ixgbe->num_rx_rings = ixgbe->num_rx_groups *
3358 			    min(8, ring_per_group);
3359 		}
3360 
3361 		break;
3362 
3363 	case ixgbe_mac_82599EB:
3364 	case ixgbe_mac_X540:
3365 	case ixgbe_mac_X550:
3366 	case ixgbe_mac_X550EM_x:
3367 	case ixgbe_mac_X550EM_a:
3368 		/*
3369 		 * 82599 supports the following combination:
3370 		 * vmdq no. x rss no.
3371 		 * [33..64] x [1..2]
3372 		 * [2..32]  x [1..4]
3373 		 * 1 x [1..16]
3374 		 * However 8 rss queue per pool (vmdq) is sufficient for
3375 		 * most cases.
3376 		 *
3377 		 * For now, treat X540 and X550 like the 82599.
3378 		 */
3379 		ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
3380 		if (ixgbe->num_rx_groups == 1) {
3381 			ixgbe->num_rx_rings = min(8, ring_per_group);
3382 		} else if (ixgbe->num_rx_groups <= 32) {
3383 			ixgbe->num_rx_rings = ixgbe->num_rx_groups *
3384 			    min(4, ring_per_group);
3385 		} else if (ixgbe->num_rx_groups <= 64) {
3386 			ixgbe->num_rx_rings = ixgbe->num_rx_groups *
3387 			    min(2, ring_per_group);
3388 		}
3389 		break;
3390 
3391 	default:
3392 		break;
3393 	}
3394 
3395 	ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
3396 
3397 	if (ixgbe->num_rx_groups == 1 && ring_per_group == 1) {
3398 		ixgbe->classify_mode = IXGBE_CLASSIFY_NONE;
3399 	} else if (ixgbe->num_rx_groups != 1 && ring_per_group == 1) {
3400 		ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ;
3401 	} else if (ixgbe->num_rx_groups != 1 && ring_per_group != 1) {
3402 		ixgbe->classify_mode = IXGBE_CLASSIFY_VMDQ_RSS;
3403 	} else {
3404 		ixgbe->classify_mode = IXGBE_CLASSIFY_RSS;
3405 	}
3406 
3407 	IXGBE_DEBUGLOG_2(ixgbe, "rx group number:%d, rx ring number:%d",
3408 	    ixgbe->num_rx_groups, ixgbe->num_rx_rings);
3409 }
3410 
3411 /*
3412  * ixgbe_get_conf - Get driver configurations set in driver.conf.
3413  *
3414  * This routine gets user-configured values out of the configuration
3415  * file ixgbe.conf.
3416  *
3417  * For each configurable value, there is a minimum, a maximum, and a
3418  * default.
3419  * If user does not configure a value, use the default.
3420  * If user configures below the minimum, use the minumum.
3421  * If user configures above the maximum, use the maxumum.
3422  */
3423 static void
3424 ixgbe_get_conf(ixgbe_t *ixgbe)
3425 {
3426 	struct ixgbe_hw *hw = &ixgbe->hw;
3427 	uint32_t flow_control;
3428 
3429 	/*
3430 	 * ixgbe driver supports the following user configurations:
3431 	 *
3432 	 * Jumbo frame configuration:
3433 	 *    default_mtu
3434 	 *
3435 	 * Ethernet flow control configuration:
3436 	 *    flow_control
3437 	 *
3438 	 * Multiple rings configurations:
3439 	 *    tx_queue_number
3440 	 *    tx_ring_size
3441 	 *    rx_queue_number
3442 	 *    rx_ring_size
3443 	 *
3444 	 * Call ixgbe_get_prop() to get the value for a specific
3445 	 * configuration parameter.
3446 	 */
3447 
3448 	/*
3449 	 * Jumbo frame configuration - max_frame_size controls host buffer
3450 	 * allocation, so includes MTU, ethernet header, vlan tag and
3451 	 * frame check sequence.
3452 	 */
3453 	ixgbe->default_mtu = ixgbe_get_prop(ixgbe, PROP_DEFAULT_MTU,
3454 	    MIN_MTU, ixgbe->capab->max_mtu, DEFAULT_MTU);
3455 
3456 	ixgbe->max_frame_size = ixgbe->default_mtu +
3457 	    sizeof (struct ether_vlan_header) + ETHERFCSL;
3458 
3459 	/*
3460 	 * Ethernet flow control configuration
3461 	 */
3462 	flow_control = ixgbe_get_prop(ixgbe, PROP_FLOW_CONTROL,
3463 	    ixgbe_fc_none, 3, ixgbe_fc_none);
3464 	if (flow_control == 3)
3465 		flow_control = ixgbe_fc_default;
3466 
3467 	/*
3468 	 * fc.requested mode is what the user requests.  After autoneg,
3469 	 * fc.current_mode will be the flow_control mode that was negotiated.
3470 	 */
3471 	hw->fc.requested_mode = flow_control;
3472 
3473 	/*
3474 	 * Multiple rings configurations
3475 	 */
3476 	ixgbe->num_tx_rings = ixgbe_get_prop(ixgbe, PROP_TX_QUEUE_NUM,
3477 	    ixgbe->capab->min_tx_que_num,
3478 	    ixgbe->capab->max_tx_que_num,
3479 	    ixgbe->capab->def_tx_que_num);
3480 	ixgbe->tx_ring_size = ixgbe_get_prop(ixgbe, PROP_TX_RING_SIZE,
3481 	    MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
3482 
3483 	ixgbe->num_rx_rings = ixgbe_get_prop(ixgbe, PROP_RX_QUEUE_NUM,
3484 	    ixgbe->capab->min_rx_que_num,
3485 	    ixgbe->capab->max_rx_que_num,
3486 	    ixgbe->capab->def_rx_que_num);
3487 	ixgbe->rx_ring_size = ixgbe_get_prop(ixgbe, PROP_RX_RING_SIZE,
3488 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
3489 
3490 	/*
3491 	 * Multiple groups configuration
3492 	 */
3493 	ixgbe->num_rx_groups = ixgbe_get_prop(ixgbe, PROP_RX_GROUP_NUM,
3494 	    ixgbe->capab->min_rx_grp_num, ixgbe->capab->max_rx_grp_num,
3495 	    ixgbe->capab->def_rx_grp_num);
3496 
3497 	ixgbe->mr_enable = ixgbe_get_prop(ixgbe, PROP_MR_ENABLE,
3498 	    0, 1, DEFAULT_MR_ENABLE);
3499 
3500 	if (ixgbe->mr_enable == B_FALSE) {
3501 		ixgbe->num_tx_rings = 1;
3502 		ixgbe->num_rx_rings = 1;
3503 		ixgbe->num_rx_groups = 1;
3504 		ixgbe->classify_mode = IXGBE_CLASSIFY_NONE;
3505 	} else {
3506 		ixgbe->num_rx_rings = ixgbe->num_rx_groups *
3507 		    max(ixgbe->num_rx_rings / ixgbe->num_rx_groups, 1);
3508 		/*
3509 		 * The combination of num_rx_rings and num_rx_groups
3510 		 * may be not supported by h/w. We need to adjust
3511 		 * them to appropriate values.
3512 		 */
3513 		ixgbe_setup_vmdq_rss_conf(ixgbe);
3514 	}
3515 
3516 	/*
3517 	 * Tunable used to force an interrupt type. The only use is
3518 	 * for testing of the lesser interrupt types.
3519 	 * 0 = don't force interrupt type
3520 	 * 1 = force interrupt type MSI-X
3521 	 * 2 = force interrupt type MSI
3522 	 * 3 = force interrupt type Legacy
3523 	 */
3524 	ixgbe->intr_force = ixgbe_get_prop(ixgbe, PROP_INTR_FORCE,
3525 	    IXGBE_INTR_NONE, IXGBE_INTR_LEGACY, IXGBE_INTR_NONE);
3526 
3527 	ixgbe->tx_hcksum_enable = ixgbe_get_prop(ixgbe, PROP_TX_HCKSUM_ENABLE,
3528 	    0, 1, DEFAULT_TX_HCKSUM_ENABLE);
3529 	ixgbe->rx_hcksum_enable = ixgbe_get_prop(ixgbe, PROP_RX_HCKSUM_ENABLE,
3530 	    0, 1, DEFAULT_RX_HCKSUM_ENABLE);
3531 	ixgbe->lso_enable = ixgbe_get_prop(ixgbe, PROP_LSO_ENABLE,
3532 	    0, 1, DEFAULT_LSO_ENABLE);
3533 	ixgbe->lro_enable = ixgbe_get_prop(ixgbe, PROP_LRO_ENABLE,
3534 	    0, 1, DEFAULT_LRO_ENABLE);
3535 	ixgbe->tx_head_wb_enable = ixgbe_get_prop(ixgbe, PROP_TX_HEAD_WB_ENABLE,
3536 	    0, 1, DEFAULT_TX_HEAD_WB_ENABLE);
3537 	ixgbe->relax_order_enable = ixgbe_get_prop(ixgbe,
3538 	    PROP_RELAX_ORDER_ENABLE, 0, 1, DEFAULT_RELAX_ORDER_ENABLE);
3539 
3540 	/* Head Write Back not recommended for 82599, X540 and X550 */
3541 	if (hw->mac.type == ixgbe_mac_82599EB ||
3542 	    hw->mac.type == ixgbe_mac_X540 ||
3543 	    hw->mac.type == ixgbe_mac_X550 ||
3544 	    hw->mac.type == ixgbe_mac_X550EM_x ||
3545 	    hw->mac.type == ixgbe_mac_X550EM_a) {
3546 		ixgbe->tx_head_wb_enable = B_FALSE;
3547 	}
3548 
3549 	/*
3550 	 * ixgbe LSO needs the tx h/w checksum support.
3551 	 * LSO will be disabled if tx h/w checksum is not
3552 	 * enabled.
3553 	 */
3554 	if (ixgbe->tx_hcksum_enable == B_FALSE) {
3555 		ixgbe->lso_enable = B_FALSE;
3556 	}
3557 
3558 	/*
3559 	 * ixgbe LRO needs the rx h/w checksum support.
3560 	 * LRO will be disabled if rx h/w checksum is not
3561 	 * enabled.
3562 	 */
3563 	if (ixgbe->rx_hcksum_enable == B_FALSE) {
3564 		ixgbe->lro_enable = B_FALSE;
3565 	}
3566 
3567 	/*
3568 	 * ixgbe LRO only supported by 82599, X540 and X550
3569 	 */
3570 	if (hw->mac.type == ixgbe_mac_82598EB) {
3571 		ixgbe->lro_enable = B_FALSE;
3572 	}
3573 	ixgbe->tx_copy_thresh = ixgbe_get_prop(ixgbe, PROP_TX_COPY_THRESHOLD,
3574 	    MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
3575 	    DEFAULT_TX_COPY_THRESHOLD);
3576 	ixgbe->tx_recycle_thresh = ixgbe_get_prop(ixgbe,
3577 	    PROP_TX_RECYCLE_THRESHOLD, MIN_TX_RECYCLE_THRESHOLD,
3578 	    MAX_TX_RECYCLE_THRESHOLD, DEFAULT_TX_RECYCLE_THRESHOLD);
3579 	ixgbe->tx_overload_thresh = ixgbe_get_prop(ixgbe,
3580 	    PROP_TX_OVERLOAD_THRESHOLD, MIN_TX_OVERLOAD_THRESHOLD,
3581 	    MAX_TX_OVERLOAD_THRESHOLD, DEFAULT_TX_OVERLOAD_THRESHOLD);
3582 	ixgbe->tx_resched_thresh = ixgbe_get_prop(ixgbe,
3583 	    PROP_TX_RESCHED_THRESHOLD, MIN_TX_RESCHED_THRESHOLD,
3584 	    MAX_TX_RESCHED_THRESHOLD, DEFAULT_TX_RESCHED_THRESHOLD);
3585 
3586 	ixgbe->rx_copy_thresh = ixgbe_get_prop(ixgbe, PROP_RX_COPY_THRESHOLD,
3587 	    MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
3588 	    DEFAULT_RX_COPY_THRESHOLD);
3589 	ixgbe->rx_limit_per_intr = ixgbe_get_prop(ixgbe, PROP_RX_LIMIT_PER_INTR,
3590 	    MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
3591 	    DEFAULT_RX_LIMIT_PER_INTR);
3592 
3593 	ixgbe->intr_throttling[0] = ixgbe_get_prop(ixgbe, PROP_INTR_THROTTLING,
3594 	    ixgbe->capab->min_intr_throttle,
3595 	    ixgbe->capab->max_intr_throttle,
3596 	    ixgbe->capab->def_intr_throttle);
3597 	/*
3598 	 * 82599, X540 and X550 require the interrupt throttling rate is
3599 	 * a multiple of 8. This is enforced by the register definiton.
3600 	 */
3601 	if (hw->mac.type == ixgbe_mac_82599EB ||
3602 	    hw->mac.type == ixgbe_mac_X540 ||
3603 	    hw->mac.type == ixgbe_mac_X550 ||
3604 	    hw->mac.type == ixgbe_mac_X550EM_x ||
3605 	    hw->mac.type == ixgbe_mac_X550EM_a)
3606 		ixgbe->intr_throttling[0] = ixgbe->intr_throttling[0] & 0xFF8;
3607 
3608 	hw->allow_unsupported_sfp = ixgbe_get_prop(ixgbe,
3609 	    PROP_ALLOW_UNSUPPORTED_SFP, 0, 1, DEFAULT_ALLOW_UNSUPPORTED_SFP);
3610 }
3611 
3612 static void
3613 ixgbe_init_params(ixgbe_t *ixgbe)
3614 {
3615 	struct ixgbe_hw *hw = &ixgbe->hw;
3616 	ixgbe_link_speed speeds_supported = 0;
3617 	boolean_t negotiate;
3618 
3619 	/*
3620 	 * Get a list of speeds the adapter supports. If the hw struct hasn't
3621 	 * been populated with this information yet, retrieve it from the
3622 	 * adapter and save it to our own variable.
3623 	 *
3624 	 * On certain adapters, such as ones which use SFPs, the contents of
3625 	 * hw->phy.speeds_supported (and hw->phy.autoneg_advertised) are not
3626 	 * updated, so we must rely on calling ixgbe_get_link_capabilities()
3627 	 * in order to ascertain the speeds which we are capable of supporting,
3628 	 * and in the case of SFP-equipped adapters, which speed we are
3629 	 * advertising. If ixgbe_get_link_capabilities() fails for some reason,
3630 	 * we'll go with a default list of speeds as a last resort.
3631 	 */
3632 	speeds_supported = hw->phy.speeds_supported;
3633 
3634 	if (speeds_supported == 0) {
3635 		if (ixgbe_get_link_capabilities(hw, &speeds_supported,
3636 		    &negotiate) != IXGBE_SUCCESS) {
3637 			if (hw->mac.type == ixgbe_mac_82598EB) {
3638 				speeds_supported =
3639 				    IXGBE_LINK_SPEED_82598_AUTONEG;
3640 			} else {
3641 				speeds_supported =
3642 				    IXGBE_LINK_SPEED_82599_AUTONEG;
3643 			}
3644 		}
3645 	}
3646 	ixgbe->speeds_supported = speeds_supported;
3647 
3648 	/*
3649 	 * By default, all supported speeds are enabled and advertised.
3650 	 */
3651 	if (speeds_supported & IXGBE_LINK_SPEED_10GB_FULL) {
3652 		ixgbe->param_en_10000fdx_cap = 1;
3653 		ixgbe->param_adv_10000fdx_cap = 1;
3654 	} else {
3655 		ixgbe->param_en_10000fdx_cap = 0;
3656 		ixgbe->param_adv_10000fdx_cap = 0;
3657 	}
3658 
3659 	if (speeds_supported & IXGBE_LINK_SPEED_5GB_FULL) {
3660 		ixgbe->param_en_5000fdx_cap = 1;
3661 		ixgbe->param_adv_5000fdx_cap = 1;
3662 	} else {
3663 		ixgbe->param_en_5000fdx_cap = 0;
3664 		ixgbe->param_adv_5000fdx_cap = 0;
3665 	}
3666 
3667 	if (speeds_supported & IXGBE_LINK_SPEED_2_5GB_FULL) {
3668 		ixgbe->param_en_2500fdx_cap = 1;
3669 		ixgbe->param_adv_2500fdx_cap = 1;
3670 	} else {
3671 		ixgbe->param_en_2500fdx_cap = 0;
3672 		ixgbe->param_adv_2500fdx_cap = 0;
3673 	}
3674 
3675 	if (speeds_supported & IXGBE_LINK_SPEED_1GB_FULL) {
3676 		ixgbe->param_en_1000fdx_cap = 1;
3677 		ixgbe->param_adv_1000fdx_cap = 1;
3678 	} else {
3679 		ixgbe->param_en_1000fdx_cap = 0;
3680 		ixgbe->param_adv_1000fdx_cap = 0;
3681 	}
3682 
3683 	if (speeds_supported & IXGBE_LINK_SPEED_100_FULL) {
3684 		ixgbe->param_en_100fdx_cap = 1;
3685 		ixgbe->param_adv_100fdx_cap = 1;
3686 	} else {
3687 		ixgbe->param_en_100fdx_cap = 0;
3688 		ixgbe->param_adv_100fdx_cap = 0;
3689 	}
3690 
3691 	ixgbe->param_pause_cap = 1;
3692 	ixgbe->param_asym_pause_cap = 1;
3693 	ixgbe->param_rem_fault = 0;
3694 
3695 	ixgbe->param_adv_autoneg_cap = 1;
3696 	ixgbe->param_adv_pause_cap = 1;
3697 	ixgbe->param_adv_asym_pause_cap = 1;
3698 	ixgbe->param_adv_rem_fault = 0;
3699 
3700 	ixgbe->param_lp_10000fdx_cap = 0;
3701 	ixgbe->param_lp_5000fdx_cap = 0;
3702 	ixgbe->param_lp_2500fdx_cap = 0;
3703 	ixgbe->param_lp_1000fdx_cap = 0;
3704 	ixgbe->param_lp_100fdx_cap = 0;
3705 	ixgbe->param_lp_autoneg_cap = 0;
3706 	ixgbe->param_lp_pause_cap = 0;
3707 	ixgbe->param_lp_asym_pause_cap = 0;
3708 	ixgbe->param_lp_rem_fault = 0;
3709 }
3710 
3711 /*
3712  * ixgbe_get_prop - Get a property value out of the configuration file
3713  * ixgbe.conf.
3714  *
3715  * Caller provides the name of the property, a default value, a minimum
3716  * value, and a maximum value.
3717  *
3718  * Return configured value of the property, with default, minimum and
3719  * maximum properly applied.
3720  */
3721 static int
3722 ixgbe_get_prop(ixgbe_t *ixgbe,
3723     char *propname,	/* name of the property */
3724     int minval,		/* minimum acceptable value */
3725     int maxval,		/* maximim acceptable value */
3726     int defval)		/* default value */
3727 {
3728 	int value;
3729 
3730 	/*
3731 	 * Call ddi_prop_get_int() to read the conf settings
3732 	 */
3733 	value = ddi_prop_get_int(DDI_DEV_T_ANY, ixgbe->dip,
3734 	    DDI_PROP_DONTPASS, propname, defval);
3735 	if (value > maxval)
3736 		value = maxval;
3737 
3738 	if (value < minval)
3739 		value = minval;
3740 
3741 	return (value);
3742 }
3743 
3744 /*
3745  * ixgbe_driver_setup_link - Using the link properties to setup the link.
3746  */
3747 int
3748 ixgbe_driver_setup_link(ixgbe_t *ixgbe, boolean_t setup_hw)
3749 {
3750 	struct ixgbe_hw *hw = &ixgbe->hw;
3751 	ixgbe_link_speed advertised = 0;
3752 
3753 	/*
3754 	 * Assemble a list of enabled speeds to auto-negotiate with.
3755 	 */
3756 	if (ixgbe->param_en_10000fdx_cap == 1)
3757 		advertised |= IXGBE_LINK_SPEED_10GB_FULL;
3758 
3759 	if (ixgbe->param_en_5000fdx_cap == 1)
3760 		advertised |= IXGBE_LINK_SPEED_5GB_FULL;
3761 
3762 	if (ixgbe->param_en_2500fdx_cap == 1)
3763 		advertised |= IXGBE_LINK_SPEED_2_5GB_FULL;
3764 
3765 	if (ixgbe->param_en_1000fdx_cap == 1)
3766 		advertised |= IXGBE_LINK_SPEED_1GB_FULL;
3767 
3768 	if (ixgbe->param_en_100fdx_cap == 1)
3769 		advertised |= IXGBE_LINK_SPEED_100_FULL;
3770 
3771 	/*
3772 	 * As a last resort, autoneg with a default list of speeds.
3773 	 */
3774 	if (ixgbe->param_adv_autoneg_cap == 1 && advertised == 0) {
3775 		ixgbe_notice(ixgbe, "Invalid link settings. Setting link "
3776 		    "to autonegotiate with full capabilities.");
3777 
3778 		if (hw->mac.type == ixgbe_mac_82598EB)
3779 			advertised = IXGBE_LINK_SPEED_82598_AUTONEG;
3780 		else
3781 			advertised = IXGBE_LINK_SPEED_82599_AUTONEG;
3782 	}
3783 
3784 	if (setup_hw) {
3785 		if (ixgbe_setup_link(&ixgbe->hw, advertised,
3786 		    ixgbe->param_adv_autoneg_cap) != IXGBE_SUCCESS) {
3787 			ixgbe_notice(ixgbe, "Setup link failed on this "
3788 			    "device.");
3789 			return (IXGBE_FAILURE);
3790 		}
3791 	}
3792 
3793 	return (IXGBE_SUCCESS);
3794 }
3795 
3796 /*
3797  * ixgbe_driver_link_check - Link status processing.
3798  *
3799  * This function can be called in both kernel context and interrupt context
3800  */
3801 static void
3802 ixgbe_driver_link_check(ixgbe_t *ixgbe)
3803 {
3804 	struct ixgbe_hw *hw = &ixgbe->hw;
3805 	ixgbe_link_speed speed = IXGBE_LINK_SPEED_UNKNOWN;
3806 	boolean_t link_up = B_FALSE;
3807 	boolean_t link_changed = B_FALSE;
3808 
3809 	ASSERT(mutex_owned(&ixgbe->gen_lock));
3810 
3811 	(void) ixgbe_check_link(hw, &speed, &link_up, B_FALSE);
3812 	if (link_up) {
3813 		ixgbe->link_check_complete = B_TRUE;
3814 
3815 		/* Link is up, enable flow control settings */
3816 		(void) ixgbe_fc_enable(hw);
3817 
3818 		/*
3819 		 * The Link is up, check whether it was marked as down earlier
3820 		 */
3821 		if (ixgbe->link_state != LINK_STATE_UP) {
3822 			switch (speed) {
3823 			case IXGBE_LINK_SPEED_10GB_FULL:
3824 				ixgbe->link_speed = SPEED_10GB;
3825 				break;
3826 			case IXGBE_LINK_SPEED_5GB_FULL:
3827 				ixgbe->link_speed = SPEED_5GB;
3828 				break;
3829 			case IXGBE_LINK_SPEED_2_5GB_FULL:
3830 				ixgbe->link_speed = SPEED_2_5GB;
3831 				break;
3832 			case IXGBE_LINK_SPEED_1GB_FULL:
3833 				ixgbe->link_speed = SPEED_1GB;
3834 				break;
3835 			case IXGBE_LINK_SPEED_100_FULL:
3836 				ixgbe->link_speed = SPEED_100;
3837 			}
3838 			ixgbe->link_duplex = LINK_DUPLEX_FULL;
3839 			ixgbe->link_state = LINK_STATE_UP;
3840 			link_changed = B_TRUE;
3841 		}
3842 	} else {
3843 		if (ixgbe->link_check_complete == B_TRUE ||
3844 		    (ixgbe->link_check_complete == B_FALSE &&
3845 		    gethrtime() >= ixgbe->link_check_hrtime)) {
3846 			/*
3847 			 * The link is really down
3848 			 */
3849 			ixgbe->link_check_complete = B_TRUE;
3850 
3851 			if (ixgbe->link_state != LINK_STATE_DOWN) {
3852 				ixgbe->link_speed = 0;
3853 				ixgbe->link_duplex = LINK_DUPLEX_UNKNOWN;
3854 				ixgbe->link_state = LINK_STATE_DOWN;
3855 				link_changed = B_TRUE;
3856 			}
3857 		}
3858 	}
3859 
3860 	/*
3861 	 * If we are in an interrupt context, need to re-enable the
3862 	 * interrupt, which was automasked
3863 	 */
3864 	if (servicing_interrupt() != 0) {
3865 		ixgbe->eims |= IXGBE_EICR_LSC;
3866 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, ixgbe->eims);
3867 	}
3868 
3869 	if (link_changed) {
3870 		mac_link_update(ixgbe->mac_hdl, ixgbe->link_state);
3871 	}
3872 }
3873 
3874 /*
3875  * ixgbe_sfp_check - sfp module processing done in taskq only for 82599.
3876  */
3877 static void
3878 ixgbe_sfp_check(void *arg)
3879 {
3880 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
3881 	uint32_t eicr = ixgbe->eicr;
3882 	struct ixgbe_hw *hw = &ixgbe->hw;
3883 
3884 	mutex_enter(&ixgbe->gen_lock);
3885 	(void) hw->phy.ops.identify_sfp(hw);
3886 	if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) {
3887 		/* clear the interrupt */
3888 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
3889 
3890 		/* if link up, do multispeed fiber setup */
3891 		(void) ixgbe_setup_link(hw, IXGBE_LINK_SPEED_82599_AUTONEG,
3892 		    B_TRUE);
3893 		ixgbe_driver_link_check(ixgbe);
3894 		ixgbe_get_hw_state(ixgbe);
3895 	} else if (eicr & IXGBE_EICR_GPI_SDP2_BY_MAC(hw)) {
3896 		/* clear the interrupt */
3897 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2_BY_MAC(hw));
3898 
3899 		/* if link up, do sfp module setup */
3900 		(void) hw->mac.ops.setup_sfp(hw);
3901 
3902 		/* do multispeed fiber setup */
3903 		(void) ixgbe_setup_link(hw, IXGBE_LINK_SPEED_82599_AUTONEG,
3904 		    B_TRUE);
3905 		ixgbe_driver_link_check(ixgbe);
3906 		ixgbe_get_hw_state(ixgbe);
3907 	}
3908 	mutex_exit(&ixgbe->gen_lock);
3909 
3910 	/*
3911 	 * We need to fully re-check the link later.
3912 	 */
3913 	ixgbe->link_check_complete = B_FALSE;
3914 	ixgbe->link_check_hrtime = gethrtime() +
3915 	    (IXGBE_LINK_UP_TIME * 100000000ULL);
3916 }
3917 
3918 /*
3919  * ixgbe_overtemp_check - overtemp module processing done in taskq
3920  *
3921  * This routine will only be called on adapters with temperature sensor.
3922  * The indication of over-temperature can be either SDP0 interrupt or the link
3923  * status change interrupt.
3924  */
3925 static void
3926 ixgbe_overtemp_check(void *arg)
3927 {
3928 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
3929 	struct ixgbe_hw *hw = &ixgbe->hw;
3930 	uint32_t eicr = ixgbe->eicr;
3931 	ixgbe_link_speed speed;
3932 	boolean_t link_up;
3933 
3934 	mutex_enter(&ixgbe->gen_lock);
3935 
3936 	/* make sure we know current state of link */
3937 	(void) ixgbe_check_link(hw, &speed, &link_up, B_FALSE);
3938 
3939 	/* check over-temp condition */
3940 	if (((eicr & IXGBE_EICR_GPI_SDP0_BY_MAC(hw)) && (!link_up)) ||
3941 	    (eicr & IXGBE_EICR_LSC)) {
3942 		if (hw->phy.ops.check_overtemp(hw) == IXGBE_ERR_OVERTEMP) {
3943 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_OVERTEMP);
3944 
3945 			/*
3946 			 * Disable the adapter interrupts
3947 			 */
3948 			ixgbe_disable_adapter_interrupts(ixgbe);
3949 
3950 			/*
3951 			 * Disable Rx/Tx units
3952 			 */
3953 			(void) ixgbe_stop_adapter(hw);
3954 
3955 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
3956 			ixgbe_error(ixgbe,
3957 			    "Problem: Network adapter has been stopped "
3958 			    "because it has overheated");
3959 			ixgbe_error(ixgbe,
3960 			    "Action: Restart the computer. "
3961 			    "If the problem persists, power off the system "
3962 			    "and replace the adapter");
3963 		}
3964 	}
3965 
3966 	/* write to clear the interrupt */
3967 	IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
3968 
3969 	mutex_exit(&ixgbe->gen_lock);
3970 }
3971 
3972 /*
3973  * ixgbe_phy_check - taskq to process interrupts from an external PHY
3974  *
3975  * This routine will only be called on adapters with external PHYs
3976  * (such as X550) that may be trying to raise our attention to some event.
3977  * Currently, this is limited to claiming PHY overtemperature and link status
3978  * change (LSC) events, however this may expand to include other things in
3979  * future adapters.
3980  */
3981 static void
3982 ixgbe_phy_check(void *arg)
3983 {
3984 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
3985 	struct ixgbe_hw *hw = &ixgbe->hw;
3986 	int rv;
3987 
3988 	mutex_enter(&ixgbe->gen_lock);
3989 
3990 	/*
3991 	 * X550 baseT PHY overtemp and LSC events are handled here.
3992 	 *
3993 	 * If an overtemp event occurs, it will be reflected in the
3994 	 * return value of phy.ops.handle_lasi() and the common code will
3995 	 * automatically power off the baseT PHY. This is our cue to trigger
3996 	 * an FMA event.
3997 	 *
3998 	 * If a link status change event occurs, phy.ops.handle_lasi() will
3999 	 * automatically initiate a link setup between the integrated KR PHY
4000 	 * and the external X557 PHY to ensure that the link speed between
4001 	 * them matches the link speed of the baseT link.
4002 	 */
4003 	rv = ixgbe_handle_lasi(hw);
4004 
4005 	if (rv == IXGBE_ERR_OVERTEMP) {
4006 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_OVERTEMP);
4007 
4008 		/*
4009 		 * Disable the adapter interrupts
4010 		 */
4011 		ixgbe_disable_adapter_interrupts(ixgbe);
4012 
4013 		/*
4014 		 * Disable Rx/Tx units
4015 		 */
4016 		(void) ixgbe_stop_adapter(hw);
4017 
4018 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
4019 		ixgbe_error(ixgbe,
4020 		    "Problem: Network adapter has been stopped due to a "
4021 		    "overtemperature event being detected.");
4022 		ixgbe_error(ixgbe,
4023 		    "Action: Shut down or restart the computer. If the issue "
4024 		    "persists, please take action in accordance with the "
4025 		    "recommendations from your system vendor.");
4026 	}
4027 
4028 	mutex_exit(&ixgbe->gen_lock);
4029 }
4030 
4031 /*
4032  * ixgbe_link_timer - timer for link status detection
4033  */
4034 static void
4035 ixgbe_link_timer(void *arg)
4036 {
4037 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
4038 
4039 	mutex_enter(&ixgbe->gen_lock);
4040 	ixgbe_driver_link_check(ixgbe);
4041 	mutex_exit(&ixgbe->gen_lock);
4042 }
4043 
4044 /*
4045  * ixgbe_local_timer - Driver watchdog function.
4046  *
4047  * This function will handle the transmit stall check and other routines.
4048  */
4049 static void
4050 ixgbe_local_timer(void *arg)
4051 {
4052 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
4053 
4054 	if (ixgbe->ixgbe_state & IXGBE_OVERTEMP)
4055 		goto out;
4056 
4057 	if (ixgbe->ixgbe_state & IXGBE_ERROR) {
4058 		ixgbe->reset_count++;
4059 		if (ixgbe_reset(ixgbe) == IXGBE_SUCCESS)
4060 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_RESTORED);
4061 		goto out;
4062 	}
4063 
4064 	if (ixgbe_stall_check(ixgbe)) {
4065 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_STALL);
4066 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
4067 
4068 		ixgbe->reset_count++;
4069 		if (ixgbe_reset(ixgbe) == IXGBE_SUCCESS)
4070 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_RESTORED);
4071 	}
4072 
4073 out:
4074 	ixgbe_restart_watchdog_timer(ixgbe);
4075 }
4076 
4077 /*
4078  * ixgbe_stall_check - Check for transmit stall.
4079  *
4080  * This function checks if the adapter is stalled (in transmit).
4081  *
4082  * It is called each time the watchdog timeout is invoked.
4083  * If the transmit descriptor reclaim continuously fails,
4084  * the watchdog value will increment by 1. If the watchdog
4085  * value exceeds the threshold, the ixgbe is assumed to
4086  * have stalled and need to be reset.
4087  */
4088 static boolean_t
4089 ixgbe_stall_check(ixgbe_t *ixgbe)
4090 {
4091 	ixgbe_tx_ring_t *tx_ring;
4092 	boolean_t result;
4093 	int i;
4094 
4095 	if (ixgbe->link_state != LINK_STATE_UP)
4096 		return (B_FALSE);
4097 
4098 	/*
4099 	 * If any tx ring is stalled, we'll reset the chipset
4100 	 */
4101 	result = B_FALSE;
4102 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
4103 		tx_ring = &ixgbe->tx_rings[i];
4104 		if (tx_ring->tbd_free <= ixgbe->tx_recycle_thresh) {
4105 			tx_ring->tx_recycle(tx_ring);
4106 		}
4107 
4108 		if (tx_ring->recycle_fail > 0)
4109 			tx_ring->stall_watchdog++;
4110 		else
4111 			tx_ring->stall_watchdog = 0;
4112 
4113 		if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
4114 			result = B_TRUE;
4115 			break;
4116 		}
4117 	}
4118 
4119 	if (result) {
4120 		tx_ring->stall_watchdog = 0;
4121 		tx_ring->recycle_fail = 0;
4122 	}
4123 
4124 	return (result);
4125 }
4126 
4127 
4128 /*
4129  * is_valid_mac_addr - Check if the mac address is valid.
4130  */
4131 static boolean_t
4132 is_valid_mac_addr(uint8_t *mac_addr)
4133 {
4134 	const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
4135 	const uint8_t addr_test2[6] =
4136 	    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
4137 
4138 	if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
4139 	    !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
4140 		return (B_FALSE);
4141 
4142 	return (B_TRUE);
4143 }
4144 
4145 static boolean_t
4146 ixgbe_find_mac_address(ixgbe_t *ixgbe)
4147 {
4148 #ifdef __sparc
4149 	struct ixgbe_hw *hw = &ixgbe->hw;
4150 	uchar_t *bytes;
4151 	struct ether_addr sysaddr;
4152 	uint_t nelts;
4153 	int err;
4154 	boolean_t found = B_FALSE;
4155 
4156 	/*
4157 	 * The "vendor's factory-set address" may already have
4158 	 * been extracted from the chip, but if the property
4159 	 * "local-mac-address" is set we use that instead.
4160 	 *
4161 	 * We check whether it looks like an array of 6
4162 	 * bytes (which it should, if OBP set it).  If we can't
4163 	 * make sense of it this way, we'll ignore it.
4164 	 */
4165 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ixgbe->dip,
4166 	    DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
4167 	if (err == DDI_PROP_SUCCESS) {
4168 		if (nelts == ETHERADDRL) {
4169 			while (nelts--)
4170 				hw->mac.addr[nelts] = bytes[nelts];
4171 			found = B_TRUE;
4172 		}
4173 		ddi_prop_free(bytes);
4174 	}
4175 
4176 	/*
4177 	 * Look up the OBP property "local-mac-address?". If the user has set
4178 	 * 'local-mac-address? = false', use "the system address" instead.
4179 	 */
4180 	if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ixgbe->dip, 0,
4181 	    "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
4182 		if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
4183 			if (localetheraddr(NULL, &sysaddr) != 0) {
4184 				bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
4185 				found = B_TRUE;
4186 			}
4187 		}
4188 		ddi_prop_free(bytes);
4189 	}
4190 
4191 	/*
4192 	 * Finally(!), if there's a valid "mac-address" property (created
4193 	 * if we netbooted from this interface), we must use this instead
4194 	 * of any of the above to ensure that the NFS/install server doesn't
4195 	 * get confused by the address changing as illumos takes over!
4196 	 */
4197 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ixgbe->dip,
4198 	    DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
4199 	if (err == DDI_PROP_SUCCESS) {
4200 		if (nelts == ETHERADDRL) {
4201 			while (nelts--)
4202 				hw->mac.addr[nelts] = bytes[nelts];
4203 			found = B_TRUE;
4204 		}
4205 		ddi_prop_free(bytes);
4206 	}
4207 
4208 	if (found) {
4209 		bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
4210 		return (B_TRUE);
4211 	}
4212 #else
4213 	_NOTE(ARGUNUSED(ixgbe));
4214 #endif
4215 
4216 	return (B_TRUE);
4217 }
4218 
4219 #pragma inline(ixgbe_arm_watchdog_timer)
4220 static void
4221 ixgbe_arm_watchdog_timer(ixgbe_t *ixgbe)
4222 {
4223 	/*
4224 	 * Fire a watchdog timer
4225 	 */
4226 	ixgbe->watchdog_tid =
4227 	    timeout(ixgbe_local_timer,
4228 	    (void *)ixgbe, 1 * drv_usectohz(1000000));
4229 
4230 }
4231 
4232 /*
4233  * ixgbe_enable_watchdog_timer - Enable and start the driver watchdog timer.
4234  */
4235 void
4236 ixgbe_enable_watchdog_timer(ixgbe_t *ixgbe)
4237 {
4238 	mutex_enter(&ixgbe->watchdog_lock);
4239 
4240 	if (!ixgbe->watchdog_enable) {
4241 		ixgbe->watchdog_enable = B_TRUE;
4242 		ixgbe->watchdog_start = B_TRUE;
4243 		ixgbe_arm_watchdog_timer(ixgbe);
4244 	}
4245 
4246 	mutex_exit(&ixgbe->watchdog_lock);
4247 }
4248 
4249 /*
4250  * ixgbe_disable_watchdog_timer - Disable and stop the driver watchdog timer.
4251  */
4252 void
4253 ixgbe_disable_watchdog_timer(ixgbe_t *ixgbe)
4254 {
4255 	timeout_id_t tid;
4256 
4257 	mutex_enter(&ixgbe->watchdog_lock);
4258 
4259 	ixgbe->watchdog_enable = B_FALSE;
4260 	ixgbe->watchdog_start = B_FALSE;
4261 	tid = ixgbe->watchdog_tid;
4262 	ixgbe->watchdog_tid = 0;
4263 
4264 	mutex_exit(&ixgbe->watchdog_lock);
4265 
4266 	if (tid != 0)
4267 		(void) untimeout(tid);
4268 }
4269 
4270 /*
4271  * ixgbe_start_watchdog_timer - Start the driver watchdog timer.
4272  */
4273 void
4274 ixgbe_start_watchdog_timer(ixgbe_t *ixgbe)
4275 {
4276 	mutex_enter(&ixgbe->watchdog_lock);
4277 
4278 	if (ixgbe->watchdog_enable) {
4279 		if (!ixgbe->watchdog_start) {
4280 			ixgbe->watchdog_start = B_TRUE;
4281 			ixgbe_arm_watchdog_timer(ixgbe);
4282 		}
4283 	}
4284 
4285 	mutex_exit(&ixgbe->watchdog_lock);
4286 }
4287 
4288 /*
4289  * ixgbe_restart_watchdog_timer - Restart the driver watchdog timer.
4290  */
4291 static void
4292 ixgbe_restart_watchdog_timer(ixgbe_t *ixgbe)
4293 {
4294 	mutex_enter(&ixgbe->watchdog_lock);
4295 
4296 	if (ixgbe->watchdog_start)
4297 		ixgbe_arm_watchdog_timer(ixgbe);
4298 
4299 	mutex_exit(&ixgbe->watchdog_lock);
4300 }
4301 
4302 /*
4303  * ixgbe_stop_watchdog_timer - Stop the driver watchdog timer.
4304  */
4305 void
4306 ixgbe_stop_watchdog_timer(ixgbe_t *ixgbe)
4307 {
4308 	timeout_id_t tid;
4309 
4310 	mutex_enter(&ixgbe->watchdog_lock);
4311 
4312 	ixgbe->watchdog_start = B_FALSE;
4313 	tid = ixgbe->watchdog_tid;
4314 	ixgbe->watchdog_tid = 0;
4315 
4316 	mutex_exit(&ixgbe->watchdog_lock);
4317 
4318 	if (tid != 0)
4319 		(void) untimeout(tid);
4320 }
4321 
4322 /*
4323  * ixgbe_disable_adapter_interrupts - Disable all adapter interrupts.
4324  */
4325 static void
4326 ixgbe_disable_adapter_interrupts(ixgbe_t *ixgbe)
4327 {
4328 	struct ixgbe_hw *hw = &ixgbe->hw;
4329 
4330 	/*
4331 	 * mask all interrupts off
4332 	 */
4333 	IXGBE_WRITE_REG(hw, IXGBE_EIMC, 0xffffffff);
4334 
4335 	/*
4336 	 * for MSI-X, also disable autoclear
4337 	 */
4338 	if (ixgbe->intr_type == DDI_INTR_TYPE_MSIX) {
4339 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, 0x0);
4340 	}
4341 
4342 	IXGBE_WRITE_FLUSH(hw);
4343 }
4344 
4345 /*
4346  * ixgbe_enable_adapter_interrupts - Enable all hardware interrupts.
4347  */
4348 static void
4349 ixgbe_enable_adapter_interrupts(ixgbe_t *ixgbe)
4350 {
4351 	struct ixgbe_hw *hw = &ixgbe->hw;
4352 	uint32_t eiac, eiam;
4353 	uint32_t gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
4354 
4355 	/* interrupt types to enable */
4356 	ixgbe->eims = IXGBE_EIMS_ENABLE_MASK;	/* shared code default */
4357 	ixgbe->eims &= ~IXGBE_EIMS_TCP_TIMER;	/* minus tcp timer */
4358 	ixgbe->eims |= ixgbe->capab->other_intr; /* "other" interrupt types */
4359 
4360 	/* enable automask on "other" causes that this adapter can generate */
4361 	eiam = ixgbe->capab->other_intr;
4362 
4363 	/*
4364 	 * msi-x mode
4365 	 */
4366 	if (ixgbe->intr_type == DDI_INTR_TYPE_MSIX) {
4367 		/* enable autoclear but not on bits 29:20 */
4368 		eiac = (ixgbe->eims & ~IXGBE_OTHER_INTR);
4369 
4370 		/* general purpose interrupt enable */
4371 		gpie |= (IXGBE_GPIE_MSIX_MODE
4372 		    | IXGBE_GPIE_PBA_SUPPORT
4373 		    | IXGBE_GPIE_OCD
4374 		    | IXGBE_GPIE_EIAME);
4375 	/*
4376 	 * non-msi-x mode
4377 	 */
4378 	} else {
4379 
4380 		/* disable autoclear, leave gpie at default */
4381 		eiac = 0;
4382 
4383 		/*
4384 		 * General purpose interrupt enable.
4385 		 * For 82599, X540 and X550, extended interrupt
4386 		 * automask enable only in MSI or MSI-X mode
4387 		 */
4388 		if ((hw->mac.type == ixgbe_mac_82598EB) ||
4389 		    (ixgbe->intr_type == DDI_INTR_TYPE_MSI)) {
4390 			gpie |= IXGBE_GPIE_EIAME;
4391 		}
4392 	}
4393 
4394 	/* Enable specific "other" interrupt types */
4395 	switch (hw->mac.type) {
4396 	case ixgbe_mac_82598EB:
4397 		gpie |= ixgbe->capab->other_gpie;
4398 		break;
4399 
4400 	case ixgbe_mac_82599EB:
4401 	case ixgbe_mac_X540:
4402 	case ixgbe_mac_X550:
4403 	case ixgbe_mac_X550EM_x:
4404 	case ixgbe_mac_X550EM_a:
4405 		gpie |= ixgbe->capab->other_gpie;
4406 
4407 		/* Enable RSC Delay 8us when LRO enabled  */
4408 		if (ixgbe->lro_enable) {
4409 			gpie |= (1 << IXGBE_GPIE_RSC_DELAY_SHIFT);
4410 		}
4411 		break;
4412 
4413 	default:
4414 		break;
4415 	}
4416 
4417 	/* write to interrupt control registers */
4418 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, ixgbe->eims);
4419 	IXGBE_WRITE_REG(hw, IXGBE_EIAC, eiac);
4420 	IXGBE_WRITE_REG(hw, IXGBE_EIAM, eiam);
4421 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
4422 	IXGBE_WRITE_FLUSH(hw);
4423 }
4424 
4425 /*
4426  * ixgbe_loopback_ioctl - Loopback support.
4427  */
4428 enum ioc_reply
4429 ixgbe_loopback_ioctl(ixgbe_t *ixgbe, struct iocblk *iocp, mblk_t *mp)
4430 {
4431 	lb_info_sz_t *lbsp;
4432 	lb_property_t *lbpp;
4433 	uint32_t *lbmp;
4434 	uint32_t size;
4435 	uint32_t value;
4436 
4437 	if (mp->b_cont == NULL)
4438 		return (IOC_INVAL);
4439 
4440 	switch (iocp->ioc_cmd) {
4441 	default:
4442 		return (IOC_INVAL);
4443 
4444 	case LB_GET_INFO_SIZE:
4445 		size = sizeof (lb_info_sz_t);
4446 		if (iocp->ioc_count != size)
4447 			return (IOC_INVAL);
4448 
4449 		value = sizeof (lb_normal);
4450 		value += sizeof (lb_mac);
4451 		value += sizeof (lb_external);
4452 
4453 		lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
4454 		*lbsp = value;
4455 		break;
4456 
4457 	case LB_GET_INFO:
4458 		value = sizeof (lb_normal);
4459 		value += sizeof (lb_mac);
4460 		value += sizeof (lb_external);
4461 
4462 		size = value;
4463 		if (iocp->ioc_count != size)
4464 			return (IOC_INVAL);
4465 
4466 		value = 0;
4467 		lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
4468 
4469 		lbpp[value++] = lb_normal;
4470 		lbpp[value++] = lb_mac;
4471 		lbpp[value++] = lb_external;
4472 		break;
4473 
4474 	case LB_GET_MODE:
4475 		size = sizeof (uint32_t);
4476 		if (iocp->ioc_count != size)
4477 			return (IOC_INVAL);
4478 
4479 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
4480 		*lbmp = ixgbe->loopback_mode;
4481 		break;
4482 
4483 	case LB_SET_MODE:
4484 		size = 0;
4485 		if (iocp->ioc_count != sizeof (uint32_t))
4486 			return (IOC_INVAL);
4487 
4488 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
4489 		if (!ixgbe_set_loopback_mode(ixgbe, *lbmp))
4490 			return (IOC_INVAL);
4491 		break;
4492 	}
4493 
4494 	iocp->ioc_count = size;
4495 	iocp->ioc_error = 0;
4496 
4497 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
4498 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
4499 		return (IOC_INVAL);
4500 	}
4501 
4502 	return (IOC_REPLY);
4503 }
4504 
4505 /*
4506  * ixgbe_set_loopback_mode - Setup loopback based on the loopback mode.
4507  */
4508 static boolean_t
4509 ixgbe_set_loopback_mode(ixgbe_t *ixgbe, uint32_t mode)
4510 {
4511 	if (mode == ixgbe->loopback_mode)
4512 		return (B_TRUE);
4513 
4514 	ixgbe->loopback_mode = mode;
4515 
4516 	if (mode == IXGBE_LB_NONE) {
4517 		/*
4518 		 * Reset the chip
4519 		 */
4520 		(void) ixgbe_reset(ixgbe);
4521 		return (B_TRUE);
4522 	}
4523 
4524 	mutex_enter(&ixgbe->gen_lock);
4525 
4526 	switch (mode) {
4527 	default:
4528 		mutex_exit(&ixgbe->gen_lock);
4529 		return (B_FALSE);
4530 
4531 	case IXGBE_LB_EXTERNAL:
4532 		break;
4533 
4534 	case IXGBE_LB_INTERNAL_MAC:
4535 		ixgbe_set_internal_mac_loopback(ixgbe);
4536 		break;
4537 	}
4538 
4539 	mutex_exit(&ixgbe->gen_lock);
4540 
4541 	return (B_TRUE);
4542 }
4543 
4544 /*
4545  * ixgbe_set_internal_mac_loopback - Set the internal MAC loopback mode.
4546  */
4547 static void
4548 ixgbe_set_internal_mac_loopback(ixgbe_t *ixgbe)
4549 {
4550 	struct ixgbe_hw *hw;
4551 	uint32_t reg;
4552 	uint8_t atlas;
4553 
4554 	hw = &ixgbe->hw;
4555 
4556 	/*
4557 	 * Setup MAC loopback
4558 	 */
4559 	reg = IXGBE_READ_REG(&ixgbe->hw, IXGBE_HLREG0);
4560 	reg |= IXGBE_HLREG0_LPBK;
4561 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_HLREG0, reg);
4562 
4563 	reg = IXGBE_READ_REG(&ixgbe->hw, IXGBE_AUTOC);
4564 	reg &= ~IXGBE_AUTOC_LMS_MASK;
4565 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_AUTOC, reg);
4566 
4567 	/*
4568 	 * Disable Atlas Tx lanes to keep packets in loopback and not on wire
4569 	 */
4570 	switch (hw->mac.type) {
4571 	case ixgbe_mac_82598EB:
4572 		(void) ixgbe_read_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_LPBK,
4573 		    &atlas);
4574 		atlas |= IXGBE_ATLAS_PDN_TX_REG_EN;
4575 		(void) ixgbe_write_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_LPBK,
4576 		    atlas);
4577 
4578 		(void) ixgbe_read_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_10G,
4579 		    &atlas);
4580 		atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
4581 		(void) ixgbe_write_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_10G,
4582 		    atlas);
4583 
4584 		(void) ixgbe_read_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_1G,
4585 		    &atlas);
4586 		atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
4587 		(void) ixgbe_write_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_1G,
4588 		    atlas);
4589 
4590 		(void) ixgbe_read_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_AN,
4591 		    &atlas);
4592 		atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
4593 		(void) ixgbe_write_analog_reg8(&ixgbe->hw, IXGBE_ATLAS_PDN_AN,
4594 		    atlas);
4595 		break;
4596 
4597 	case ixgbe_mac_82599EB:
4598 	case ixgbe_mac_X540:
4599 	case ixgbe_mac_X550:
4600 	case ixgbe_mac_X550EM_x:
4601 	case ixgbe_mac_X550EM_a:
4602 		reg = IXGBE_READ_REG(&ixgbe->hw, IXGBE_AUTOC);
4603 		reg |= (IXGBE_AUTOC_FLU |
4604 		    IXGBE_AUTOC_10G_KX4);
4605 		IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_AUTOC, reg);
4606 
4607 		(void) ixgbe_setup_link(&ixgbe->hw, IXGBE_LINK_SPEED_10GB_FULL,
4608 		    B_FALSE);
4609 		break;
4610 
4611 	default:
4612 		break;
4613 	}
4614 }
4615 
4616 #pragma inline(ixgbe_intr_rx_work)
4617 /*
4618  * ixgbe_intr_rx_work - RX processing of ISR.
4619  */
4620 static void
4621 ixgbe_intr_rx_work(ixgbe_rx_ring_t *rx_ring)
4622 {
4623 	mblk_t *mp;
4624 
4625 	mutex_enter(&rx_ring->rx_lock);
4626 
4627 	mp = ixgbe_ring_rx(rx_ring, IXGBE_POLL_NULL);
4628 	mutex_exit(&rx_ring->rx_lock);
4629 
4630 	if (mp != NULL)
4631 		mac_rx_ring(rx_ring->ixgbe->mac_hdl, rx_ring->ring_handle, mp,
4632 		    rx_ring->ring_gen_num);
4633 }
4634 
4635 #pragma inline(ixgbe_intr_tx_work)
4636 /*
4637  * ixgbe_intr_tx_work - TX processing of ISR.
4638  */
4639 static void
4640 ixgbe_intr_tx_work(ixgbe_tx_ring_t *tx_ring)
4641 {
4642 	ixgbe_t *ixgbe = tx_ring->ixgbe;
4643 
4644 	/*
4645 	 * Recycle the tx descriptors
4646 	 */
4647 	tx_ring->tx_recycle(tx_ring);
4648 
4649 	/*
4650 	 * Schedule the re-transmit
4651 	 */
4652 	if (tx_ring->reschedule &&
4653 	    (tx_ring->tbd_free >= ixgbe->tx_resched_thresh)) {
4654 		tx_ring->reschedule = B_FALSE;
4655 		mac_tx_ring_update(tx_ring->ixgbe->mac_hdl,
4656 		    tx_ring->ring_handle);
4657 		tx_ring->stat_reschedule++;
4658 	}
4659 }
4660 
4661 #pragma inline(ixgbe_intr_other_work)
4662 /*
4663  * ixgbe_intr_other_work - Process interrupt types other than tx/rx
4664  */
4665 static void
4666 ixgbe_intr_other_work(ixgbe_t *ixgbe, uint32_t eicr)
4667 {
4668 	struct ixgbe_hw *hw = &ixgbe->hw;
4669 
4670 	ASSERT(mutex_owned(&ixgbe->gen_lock));
4671 
4672 	/*
4673 	 * handle link status change
4674 	 */
4675 	if (eicr & IXGBE_EICR_LSC) {
4676 		ixgbe_driver_link_check(ixgbe);
4677 		ixgbe_get_hw_state(ixgbe);
4678 	}
4679 
4680 	/*
4681 	 * check for fan failure on adapters with fans
4682 	 */
4683 	if ((ixgbe->capab->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) &&
4684 	    (eicr & IXGBE_EICR_GPI_SDP1)) {
4685 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_OVERTEMP);
4686 
4687 		/*
4688 		 * Disable the adapter interrupts
4689 		 */
4690 		ixgbe_disable_adapter_interrupts(ixgbe);
4691 
4692 		/*
4693 		 * Disable Rx/Tx units
4694 		 */
4695 		(void) ixgbe_stop_adapter(&ixgbe->hw);
4696 
4697 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_LOST);
4698 		ixgbe_error(ixgbe,
4699 		    "Problem: Network adapter has been stopped "
4700 		    "because the fan has stopped.\n");
4701 		ixgbe_error(ixgbe,
4702 		    "Action: Replace the adapter.\n");
4703 
4704 		/* re-enable the interrupt, which was automasked */
4705 		ixgbe->eims |= IXGBE_EICR_GPI_SDP1;
4706 	}
4707 
4708 	/*
4709 	 * Do SFP check for adapters with hot-plug capability
4710 	 */
4711 	if ((ixgbe->capab->flags & IXGBE_FLAG_SFP_PLUG_CAPABLE) &&
4712 	    ((eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) ||
4713 	    (eicr & IXGBE_EICR_GPI_SDP2_BY_MAC(hw)))) {
4714 		ixgbe->eicr = eicr;
4715 		if ((ddi_taskq_dispatch(ixgbe->sfp_taskq,
4716 		    ixgbe_sfp_check, (void *)ixgbe,
4717 		    DDI_NOSLEEP)) != DDI_SUCCESS) {
4718 			ixgbe_log(ixgbe, "No memory available to dispatch "
4719 			    "taskq for SFP check");
4720 		}
4721 	}
4722 
4723 	/*
4724 	 * Do over-temperature check for adapters with temp sensor
4725 	 */
4726 	if ((ixgbe->capab->flags & IXGBE_FLAG_TEMP_SENSOR_CAPABLE) &&
4727 	    ((eicr & IXGBE_EICR_GPI_SDP0_BY_MAC(hw)) ||
4728 	    (eicr & IXGBE_EICR_LSC))) {
4729 		ixgbe->eicr = eicr;
4730 		if ((ddi_taskq_dispatch(ixgbe->overtemp_taskq,
4731 		    ixgbe_overtemp_check, (void *)ixgbe,
4732 		    DDI_NOSLEEP)) != DDI_SUCCESS) {
4733 			ixgbe_log(ixgbe, "No memory available to dispatch "
4734 			    "taskq for overtemp check");
4735 		}
4736 	}
4737 
4738 	/*
4739 	 * Process an external PHY interrupt
4740 	 */
4741 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4742 	    (eicr & IXGBE_EICR_GPI_SDP0_X540)) {
4743 		ixgbe->eicr = eicr;
4744 		if ((ddi_taskq_dispatch(ixgbe->phy_taskq,
4745 		    ixgbe_phy_check, (void *)ixgbe,
4746 		    DDI_NOSLEEP)) != DDI_SUCCESS) {
4747 			ixgbe_log(ixgbe, "No memory available to dispatch "
4748 			    "taskq for PHY check");
4749 		}
4750 	}
4751 }
4752 
4753 /*
4754  * ixgbe_intr_legacy - Interrupt handler for legacy interrupts.
4755  */
4756 static uint_t
4757 ixgbe_intr_legacy(void *arg1, void *arg2)
4758 {
4759 	ixgbe_t *ixgbe = (ixgbe_t *)arg1;
4760 	struct ixgbe_hw *hw = &ixgbe->hw;
4761 	ixgbe_tx_ring_t *tx_ring;
4762 	ixgbe_rx_ring_t *rx_ring;
4763 	uint32_t eicr;
4764 	mblk_t *mp;
4765 	boolean_t tx_reschedule;
4766 	uint_t result;
4767 
4768 	_NOTE(ARGUNUSED(arg2));
4769 
4770 	mutex_enter(&ixgbe->gen_lock);
4771 	if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
4772 		mutex_exit(&ixgbe->gen_lock);
4773 		return (DDI_INTR_UNCLAIMED);
4774 	}
4775 
4776 	mp = NULL;
4777 	tx_reschedule = B_FALSE;
4778 
4779 	/*
4780 	 * Any bit set in eicr: claim this interrupt
4781 	 */
4782 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
4783 
4784 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
4785 		mutex_exit(&ixgbe->gen_lock);
4786 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
4787 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
4788 		return (DDI_INTR_CLAIMED);
4789 	}
4790 
4791 	if (eicr) {
4792 		/*
4793 		 * For legacy interrupt, we have only one interrupt,
4794 		 * so we have only one rx ring and one tx ring enabled.
4795 		 */
4796 		ASSERT(ixgbe->num_rx_rings == 1);
4797 		ASSERT(ixgbe->num_tx_rings == 1);
4798 
4799 		/*
4800 		 * For legacy interrupt, rx rings[0] will use RTxQ[0].
4801 		 */
4802 		if (eicr & 0x1) {
4803 			ixgbe->eimc |= IXGBE_EICR_RTX_QUEUE;
4804 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, ixgbe->eimc);
4805 			ixgbe->eims |= IXGBE_EICR_RTX_QUEUE;
4806 			/*
4807 			 * Clean the rx descriptors
4808 			 */
4809 			rx_ring = &ixgbe->rx_rings[0];
4810 			mp = ixgbe_ring_rx(rx_ring, IXGBE_POLL_NULL);
4811 		}
4812 
4813 		/*
4814 		 * For legacy interrupt, tx rings[0] will use RTxQ[1].
4815 		 */
4816 		if (eicr & 0x2) {
4817 			/*
4818 			 * Recycle the tx descriptors
4819 			 */
4820 			tx_ring = &ixgbe->tx_rings[0];
4821 			tx_ring->tx_recycle(tx_ring);
4822 
4823 			/*
4824 			 * Schedule the re-transmit
4825 			 */
4826 			tx_reschedule = (tx_ring->reschedule &&
4827 			    (tx_ring->tbd_free >= ixgbe->tx_resched_thresh));
4828 		}
4829 
4830 		/* any interrupt type other than tx/rx */
4831 		if (eicr & ixgbe->capab->other_intr) {
4832 			switch (hw->mac.type) {
4833 			case ixgbe_mac_82598EB:
4834 				ixgbe->eims &= ~(eicr & IXGBE_OTHER_INTR);
4835 				break;
4836 
4837 			case ixgbe_mac_82599EB:
4838 			case ixgbe_mac_X540:
4839 			case ixgbe_mac_X550:
4840 			case ixgbe_mac_X550EM_x:
4841 			case ixgbe_mac_X550EM_a:
4842 				ixgbe->eimc = IXGBE_82599_OTHER_INTR;
4843 				IXGBE_WRITE_REG(hw, IXGBE_EIMC, ixgbe->eimc);
4844 				break;
4845 
4846 			default:
4847 				break;
4848 			}
4849 			ixgbe_intr_other_work(ixgbe, eicr);
4850 			ixgbe->eims &= ~(eicr & IXGBE_OTHER_INTR);
4851 		}
4852 
4853 		mutex_exit(&ixgbe->gen_lock);
4854 
4855 		result = DDI_INTR_CLAIMED;
4856 	} else {
4857 		mutex_exit(&ixgbe->gen_lock);
4858 
4859 		/*
4860 		 * No interrupt cause bits set: don't claim this interrupt.
4861 		 */
4862 		result = DDI_INTR_UNCLAIMED;
4863 	}
4864 
4865 	/* re-enable the interrupts which were automasked */
4866 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, ixgbe->eims);
4867 
4868 	/*
4869 	 * Do the following work outside of the gen_lock
4870 	 */
4871 	if (mp != NULL) {
4872 		mac_rx_ring(rx_ring->ixgbe->mac_hdl, rx_ring->ring_handle, mp,
4873 		    rx_ring->ring_gen_num);
4874 	}
4875 
4876 	if (tx_reschedule)  {
4877 		tx_ring->reschedule = B_FALSE;
4878 		mac_tx_ring_update(ixgbe->mac_hdl, tx_ring->ring_handle);
4879 		tx_ring->stat_reschedule++;
4880 	}
4881 
4882 	return (result);
4883 }
4884 
4885 /*
4886  * ixgbe_intr_msi - Interrupt handler for MSI.
4887  */
4888 static uint_t
4889 ixgbe_intr_msi(void *arg1, void *arg2)
4890 {
4891 	ixgbe_t *ixgbe = (ixgbe_t *)arg1;
4892 	struct ixgbe_hw *hw = &ixgbe->hw;
4893 	uint32_t eicr;
4894 
4895 	_NOTE(ARGUNUSED(arg2));
4896 
4897 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
4898 
4899 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
4900 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
4901 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
4902 		return (DDI_INTR_CLAIMED);
4903 	}
4904 
4905 	/*
4906 	 * For MSI interrupt, we have only one vector,
4907 	 * so we have only one rx ring and one tx ring enabled.
4908 	 */
4909 	ASSERT(ixgbe->num_rx_rings == 1);
4910 	ASSERT(ixgbe->num_tx_rings == 1);
4911 
4912 	/*
4913 	 * For MSI interrupt, rx rings[0] will use RTxQ[0].
4914 	 */
4915 	if (eicr & 0x1) {
4916 		ixgbe_intr_rx_work(&ixgbe->rx_rings[0]);
4917 	}
4918 
4919 	/*
4920 	 * For MSI interrupt, tx rings[0] will use RTxQ[1].
4921 	 */
4922 	if (eicr & 0x2) {
4923 		ixgbe_intr_tx_work(&ixgbe->tx_rings[0]);
4924 	}
4925 
4926 	/* any interrupt type other than tx/rx */
4927 	if (eicr & ixgbe->capab->other_intr) {
4928 		mutex_enter(&ixgbe->gen_lock);
4929 		switch (hw->mac.type) {
4930 		case ixgbe_mac_82598EB:
4931 			ixgbe->eims &= ~(eicr & IXGBE_OTHER_INTR);
4932 			break;
4933 
4934 		case ixgbe_mac_82599EB:
4935 		case ixgbe_mac_X540:
4936 		case ixgbe_mac_X550:
4937 		case ixgbe_mac_X550EM_x:
4938 		case ixgbe_mac_X550EM_a:
4939 			ixgbe->eimc = IXGBE_82599_OTHER_INTR;
4940 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, ixgbe->eimc);
4941 			break;
4942 
4943 		default:
4944 			break;
4945 		}
4946 		ixgbe_intr_other_work(ixgbe, eicr);
4947 		ixgbe->eims &= ~(eicr & IXGBE_OTHER_INTR);
4948 		mutex_exit(&ixgbe->gen_lock);
4949 	}
4950 
4951 	/* re-enable the interrupts which were automasked */
4952 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, ixgbe->eims);
4953 
4954 	return (DDI_INTR_CLAIMED);
4955 }
4956 
4957 /*
4958  * ixgbe_intr_msix - Interrupt handler for MSI-X.
4959  */
4960 static uint_t
4961 ixgbe_intr_msix(void *arg1, void *arg2)
4962 {
4963 	ixgbe_intr_vector_t *vect = (ixgbe_intr_vector_t *)arg1;
4964 	ixgbe_t *ixgbe = vect->ixgbe;
4965 	struct ixgbe_hw *hw = &ixgbe->hw;
4966 	uint32_t eicr;
4967 	int r_idx = 0;
4968 
4969 	_NOTE(ARGUNUSED(arg2));
4970 
4971 	/*
4972 	 * Clean each rx ring that has its bit set in the map
4973 	 */
4974 	r_idx = bt_getlowbit(vect->rx_map, 0, (ixgbe->num_rx_rings - 1));
4975 	while (r_idx >= 0) {
4976 		ixgbe_intr_rx_work(&ixgbe->rx_rings[r_idx]);
4977 		r_idx = bt_getlowbit(vect->rx_map, (r_idx + 1),
4978 		    (ixgbe->num_rx_rings - 1));
4979 	}
4980 
4981 	/*
4982 	 * Clean each tx ring that has its bit set in the map
4983 	 */
4984 	r_idx = bt_getlowbit(vect->tx_map, 0, (ixgbe->num_tx_rings - 1));
4985 	while (r_idx >= 0) {
4986 		ixgbe_intr_tx_work(&ixgbe->tx_rings[r_idx]);
4987 		r_idx = bt_getlowbit(vect->tx_map, (r_idx + 1),
4988 		    (ixgbe->num_tx_rings - 1));
4989 	}
4990 
4991 
4992 	/*
4993 	 * Clean other interrupt (link change) that has its bit set in the map
4994 	 */
4995 	if (BT_TEST(vect->other_map, 0) == 1) {
4996 		eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
4997 
4998 		if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) !=
4999 		    DDI_FM_OK) {
5000 			ddi_fm_service_impact(ixgbe->dip,
5001 			    DDI_SERVICE_DEGRADED);
5002 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
5003 			return (DDI_INTR_CLAIMED);
5004 		}
5005 
5006 		/*
5007 		 * Check "other" cause bits: any interrupt type other than tx/rx
5008 		 */
5009 		if (eicr & ixgbe->capab->other_intr) {
5010 			mutex_enter(&ixgbe->gen_lock);
5011 			switch (hw->mac.type) {
5012 			case ixgbe_mac_82598EB:
5013 				ixgbe->eims &= ~(eicr & IXGBE_OTHER_INTR);
5014 				ixgbe_intr_other_work(ixgbe, eicr);
5015 				break;
5016 
5017 			case ixgbe_mac_82599EB:
5018 			case ixgbe_mac_X540:
5019 			case ixgbe_mac_X550:
5020 			case ixgbe_mac_X550EM_x:
5021 			case ixgbe_mac_X550EM_a:
5022 				ixgbe->eims |= IXGBE_EICR_RTX_QUEUE;
5023 				ixgbe_intr_other_work(ixgbe, eicr);
5024 				break;
5025 
5026 			default:
5027 				break;
5028 			}
5029 			mutex_exit(&ixgbe->gen_lock);
5030 		}
5031 
5032 		/* re-enable the interrupts which were automasked */
5033 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, ixgbe->eims);
5034 	}
5035 
5036 	return (DDI_INTR_CLAIMED);
5037 }
5038 
5039 /*
5040  * ixgbe_alloc_intrs - Allocate interrupts for the driver.
5041  *
5042  * Normal sequence is to try MSI-X; if not sucessful, try MSI;
5043  * if not successful, try Legacy.
5044  * ixgbe->intr_force can be used to force sequence to start with
5045  * any of the 3 types.
5046  * If MSI-X is not used, number of tx/rx rings is forced to 1.
5047  */
5048 static int
5049 ixgbe_alloc_intrs(ixgbe_t *ixgbe)
5050 {
5051 	dev_info_t *devinfo;
5052 	int intr_types;
5053 	int rc;
5054 
5055 	devinfo = ixgbe->dip;
5056 
5057 	/*
5058 	 * Get supported interrupt types
5059 	 */
5060 	rc = ddi_intr_get_supported_types(devinfo, &intr_types);
5061 
5062 	if (rc != DDI_SUCCESS) {
5063 		ixgbe_log(ixgbe,
5064 		    "Get supported interrupt types failed: %d", rc);
5065 		return (IXGBE_FAILURE);
5066 	}
5067 	IXGBE_DEBUGLOG_1(ixgbe, "Supported interrupt types: %x", intr_types);
5068 
5069 	ixgbe->intr_type = 0;
5070 
5071 	/*
5072 	 * Install MSI-X interrupts
5073 	 */
5074 	if ((intr_types & DDI_INTR_TYPE_MSIX) &&
5075 	    (ixgbe->intr_force <= IXGBE_INTR_MSIX)) {
5076 		rc = ixgbe_alloc_intr_handles(ixgbe, DDI_INTR_TYPE_MSIX);
5077 		if (rc == IXGBE_SUCCESS)
5078 			return (IXGBE_SUCCESS);
5079 
5080 		ixgbe_log(ixgbe,
5081 		    "Allocate MSI-X failed, trying MSI interrupts...");
5082 	}
5083 
5084 	/*
5085 	 * MSI-X not used, force rings and groups to 1
5086 	 */
5087 	ixgbe->num_rx_rings = 1;
5088 	ixgbe->num_rx_groups = 1;
5089 	ixgbe->num_tx_rings = 1;
5090 	ixgbe->classify_mode = IXGBE_CLASSIFY_NONE;
5091 	ixgbe_log(ixgbe,
5092 	    "MSI-X not used, force rings and groups number to 1");
5093 
5094 	/*
5095 	 * Install MSI interrupts
5096 	 */
5097 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
5098 	    (ixgbe->intr_force <= IXGBE_INTR_MSI)) {
5099 		rc = ixgbe_alloc_intr_handles(ixgbe, DDI_INTR_TYPE_MSI);
5100 		if (rc == IXGBE_SUCCESS)
5101 			return (IXGBE_SUCCESS);
5102 
5103 		ixgbe_log(ixgbe,
5104 		    "Allocate MSI failed, trying Legacy interrupts...");
5105 	}
5106 
5107 	/*
5108 	 * Install legacy interrupts
5109 	 */
5110 	if (intr_types & DDI_INTR_TYPE_FIXED) {
5111 		/*
5112 		 * Disallow legacy interrupts for X550. X550 has a silicon
5113 		 * bug which prevents Shared Legacy interrupts from working.
5114 		 * For details, please reference:
5115 		 *
5116 		 * Intel Ethernet Controller X550 Specification Update rev. 2.1
5117 		 * May 2016, erratum 22: PCIe Interrupt Status Bit
5118 		 */
5119 		if (ixgbe->hw.mac.type == ixgbe_mac_X550 ||
5120 		    ixgbe->hw.mac.type == ixgbe_mac_X550EM_x ||
5121 		    ixgbe->hw.mac.type == ixgbe_mac_X550EM_a ||
5122 		    ixgbe->hw.mac.type == ixgbe_mac_X550_vf ||
5123 		    ixgbe->hw.mac.type == ixgbe_mac_X550EM_x_vf ||
5124 		    ixgbe->hw.mac.type == ixgbe_mac_X550EM_a_vf) {
5125 			ixgbe_log(ixgbe,
5126 			    "Legacy interrupts are not supported on this "
5127 			    "adapter. Please use MSI or MSI-X instead.");
5128 			return (IXGBE_FAILURE);
5129 		}
5130 		rc = ixgbe_alloc_intr_handles(ixgbe, DDI_INTR_TYPE_FIXED);
5131 		if (rc == IXGBE_SUCCESS)
5132 			return (IXGBE_SUCCESS);
5133 
5134 		ixgbe_log(ixgbe,
5135 		    "Allocate Legacy interrupts failed");
5136 	}
5137 
5138 	/*
5139 	 * If none of the 3 types succeeded, return failure
5140 	 */
5141 	return (IXGBE_FAILURE);
5142 }
5143 
5144 /*
5145  * ixgbe_alloc_intr_handles - Allocate interrupt handles.
5146  *
5147  * For legacy and MSI, only 1 handle is needed.  For MSI-X,
5148  * if fewer than 2 handles are available, return failure.
5149  * Upon success, this maps the vectors to rx and tx rings for
5150  * interrupts.
5151  */
5152 static int
5153 ixgbe_alloc_intr_handles(ixgbe_t *ixgbe, int intr_type)
5154 {
5155 	dev_info_t *devinfo;
5156 	int request, count, actual;
5157 	int minimum;
5158 	int rc;
5159 	uint32_t ring_per_group;
5160 
5161 	devinfo = ixgbe->dip;
5162 
5163 	switch (intr_type) {
5164 	case DDI_INTR_TYPE_FIXED:
5165 		request = 1;	/* Request 1 legacy interrupt handle */
5166 		minimum = 1;
5167 		IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: legacy");
5168 		break;
5169 
5170 	case DDI_INTR_TYPE_MSI:
5171 		request = 1;	/* Request 1 MSI interrupt handle */
5172 		minimum = 1;
5173 		IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: MSI");
5174 		break;
5175 
5176 	case DDI_INTR_TYPE_MSIX:
5177 		/*
5178 		 * Best number of vectors for the adapter is
5179 		 * (# rx rings + # tx rings), however we will
5180 		 * limit the request number.
5181 		 */
5182 		request = min(16, ixgbe->num_rx_rings + ixgbe->num_tx_rings);
5183 		if (request > ixgbe->capab->max_ring_vect)
5184 			request = ixgbe->capab->max_ring_vect;
5185 		minimum = 1;
5186 		IXGBE_DEBUGLOG_0(ixgbe, "interrupt type: MSI-X");
5187 		break;
5188 
5189 	default:
5190 		ixgbe_log(ixgbe,
5191 		    "invalid call to ixgbe_alloc_intr_handles(): %d\n",
5192 		    intr_type);
5193 		return (IXGBE_FAILURE);
5194 	}
5195 	IXGBE_DEBUGLOG_2(ixgbe, "interrupt handles requested: %d  minimum: %d",
5196 	    request, minimum);
5197 
5198 	/*
5199 	 * Get number of supported interrupts
5200 	 */
5201 	rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
5202 	if ((rc != DDI_SUCCESS) || (count < minimum)) {
5203 		ixgbe_log(ixgbe,
5204 		    "Get interrupt number failed. Return: %d, count: %d",
5205 		    rc, count);
5206 		return (IXGBE_FAILURE);
5207 	}
5208 	IXGBE_DEBUGLOG_1(ixgbe, "interrupts supported: %d", count);
5209 
5210 	actual = 0;
5211 	ixgbe->intr_cnt = 0;
5212 	ixgbe->intr_cnt_max = 0;
5213 	ixgbe->intr_cnt_min = 0;
5214 
5215 	/*
5216 	 * Allocate an array of interrupt handles
5217 	 */
5218 	ixgbe->intr_size = request * sizeof (ddi_intr_handle_t);
5219 	ixgbe->htable = kmem_alloc(ixgbe->intr_size, KM_SLEEP);
5220 
5221 	rc = ddi_intr_alloc(devinfo, ixgbe->htable, intr_type, 0,
5222 	    request, &actual, DDI_INTR_ALLOC_NORMAL);
5223 	if (rc != DDI_SUCCESS) {
5224 		ixgbe_log(ixgbe, "Allocate interrupts failed. "
5225 		    "return: %d, request: %d, actual: %d",
5226 		    rc, request, actual);
5227 		goto alloc_handle_fail;
5228 	}
5229 	IXGBE_DEBUGLOG_1(ixgbe, "interrupts actually allocated: %d", actual);
5230 
5231 	/*
5232 	 * upper/lower limit of interrupts
5233 	 */
5234 	ixgbe->intr_cnt = actual;
5235 	ixgbe->intr_cnt_max = request;
5236 	ixgbe->intr_cnt_min = minimum;
5237 
5238 	/*
5239 	 * rss number per group should not exceed the rx interrupt number,
5240 	 * else need to adjust rx ring number.
5241 	 */
5242 	ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
5243 	ASSERT((ixgbe->num_rx_rings % ixgbe->num_rx_groups) == 0);
5244 	if (actual < ring_per_group) {
5245 		ixgbe->num_rx_rings = ixgbe->num_rx_groups * actual;
5246 		ixgbe_setup_vmdq_rss_conf(ixgbe);
5247 	}
5248 
5249 	/*
5250 	 * Now we know the actual number of vectors.  Here we map the vector
5251 	 * to other, rx rings and tx ring.
5252 	 */
5253 	if (actual < minimum) {
5254 		ixgbe_log(ixgbe, "Insufficient interrupt handles available: %d",
5255 		    actual);
5256 		goto alloc_handle_fail;
5257 	}
5258 
5259 	/*
5260 	 * Get priority for first vector, assume remaining are all the same
5261 	 */
5262 	rc = ddi_intr_get_pri(ixgbe->htable[0], &ixgbe->intr_pri);
5263 	if (rc != DDI_SUCCESS) {
5264 		ixgbe_log(ixgbe,
5265 		    "Get interrupt priority failed: %d", rc);
5266 		goto alloc_handle_fail;
5267 	}
5268 
5269 	rc = ddi_intr_get_cap(ixgbe->htable[0], &ixgbe->intr_cap);
5270 	if (rc != DDI_SUCCESS) {
5271 		ixgbe_log(ixgbe,
5272 		    "Get interrupt cap failed: %d", rc);
5273 		goto alloc_handle_fail;
5274 	}
5275 
5276 	ixgbe->intr_type = intr_type;
5277 
5278 	return (IXGBE_SUCCESS);
5279 
5280 alloc_handle_fail:
5281 	ixgbe_rem_intrs(ixgbe);
5282 
5283 	return (IXGBE_FAILURE);
5284 }
5285 
5286 /*
5287  * ixgbe_add_intr_handlers - Add interrupt handlers based on the interrupt type.
5288  *
5289  * Before adding the interrupt handlers, the interrupt vectors have
5290  * been allocated, and the rx/tx rings have also been allocated.
5291  */
5292 static int
5293 ixgbe_add_intr_handlers(ixgbe_t *ixgbe)
5294 {
5295 	int vector = 0;
5296 	int rc;
5297 
5298 	switch (ixgbe->intr_type) {
5299 	case DDI_INTR_TYPE_MSIX:
5300 		/*
5301 		 * Add interrupt handler for all vectors
5302 		 */
5303 		for (vector = 0; vector < ixgbe->intr_cnt; vector++) {
5304 			/*
5305 			 * install pointer to vect_map[vector]
5306 			 */
5307 			rc = ddi_intr_add_handler(ixgbe->htable[vector],
5308 			    (ddi_intr_handler_t *)ixgbe_intr_msix,
5309 			    (void *)&ixgbe->vect_map[vector], NULL);
5310 
5311 			if (rc != DDI_SUCCESS) {
5312 				ixgbe_log(ixgbe,
5313 				    "Add interrupt handler failed. "
5314 				    "return: %d, vector: %d", rc, vector);
5315 				for (vector--; vector >= 0; vector--) {
5316 					(void) ddi_intr_remove_handler(
5317 					    ixgbe->htable[vector]);
5318 				}
5319 				return (IXGBE_FAILURE);
5320 			}
5321 		}
5322 
5323 		break;
5324 
5325 	case DDI_INTR_TYPE_MSI:
5326 		/*
5327 		 * Add interrupt handlers for the only vector
5328 		 */
5329 		rc = ddi_intr_add_handler(ixgbe->htable[vector],
5330 		    (ddi_intr_handler_t *)ixgbe_intr_msi,
5331 		    (void *)ixgbe, NULL);
5332 
5333 		if (rc != DDI_SUCCESS) {
5334 			ixgbe_log(ixgbe,
5335 			    "Add MSI interrupt handler failed: %d", rc);
5336 			return (IXGBE_FAILURE);
5337 		}
5338 
5339 		break;
5340 
5341 	case DDI_INTR_TYPE_FIXED:
5342 		/*
5343 		 * Add interrupt handlers for the only vector
5344 		 */
5345 		rc = ddi_intr_add_handler(ixgbe->htable[vector],
5346 		    (ddi_intr_handler_t *)ixgbe_intr_legacy,
5347 		    (void *)ixgbe, NULL);
5348 
5349 		if (rc != DDI_SUCCESS) {
5350 			ixgbe_log(ixgbe,
5351 			    "Add legacy interrupt handler failed: %d", rc);
5352 			return (IXGBE_FAILURE);
5353 		}
5354 
5355 		break;
5356 
5357 	default:
5358 		return (IXGBE_FAILURE);
5359 	}
5360 
5361 	return (IXGBE_SUCCESS);
5362 }
5363 
5364 #pragma inline(ixgbe_map_rxring_to_vector)
5365 /*
5366  * ixgbe_map_rxring_to_vector - Map given rx ring to given interrupt vector.
5367  */
5368 static void
5369 ixgbe_map_rxring_to_vector(ixgbe_t *ixgbe, int r_idx, int v_idx)
5370 {
5371 	/*
5372 	 * Set bit in map
5373 	 */
5374 	BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx);
5375 
5376 	/*
5377 	 * Count bits set
5378 	 */
5379 	ixgbe->vect_map[v_idx].rxr_cnt++;
5380 
5381 	/*
5382 	 * Remember bit position
5383 	 */
5384 	ixgbe->rx_rings[r_idx].intr_vector = v_idx;
5385 	ixgbe->rx_rings[r_idx].vect_bit = 1 << v_idx;
5386 }
5387 
5388 #pragma inline(ixgbe_map_txring_to_vector)
5389 /*
5390  * ixgbe_map_txring_to_vector - Map given tx ring to given interrupt vector.
5391  */
5392 static void
5393 ixgbe_map_txring_to_vector(ixgbe_t *ixgbe, int t_idx, int v_idx)
5394 {
5395 	/*
5396 	 * Set bit in map
5397 	 */
5398 	BT_SET(ixgbe->vect_map[v_idx].tx_map, t_idx);
5399 
5400 	/*
5401 	 * Count bits set
5402 	 */
5403 	ixgbe->vect_map[v_idx].txr_cnt++;
5404 
5405 	/*
5406 	 * Remember bit position
5407 	 */
5408 	ixgbe->tx_rings[t_idx].intr_vector = v_idx;
5409 	ixgbe->tx_rings[t_idx].vect_bit = 1 << v_idx;
5410 }
5411 
5412 /*
5413  * ixgbe_setup_ivar - Set the given entry in the given interrupt vector
5414  * allocation register (IVAR).
5415  * cause:
5416  *   -1 : other cause
5417  *    0 : rx
5418  *    1 : tx
5419  */
5420 static void
5421 ixgbe_setup_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, uint8_t msix_vector,
5422     int8_t cause)
5423 {
5424 	struct ixgbe_hw *hw = &ixgbe->hw;
5425 	u32 ivar, index;
5426 
5427 	switch (hw->mac.type) {
5428 	case ixgbe_mac_82598EB:
5429 		msix_vector |= IXGBE_IVAR_ALLOC_VAL;
5430 		if (cause == -1) {
5431 			cause = 0;
5432 		}
5433 		index = (((cause * 64) + intr_alloc_entry) >> 2) & 0x1F;
5434 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5435 		ivar &= ~(0xFF << (8 * (intr_alloc_entry & 0x3)));
5436 		ivar |= (msix_vector << (8 * (intr_alloc_entry & 0x3)));
5437 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
5438 		break;
5439 
5440 	case ixgbe_mac_82599EB:
5441 	case ixgbe_mac_X540:
5442 	case ixgbe_mac_X550:
5443 	case ixgbe_mac_X550EM_x:
5444 	case ixgbe_mac_X550EM_a:
5445 		if (cause == -1) {
5446 			/* other causes */
5447 			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
5448 			index = (intr_alloc_entry & 1) * 8;
5449 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5450 			ivar &= ~(0xFF << index);
5451 			ivar |= (msix_vector << index);
5452 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5453 		} else {
5454 			/* tx or rx causes */
5455 			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
5456 			index = ((16 * (intr_alloc_entry & 1)) + (8 * cause));
5457 			ivar = IXGBE_READ_REG(hw,
5458 			    IXGBE_IVAR(intr_alloc_entry >> 1));
5459 			ivar &= ~(0xFF << index);
5460 			ivar |= (msix_vector << index);
5461 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(intr_alloc_entry >> 1),
5462 			    ivar);
5463 		}
5464 		break;
5465 
5466 	default:
5467 		break;
5468 	}
5469 }
5470 
5471 /*
5472  * ixgbe_enable_ivar - Enable the given entry by setting the VAL bit of
5473  * given interrupt vector allocation register (IVAR).
5474  * cause:
5475  *   -1 : other cause
5476  *    0 : rx
5477  *    1 : tx
5478  */
5479 static void
5480 ixgbe_enable_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, int8_t cause)
5481 {
5482 	struct ixgbe_hw *hw = &ixgbe->hw;
5483 	u32 ivar, index;
5484 
5485 	switch (hw->mac.type) {
5486 	case ixgbe_mac_82598EB:
5487 		if (cause == -1) {
5488 			cause = 0;
5489 		}
5490 		index = (((cause * 64) + intr_alloc_entry) >> 2) & 0x1F;
5491 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5492 		ivar |= (IXGBE_IVAR_ALLOC_VAL << (8 *
5493 		    (intr_alloc_entry & 0x3)));
5494 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
5495 		break;
5496 
5497 	case ixgbe_mac_82599EB:
5498 	case ixgbe_mac_X540:
5499 	case ixgbe_mac_X550:
5500 	case ixgbe_mac_X550EM_x:
5501 	case ixgbe_mac_X550EM_a:
5502 		if (cause == -1) {
5503 			/* other causes */
5504 			index = (intr_alloc_entry & 1) * 8;
5505 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5506 			ivar |= (IXGBE_IVAR_ALLOC_VAL << index);
5507 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5508 		} else {
5509 			/* tx or rx causes */
5510 			index = ((16 * (intr_alloc_entry & 1)) + (8 * cause));
5511 			ivar = IXGBE_READ_REG(hw,
5512 			    IXGBE_IVAR(intr_alloc_entry >> 1));
5513 			ivar |= (IXGBE_IVAR_ALLOC_VAL << index);
5514 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(intr_alloc_entry >> 1),
5515 			    ivar);
5516 		}
5517 		break;
5518 
5519 	default:
5520 		break;
5521 	}
5522 }
5523 
5524 /*
5525  * ixgbe_disable_ivar - Disble the given entry by clearing the VAL bit of
5526  * given interrupt vector allocation register (IVAR).
5527  * cause:
5528  *   -1 : other cause
5529  *    0 : rx
5530  *    1 : tx
5531  */
5532 static void
5533 ixgbe_disable_ivar(ixgbe_t *ixgbe, uint16_t intr_alloc_entry, int8_t cause)
5534 {
5535 	struct ixgbe_hw *hw = &ixgbe->hw;
5536 	u32 ivar, index;
5537 
5538 	switch (hw->mac.type) {
5539 	case ixgbe_mac_82598EB:
5540 		if (cause == -1) {
5541 			cause = 0;
5542 		}
5543 		index = (((cause * 64) + intr_alloc_entry) >> 2) & 0x1F;
5544 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5545 		ivar &= ~(IXGBE_IVAR_ALLOC_VAL<< (8 *
5546 		    (intr_alloc_entry & 0x3)));
5547 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
5548 		break;
5549 
5550 	case ixgbe_mac_82599EB:
5551 	case ixgbe_mac_X540:
5552 	case ixgbe_mac_X550:
5553 	case ixgbe_mac_X550EM_x:
5554 	case ixgbe_mac_X550EM_a:
5555 		if (cause == -1) {
5556 			/* other causes */
5557 			index = (intr_alloc_entry & 1) * 8;
5558 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5559 			ivar &= ~(IXGBE_IVAR_ALLOC_VAL << index);
5560 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5561 		} else {
5562 			/* tx or rx causes */
5563 			index = ((16 * (intr_alloc_entry & 1)) + (8 * cause));
5564 			ivar = IXGBE_READ_REG(hw,
5565 			    IXGBE_IVAR(intr_alloc_entry >> 1));
5566 			ivar &= ~(IXGBE_IVAR_ALLOC_VAL << index);
5567 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(intr_alloc_entry >> 1),
5568 			    ivar);
5569 		}
5570 		break;
5571 
5572 	default:
5573 		break;
5574 	}
5575 }
5576 
5577 /*
5578  * Convert the rx ring index driver maintained to the rx ring index
5579  * in h/w.
5580  */
5581 static uint32_t
5582 ixgbe_get_hw_rx_index(ixgbe_t *ixgbe, uint32_t sw_rx_index)
5583 {
5584 
5585 	struct ixgbe_hw *hw = &ixgbe->hw;
5586 	uint32_t rx_ring_per_group, hw_rx_index;
5587 
5588 	if (ixgbe->classify_mode == IXGBE_CLASSIFY_RSS ||
5589 	    ixgbe->classify_mode == IXGBE_CLASSIFY_NONE) {
5590 		return (sw_rx_index);
5591 	} else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ) {
5592 		switch (hw->mac.type) {
5593 		case ixgbe_mac_82598EB:
5594 			return (sw_rx_index);
5595 
5596 		case ixgbe_mac_82599EB:
5597 		case ixgbe_mac_X540:
5598 		case ixgbe_mac_X550:
5599 		case ixgbe_mac_X550EM_x:
5600 		case ixgbe_mac_X550EM_a:
5601 			return (sw_rx_index * 2);
5602 
5603 		default:
5604 			break;
5605 		}
5606 	} else if (ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) {
5607 		rx_ring_per_group = ixgbe->num_rx_rings / ixgbe->num_rx_groups;
5608 
5609 		switch (hw->mac.type) {
5610 		case ixgbe_mac_82598EB:
5611 			hw_rx_index = (sw_rx_index / rx_ring_per_group) *
5612 			    16 + (sw_rx_index % rx_ring_per_group);
5613 			return (hw_rx_index);
5614 
5615 		case ixgbe_mac_82599EB:
5616 		case ixgbe_mac_X540:
5617 		case ixgbe_mac_X550:
5618 		case ixgbe_mac_X550EM_x:
5619 		case ixgbe_mac_X550EM_a:
5620 			if (ixgbe->num_rx_groups > 32) {
5621 				hw_rx_index = (sw_rx_index /
5622 				    rx_ring_per_group) * 2 +
5623 				    (sw_rx_index % rx_ring_per_group);
5624 			} else {
5625 				hw_rx_index = (sw_rx_index /
5626 				    rx_ring_per_group) * 4 +
5627 				    (sw_rx_index % rx_ring_per_group);
5628 			}
5629 			return (hw_rx_index);
5630 
5631 		default:
5632 			break;
5633 		}
5634 	}
5635 
5636 	/*
5637 	 * Should never reach. Just to make compiler happy.
5638 	 */
5639 	return (sw_rx_index);
5640 }
5641 
5642 /*
5643  * ixgbe_map_intrs_to_vectors - Map different interrupts to MSI-X vectors.
5644  *
5645  * For MSI-X, here will map rx interrupt, tx interrupt and other interrupt
5646  * to vector[0 - (intr_cnt -1)].
5647  */
5648 static int
5649 ixgbe_map_intrs_to_vectors(ixgbe_t *ixgbe)
5650 {
5651 	int i, vector = 0;
5652 
5653 	/* initialize vector map */
5654 	bzero(&ixgbe->vect_map, sizeof (ixgbe->vect_map));
5655 	for (i = 0; i < ixgbe->intr_cnt; i++) {
5656 		ixgbe->vect_map[i].ixgbe = ixgbe;
5657 	}
5658 
5659 	/*
5660 	 * non-MSI-X case is very simple: rx rings[0] on RTxQ[0],
5661 	 * tx rings[0] on RTxQ[1].
5662 	 */
5663 	if (ixgbe->intr_type != DDI_INTR_TYPE_MSIX) {
5664 		ixgbe_map_rxring_to_vector(ixgbe, 0, 0);
5665 		ixgbe_map_txring_to_vector(ixgbe, 0, 1);
5666 		return (IXGBE_SUCCESS);
5667 	}
5668 
5669 	/*
5670 	 * Interrupts/vectors mapping for MSI-X
5671 	 */
5672 
5673 	/*
5674 	 * Map other interrupt to vector 0,
5675 	 * Set bit in map and count the bits set.
5676 	 */
5677 	BT_SET(ixgbe->vect_map[vector].other_map, 0);
5678 	ixgbe->vect_map[vector].other_cnt++;
5679 
5680 	/*
5681 	 * Map rx ring interrupts to vectors
5682 	 */
5683 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
5684 		ixgbe_map_rxring_to_vector(ixgbe, i, vector);
5685 		vector = (vector +1) % ixgbe->intr_cnt;
5686 	}
5687 
5688 	/*
5689 	 * Map tx ring interrupts to vectors
5690 	 */
5691 	for (i = 0; i < ixgbe->num_tx_rings; i++) {
5692 		ixgbe_map_txring_to_vector(ixgbe, i, vector);
5693 		vector = (vector +1) % ixgbe->intr_cnt;
5694 	}
5695 
5696 	return (IXGBE_SUCCESS);
5697 }
5698 
5699 /*
5700  * ixgbe_setup_adapter_vector - Setup the adapter interrupt vector(s).
5701  *
5702  * This relies on ring/vector mapping already set up in the
5703  * vect_map[] structures
5704  */
5705 static void
5706 ixgbe_setup_adapter_vector(ixgbe_t *ixgbe)
5707 {
5708 	struct ixgbe_hw *hw = &ixgbe->hw;
5709 	ixgbe_intr_vector_t *vect;	/* vector bitmap */
5710 	int r_idx;	/* ring index */
5711 	int v_idx;	/* vector index */
5712 	uint32_t hw_index;
5713 
5714 	/*
5715 	 * Clear any previous entries
5716 	 */
5717 	switch (hw->mac.type) {
5718 	case ixgbe_mac_82598EB:
5719 		for (v_idx = 0; v_idx < 25; v_idx++)
5720 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(v_idx), 0);
5721 		break;
5722 
5723 	case ixgbe_mac_82599EB:
5724 	case ixgbe_mac_X540:
5725 	case ixgbe_mac_X550:
5726 	case ixgbe_mac_X550EM_x:
5727 	case ixgbe_mac_X550EM_a:
5728 		for (v_idx = 0; v_idx < 64; v_idx++)
5729 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(v_idx), 0);
5730 		IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, 0);
5731 		break;
5732 
5733 	default:
5734 		break;
5735 	}
5736 
5737 	/*
5738 	 * For non MSI-X interrupt, rx rings[0] will use RTxQ[0], and
5739 	 * tx rings[0] will use RTxQ[1].
5740 	 */
5741 	if (ixgbe->intr_type != DDI_INTR_TYPE_MSIX) {
5742 		ixgbe_setup_ivar(ixgbe, 0, 0, 0);
5743 		ixgbe_setup_ivar(ixgbe, 0, 1, 1);
5744 		return;
5745 	}
5746 
5747 	/*
5748 	 * For MSI-X interrupt, "Other" is always on vector[0].
5749 	 */
5750 	ixgbe_setup_ivar(ixgbe, IXGBE_IVAR_OTHER_CAUSES_INDEX, 0, -1);
5751 
5752 	/*
5753 	 * For each interrupt vector, populate the IVAR table
5754 	 */
5755 	for (v_idx = 0; v_idx < ixgbe->intr_cnt; v_idx++) {
5756 		vect = &ixgbe->vect_map[v_idx];
5757 
5758 		/*
5759 		 * For each rx ring bit set
5760 		 */
5761 		r_idx = bt_getlowbit(vect->rx_map, 0,
5762 		    (ixgbe->num_rx_rings - 1));
5763 
5764 		while (r_idx >= 0) {
5765 			hw_index = ixgbe->rx_rings[r_idx].hw_index;
5766 			ixgbe_setup_ivar(ixgbe, hw_index, v_idx, 0);
5767 			r_idx = bt_getlowbit(vect->rx_map, (r_idx + 1),
5768 			    (ixgbe->num_rx_rings - 1));
5769 		}
5770 
5771 		/*
5772 		 * For each tx ring bit set
5773 		 */
5774 		r_idx = bt_getlowbit(vect->tx_map, 0,
5775 		    (ixgbe->num_tx_rings - 1));
5776 
5777 		while (r_idx >= 0) {
5778 			ixgbe_setup_ivar(ixgbe, r_idx, v_idx, 1);
5779 			r_idx = bt_getlowbit(vect->tx_map, (r_idx + 1),
5780 			    (ixgbe->num_tx_rings - 1));
5781 		}
5782 	}
5783 }
5784 
5785 /*
5786  * ixgbe_rem_intr_handlers - Remove the interrupt handlers.
5787  */
5788 static void
5789 ixgbe_rem_intr_handlers(ixgbe_t *ixgbe)
5790 {
5791 	int i;
5792 	int rc;
5793 
5794 	for (i = 0; i < ixgbe->intr_cnt; i++) {
5795 		rc = ddi_intr_remove_handler(ixgbe->htable[i]);
5796 		if (rc != DDI_SUCCESS) {
5797 			IXGBE_DEBUGLOG_1(ixgbe,
5798 			    "Remove intr handler failed: %d", rc);
5799 		}
5800 	}
5801 }
5802 
5803 /*
5804  * ixgbe_rem_intrs - Remove the allocated interrupts.
5805  */
5806 static void
5807 ixgbe_rem_intrs(ixgbe_t *ixgbe)
5808 {
5809 	int i;
5810 	int rc;
5811 
5812 	for (i = 0; i < ixgbe->intr_cnt; i++) {
5813 		rc = ddi_intr_free(ixgbe->htable[i]);
5814 		if (rc != DDI_SUCCESS) {
5815 			IXGBE_DEBUGLOG_1(ixgbe,
5816 			    "Free intr failed: %d", rc);
5817 		}
5818 	}
5819 
5820 	kmem_free(ixgbe->htable, ixgbe->intr_size);
5821 	ixgbe->htable = NULL;
5822 }
5823 
5824 /*
5825  * ixgbe_enable_intrs - Enable all the ddi interrupts.
5826  */
5827 static int
5828 ixgbe_enable_intrs(ixgbe_t *ixgbe)
5829 {
5830 	int i;
5831 	int rc;
5832 
5833 	/*
5834 	 * Enable interrupts
5835 	 */
5836 	if (ixgbe->intr_cap & DDI_INTR_FLAG_BLOCK) {
5837 		/*
5838 		 * Call ddi_intr_block_enable() for MSI
5839 		 */
5840 		rc = ddi_intr_block_enable(ixgbe->htable, ixgbe->intr_cnt);
5841 		if (rc != DDI_SUCCESS) {
5842 			ixgbe_log(ixgbe,
5843 			    "Enable block intr failed: %d", rc);
5844 			return (IXGBE_FAILURE);
5845 		}
5846 	} else {
5847 		/*
5848 		 * Call ddi_intr_enable() for Legacy/MSI non block enable
5849 		 */
5850 		for (i = 0; i < ixgbe->intr_cnt; i++) {
5851 			rc = ddi_intr_enable(ixgbe->htable[i]);
5852 			if (rc != DDI_SUCCESS) {
5853 				ixgbe_log(ixgbe,
5854 				    "Enable intr failed: %d", rc);
5855 				return (IXGBE_FAILURE);
5856 			}
5857 		}
5858 	}
5859 
5860 	return (IXGBE_SUCCESS);
5861 }
5862 
5863 /*
5864  * ixgbe_disable_intrs - Disable all the interrupts.
5865  */
5866 static int
5867 ixgbe_disable_intrs(ixgbe_t *ixgbe)
5868 {
5869 	int i;
5870 	int rc;
5871 
5872 	/*
5873 	 * Disable all interrupts
5874 	 */
5875 	if (ixgbe->intr_cap & DDI_INTR_FLAG_BLOCK) {
5876 		rc = ddi_intr_block_disable(ixgbe->htable, ixgbe->intr_cnt);
5877 		if (rc != DDI_SUCCESS) {
5878 			ixgbe_log(ixgbe,
5879 			    "Disable block intr failed: %d", rc);
5880 			return (IXGBE_FAILURE);
5881 		}
5882 	} else {
5883 		for (i = 0; i < ixgbe->intr_cnt; i++) {
5884 			rc = ddi_intr_disable(ixgbe->htable[i]);
5885 			if (rc != DDI_SUCCESS) {
5886 				ixgbe_log(ixgbe,
5887 				    "Disable intr failed: %d", rc);
5888 				return (IXGBE_FAILURE);
5889 			}
5890 		}
5891 	}
5892 
5893 	return (IXGBE_SUCCESS);
5894 }
5895 
5896 /*
5897  * ixgbe_get_hw_state - Get and save parameters related to adapter hardware.
5898  */
5899 static void
5900 ixgbe_get_hw_state(ixgbe_t *ixgbe)
5901 {
5902 	struct ixgbe_hw *hw = &ixgbe->hw;
5903 	ixgbe_link_speed speed = 0;
5904 	boolean_t link_up = B_FALSE;
5905 	uint32_t pcs1g_anlp = 0;
5906 
5907 	ASSERT(mutex_owned(&ixgbe->gen_lock));
5908 	ixgbe->param_lp_1000fdx_cap = 0;
5909 	ixgbe->param_lp_100fdx_cap  = 0;
5910 
5911 	/* check for link, don't wait */
5912 	(void) ixgbe_check_link(hw, &speed, &link_up, B_FALSE);
5913 
5914 	/*
5915 	 * Update the observed Link Partner's capabilities. Not all adapters
5916 	 * can provide full information on the LP's capable speeds, so we
5917 	 * provide what we can.
5918 	 */
5919 	if (link_up) {
5920 		pcs1g_anlp = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
5921 
5922 		ixgbe->param_lp_1000fdx_cap =
5923 		    (pcs1g_anlp & IXGBE_PCS1GANLP_LPFD) ? 1 : 0;
5924 		ixgbe->param_lp_100fdx_cap =
5925 		    (pcs1g_anlp & IXGBE_PCS1GANLP_LPFD) ? 1 : 0;
5926 	}
5927 
5928 	/*
5929 	 * Update GLD's notion of the adapter's currently advertised speeds.
5930 	 * Since the common code doesn't always record the current autonegotiate
5931 	 * settings in the phy struct for all parts (specifically, adapters with
5932 	 * SFPs) we first test to see if it is 0, and if so, we fall back to
5933 	 * using the adapter's speed capabilities which we saved during instance
5934 	 * init in ixgbe_init_params().
5935 	 *
5936 	 * Adapters with SFPs will always be shown as advertising all of their
5937 	 * supported speeds, and adapters with baseT PHYs (where the phy struct
5938 	 * is maintained by the common code) will always have a factual view of
5939 	 * their currently-advertised speeds. In the case of SFPs, this is
5940 	 * acceptable as we default to advertising all speeds that the adapter
5941 	 * claims to support, and those properties are immutable; unlike on
5942 	 * baseT (copper) PHYs, where speeds can be enabled or disabled at will.
5943 	 */
5944 	speed = hw->phy.autoneg_advertised;
5945 	if (speed == 0)
5946 		speed = ixgbe->speeds_supported;
5947 
5948 	ixgbe->param_adv_10000fdx_cap =
5949 	    (speed & IXGBE_LINK_SPEED_10GB_FULL) ? 1 : 0;
5950 	ixgbe->param_adv_5000fdx_cap =
5951 	    (speed & IXGBE_LINK_SPEED_5GB_FULL) ? 1 : 0;
5952 	ixgbe->param_adv_2500fdx_cap =
5953 	    (speed & IXGBE_LINK_SPEED_2_5GB_FULL) ? 1 : 0;
5954 	ixgbe->param_adv_1000fdx_cap =
5955 	    (speed & IXGBE_LINK_SPEED_1GB_FULL) ? 1 : 0;
5956 	ixgbe->param_adv_100fdx_cap =
5957 	    (speed & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0;
5958 }
5959 
5960 /*
5961  * ixgbe_get_driver_control - Notify that driver is in control of device.
5962  */
5963 static void
5964 ixgbe_get_driver_control(struct ixgbe_hw *hw)
5965 {
5966 	uint32_t ctrl_ext;
5967 
5968 	/*
5969 	 * Notify firmware that driver is in control of device
5970 	 */
5971 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
5972 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
5973 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
5974 }
5975 
5976 /*
5977  * ixgbe_release_driver_control - Notify that driver is no longer in control
5978  * of device.
5979  */
5980 static void
5981 ixgbe_release_driver_control(struct ixgbe_hw *hw)
5982 {
5983 	uint32_t ctrl_ext;
5984 
5985 	/*
5986 	 * Notify firmware that driver is no longer in control of device
5987 	 */
5988 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
5989 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
5990 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
5991 }
5992 
5993 /*
5994  * ixgbe_atomic_reserve - Atomic decrease operation.
5995  */
5996 int
5997 ixgbe_atomic_reserve(uint32_t *count_p, uint32_t n)
5998 {
5999 	uint32_t oldval;
6000 	uint32_t newval;
6001 
6002 	/*
6003 	 * ATOMICALLY
6004 	 */
6005 	do {
6006 		oldval = *count_p;
6007 		if (oldval < n)
6008 			return (-1);
6009 		newval = oldval - n;
6010 	} while (atomic_cas_32(count_p, oldval, newval) != oldval);
6011 
6012 	return (newval);
6013 }
6014 
6015 /*
6016  * ixgbe_mc_table_itr - Traverse the entries in the multicast table.
6017  */
6018 static uint8_t *
6019 ixgbe_mc_table_itr(struct ixgbe_hw *hw, uint8_t **upd_ptr, uint32_t *vmdq)
6020 {
6021 	uint8_t *addr = *upd_ptr;
6022 	uint8_t *new_ptr;
6023 
6024 	_NOTE(ARGUNUSED(hw));
6025 	_NOTE(ARGUNUSED(vmdq));
6026 
6027 	new_ptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
6028 	*upd_ptr = new_ptr;
6029 	return (addr);
6030 }
6031 
6032 /*
6033  * FMA support
6034  */
6035 int
6036 ixgbe_check_acc_handle(ddi_acc_handle_t handle)
6037 {
6038 	ddi_fm_error_t de;
6039 
6040 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
6041 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
6042 	return (de.fme_status);
6043 }
6044 
6045 int
6046 ixgbe_check_dma_handle(ddi_dma_handle_t handle)
6047 {
6048 	ddi_fm_error_t de;
6049 
6050 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
6051 	return (de.fme_status);
6052 }
6053 
6054 /*
6055  * ixgbe_fm_error_cb - The IO fault service error handling callback function.
6056  */
6057 static int
6058 ixgbe_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
6059 {
6060 	_NOTE(ARGUNUSED(impl_data));
6061 	/*
6062 	 * as the driver can always deal with an error in any dma or
6063 	 * access handle, we can just return the fme_status value.
6064 	 */
6065 	pci_ereport_post(dip, err, NULL);
6066 	return (err->fme_status);
6067 }
6068 
6069 static void
6070 ixgbe_fm_init(ixgbe_t *ixgbe)
6071 {
6072 	ddi_iblock_cookie_t iblk;
6073 	int fma_dma_flag;
6074 
6075 	/*
6076 	 * Only register with IO Fault Services if we have some capability
6077 	 */
6078 	if (ixgbe->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
6079 		ixgbe_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
6080 	} else {
6081 		ixgbe_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
6082 	}
6083 
6084 	if (ixgbe->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
6085 		fma_dma_flag = 1;
6086 	} else {
6087 		fma_dma_flag = 0;
6088 	}
6089 
6090 	ixgbe_set_fma_flags(fma_dma_flag);
6091 
6092 	if (ixgbe->fm_capabilities) {
6093 
6094 		/*
6095 		 * Register capabilities with IO Fault Services
6096 		 */
6097 		ddi_fm_init(ixgbe->dip, &ixgbe->fm_capabilities, &iblk);
6098 
6099 		/*
6100 		 * Initialize pci ereport capabilities if ereport capable
6101 		 */
6102 		if (DDI_FM_EREPORT_CAP(ixgbe->fm_capabilities) ||
6103 		    DDI_FM_ERRCB_CAP(ixgbe->fm_capabilities))
6104 			pci_ereport_setup(ixgbe->dip);
6105 
6106 		/*
6107 		 * Register error callback if error callback capable
6108 		 */
6109 		if (DDI_FM_ERRCB_CAP(ixgbe->fm_capabilities))
6110 			ddi_fm_handler_register(ixgbe->dip,
6111 			    ixgbe_fm_error_cb, (void*) ixgbe);
6112 	}
6113 }
6114 
6115 static void
6116 ixgbe_fm_fini(ixgbe_t *ixgbe)
6117 {
6118 	/*
6119 	 * Only unregister FMA capabilities if they are registered
6120 	 */
6121 	if (ixgbe->fm_capabilities) {
6122 
6123 		/*
6124 		 * Release any resources allocated by pci_ereport_setup()
6125 		 */
6126 		if (DDI_FM_EREPORT_CAP(ixgbe->fm_capabilities) ||
6127 		    DDI_FM_ERRCB_CAP(ixgbe->fm_capabilities))
6128 			pci_ereport_teardown(ixgbe->dip);
6129 
6130 		/*
6131 		 * Un-register error callback if error callback capable
6132 		 */
6133 		if (DDI_FM_ERRCB_CAP(ixgbe->fm_capabilities))
6134 			ddi_fm_handler_unregister(ixgbe->dip);
6135 
6136 		/*
6137 		 * Unregister from IO Fault Service
6138 		 */
6139 		ddi_fm_fini(ixgbe->dip);
6140 	}
6141 }
6142 
6143 void
6144 ixgbe_fm_ereport(ixgbe_t *ixgbe, char *detail)
6145 {
6146 	uint64_t ena;
6147 	char buf[FM_MAX_CLASS];
6148 
6149 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
6150 	ena = fm_ena_generate(0, FM_ENA_FMT1);
6151 	if (DDI_FM_EREPORT_CAP(ixgbe->fm_capabilities)) {
6152 		ddi_fm_ereport_post(ixgbe->dip, buf, ena, DDI_NOSLEEP,
6153 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
6154 	}
6155 }
6156 
6157 static int
6158 ixgbe_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
6159 {
6160 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)rh;
6161 
6162 	mutex_enter(&rx_ring->rx_lock);
6163 	rx_ring->ring_gen_num = mr_gen_num;
6164 	mutex_exit(&rx_ring->rx_lock);
6165 	return (0);
6166 }
6167 
6168 /*
6169  * Get the global ring index by a ring index within a group.
6170  */
6171 static int
6172 ixgbe_get_rx_ring_index(ixgbe_t *ixgbe, int gindex, int rindex)
6173 {
6174 	ixgbe_rx_ring_t *rx_ring;
6175 	int i;
6176 
6177 	for (i = 0; i < ixgbe->num_rx_rings; i++) {
6178 		rx_ring = &ixgbe->rx_rings[i];
6179 		if (rx_ring->group_index == gindex)
6180 			rindex--;
6181 		if (rindex < 0)
6182 			return (i);
6183 	}
6184 
6185 	return (-1);
6186 }
6187 
6188 /*
6189  * Callback funtion for MAC layer to register all rings.
6190  */
6191 /* ARGSUSED */
6192 void
6193 ixgbe_fill_ring(void *arg, mac_ring_type_t rtype, const int group_index,
6194     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
6195 {
6196 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
6197 	mac_intr_t *mintr = &infop->mri_intr;
6198 
6199 	switch (rtype) {
6200 	case MAC_RING_TYPE_RX: {
6201 		/*
6202 		 * 'index' is the ring index within the group.
6203 		 * Need to get the global ring index by searching in groups.
6204 		 */
6205 		int global_ring_index = ixgbe_get_rx_ring_index(
6206 		    ixgbe, group_index, ring_index);
6207 
6208 		ASSERT(global_ring_index >= 0);
6209 
6210 		ixgbe_rx_ring_t *rx_ring = &ixgbe->rx_rings[global_ring_index];
6211 		rx_ring->ring_handle = rh;
6212 
6213 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
6214 		infop->mri_start = ixgbe_ring_start;
6215 		infop->mri_stop = NULL;
6216 		infop->mri_poll = ixgbe_ring_rx_poll;
6217 		infop->mri_stat = ixgbe_rx_ring_stat;
6218 
6219 		mintr->mi_handle = (mac_intr_handle_t)rx_ring;
6220 		mintr->mi_enable = ixgbe_rx_ring_intr_enable;
6221 		mintr->mi_disable = ixgbe_rx_ring_intr_disable;
6222 		if (ixgbe->intr_type &
6223 		    (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) {
6224 			mintr->mi_ddi_handle =
6225 			    ixgbe->htable[rx_ring->intr_vector];
6226 		}
6227 
6228 		break;
6229 	}
6230 	case MAC_RING_TYPE_TX: {
6231 		ASSERT(group_index == -1);
6232 		ASSERT(ring_index < ixgbe->num_tx_rings);
6233 
6234 		ixgbe_tx_ring_t *tx_ring = &ixgbe->tx_rings[ring_index];
6235 		tx_ring->ring_handle = rh;
6236 
6237 		infop->mri_driver = (mac_ring_driver_t)tx_ring;
6238 		infop->mri_start = NULL;
6239 		infop->mri_stop = NULL;
6240 		infop->mri_tx = ixgbe_ring_tx;
6241 		infop->mri_stat = ixgbe_tx_ring_stat;
6242 		if (ixgbe->intr_type &
6243 		    (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) {
6244 			mintr->mi_ddi_handle =
6245 			    ixgbe->htable[tx_ring->intr_vector];
6246 		}
6247 		break;
6248 	}
6249 	default:
6250 		break;
6251 	}
6252 }
6253 
6254 /*
6255  * Callback funtion for MAC layer to register all groups.
6256  */
6257 void
6258 ixgbe_fill_group(void *arg, mac_ring_type_t rtype, const int index,
6259     mac_group_info_t *infop, mac_group_handle_t gh)
6260 {
6261 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
6262 	struct ixgbe_hw *hw = &ixgbe->hw;
6263 
6264 	switch (rtype) {
6265 	case MAC_RING_TYPE_RX: {
6266 		ixgbe_rx_group_t *rx_group;
6267 
6268 		rx_group = &ixgbe->rx_groups[index];
6269 		rx_group->group_handle = gh;
6270 
6271 		infop->mgi_driver = (mac_group_driver_t)rx_group;
6272 		infop->mgi_start = NULL;
6273 		infop->mgi_stop = NULL;
6274 		infop->mgi_addmac = ixgbe_addmac;
6275 		infop->mgi_remmac = ixgbe_remmac;
6276 
6277 		if ((ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ ||
6278 		    ixgbe->classify_mode == IXGBE_CLASSIFY_VMDQ_RSS) &&
6279 		    (hw->mac.type == ixgbe_mac_82599EB ||
6280 		    hw->mac.type == ixgbe_mac_X540 ||
6281 		    hw->mac.type == ixgbe_mac_X550 ||
6282 		    hw->mac.type == ixgbe_mac_X550EM_x)) {
6283 			infop->mgi_addvlan = ixgbe_addvlan;
6284 			infop->mgi_remvlan = ixgbe_remvlan;
6285 		} else {
6286 			infop->mgi_addvlan = NULL;
6287 			infop->mgi_remvlan = NULL;
6288 		}
6289 
6290 		infop->mgi_count = (ixgbe->num_rx_rings / ixgbe->num_rx_groups);
6291 
6292 		break;
6293 	}
6294 	case MAC_RING_TYPE_TX:
6295 		break;
6296 	default:
6297 		break;
6298 	}
6299 }
6300 
6301 /*
6302  * Enable interrupt on the specificed rx ring.
6303  */
6304 int
6305 ixgbe_rx_ring_intr_enable(mac_intr_handle_t intrh)
6306 {
6307 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh;
6308 	ixgbe_t *ixgbe = rx_ring->ixgbe;
6309 	int r_idx = rx_ring->index;
6310 	int hw_r_idx = rx_ring->hw_index;
6311 	int v_idx = rx_ring->intr_vector;
6312 
6313 	mutex_enter(&ixgbe->gen_lock);
6314 	if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) {
6315 		mutex_exit(&ixgbe->gen_lock);
6316 		/*
6317 		 * Simply return 0.
6318 		 * Interrupts are being adjusted. ixgbe_intr_adjust()
6319 		 * will eventually re-enable the interrupt when it's
6320 		 * done with the adjustment.
6321 		 */
6322 		return (0);
6323 	}
6324 
6325 	/*
6326 	 * To enable interrupt by setting the VAL bit of given interrupt
6327 	 * vector allocation register (IVAR).
6328 	 */
6329 	ixgbe_enable_ivar(ixgbe, hw_r_idx, 0);
6330 
6331 	BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx);
6332 
6333 	/*
6334 	 * Trigger a Rx interrupt on this ring
6335 	 */
6336 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_EICS, (1 << v_idx));
6337 	IXGBE_WRITE_FLUSH(&ixgbe->hw);
6338 
6339 	mutex_exit(&ixgbe->gen_lock);
6340 
6341 	return (0);
6342 }
6343 
6344 /*
6345  * Disable interrupt on the specificed rx ring.
6346  */
6347 int
6348 ixgbe_rx_ring_intr_disable(mac_intr_handle_t intrh)
6349 {
6350 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)intrh;
6351 	ixgbe_t *ixgbe = rx_ring->ixgbe;
6352 	int r_idx = rx_ring->index;
6353 	int hw_r_idx = rx_ring->hw_index;
6354 	int v_idx = rx_ring->intr_vector;
6355 
6356 	mutex_enter(&ixgbe->gen_lock);
6357 	if (ixgbe->ixgbe_state & IXGBE_INTR_ADJUST) {
6358 		mutex_exit(&ixgbe->gen_lock);
6359 		/*
6360 		 * Simply return 0.
6361 		 * In the rare case where an interrupt is being
6362 		 * disabled while interrupts are being adjusted,
6363 		 * we don't fail the operation. No interrupts will
6364 		 * be generated while they are adjusted, and
6365 		 * ixgbe_intr_adjust() will cause the interrupts
6366 		 * to be re-enabled once it completes. Note that
6367 		 * in this case, packets may be delivered to the
6368 		 * stack via interrupts before xgbe_rx_ring_intr_enable()
6369 		 * is called again. This is acceptable since interrupt
6370 		 * adjustment is infrequent, and the stack will be
6371 		 * able to handle these packets.
6372 		 */
6373 		return (0);
6374 	}
6375 
6376 	/*
6377 	 * To disable interrupt by clearing the VAL bit of given interrupt
6378 	 * vector allocation register (IVAR).
6379 	 */
6380 	ixgbe_disable_ivar(ixgbe, hw_r_idx, 0);
6381 
6382 	BT_CLEAR(ixgbe->vect_map[v_idx].rx_map, r_idx);
6383 
6384 	mutex_exit(&ixgbe->gen_lock);
6385 
6386 	return (0);
6387 }
6388 
6389 static ixgbe_vlan_t *
6390 ixgbe_find_vlan(ixgbe_rx_group_t *rx_group, uint16_t vid)
6391 {
6392 	for (ixgbe_vlan_t *vlp = list_head(&rx_group->vlans); vlp != NULL;
6393 	    vlp = list_next(&rx_group->vlans, vlp)) {
6394 		if (vlp->ixvl_vid == vid)
6395 			return (vlp);
6396 	}
6397 
6398 	return (NULL);
6399 }
6400 
6401 /*
6402  * Attempt to use a VLAN HW filter for this group. If the group is
6403  * interested in untagged packets then set AUPE only. If the group is
6404  * the default then only set the VFTA. Leave the VLVF slots open for
6405  * reserved groups to guarantee their use of HW filtering.
6406  */
6407 static int
6408 ixgbe_addvlan(mac_group_driver_t gdriver, uint16_t vid)
6409 {
6410 	ixgbe_rx_group_t	*rx_group = (ixgbe_rx_group_t *)gdriver;
6411 	ixgbe_t			*ixgbe = rx_group->ixgbe;
6412 	struct ixgbe_hw		*hw = &ixgbe->hw;
6413 	ixgbe_vlan_t		*vlp;
6414 	int			ret;
6415 	boolean_t		is_def_grp;
6416 
6417 	mutex_enter(&ixgbe->gen_lock);
6418 
6419 	if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
6420 		mutex_exit(&ixgbe->gen_lock);
6421 		return (ECANCELED);
6422 	}
6423 
6424 	/*
6425 	 * Let's be sure VLAN filtering is enabled.
6426 	 */
6427 	VERIFY3B(ixgbe->vlft_enabled, ==, B_TRUE);
6428 	is_def_grp = (rx_group->index == ixgbe->rx_def_group);
6429 
6430 	/*
6431 	 * VLAN filtering is enabled but we want to receive untagged
6432 	 * traffic on this group -- set the AUPE bit on the group and
6433 	 * leave the VLAN tables alone.
6434 	 */
6435 	if (vid == MAC_VLAN_UNTAGGED) {
6436 		/*
6437 		 * We never enable AUPE on the default group; it is
6438 		 * redundant. Untagged traffic which passes L2
6439 		 * filtering is delivered to the default group if no
6440 		 * other group is interested.
6441 		 */
6442 		if (!is_def_grp) {
6443 			uint32_t vml2flt;
6444 
6445 			vml2flt = IXGBE_READ_REG(hw,
6446 			    IXGBE_VMOLR(rx_group->index));
6447 			vml2flt |= IXGBE_VMOLR_AUPE;
6448 			IXGBE_WRITE_REG(hw, IXGBE_VMOLR(rx_group->index),
6449 			    vml2flt);
6450 			rx_group->aupe = B_TRUE;
6451 		}
6452 
6453 		mutex_exit(&ixgbe->gen_lock);
6454 		return (0);
6455 	}
6456 
6457 	vlp = ixgbe_find_vlan(rx_group, vid);
6458 	if (vlp != NULL) {
6459 		/* Only the default group supports multiple clients. */
6460 		VERIFY3B(is_def_grp, ==, B_TRUE);
6461 		vlp->ixvl_refs++;
6462 		mutex_exit(&ixgbe->gen_lock);
6463 		return (0);
6464 	}
6465 
6466 	/*
6467 	 * The default group doesn't require a VLVF entry, only a VFTA
6468 	 * entry. All traffic passing L2 filtering (MPSAR + VFTA) is
6469 	 * delivered to the default group if no other group is
6470 	 * interested. The fourth argument, vlvf_bypass, tells the
6471 	 * ixgbe common code to avoid using a VLVF slot if one isn't
6472 	 * already allocated to this VLAN.
6473 	 *
6474 	 * This logic is meant to reserve VLVF slots for use by
6475 	 * reserved groups: guaranteeing their use of HW filtering.
6476 	 */
6477 	ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE, is_def_grp);
6478 
6479 	if (ret == IXGBE_SUCCESS) {
6480 		vlp = kmem_zalloc(sizeof (ixgbe_vlan_t), KM_SLEEP);
6481 		vlp->ixvl_vid = vid;
6482 		vlp->ixvl_refs = 1;
6483 		list_insert_tail(&rx_group->vlans, vlp);
6484 		mutex_exit(&ixgbe->gen_lock);
6485 		return (0);
6486 	}
6487 
6488 	/*
6489 	 * We should actually never return ENOSPC because we've set
6490 	 * things up so that every reserved group is guaranteed to
6491 	 * have a VLVF slot.
6492 	 */
6493 	if (ret == IXGBE_ERR_PARAM)
6494 		ret = EINVAL;
6495 	else if (ret == IXGBE_ERR_NO_SPACE)
6496 		ret = ENOSPC;
6497 	else
6498 		ret = EIO;
6499 
6500 	mutex_exit(&ixgbe->gen_lock);
6501 	return (ret);
6502 }
6503 
6504 /*
6505  * Attempt to remove the VLAN HW filter associated with this group. If
6506  * we are removing a HW filter for the default group then we know only
6507  * the VFTA was set (VLVF is reserved for non-default/reserved
6508  * groups). If the group wishes to stop receiving untagged traffic
6509  * then clear the AUPE but leave the VLAN filters alone.
6510  */
6511 static int
6512 ixgbe_remvlan(mac_group_driver_t gdriver, uint16_t vid)
6513 {
6514 	ixgbe_rx_group_t	*rx_group = (ixgbe_rx_group_t *)gdriver;
6515 	ixgbe_t			*ixgbe = rx_group->ixgbe;
6516 	struct ixgbe_hw		*hw = &ixgbe->hw;
6517 	int			ret;
6518 	ixgbe_vlan_t		*vlp;
6519 	boolean_t		is_def_grp;
6520 
6521 	mutex_enter(&ixgbe->gen_lock);
6522 
6523 	if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
6524 		mutex_exit(&ixgbe->gen_lock);
6525 		return (ECANCELED);
6526 	}
6527 
6528 	is_def_grp = (rx_group->index == ixgbe->rx_def_group);
6529 
6530 	/* See the AUPE comment in ixgbe_addvlan(). */
6531 	if (vid == MAC_VLAN_UNTAGGED) {
6532 		if (!is_def_grp) {
6533 			uint32_t vml2flt;
6534 
6535 			vml2flt = IXGBE_READ_REG(hw,
6536 			    IXGBE_VMOLR(rx_group->index));
6537 			vml2flt &= ~IXGBE_VMOLR_AUPE;
6538 			IXGBE_WRITE_REG(hw,
6539 			    IXGBE_VMOLR(rx_group->index), vml2flt);
6540 			rx_group->aupe = B_FALSE;
6541 		}
6542 		mutex_exit(&ixgbe->gen_lock);
6543 		return (0);
6544 	}
6545 
6546 	vlp = ixgbe_find_vlan(rx_group, vid);
6547 	if (vlp == NULL) {
6548 		mutex_exit(&ixgbe->gen_lock);
6549 		return (ENOENT);
6550 	}
6551 
6552 	/*
6553 	 * See the comment in ixgbe_addvlan() about is_def_grp and
6554 	 * vlvf_bypass.
6555 	 */
6556 	if (vlp->ixvl_refs == 1) {
6557 		ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_FALSE,
6558 		    is_def_grp);
6559 	} else {
6560 		/*
6561 		 * Only the default group can have multiple clients.
6562 		 * If there is more than one client, leave the
6563 		 * VFTA[vid] bit alone.
6564 		 */
6565 		VERIFY3B(is_def_grp, ==, B_TRUE);
6566 		VERIFY3U(vlp->ixvl_refs, >, 1);
6567 		vlp->ixvl_refs--;
6568 		mutex_exit(&ixgbe->gen_lock);
6569 		return (0);
6570 	}
6571 
6572 	if (ret != IXGBE_SUCCESS) {
6573 		mutex_exit(&ixgbe->gen_lock);
6574 		/* IXGBE_ERR_PARAM should be the only possible error here. */
6575 		if (ret == IXGBE_ERR_PARAM)
6576 			return (EINVAL);
6577 		else
6578 			return (EIO);
6579 	}
6580 
6581 	VERIFY3U(vlp->ixvl_refs, ==, 1);
6582 	vlp->ixvl_refs = 0;
6583 	list_remove(&rx_group->vlans, vlp);
6584 	kmem_free(vlp, sizeof (ixgbe_vlan_t));
6585 
6586 	/*
6587 	 * Calling ixgbe_set_vfta() on a non-default group may have
6588 	 * cleared the VFTA[vid] bit even though the default group
6589 	 * still has clients using the vid. This happens because the
6590 	 * ixgbe common code doesn't ref count the use of VLANs. Check
6591 	 * for any use of vid on the default group and make sure the
6592 	 * VFTA[vid] bit is set. This operation is idempotent: setting
6593 	 * VFTA[vid] to true if already true won't hurt anything.
6594 	 */
6595 	if (!is_def_grp) {
6596 		ixgbe_rx_group_t *defgrp;
6597 
6598 		defgrp = &ixgbe->rx_groups[ixgbe->rx_def_group];
6599 		vlp = ixgbe_find_vlan(defgrp, vid);
6600 		if (vlp != NULL) {
6601 			/* This shouldn't fail, but if it does return EIO. */
6602 			ret = ixgbe_set_vfta(hw, vid, rx_group->index, B_TRUE,
6603 			    B_TRUE);
6604 			if (ret != IXGBE_SUCCESS) {
6605 				mutex_exit(&ixgbe->gen_lock);
6606 				return (EIO);
6607 			}
6608 		}
6609 	}
6610 
6611 	mutex_exit(&ixgbe->gen_lock);
6612 	return (0);
6613 }
6614 
6615 /*
6616  * Add a mac address.
6617  */
6618 static int
6619 ixgbe_addmac(void *arg, const uint8_t *mac_addr)
6620 {
6621 	ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg;
6622 	ixgbe_t *ixgbe = rx_group->ixgbe;
6623 	struct ixgbe_hw *hw = &ixgbe->hw;
6624 	int slot, i;
6625 
6626 	mutex_enter(&ixgbe->gen_lock);
6627 
6628 	if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
6629 		mutex_exit(&ixgbe->gen_lock);
6630 		return (ECANCELED);
6631 	}
6632 
6633 	if (ixgbe->unicst_avail == 0) {
6634 		/* no slots available */
6635 		mutex_exit(&ixgbe->gen_lock);
6636 		return (ENOSPC);
6637 	}
6638 
6639 	/*
6640 	 * The first ixgbe->num_rx_groups slots are reserved for each respective
6641 	 * group. The rest slots are shared by all groups. While adding a
6642 	 * MAC address, reserved slots are firstly checked then the shared
6643 	 * slots are searched.
6644 	 */
6645 	slot = -1;
6646 	if (ixgbe->unicst_addr[rx_group->index].mac.set == 1) {
6647 		for (i = ixgbe->num_rx_groups; i < ixgbe->unicst_total; i++) {
6648 			if (ixgbe->unicst_addr[i].mac.set == 0) {
6649 				slot = i;
6650 				break;
6651 			}
6652 		}
6653 	} else {
6654 		slot = rx_group->index;
6655 	}
6656 
6657 	if (slot == -1) {
6658 		/* no slots available */
6659 		mutex_exit(&ixgbe->gen_lock);
6660 		return (ENOSPC);
6661 	}
6662 
6663 	bcopy(mac_addr, ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL);
6664 	(void) ixgbe_set_rar(hw, slot, ixgbe->unicst_addr[slot].mac.addr,
6665 	    rx_group->index, IXGBE_RAH_AV);
6666 	ixgbe->unicst_addr[slot].mac.set = 1;
6667 	ixgbe->unicst_addr[slot].mac.group_index = rx_group->index;
6668 	ixgbe->unicst_avail--;
6669 
6670 	mutex_exit(&ixgbe->gen_lock);
6671 
6672 	return (0);
6673 }
6674 
6675 /*
6676  * Remove a mac address.
6677  */
6678 static int
6679 ixgbe_remmac(void *arg, const uint8_t *mac_addr)
6680 {
6681 	ixgbe_rx_group_t *rx_group = (ixgbe_rx_group_t *)arg;
6682 	ixgbe_t *ixgbe = rx_group->ixgbe;
6683 	struct ixgbe_hw *hw = &ixgbe->hw;
6684 	int slot;
6685 
6686 	mutex_enter(&ixgbe->gen_lock);
6687 
6688 	if (ixgbe->ixgbe_state & IXGBE_SUSPENDED) {
6689 		mutex_exit(&ixgbe->gen_lock);
6690 		return (ECANCELED);
6691 	}
6692 
6693 	slot = ixgbe_unicst_find(ixgbe, mac_addr);
6694 	if (slot == -1) {
6695 		mutex_exit(&ixgbe->gen_lock);
6696 		return (EINVAL);
6697 	}
6698 
6699 	if (ixgbe->unicst_addr[slot].mac.set == 0) {
6700 		mutex_exit(&ixgbe->gen_lock);
6701 		return (EINVAL);
6702 	}
6703 
6704 	bzero(ixgbe->unicst_addr[slot].mac.addr, ETHERADDRL);
6705 	(void) ixgbe_clear_rar(hw, slot);
6706 	ixgbe->unicst_addr[slot].mac.set = 0;
6707 	ixgbe->unicst_avail++;
6708 
6709 	mutex_exit(&ixgbe->gen_lock);
6710 
6711 	return (0);
6712 }
6713