xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 207ab5f4f1ebcbfd6f3f2ab619baf08862d0d08d)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 static void ice_init_link(struct ice_softc *sc);
88 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
89 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
90 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
91 static void *ice_subif_register(device_t);
92 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
93 static int ice_subif_rebuild(struct ice_softc *sc);
94 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
95 
96 /* Iflib API */
97 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
98     uint64_t *paddrs, int ntxqs, int ntxqsets);
99 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
100     uint64_t *paddrs, int nrxqs, int nrxqsets);
101 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
102 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
103 static void ice_subif_if_intr_enable(if_ctx_t ctx);
104 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
105 static void ice_subif_if_init(if_ctx_t ctx);
106 static void ice_subif_if_stop(if_ctx_t ctx);
107 static void ice_subif_if_queues_free(if_ctx_t ctx);
108 static int ice_subif_if_attach_pre(if_ctx_t);
109 static int ice_subif_if_attach_post(if_ctx_t);
110 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
111 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
112 
113 static int ice_msix_que(void *arg);
114 static int ice_msix_admin(void *arg);
115 
116 /*
117  * Helper function prototypes
118  */
119 static int ice_pci_mapping(struct ice_softc *sc);
120 static void ice_free_pci_mapping(struct ice_softc *sc);
121 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
122 static void ice_init_device_features(struct ice_softc *sc);
123 static void ice_init_tx_tracking(struct ice_vsi *vsi);
124 static void ice_handle_reset_event(struct ice_softc *sc);
125 static void ice_handle_pf_reset_request(struct ice_softc *sc);
126 static void ice_prepare_for_reset(struct ice_softc *sc);
127 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
128 static void ice_rebuild(struct ice_softc *sc);
129 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
130 static void ice_free_irqvs(struct ice_softc *sc);
131 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
132 static void ice_poll_for_media_avail(struct ice_softc *sc);
133 static void ice_setup_scctx(struct ice_softc *sc);
134 static int ice_allocate_msix(struct ice_softc *sc);
135 static void ice_admin_timer(void *arg);
136 static void ice_transition_recovery_mode(struct ice_softc *sc);
137 static void ice_transition_safe_mode(struct ice_softc *sc);
138 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
139 
140 /*
141  * Device Interface Declaration
142  */
143 
144 /**
145  * @var ice_methods
146  * @brief ice driver method entry points
147  *
148  * List of device methods implementing the generic device interface used by
149  * the device stack to interact with the ice driver. Since this is an iflib
150  * driver, most of the methods point to the generic iflib implementation.
151  */
152 static device_method_t ice_methods[] = {
153 	/* Device interface */
154 	DEVMETHOD(device_register, ice_register),
155 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
156 	DEVMETHOD(device_attach,   iflib_device_attach),
157 	DEVMETHOD(device_detach,   iflib_device_detach),
158 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
159 	DEVMETHOD(device_suspend,  iflib_device_suspend),
160 	DEVMETHOD(device_resume,   iflib_device_resume),
161 	DEVMETHOD_END
162 };
163 
164 /**
165  * @var ice_iflib_methods
166  * @brief iflib method entry points
167  *
168  * List of device methods used by the iflib stack to interact with this
169  * driver. These are the real main entry points used to interact with this
170  * driver.
171  */
172 static device_method_t ice_iflib_methods[] = {
173 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
174 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
175 	DEVMETHOD(ifdi_detach, ice_if_detach),
176 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
177 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
178 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
179 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
180 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
181 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
182 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
183 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
184 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
185 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
186 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
187 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
188 	DEVMETHOD(ifdi_init, ice_if_init),
189 	DEVMETHOD(ifdi_stop, ice_if_stop),
190 	DEVMETHOD(ifdi_timer, ice_if_timer),
191 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
192 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
193 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
194 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
195 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
196 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
197 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
198 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
199 	DEVMETHOD(ifdi_resume, ice_if_resume),
200 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
201 	DEVMETHOD_END
202 };
203 
204 /**
205  * @var ice_driver
206  * @brief driver structure for the generic device stack
207  *
208  * driver_t definition used to setup the generic device methods.
209  */
210 static driver_t ice_driver = {
211 	.name = "ice",
212 	.methods = ice_methods,
213 	.size = sizeof(struct ice_softc),
214 };
215 
216 /**
217  * @var ice_iflib_driver
218  * @brief driver structure for the iflib stack
219  *
220  * driver_t definition used to setup the iflib device methods.
221  */
222 static driver_t ice_iflib_driver = {
223 	.name = "ice",
224 	.methods = ice_iflib_methods,
225 	.size = sizeof(struct ice_softc),
226 };
227 
228 extern struct if_txrx ice_txrx;
229 extern struct if_txrx ice_recovery_txrx;
230 
231 /**
232  * @var ice_sctx
233  * @brief ice driver shared context
234  *
235  * Structure defining shared values (context) that is used by all instances of
236  * the device. Primarily used to setup details about how the iflib stack
237  * should treat this driver. Also defines the default, minimum, and maximum
238  * number of descriptors in each ring.
239  */
240 static struct if_shared_ctx ice_sctx = {
241 	.isc_magic = IFLIB_MAGIC,
242 	.isc_q_align = PAGE_SIZE,
243 
244 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
245 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
246 	 * that doesn't make sense since that would be larger than the maximum
247 	 * size of a single packet.
248 	 */
249 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
250 
251 	/* XXX: This is only used by iflib to ensure that
252 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
253 	 */
254 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
255 	/* XXX: This is used by iflib to set the number of segments in the TSO
256 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
257 	 * related ifnet parameter.
258 	 */
259 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
260 
261 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
262 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
263 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
264 
265 	.isc_nfl = 1,
266 	.isc_ntxqs = 1,
267 	.isc_nrxqs = 1,
268 
269 	.isc_admin_intrcnt = 1,
270 	.isc_vendor_info = ice_vendor_info_array,
271 	.isc_driver_version = __DECONST(char *, ice_driver_version),
272 	.isc_driver = &ice_iflib_driver,
273 
274 	/*
275 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
276 	 * for hardware checksum offload
277 	 *
278 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
279 	 * IP sum field, required by our hardware to calculate valid TSO
280 	 * checksums.
281 	 *
282 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
283 	 * even when the interface is down.
284 	 *
285 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
286 	 * vectors manually instead of relying on iflib code to do this.
287 	 */
288 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
289 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
290 
291 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
292 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
293 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
295 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
296 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
297 };
298 
299 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
300 
301 MODULE_VERSION(ice, 1);
302 MODULE_DEPEND(ice, pci, 1, 1, 1);
303 MODULE_DEPEND(ice, ether, 1, 1, 1);
304 MODULE_DEPEND(ice, iflib, 1, 1, 1);
305 
306 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
307 
308 /* Static driver-wide sysctls */
309 #include "ice_iflib_sysctls.h"
310 
311 /**
312  * ice_pci_mapping - Map PCI BAR memory
313  * @sc: device private softc
314  *
315  * Map PCI BAR 0 for device operation.
316  */
317 static int
318 ice_pci_mapping(struct ice_softc *sc)
319 {
320 	int rc;
321 
322 	/* Map BAR0 */
323 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
324 	if (rc)
325 		return rc;
326 
327 	return 0;
328 }
329 
330 /**
331  * ice_free_pci_mapping - Release PCI BAR memory
332  * @sc: device private softc
333  *
334  * Release PCI BARs which were previously mapped by ice_pci_mapping().
335  */
336 static void
337 ice_free_pci_mapping(struct ice_softc *sc)
338 {
339 	/* Free BAR0 */
340 	ice_free_bar(sc->dev, &sc->bar0);
341 }
342 
343 /*
344  * Device methods
345  */
346 
347 /**
348  * ice_register - register device method callback
349  * @dev: the device being registered
350  *
351  * Returns a pointer to the shared context structure, which is used by iflib.
352  */
353 static void *
354 ice_register(device_t dev __unused)
355 {
356 	return &ice_sctx;
357 } /* ice_register */
358 
359 /**
360  * ice_setup_scctx - Setup the iflib softc context structure
361  * @sc: the device private structure
362  *
363  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
364  * when loading.
365  */
366 static void
367 ice_setup_scctx(struct ice_softc *sc)
368 {
369 	if_softc_ctx_t scctx = sc->scctx;
370 	struct ice_hw *hw = &sc->hw;
371 	device_t dev = sc->dev;
372 	bool safe_mode, recovery_mode;
373 
374 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
375 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
376 
377 	/*
378 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
379 	 * a single queue pair.
380 	 */
381 	if (safe_mode || recovery_mode) {
382 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
383 		scctx->isc_ntxqsets_max = 1;
384 		scctx->isc_nrxqsets_max = 1;
385 	} else {
386 		/*
387 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
388 		 * the values of the override sysctls. Cache these initial
389 		 * values so that the driver can be aware of what the iflib
390 		 * sysctl value is when setting up MSI-X vectors.
391 		 */
392 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
393 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
394 
395 		if (scctx->isc_ntxqsets == 0)
396 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
397 		if (scctx->isc_nrxqsets == 0)
398 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
399 
400 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
401 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
402 
403 		/*
404 		 * Sanity check that the iflib sysctl values are within the
405 		 * maximum supported range.
406 		 */
407 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
408 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
409 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
410 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
411 	}
412 
413 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
414 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
415 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
416 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
417 
418 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
419 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
420 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
421 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
422 
423 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
424 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
425 
426 	/*
427 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
428 	 */
429 	if (recovery_mode)
430 		scctx->isc_txrx = &ice_recovery_txrx;
431 	else
432 		scctx->isc_txrx = &ice_txrx;
433 
434 	/*
435 	 * If the driver loads in Safe mode or Recovery mode, disable
436 	 * advanced features including hardware offloads.
437 	 */
438 	if (safe_mode || recovery_mode) {
439 		scctx->isc_capenable = ICE_SAFE_CAPS;
440 		scctx->isc_tx_csum_flags = 0;
441 	} else {
442 		scctx->isc_capenable = ICE_FULL_CAPS;
443 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
444 	}
445 
446 	scctx->isc_capabilities = scctx->isc_capenable;
447 } /* ice_setup_scctx */
448 
449 /**
450  * ice_if_attach_pre - Early device attach logic
451  * @ctx: the iflib context structure
452  *
453  * Called by iflib during the attach process. Earliest main driver entry
454  * point which performs necessary hardware and driver initialization. Called
455  * before the Tx and Rx queues are allocated.
456  */
457 static int
458 ice_if_attach_pre(if_ctx_t ctx)
459 {
460 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
461 	enum ice_fw_modes fw_mode;
462 	int status;
463 	if_softc_ctx_t scctx;
464 	struct ice_hw *hw;
465 	device_t dev;
466 	int err;
467 
468 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
469 
470 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
471 
472 	sc->ctx = ctx;
473 	sc->media = iflib_get_media(ctx);
474 	sc->sctx = iflib_get_sctx(ctx);
475 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
476 	sc->ifp = iflib_get_ifp(ctx);
477 
478 	dev = sc->dev = iflib_get_dev(ctx);
479 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
480 
481 	hw = &sc->hw;
482 	hw->back = sc;
483 
484 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
485 		 "%s:admin", device_get_nameunit(dev));
486 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
487 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
488 
489 	ASSERT_CTX_LOCKED(sc);
490 
491 	if (ice_pci_mapping(sc)) {
492 		err = (ENXIO);
493 		goto destroy_admin_timer;
494 	}
495 
496 	/* Save off the PCI information */
497 	ice_save_pci_info(hw, dev);
498 
499 	/* create tunables as early as possible */
500 	ice_add_device_tunables(sc);
501 
502 	/* Setup ControlQ lengths */
503 	ice_set_ctrlq_len(hw);
504 
505 reinit_hw:
506 
507 	fw_mode = ice_get_fw_mode(hw);
508 	if (fw_mode == ICE_FW_MODE_REC) {
509 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
510 
511 		err = ice_attach_pre_recovery_mode(sc);
512 		if (err)
513 			goto free_pci_mapping;
514 
515 		return (0);
516 	}
517 
518 	/* Initialize the hw data structure */
519 	status = ice_init_hw(hw);
520 	if (status) {
521 		if (status == ICE_ERR_FW_API_VER) {
522 			/* Enter recovery mode, so that the driver remains
523 			 * loaded. This way, if the system administrator
524 			 * cannot update the driver, they may still attempt to
525 			 * downgrade the NVM.
526 			 */
527 			err = ice_attach_pre_recovery_mode(sc);
528 			if (err)
529 				goto free_pci_mapping;
530 
531 			return (0);
532 		} else {
533 			err = EIO;
534 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
535 				      ice_status_str(status),
536 				      ice_aq_str(hw->adminq.sq_last_status));
537 		}
538 		goto free_pci_mapping;
539 	}
540 
541 	ice_init_device_features(sc);
542 
543 	/* Keep flag set by default */
544 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
545 
546 	/* Notify firmware of the device driver version */
547 	err = ice_send_version(sc);
548 	if (err)
549 		goto deinit_hw;
550 
551 	/*
552 	 * Success indicates a change was made that requires a reinitialization
553 	 * of the hardware
554 	 */
555 	err = ice_load_pkg_file(sc);
556 	if (!err) {
557 		ice_deinit_hw(hw);
558 		goto reinit_hw;
559 	}
560 
561 	err = ice_init_link_events(sc);
562 	if (err) {
563 		device_printf(dev, "ice_init_link_events failed: %s\n",
564 			      ice_err_str(err));
565 		goto deinit_hw;
566 	}
567 
568 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
569 	 * and firmware, this will force them to use single VLAN mode.
570 	 */
571 	status = ice_set_vlan_mode(hw);
572 	if (status) {
573 		err = EIO;
574 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
575 			      ice_status_str(status),
576 			      ice_aq_str(hw->adminq.sq_last_status));
577 		goto deinit_hw;
578 	}
579 
580 	ice_print_nvm_version(sc);
581 
582 	/* Setup the MAC address */
583 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
584 
585 	/* Setup the iflib softc context structure */
586 	ice_setup_scctx(sc);
587 
588 	/* Initialize the Tx queue manager */
589 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
590 	if (err) {
591 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
592 			      ice_err_str(err));
593 		goto deinit_hw;
594 	}
595 
596 	/* Initialize the Rx queue manager */
597 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
598 	if (err) {
599 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
600 			      ice_err_str(err));
601 		goto free_tx_qmgr;
602 	}
603 
604 	/* Initialize the PF device interrupt resource manager */
605 	err = ice_alloc_intr_tracking(sc);
606 	if (err)
607 		/* Errors are already printed */
608 		goto free_rx_qmgr;
609 
610 	/* Determine maximum number of VSIs we'll prepare for */
611 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
612 				    hw->func_caps.guar_num_vsi);
613 
614 	if (!sc->num_available_vsi) {
615 		err = EIO;
616 		device_printf(dev, "No VSIs allocated to host\n");
617 		goto free_intr_tracking;
618 	}
619 
620 	/* Allocate storage for the VSI pointers */
621 	sc->all_vsi = (struct ice_vsi **)
622 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
623 		       M_ICE, M_WAITOK | M_ZERO);
624 	if (!sc->all_vsi) {
625 		err = ENOMEM;
626 		device_printf(dev, "Unable to allocate VSI array\n");
627 		goto free_intr_tracking;
628 	}
629 
630 	/*
631 	 * Prepare the statically allocated primary PF VSI in the softc
632 	 * structure. Other VSIs will be dynamically allocated as needed.
633 	 */
634 	ice_setup_pf_vsi(sc);
635 
636 	ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
637 	    scctx->isc_nrxqsets_max);
638 
639 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
640 	err = ice_allocate_msix(sc);
641 	if (err)
642 		goto free_main_vsi;
643 
644 	return 0;
645 
646 free_main_vsi:
647 	/* ice_release_vsi will free the queue maps if they were allocated */
648 	ice_release_vsi(&sc->pf_vsi);
649 	free(sc->all_vsi, M_ICE);
650 	sc->all_vsi = NULL;
651 free_intr_tracking:
652 	ice_free_intr_tracking(sc);
653 free_rx_qmgr:
654 	ice_resmgr_destroy(&sc->rx_qmgr);
655 free_tx_qmgr:
656 	ice_resmgr_destroy(&sc->tx_qmgr);
657 deinit_hw:
658 	ice_deinit_hw(hw);
659 free_pci_mapping:
660 	ice_free_pci_mapping(sc);
661 destroy_admin_timer:
662 	mtx_lock(&sc->admin_mtx);
663 	callout_stop(&sc->admin_timer);
664 	mtx_unlock(&sc->admin_mtx);
665 	mtx_destroy(&sc->admin_mtx);
666 	return err;
667 } /* ice_if_attach_pre */
668 
669 /**
670  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
671  * @sc: the device private softc
672  *
673  * Loads the device driver in limited Firmware Recovery mode, intended to
674  * allow users to update the firmware to attempt to recover the device.
675  *
676  * @remark We may enter recovery mode in case either (a) the firmware is
677  * detected to be in an invalid state and must be re-programmed, or (b) the
678  * driver detects that the loaded firmware has a non-compatible API version
679  * that the driver cannot operate with.
680  */
681 static int
682 ice_attach_pre_recovery_mode(struct ice_softc *sc)
683 {
684 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
685 
686 	/* Setup the iflib softc context */
687 	ice_setup_scctx(sc);
688 
689 	/* Setup the PF VSI back pointer */
690 	sc->pf_vsi.sc = sc;
691 
692 	/*
693 	 * We still need to allocate MSI-X vectors since we need one vector to
694 	 * run the administrative admin interrupt
695 	 */
696 	return ice_allocate_msix(sc);
697 }
698 
699 /**
700  * ice_update_link_status - notify OS of link state change
701  * @sc: device private softc structure
702  * @update_media: true if we should update media even if link didn't change
703  *
704  * Called to notify iflib core of link status changes. Should be called once
705  * during attach_post, and whenever link status changes during runtime.
706  *
707  * This call only updates the currently supported media types if the link
708  * status changed, or if update_media is set to true.
709  */
710 static void
711 ice_update_link_status(struct ice_softc *sc, bool update_media)
712 {
713 	struct ice_hw *hw = &sc->hw;
714 	int status;
715 
716 	/* Never report link up when in recovery mode */
717 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
718 		return;
719 
720 	/* Report link status to iflib only once each time it changes */
721 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
722 		if (sc->link_up) { /* link is up */
723 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
724 
725 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
726 				ice_set_default_local_lldp_mib(sc);
727 
728 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
729 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
730 
731 			ice_link_up_msg(sc);
732 		} else { /* link is down */
733 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
734 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
735 		}
736 		update_media = true;
737 	}
738 
739 	/* Update the supported media types */
740 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
741 		status = ice_add_media_types(sc, sc->media);
742 		if (status)
743 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
744 				      ice_status_str(status),
745 				      ice_aq_str(hw->adminq.sq_last_status));
746 	}
747 }
748 
749 /**
750  * ice_if_attach_post - Late device attach logic
751  * @ctx: the iflib context structure
752  *
753  * Called by iflib to finish up attaching the device. Performs any attach
754  * logic which must wait until after the Tx and Rx queues have been
755  * allocated.
756  */
757 static int
758 ice_if_attach_post(if_ctx_t ctx)
759 {
760 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
761 	if_t ifp = iflib_get_ifp(ctx);
762 	int status;
763 	int err;
764 
765 	ASSERT_CTX_LOCKED(sc);
766 
767 	/* We don't yet support loading if MSI-X is not supported */
768 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
769 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
770 		return (ENOTSUP);
771 	}
772 
773 	/* The ifnet structure hasn't yet been initialized when the attach_pre
774 	 * handler is called, so wait until attach_post to setup the
775 	 * isc_max_frame_size.
776 	 */
777 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
778 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
779 
780 	/*
781 	 * If we are in recovery mode, only perform a limited subset of
782 	 * initialization to support NVM recovery.
783 	 */
784 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
785 		ice_attach_post_recovery_mode(sc);
786 		return (0);
787 	}
788 
789 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
790 
791 	err = ice_initialize_vsi(&sc->pf_vsi);
792 	if (err) {
793 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
794 			      ice_err_str(err));
795 		return err;
796 	}
797 
798 	/* Enable FW health event reporting */
799 	ice_init_health_events(sc);
800 
801 	/* Configure the main PF VSI for RSS */
802 	err = ice_config_rss(&sc->pf_vsi);
803 	if (err) {
804 		device_printf(sc->dev,
805 			      "Unable to configure RSS for the main VSI, err %s\n",
806 			      ice_err_str(err));
807 		return err;
808 	}
809 
810 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
811 	err = ice_cfg_pf_ethertype_filters(sc);
812 	if (err)
813 		return err;
814 
815 	ice_get_and_print_bus_info(sc);
816 
817 	ice_set_link_management_mode(sc);
818 
819 	ice_init_saved_phy_cfg(sc);
820 
821 	ice_cfg_pba_num(sc);
822 
823 	/* Set a default value for PFC mode on attach since the FW state is unknown
824 	 * before sysctl tunables are executed and it can't be queried. This fixes an
825 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
826 	 * was previously in DSCP PFC mode.
827 	 */
828 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
829 	if (status)
830 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
831 
832 	ice_add_device_sysctls(sc);
833 
834 	/* Get DCBX/LLDP state and start DCBX agent */
835 	ice_init_dcb_setup(sc);
836 
837 	/* Setup link, if PHY FW is ready */
838 	ice_init_link(sc);
839 
840 	/* Configure interrupt causes for the administrative interrupt */
841 	ice_configure_misc_interrupts(sc);
842 
843 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
844 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
845 
846 	err = ice_rdma_pf_attach(sc);
847 	if (err)
848 		return (err);
849 
850 	/* Start the admin timer */
851 	mtx_lock(&sc->admin_mtx);
852 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
853 	mtx_unlock(&sc->admin_mtx);
854 
855 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
856 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
857 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
858 
859 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
860 
861 	return 0;
862 } /* ice_if_attach_post */
863 
864 /**
865  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
866  * @sc: the device private softc
867  *
868  * Performs minimal work to prepare the driver to recover an NVM in case the
869  * firmware is in recovery mode.
870  */
871 static void
872 ice_attach_post_recovery_mode(struct ice_softc *sc)
873 {
874 	/* Configure interrupt causes for the administrative interrupt */
875 	ice_configure_misc_interrupts(sc);
876 
877 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
878 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
879 
880 	/* Start the admin timer */
881 	mtx_lock(&sc->admin_mtx);
882 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
883 	mtx_unlock(&sc->admin_mtx);
884 
885 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
886 }
887 
888 /**
889  * ice_free_irqvs - Free IRQ vector memory
890  * @sc: the device private softc structure
891  *
892  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
893  */
894 static void
895 ice_free_irqvs(struct ice_softc *sc)
896 {
897 	struct ice_vsi *vsi = &sc->pf_vsi;
898 	if_ctx_t ctx = sc->ctx;
899 	int i;
900 
901 	/* If the irqvs array is NULL, then there are no vectors to free */
902 	if (sc->irqvs == NULL)
903 		return;
904 
905 	/* Free the IRQ vectors */
906 	for (i = 0; i < sc->num_irq_vectors; i++)
907 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
908 
909 	/* Clear the irqv pointers */
910 	for (i = 0; i < vsi->num_rx_queues; i++)
911 		vsi->rx_queues[i].irqv = NULL;
912 
913 	for (i = 0; i < vsi->num_tx_queues; i++)
914 		vsi->tx_queues[i].irqv = NULL;
915 
916 	/* Release the vector array memory */
917 	free(sc->irqvs, M_ICE);
918 	sc->irqvs = NULL;
919 	sc->num_irq_vectors = 0;
920 }
921 
922 /**
923  * ice_if_detach - Device driver detach logic
924  * @ctx: iflib context structure
925  *
926  * Perform device shutdown logic to detach the device driver.
927  *
928  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
929  * ice_if_detach(). It is possible for the functions to be called in either
930  * order, and they must not assume to have a strict ordering.
931  */
932 static int
933 ice_if_detach(if_ctx_t ctx)
934 {
935 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
936 	struct ice_vsi *vsi = &sc->pf_vsi;
937 	int status;
938 	int i;
939 
940 	ASSERT_CTX_LOCKED(sc);
941 
942 	/* Indicate that we're detaching */
943 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
944 
945 	/* Stop the admin timer */
946 	mtx_lock(&sc->admin_mtx);
947 	callout_stop(&sc->admin_timer);
948 	mtx_unlock(&sc->admin_mtx);
949 	mtx_destroy(&sc->admin_mtx);
950 
951 	/* Remove additional interfaces if they exist */
952 	if (sc->mirr_if)
953 		ice_destroy_mirror_interface(sc);
954 	ice_rdma_pf_detach(sc);
955 
956 	/* Free allocated media types */
957 	ifmedia_removeall(sc->media);
958 
959 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
960 	 * pointers. Note, the calls here and those in ice_if_queues_free()
961 	 * are *BOTH* necessary, as we cannot guarantee which path will be
962 	 * run first
963 	 */
964 	ice_vsi_del_txqs_ctx(vsi);
965 	ice_vsi_del_rxqs_ctx(vsi);
966 
967 	/* Release MSI-X resources */
968 	ice_free_irqvs(sc);
969 
970 	for (i = 0; i < sc->num_available_vsi; i++) {
971 		if (sc->all_vsi[i])
972 			ice_release_vsi(sc->all_vsi[i]);
973 	}
974 
975 	if (sc->all_vsi) {
976 		free(sc->all_vsi, M_ICE);
977 		sc->all_vsi = NULL;
978 	}
979 
980 	/* Release MSI-X memory */
981 	pci_release_msi(sc->dev);
982 
983 	if (sc->msix_table != NULL) {
984 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
985 				     rman_get_rid(sc->msix_table),
986 				     sc->msix_table);
987 		sc->msix_table = NULL;
988 	}
989 
990 	ice_free_intr_tracking(sc);
991 
992 	/* Destroy the queue managers */
993 	ice_resmgr_destroy(&sc->tx_qmgr);
994 	ice_resmgr_destroy(&sc->rx_qmgr);
995 
996 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
997 		ice_deinit_hw(&sc->hw);
998 
999 	IFLIB_CTX_UNLOCK(sc);
1000 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1001 	IFLIB_CTX_LOCK(sc);
1002 	if (status) {
1003 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1004 			      ice_status_str(status));
1005 	}
1006 
1007 	ice_free_pci_mapping(sc);
1008 
1009 	return 0;
1010 } /* ice_if_detach */
1011 
1012 /**
1013  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1014  * @ctx: iflib context structure
1015  * @vaddrs: virtual addresses for the queue memory
1016  * @paddrs: physical addresses for the queue memory
1017  * @ntxqs: the number of Tx queues per set (should always be 1)
1018  * @ntxqsets: the number of Tx queue sets to allocate
1019  *
1020  * Called by iflib to allocate Tx queues for the device. Allocates driver
1021  * memory to track each queue, the status arrays used for descriptor
1022  * status reporting, and Tx queue sysctls.
1023  */
1024 static int
1025 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1026 		       int __invariant_only ntxqs, int ntxqsets)
1027 {
1028 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1029 	struct ice_vsi *vsi = &sc->pf_vsi;
1030 	struct ice_tx_queue *txq;
1031 	int err, i, j;
1032 
1033 	MPASS(ntxqs == 1);
1034 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1035 	ASSERT_CTX_LOCKED(sc);
1036 
1037 	/* Do not bother allocating queues if we're in recovery mode */
1038 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1039 		return (0);
1040 
1041 	/* Allocate queue structure memory */
1042 	if (!(vsi->tx_queues =
1043 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1044 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1045 		return (ENOMEM);
1046 	}
1047 
1048 	/* Allocate report status arrays */
1049 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1050 		if (!(txq->tx_rsq =
1051 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1052 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1053 			err = ENOMEM;
1054 			goto free_tx_queues;
1055 		}
1056 		/* Initialize report status array */
1057 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1058 			txq->tx_rsq[j] = QIDX_INVALID;
1059 	}
1060 
1061 	/* Assign queues from PF space to the main VSI */
1062 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1063 	if (err) {
1064 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1065 			      ice_err_str(err));
1066 		goto free_tx_queues;
1067 	}
1068 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1069 
1070 	/* Add Tx queue sysctls context */
1071 	ice_vsi_add_txqs_ctx(vsi);
1072 
1073 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1074 		/* q_handle == me when only one TC */
1075 		txq->me = txq->q_handle = i;
1076 		txq->vsi = vsi;
1077 
1078 		/* store the queue size for easier access */
1079 		txq->desc_count = sc->scctx->isc_ntxd[0];
1080 
1081 		/* get the virtual and physical address of the hardware queues */
1082 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1083 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1084 		txq->tx_paddr = paddrs[i];
1085 
1086 		ice_add_txq_sysctls(txq);
1087 	}
1088 
1089 	vsi->num_tx_queues = ntxqsets;
1090 
1091 	return (0);
1092 
1093 free_tx_queues:
1094 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1095 		if (txq->tx_rsq != NULL) {
1096 			free(txq->tx_rsq, M_ICE);
1097 			txq->tx_rsq = NULL;
1098 		}
1099 	}
1100 	free(vsi->tx_queues, M_ICE);
1101 	vsi->tx_queues = NULL;
1102 	return err;
1103 }
1104 
1105 /**
1106  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1107  * @ctx: iflib context structure
1108  * @vaddrs: virtual addresses for the queue memory
1109  * @paddrs: physical addresses for the queue memory
1110  * @nrxqs: number of Rx queues per set (should always be 1)
1111  * @nrxqsets: number of Rx queue sets to allocate
1112  *
1113  * Called by iflib to allocate Rx queues for the device. Allocates driver
1114  * memory to track each queue, as well as sets up the Rx queue sysctls.
1115  */
1116 static int
1117 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1118 		       int __invariant_only nrxqs, int nrxqsets)
1119 {
1120 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1121 	struct ice_vsi *vsi = &sc->pf_vsi;
1122 	struct ice_rx_queue *rxq;
1123 	int err, i;
1124 
1125 	MPASS(nrxqs == 1);
1126 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1127 	ASSERT_CTX_LOCKED(sc);
1128 
1129 	/* Do not bother allocating queues if we're in recovery mode */
1130 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1131 		return (0);
1132 
1133 	/* Allocate queue structure memory */
1134 	if (!(vsi->rx_queues =
1135 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1136 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1137 		return (ENOMEM);
1138 	}
1139 
1140 	/* Assign queues from PF space to the main VSI */
1141 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1142 	if (err) {
1143 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1144 			      ice_err_str(err));
1145 		goto free_rx_queues;
1146 	}
1147 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1148 
1149 	/* Add Rx queue sysctls context */
1150 	ice_vsi_add_rxqs_ctx(vsi);
1151 
1152 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1153 		rxq->me = i;
1154 		rxq->vsi = vsi;
1155 
1156 		/* store the queue size for easier access */
1157 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1158 
1159 		/* get the virtual and physical address of the hardware queues */
1160 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1161 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1162 		rxq->rx_paddr = paddrs[i];
1163 
1164 		ice_add_rxq_sysctls(rxq);
1165 	}
1166 
1167 	vsi->num_rx_queues = nrxqsets;
1168 
1169 	return (0);
1170 
1171 free_rx_queues:
1172 	free(vsi->rx_queues, M_ICE);
1173 	vsi->rx_queues = NULL;
1174 	return err;
1175 }
1176 
1177 /**
1178  * ice_if_queues_free - Free queue memory
1179  * @ctx: the iflib context structure
1180  *
1181  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1182  * ice_if_rx_queues_alloc().
1183  *
1184  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1185  * called in the same order. It's possible for ice_if_queues_free() to be
1186  * called prior to ice_if_detach(), and vice versa.
1187  *
1188  * For this reason, the main VSI is a static member of the ice_softc, which is
1189  * not free'd until after iflib finishes calling both of these functions.
1190  *
1191  * Thus, care must be taken in how we manage the memory being freed by this
1192  * function, and in what tasks it can and must perform.
1193  */
1194 static void
1195 ice_if_queues_free(if_ctx_t ctx)
1196 {
1197 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1198 	struct ice_vsi *vsi = &sc->pf_vsi;
1199 	struct ice_tx_queue *txq;
1200 	int i;
1201 
1202 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1203 	 * pointers. Note, the calls here and those in ice_if_detach()
1204 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1205 	 * run first
1206 	 */
1207 	ice_vsi_del_txqs_ctx(vsi);
1208 	ice_vsi_del_rxqs_ctx(vsi);
1209 
1210 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1211 	ice_free_irqvs(sc);
1212 
1213 	if (vsi->tx_queues != NULL) {
1214 		/* free the tx_rsq arrays */
1215 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1216 			if (txq->tx_rsq != NULL) {
1217 				free(txq->tx_rsq, M_ICE);
1218 				txq->tx_rsq = NULL;
1219 			}
1220 		}
1221 		free(vsi->tx_queues, M_ICE);
1222 		vsi->tx_queues = NULL;
1223 		vsi->num_tx_queues = 0;
1224 	}
1225 	if (vsi->rx_queues != NULL) {
1226 		free(vsi->rx_queues, M_ICE);
1227 		vsi->rx_queues = NULL;
1228 		vsi->num_rx_queues = 0;
1229 	}
1230 }
1231 
1232 /**
1233  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1234  * @arg: The Rx queue memory
1235  *
1236  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1237  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1238  * iflib to schedule the main Rx thread.
1239  */
1240 static int
1241 ice_msix_que(void *arg)
1242 {
1243 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1244 
1245 	/* TODO: dynamic ITR algorithm?? */
1246 
1247 	return (FILTER_SCHEDULE_THREAD);
1248 }
1249 
1250 /**
1251  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1252  * @arg: pointer to device softc memory
1253  *
1254  * Called by iflib when an administrative interrupt occurs. Should perform any
1255  * fast logic for handling the interrupt cause, and then indicate whether the
1256  * admin task needs to be queued.
1257  */
1258 static int
1259 ice_msix_admin(void *arg)
1260 {
1261 	struct ice_softc *sc = (struct ice_softc *)arg;
1262 	struct ice_hw *hw = &sc->hw;
1263 	device_t dev = sc->dev;
1264 	u32 oicr;
1265 
1266 	/* There is no safe way to modify the enabled miscellaneous causes of
1267 	 * the OICR vector at runtime, as doing so would be prone to race
1268 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1269 	 * causes and allow future interrupts to occur. The admin interrupt
1270 	 * vector will not be re-enabled until after we exit this function,
1271 	 * but any delayed tasks must be resilient against possible "late
1272 	 * arrival" interrupts that occur while we're already handling the
1273 	 * task. This is done by using state bits and serializing these
1274 	 * delayed tasks via the admin status task function.
1275 	 */
1276 	oicr = rd32(hw, PFINT_OICR);
1277 
1278 	/* Processing multiple controlq interrupts on a single vector does not
1279 	 * provide an indication of which controlq triggered the interrupt.
1280 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1281 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1282 	 * it gets automatically cleared when the hardware acknowledges the
1283 	 * interrupt.
1284 	 *
1285 	 * This means we don't really have a good indication of whether or
1286 	 * which controlq triggered this interrupt. We'll just notify the
1287 	 * admin task that it should check all the controlqs.
1288 	 */
1289 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1290 
1291 	if (oicr & PFINT_OICR_VFLR_M) {
1292 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1293 	}
1294 
1295 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1296 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1297 	}
1298 
1299 	if (oicr & PFINT_OICR_GRST_M) {
1300 		u32 reset;
1301 
1302 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1303 			GLGEN_RSTAT_RESET_TYPE_S;
1304 
1305 		if (reset == ICE_RESET_CORER)
1306 			sc->soft_stats.corer_count++;
1307 		else if (reset == ICE_RESET_GLOBR)
1308 			sc->soft_stats.globr_count++;
1309 		else
1310 			sc->soft_stats.empr_count++;
1311 
1312 		/* There are a couple of bits at play for handling resets.
1313 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1314 		 * indicate that the driver has received an OICR with a reset
1315 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1316 		 * happen. Second, we set hw->reset_ongoing to indicate that
1317 		 * the hardware is in reset. We will set this back to false as
1318 		 * soon as the driver has determined that the hardware is out
1319 		 * of reset.
1320 		 *
1321 		 * If the driver wishes to trigger a request, it can set one of
1322 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1323 		 * correct type of reset.
1324 		 */
1325 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1326 			hw->reset_ongoing = true;
1327 			/*
1328 			 * During the NVM update process, there is a driver reset and link
1329 			 * goes down and then up. The below if-statement prevents a second
1330 			 * link flap from occurring in ice_if_init().
1331 			 */
1332 			if (if_getflags(sc->ifp) & IFF_UP)
1333 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1334 		}
1335 	}
1336 
1337 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1338 		device_printf(dev, "ECC Error detected!\n");
1339 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1340 	}
1341 
1342 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1343 		if (oicr & PFINT_OICR_HMC_ERR_M)
1344 			/* Log the HMC errors */
1345 			ice_log_hmc_error(hw, dev);
1346 		ice_rdma_notify_pe_intr(sc, oicr);
1347 	}
1348 
1349 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1350 		device_printf(dev, "PCI Exception detected!\n");
1351 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1352 	}
1353 
1354 	return (FILTER_SCHEDULE_THREAD);
1355 }
1356 
1357 /**
1358  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1359  * @sc: the device private softc
1360  *
1361  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1362  *
1363  * First, determine a suitable total number of vectors based on the number
1364  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1365  * RDMA.
1366  *
1367  * Request the desired amount of vectors, and see how many we obtain. If we
1368  * don't obtain as many as desired, reduce the demands by lowering the number
1369  * of requested queues or reducing the demand from other features such as
1370  * RDMA.
1371  *
1372  * @remark This function is required because the driver sets the
1373  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1374  * manually.
1375  *
1376  * @remark This driver will only use MSI-X vectors. If this is not possible,
1377  * neither MSI or legacy interrupts will be tried.
1378  *
1379  * @remark if it exists, os_imgr is initialized here for keeping track of
1380  * the assignments of extra MSIX vectors.
1381  *
1382  * @post on success this function must set the following scctx parameters:
1383  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1384  *
1385  * @returns zero on success or an error code on failure.
1386  */
1387 static int
1388 ice_allocate_msix(struct ice_softc *sc)
1389 {
1390 	bool iflib_override_queue_count = false;
1391 	if_softc_ctx_t scctx = sc->scctx;
1392 	device_t dev = sc->dev;
1393 	cpuset_t cpus;
1394 	int bar, queues, vectors, requested;
1395 	int err = 0;
1396 	int rdma;
1397 
1398 	/* Allocate the MSI-X bar */
1399 	bar = scctx->isc_msix_bar;
1400 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1401 	if (!sc->msix_table) {
1402 		device_printf(dev, "Unable to map MSI-X table\n");
1403 		return (ENOMEM);
1404 	}
1405 
1406 	/* Check if the iflib queue count sysctls have been set */
1407 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1408 		iflib_override_queue_count = true;
1409 
1410 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1411 	if (err) {
1412 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1413 			      __func__, ice_err_str(err));
1414 		CPU_COPY(&all_cpus, &cpus);
1415 	}
1416 
1417 	/* Attempt to mimic behavior of iflib_msix_init */
1418 	if (iflib_override_queue_count) {
1419 		/*
1420 		 * If the override sysctls have been set, limit the queues to
1421 		 * the number of logical CPUs.
1422 		 */
1423 		queues = mp_ncpus;
1424 	} else {
1425 		/*
1426 		 * Otherwise, limit the queue count to the CPUs associated
1427 		 * with the NUMA node the device is associated with.
1428 		 */
1429 		queues = CPU_COUNT(&cpus);
1430 	}
1431 
1432 	/* Clamp to the number of RSS buckets */
1433 	queues = imin(queues, rss_getnumbuckets());
1434 
1435 	/*
1436 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1437 	 * and Rx queues.
1438 	 */
1439 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1440 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1441 
1442 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1443 		/*
1444 		 * Choose a number of RDMA vectors based on the number of CPUs
1445 		 * up to a maximum
1446 		 */
1447 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1448 
1449 		/* Further limit by the user configurable tunable */
1450 		rdma = min(rdma, ice_rdma_max_msix);
1451 	} else {
1452 		rdma = 0;
1453 	}
1454 
1455 	/*
1456 	 * Determine the number of vectors to request. Note that we also need
1457 	 * to allocate one vector for administrative tasks.
1458 	 */
1459 	requested = rdma + queues + 1;
1460 	/* Add extra vectors requested by the user for later subinterface
1461 	 * creation.
1462 	 */
1463 	if_ctx_t ctx = sc->ctx;
1464 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1465 	requested += extra_vectors;
1466 
1467 	vectors = requested;
1468 	err = pci_alloc_msix(dev, &vectors);
1469 	if (err) {
1470 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1471 			      vectors, ice_err_str(err));
1472 		goto err_free_msix_table;
1473 	}
1474 
1475 	/* If we don't receive enough vectors, reduce demands */
1476 	if (vectors < requested) {
1477 		int diff = requested - vectors;
1478 
1479 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1480 			      requested, vectors);
1481 
1482 		diff += extra_vectors;
1483 		extra_vectors = 0;
1484 		/*
1485 		 * The OS didn't grant us the requested number of vectors.
1486 		 * Check to see if we can reduce demands by limiting the
1487 		 * number of vectors allocated to certain features.
1488 		 */
1489 
1490 		if (rdma >= diff) {
1491 			/* Reduce the number of RDMA vectors we reserve */
1492 			rdma -= diff;
1493 			diff = 0;
1494 		} else {
1495 			/* Disable RDMA and reduce the difference */
1496 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1497 			diff -= rdma;
1498 			rdma = 0;
1499 		}
1500 
1501 		/*
1502 		 * If we still have a difference, we need to reduce the number
1503 		 * of queue pairs.
1504 		 *
1505 		 * However, we still need at least one vector for the admin
1506 		 * interrupt and one queue pair.
1507 		 */
1508 		if (queues <= diff) {
1509 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1510 			err = (ERANGE);
1511 			goto err_pci_release_msi;
1512 		}
1513 
1514 		queues -= diff;
1515 	}
1516 
1517 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1518 	if (rdma)
1519 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1520 			      rdma);
1521 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1522 		      vectors);
1523 
1524 	/* Split resulting vectors back into requested splits */
1525 	scctx->isc_vectors = vectors;
1526 	scctx->isc_nrxqsets = queues;
1527 	scctx->isc_ntxqsets = queues;
1528 	scctx->isc_intr = IFLIB_INTR_MSIX;
1529 
1530 	sc->irdma_vectors = rdma;
1531 
1532 	/* Interrupt allocation tracking isn't required in recovery mode,
1533 	 * since neither RDMA nor VFs are enabled.
1534 	 */
1535 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1536 		return (0);
1537 
1538 	/* Keep track of which interrupt indices are being used for what */
1539 	sc->lan_vectors = vectors - rdma;
1540 	sc->lan_vectors -= extra_vectors;
1541 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1542 	if (err) {
1543 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1544 			      ice_err_str(err));
1545 		goto err_pci_release_msi;
1546 	}
1547 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1548 	if (err) {
1549 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1550 			      ice_err_str(err));
1551 		goto err_release_pf_imap;
1552 	}
1553 	sc->extra_vectors = extra_vectors;
1554 	/* Setup another resource manager to track the assignments of extra OS
1555 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1556 	 * unlike the ones that come from the device.
1557 	 */
1558 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1559 	if (err) {
1560 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1561 			      ice_err_str(err));
1562 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1563 					    rdma);
1564 		goto err_release_pf_imap;
1565 	}
1566 	return (0);
1567 
1568 err_release_pf_imap:
1569 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1570 				    sc->lan_vectors);
1571 err_pci_release_msi:
1572 	pci_release_msi(dev);
1573 err_free_msix_table:
1574 	if (sc->msix_table != NULL) {
1575 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1576 				rman_get_rid(sc->msix_table),
1577 				sc->msix_table);
1578 		sc->msix_table = NULL;
1579 	}
1580 
1581 	return (err);
1582 }
1583 
1584 /**
1585  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1586  * @ctx: the iflib context structure
1587  * @msix: the number of vectors we were assigned
1588  *
1589  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1590  * we get at least the same number of vectors as we have queues, and that we
1591  * always have the same number of Tx and Rx queues.
1592  *
1593  * Tx queues use a softirq instead of using their own hardware interrupt.
1594  */
1595 static int
1596 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1597 {
1598 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1599 	struct ice_vsi *vsi = &sc->pf_vsi;
1600 	int err, i, vector;
1601 
1602 	ASSERT_CTX_LOCKED(sc);
1603 
1604 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1605 		device_printf(sc->dev,
1606 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1607 			      vsi->num_tx_queues, vsi->num_rx_queues);
1608 		return (EOPNOTSUPP);
1609 	}
1610 
1611 	if (msix < (vsi->num_rx_queues + 1)) {
1612 		device_printf(sc->dev,
1613 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1614 		return (EOPNOTSUPP);
1615 	}
1616 
1617 	/* Save the number of vectors for future use */
1618 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1619 
1620 	/* Allocate space to store the IRQ vector data */
1621 	if (!(sc->irqvs =
1622 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1623 					       M_ICE, M_NOWAIT))) {
1624 		device_printf(sc->dev,
1625 			      "Unable to allocate irqv memory\n");
1626 		return (ENOMEM);
1627 	}
1628 
1629 	/* Administrative interrupt events will use vector 0 */
1630 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1631 				      ice_msix_admin, sc, 0, "admin");
1632 	if (err) {
1633 		device_printf(sc->dev,
1634 			      "Failed to register Admin queue handler: %s\n",
1635 			      ice_err_str(err));
1636 		goto free_irqvs;
1637 	}
1638 	sc->irqvs[0].me = 0;
1639 
1640 	/* Do not allocate queue interrupts when in recovery mode */
1641 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1642 		return (0);
1643 
1644 	int rid;
1645 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1646 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1647 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1648 		char irq_name[16];
1649 
1650 		rid = vector + 1;
1651 
1652 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1653 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1654 					      IFLIB_INTR_RXTX, ice_msix_que,
1655 					      rxq, rxq->me, irq_name);
1656 		if (err) {
1657 			device_printf(sc->dev,
1658 				      "Failed to allocate q int %d err: %s\n",
1659 				      i, ice_err_str(err));
1660 			vector--;
1661 			i--;
1662 			goto fail;
1663 		}
1664 		sc->irqvs[vector].me = vector;
1665 		rxq->irqv = &sc->irqvs[vector];
1666 
1667 		bzero(irq_name, sizeof(irq_name));
1668 
1669 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1670 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1671 					    IFLIB_INTR_TX, txq,
1672 					    txq->me, irq_name);
1673 		txq->irqv = &sc->irqvs[vector];
1674 	}
1675 
1676 	/* For future interrupt assignments */
1677 	sc->last_rid = rid + sc->irdma_vectors;
1678 
1679 	return (0);
1680 fail:
1681 	for (; i >= 0; i--, vector--)
1682 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1683 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1684 free_irqvs:
1685 	free(sc->irqvs, M_ICE);
1686 	sc->irqvs = NULL;
1687 	return err;
1688 }
1689 
1690 /**
1691  * ice_if_mtu_set - Set the device MTU
1692  * @ctx: iflib context structure
1693  * @mtu: the MTU requested
1694  *
1695  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1696  *
1697  * @pre assumes the caller holds the iflib CTX lock
1698  */
1699 static int
1700 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1701 {
1702 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1703 
1704 	ASSERT_CTX_LOCKED(sc);
1705 
1706 	/* Do not support configuration when in recovery mode */
1707 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1708 		return (ENOSYS);
1709 
1710 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1711 		return (EINVAL);
1712 
1713 	sc->scctx->isc_max_frame_size = mtu +
1714 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1715 
1716 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1717 
1718 	return (0);
1719 }
1720 
1721 /**
1722  * ice_if_intr_enable - Enable device interrupts
1723  * @ctx: iflib context structure
1724  *
1725  * Called by iflib to request enabling device interrupts.
1726  */
1727 static void
1728 ice_if_intr_enable(if_ctx_t ctx)
1729 {
1730 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1731 	struct ice_vsi *vsi = &sc->pf_vsi;
1732 	struct ice_hw *hw = &sc->hw;
1733 
1734 	ASSERT_CTX_LOCKED(sc);
1735 
1736 	/* Enable ITR 0 */
1737 	ice_enable_intr(hw, sc->irqvs[0].me);
1738 
1739 	/* Do not enable queue interrupts in recovery mode */
1740 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1741 		return;
1742 
1743 	/* Enable all queue interrupts */
1744 	for (int i = 0; i < vsi->num_rx_queues; i++)
1745 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1746 }
1747 
1748 /**
1749  * ice_if_intr_disable - Disable device interrupts
1750  * @ctx: iflib context structure
1751  *
1752  * Called by iflib to request disabling device interrupts.
1753  */
1754 static void
1755 ice_if_intr_disable(if_ctx_t ctx)
1756 {
1757 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1758 	struct ice_hw *hw = &sc->hw;
1759 	unsigned int i;
1760 
1761 	ASSERT_CTX_LOCKED(sc);
1762 
1763 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1764 	 * assigned to queues. Instead of assuming that the interrupt
1765 	 * assignment in the rx_queues structure is valid, just disable all
1766 	 * possible interrupts
1767 	 *
1768 	 * Note that we choose not to disable ITR 0 because this handles the
1769 	 * AdminQ interrupts, and we want to keep processing these even when
1770 	 * the interface is offline.
1771 	 */
1772 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1773 		ice_disable_intr(hw, i);
1774 }
1775 
1776 /**
1777  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1778  * @ctx: iflib context structure
1779  * @rxqid: the Rx queue to enable
1780  *
1781  * Enable a specific Rx queue interrupt.
1782  *
1783  * This function is not protected by the iflib CTX lock.
1784  */
1785 static int
1786 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1787 {
1788 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1789 	struct ice_vsi *vsi = &sc->pf_vsi;
1790 	struct ice_hw *hw = &sc->hw;
1791 
1792 	/* Do not enable queue interrupts in recovery mode */
1793 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1794 		return (ENOSYS);
1795 
1796 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1797 	return (0);
1798 }
1799 
1800 /**
1801  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1802  * @ctx: iflib context structure
1803  * @txqid: the Tx queue to enable
1804  *
1805  * Enable a specific Tx queue interrupt.
1806  *
1807  * This function is not protected by the iflib CTX lock.
1808  */
1809 static int
1810 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1811 {
1812 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1813 	struct ice_vsi *vsi = &sc->pf_vsi;
1814 	struct ice_hw *hw = &sc->hw;
1815 
1816 	/* Do not enable queue interrupts in recovery mode */
1817 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1818 		return (ENOSYS);
1819 
1820 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1821 	return (0);
1822 }
1823 
1824 /**
1825  * ice_set_default_promisc_mask - Set default config for promisc settings
1826  * @promisc_mask: bitmask to setup
1827  *
1828  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1829  * modes to operate on. The mask used in here is the default one for the
1830  * driver, where promiscuous is enabled/disabled for all types of
1831  * non-VLAN-tagged/VLAN 0 traffic.
1832  */
1833 static void
1834 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1835 {
1836 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1837 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1838 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1839 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1840 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1841 }
1842 
1843 /**
1844  * ice_if_promisc_set - Set device promiscuous mode
1845  * @ctx: iflib context structure
1846  * @flags: promiscuous flags to configure
1847  *
1848  * Called by iflib to configure device promiscuous mode.
1849  *
1850  * @remark Calls to this function will always overwrite the previous setting
1851  */
1852 static int
1853 ice_if_promisc_set(if_ctx_t ctx, int flags)
1854 {
1855 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1856 	struct ice_hw *hw = &sc->hw;
1857 	device_t dev = sc->dev;
1858 	int status;
1859 	bool promisc_enable = flags & IFF_PROMISC;
1860 	bool multi_enable = flags & IFF_ALLMULTI;
1861 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1862 
1863 	/* Do not support configuration when in recovery mode */
1864 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1865 		return (ENOSYS);
1866 
1867 	ice_set_default_promisc_mask(promisc_mask);
1868 
1869 	if (multi_enable)
1870 		return (EOPNOTSUPP);
1871 
1872 	if (promisc_enable) {
1873 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1874 					     promisc_mask, 0);
1875 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1876 			device_printf(dev,
1877 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1878 				      ice_status_str(status),
1879 				      ice_aq_str(hw->adminq.sq_last_status));
1880 			return (EIO);
1881 		}
1882 	} else {
1883 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1884 					       promisc_mask, 0);
1885 		if (status) {
1886 			device_printf(dev,
1887 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1888 				      ice_status_str(status),
1889 				      ice_aq_str(hw->adminq.sq_last_status));
1890 			return (EIO);
1891 		}
1892 	}
1893 
1894 	return (0);
1895 }
1896 
1897 /**
1898  * ice_if_media_change - Change device media
1899  * @ctx: device ctx structure
1900  *
1901  * Called by iflib when a media change is requested. This operation is not
1902  * supported by the hardware, so we just return an error code.
1903  */
1904 static int
1905 ice_if_media_change(if_ctx_t ctx)
1906 {
1907 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1908 
1909 	device_printf(sc->dev, "Media change is not supported.\n");
1910 	return (ENODEV);
1911 }
1912 
1913 /**
1914  * ice_if_media_status - Report current device media
1915  * @ctx: iflib context structure
1916  * @ifmr: ifmedia request structure to update
1917  *
1918  * Updates the provided ifmr with current device media status, including link
1919  * status and media type.
1920  */
1921 static void
1922 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1923 {
1924 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1925 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1926 
1927 	ifmr->ifm_status = IFM_AVALID;
1928 	ifmr->ifm_active = IFM_ETHER;
1929 
1930 	/* Never report link up or media types when in recovery mode */
1931 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1932 		return;
1933 
1934 	if (!sc->link_up)
1935 		return;
1936 
1937 	ifmr->ifm_status |= IFM_ACTIVE;
1938 	ifmr->ifm_active |= IFM_FDX;
1939 
1940 	if (li->phy_type_low)
1941 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1942 	else if (li->phy_type_high)
1943 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1944 	else
1945 		ifmr->ifm_active |= IFM_UNKNOWN;
1946 
1947 	/* Report flow control status as well */
1948 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1949 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1950 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1951 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1952 }
1953 
1954 /**
1955  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1956  * @vsi: the VSI to initialize
1957  *
1958  * Initialize Tx queue software tracking values, including the Report Status
1959  * queue, and related software tracking values.
1960  */
1961 static void
1962 ice_init_tx_tracking(struct ice_vsi *vsi)
1963 {
1964 	struct ice_tx_queue *txq;
1965 	size_t j;
1966 	int i;
1967 
1968 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1969 
1970 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1971 
1972 		/* Initialize the last processed descriptor to be the end of
1973 		 * the ring, rather than the start, so that we avoid an
1974 		 * off-by-one error in ice_ift_txd_credits_update for the
1975 		 * first packet.
1976 		 */
1977 		txq->tx_cidx_processed = txq->desc_count - 1;
1978 
1979 		for (j = 0; j < txq->desc_count; j++)
1980 			txq->tx_rsq[j] = QIDX_INVALID;
1981 	}
1982 }
1983 
1984 /**
1985  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1986  * @sc: the device softc
1987  *
1988  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1989  * buffer sizes when programming hardware.
1990  */
1991 static void
1992 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1993 {
1994 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1995 	struct ice_vsi *vsi = &sc->pf_vsi;
1996 
1997 	MPASS(mbuf_sz <= UINT16_MAX);
1998 	vsi->mbuf_sz = mbuf_sz;
1999 }
2000 
2001 /**
2002  * ice_if_init - Initialize the device
2003  * @ctx: iflib ctx structure
2004  *
2005  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2006  * device filters and prepares the Tx and Rx engines.
2007  *
2008  * @pre assumes the caller holds the iflib CTX lock
2009  */
2010 static void
2011 ice_if_init(if_ctx_t ctx)
2012 {
2013 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2014 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2015 	device_t dev = sc->dev;
2016 	int err;
2017 
2018 	ASSERT_CTX_LOCKED(sc);
2019 
2020 	/*
2021 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2022 	 * called after detach is called.  This would call routines after
2023 	 * if_stop, causing issues with the teardown process.  This has
2024 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2025 	 * good idea to have this guard here regardless.
2026 	 */
2027 	if (ice_driver_is_detaching(sc))
2028 		return;
2029 
2030 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2031 		return;
2032 
2033 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2034 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2035 		return;
2036 	}
2037 
2038 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2039 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2040 		return;
2041 	}
2042 
2043 	ice_update_rx_mbuf_sz(sc);
2044 
2045 	/* Update the MAC address... User might use a LAA */
2046 	err = ice_update_laa_mac(sc);
2047 	if (err) {
2048 		device_printf(dev,
2049 			      "LAA address change failed, err %s\n",
2050 			      ice_err_str(err));
2051 		return;
2052 	}
2053 
2054 	/* Initialize software Tx tracking values */
2055 	ice_init_tx_tracking(&sc->pf_vsi);
2056 
2057 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2058 	if (err) {
2059 		device_printf(dev,
2060 			      "Unable to configure the main VSI for Tx: %s\n",
2061 			      ice_err_str(err));
2062 		return;
2063 	}
2064 
2065 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2066 	if (err) {
2067 		device_printf(dev,
2068 			      "Unable to configure the main VSI for Rx: %s\n",
2069 			      ice_err_str(err));
2070 		goto err_cleanup_tx;
2071 	}
2072 
2073 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2074 	if (err) {
2075 		device_printf(dev,
2076 			      "Unable to enable Rx rings for transmit: %s\n",
2077 			      ice_err_str(err));
2078 		goto err_cleanup_tx;
2079 	}
2080 
2081 	err = ice_cfg_pf_default_mac_filters(sc);
2082 	if (err) {
2083 		device_printf(dev,
2084 			      "Unable to configure default MAC filters: %s\n",
2085 			      ice_err_str(err));
2086 		goto err_stop_rx;
2087 	}
2088 
2089 	/* We use software interrupts for Tx, so we only program the hardware
2090 	 * interrupts for Rx.
2091 	 */
2092 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2093 	ice_configure_rx_itr(&sc->pf_vsi);
2094 
2095 	/* Configure promiscuous mode */
2096 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2097 
2098 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2099 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2100 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2101 			ice_set_link(sc, true);
2102 
2103 	ice_rdma_pf_init(sc);
2104 
2105 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2106 
2107 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2108 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2109 		iflib_request_reset(sc->mirr_if->subctx);
2110 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2111 	}
2112 
2113 	return;
2114 
2115 err_stop_rx:
2116 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2117 err_cleanup_tx:
2118 	ice_vsi_disable_tx(&sc->pf_vsi);
2119 }
2120 
2121 /**
2122  * ice_poll_for_media_avail - Re-enable link if media is detected
2123  * @sc: device private structure
2124  *
2125  * Intended to be called from the driver's timer function, this function
2126  * sends the Get Link Status AQ command and re-enables HW link if the
2127  * command says that media is available.
2128  *
2129  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2130  * since media removal events are supposed to be sent to the driver through
2131  * a link status event.
2132  */
2133 static void
2134 ice_poll_for_media_avail(struct ice_softc *sc)
2135 {
2136 	struct ice_hw *hw = &sc->hw;
2137 	struct ice_port_info *pi = hw->port_info;
2138 
2139 	/* E830 only: There's no interrupt for when the PHY FW has finished loading,
2140 	 * so poll for the status in the media task here if it's previously
2141 	 * been detected that it's still loading.
2142 	 */
2143 	if (ice_is_e830(hw) &&
2144 	    ice_test_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING)) {
2145 		if (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)
2146 			ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
2147 		else
2148 			return;
2149 	}
2150 
2151 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2152 		pi->phy.get_link_info = true;
2153 		ice_get_link_status(pi, &sc->link_up);
2154 
2155 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2156 			int status;
2157 
2158 			/* Re-enable link and re-apply user link settings */
2159 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2160 			    (if_getflags(sc->ifp) & IFF_UP)) {
2161 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2162 
2163 				/* Update the OS about changes in media capability */
2164 				status = ice_add_media_types(sc, sc->media);
2165 				if (status)
2166 					device_printf(sc->dev,
2167 					    "Error adding device media types: %s aq_err %s\n",
2168 					    ice_status_str(status),
2169 					    ice_aq_str(hw->adminq.sq_last_status));
2170 			}
2171 
2172 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2173 		}
2174 	}
2175 }
2176 
2177 /**
2178  * ice_if_timer - called by iflib periodically
2179  * @ctx: iflib ctx structure
2180  * @qid: the queue this timer was called for
2181  *
2182  * This callback is triggered by iflib periodically. We use it to update the
2183  * hw statistics.
2184  *
2185  * @remark this function is not protected by the iflib CTX lock.
2186  */
2187 static void
2188 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2189 {
2190 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2191 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2192 
2193 	if (qid != 0)
2194 		return;
2195 
2196 	/* Do not attempt to update stats when in recovery mode */
2197 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2198 		return;
2199 
2200 	/* Update device statistics */
2201 	ice_update_pf_stats(sc);
2202 
2203 	/*
2204 	 * For proper watchdog management, the iflib stack needs to know if
2205 	 * we've been paused during the last interval. Check if the
2206 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2207 	 */
2208 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2209 		sc->scctx->isc_pause_frames = 1;
2210 
2211 	/* Update the primary VSI stats */
2212 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2213 
2214 	/* Update mirror VSI stats */
2215 	if (sc->mirr_if && sc->mirr_if->if_attached)
2216 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2217 }
2218 
2219 /**
2220  * ice_admin_timer - called periodically to trigger the admin task
2221  * @arg: callout(9) argument pointing to the device private softc structure
2222  *
2223  * Timer function used as part of a callout(9) timer that will periodically
2224  * trigger the admin task, even when the interface is down.
2225  *
2226  * @remark this function is not called by iflib and is not protected by the
2227  * iflib CTX lock.
2228  *
2229  * @remark because this is a callout function, it cannot sleep and should not
2230  * attempt taking the iflib CTX lock.
2231  */
2232 static void
2233 ice_admin_timer(void *arg)
2234 {
2235 	struct ice_softc *sc = (struct ice_softc *)arg;
2236 
2237 	/*
2238 	 * There is a point where callout routines are no longer
2239 	 * cancelable.  So there exists a window of time where the
2240 	 * driver enters detach() and tries to cancel the callout, but the
2241 	 * callout routine has passed the cancellation point.  The detach()
2242 	 * routine is unaware of this and tries to free resources that the
2243 	 * callout routine needs.  So we check for the detach state flag to
2244 	 * at least shrink the window of opportunity.
2245 	 */
2246 	if (ice_driver_is_detaching(sc))
2247 		return;
2248 
2249 	/* Fire off the admin task */
2250 	iflib_admin_intr_deferred(sc->ctx);
2251 
2252 	/* Reschedule the admin timer */
2253 	callout_schedule(&sc->admin_timer, hz/2);
2254 }
2255 
2256 /**
2257  * ice_transition_recovery_mode - Transition to recovery mode
2258  * @sc: the device private softc
2259  *
2260  * Called when the driver detects that the firmware has entered recovery mode
2261  * at run time.
2262  */
2263 static void
2264 ice_transition_recovery_mode(struct ice_softc *sc)
2265 {
2266 	struct ice_vsi *vsi = &sc->pf_vsi;
2267 	int i;
2268 
2269 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2270 
2271 	/* Tell the stack that the link has gone down */
2272 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2273 
2274 	/* Request that the device be re-initialized */
2275 	ice_request_stack_reinit(sc);
2276 
2277 	ice_rdma_pf_detach(sc);
2278 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2279 
2280 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2281 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2282 
2283 	ice_vsi_del_txqs_ctx(vsi);
2284 	ice_vsi_del_rxqs_ctx(vsi);
2285 
2286 	for (i = 0; i < sc->num_available_vsi; i++) {
2287 		if (sc->all_vsi[i])
2288 			ice_release_vsi(sc->all_vsi[i]);
2289 	}
2290 	sc->num_available_vsi = 0;
2291 
2292 	if (sc->all_vsi) {
2293 		free(sc->all_vsi, M_ICE);
2294 		sc->all_vsi = NULL;
2295 	}
2296 
2297 	/* Destroy the interrupt manager */
2298 	ice_resmgr_destroy(&sc->dev_imgr);
2299 	/* Destroy the queue managers */
2300 	ice_resmgr_destroy(&sc->tx_qmgr);
2301 	ice_resmgr_destroy(&sc->rx_qmgr);
2302 
2303 	ice_deinit_hw(&sc->hw);
2304 }
2305 
2306 /**
2307  * ice_transition_safe_mode - Transition to safe mode
2308  * @sc: the device private softc
2309  *
2310  * Called when the driver attempts to reload the DDP package during a device
2311  * reset, and the new download fails. If so, we must transition to safe mode
2312  * at run time.
2313  *
2314  * @remark although safe mode normally allocates only a single queue, we can't
2315  * change the number of queues dynamically when using iflib. Due to this, we
2316  * do not attempt to reduce the number of queues.
2317  */
2318 static void
2319 ice_transition_safe_mode(struct ice_softc *sc)
2320 {
2321 	/* Indicate that we are in Safe mode */
2322 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2323 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2324 
2325 	ice_rdma_pf_detach(sc);
2326 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2327 
2328 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2329 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2330 
2331 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2332 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2333 }
2334 
2335 /**
2336  * ice_if_update_admin_status - update admin status
2337  * @ctx: iflib ctx structure
2338  *
2339  * Called by iflib to update the admin status. For our purposes, this means
2340  * check the adminq, and update the link status. It's ultimately triggered by
2341  * our admin interrupt, or by the ice_if_timer periodically.
2342  *
2343  * @pre assumes the caller holds the iflib CTX lock
2344  */
2345 static void
2346 ice_if_update_admin_status(if_ctx_t ctx)
2347 {
2348 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2349 	enum ice_fw_modes fw_mode;
2350 	bool reschedule = false;
2351 	u16 pending = 0;
2352 
2353 	ASSERT_CTX_LOCKED(sc);
2354 
2355 	/* Check if the firmware entered recovery mode at run time */
2356 	fw_mode = ice_get_fw_mode(&sc->hw);
2357 	if (fw_mode == ICE_FW_MODE_REC) {
2358 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2359 			/* If we just entered recovery mode, log a warning to
2360 			 * the system administrator and deinit driver state
2361 			 * that is no longer functional.
2362 			 */
2363 			ice_transition_recovery_mode(sc);
2364 		}
2365 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2366 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2367 			/* Rollback mode isn't fatal, but we don't want to
2368 			 * repeatedly post a message about it.
2369 			 */
2370 			ice_print_rollback_msg(&sc->hw);
2371 		}
2372 	}
2373 
2374 	/* Handle global reset events */
2375 	ice_handle_reset_event(sc);
2376 
2377 	/* Handle PF reset requests */
2378 	ice_handle_pf_reset_request(sc);
2379 
2380 	/* Handle MDD events */
2381 	ice_handle_mdd_event(sc);
2382 
2383 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2384 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2385 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2386 		/*
2387 		 * If we know the control queues are disabled, skip processing
2388 		 * the control queues entirely.
2389 		 */
2390 		;
2391 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2392 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2393 		if (pending > 0)
2394 			reschedule = true;
2395 
2396 		if (ice_is_generic_mac(&sc->hw)) {
2397 			ice_process_ctrlq(sc, ICE_CTL_Q_SB, &pending);
2398 			if (pending > 0)
2399 				reschedule = true;
2400 		}
2401 
2402 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2403 		if (pending > 0)
2404 			reschedule = true;
2405 	}
2406 
2407 	/* Poll for link up */
2408 	ice_poll_for_media_avail(sc);
2409 
2410 	/* Check and update link status */
2411 	ice_update_link_status(sc, false);
2412 
2413 	/*
2414 	 * If there are still messages to process, we need to reschedule
2415 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2416 	 * woken up at the next interrupt or timer event.
2417 	 */
2418 	if (reschedule) {
2419 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2420 		iflib_admin_intr_deferred(ctx);
2421 	} else {
2422 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2423 	}
2424 }
2425 
2426 /**
2427  * ice_prepare_for_reset - Prepare device for an impending reset
2428  * @sc: The device private softc
2429  *
2430  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2431  * scheduler setup, and shutting down controlqs. Uses the
2432  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2433  * driver for reset or not.
2434  */
2435 static void
2436 ice_prepare_for_reset(struct ice_softc *sc)
2437 {
2438 	struct ice_hw *hw = &sc->hw;
2439 
2440 	/* If we're already prepared, there's nothing to do */
2441 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2442 		return;
2443 
2444 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2445 
2446 	/* In recovery mode, hardware is not initialized */
2447 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2448 		return;
2449 
2450 	/* inform the RDMA client */
2451 	ice_rdma_notify_reset(sc);
2452 	/* stop the RDMA client */
2453 	ice_rdma_pf_stop(sc);
2454 
2455 	/* Release the main PF VSI queue mappings */
2456 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2457 				    sc->pf_vsi.num_tx_queues);
2458 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2459 				    sc->pf_vsi.num_rx_queues);
2460 	if (sc->mirr_if) {
2461 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2462 		    sc->mirr_if->num_irq_vectors);
2463 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2464 		    sc->mirr_if->num_irq_vectors);
2465 	}
2466 
2467 	ice_clear_hw_tbls(hw);
2468 
2469 	if (hw->port_info)
2470 		ice_sched_cleanup_all(hw);
2471 
2472 	ice_shutdown_all_ctrlq(hw, false);
2473 }
2474 
2475 /**
2476  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2477  * @sc: the device softc pointer
2478  *
2479  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2480  * mapping after a reset occurred.
2481  */
2482 static int
2483 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2484 {
2485 	struct ice_vsi *vsi = &sc->pf_vsi;
2486 	struct ice_tx_queue *txq;
2487 	struct ice_rx_queue *rxq;
2488 	int err, i;
2489 
2490 	/* Re-assign Tx queues from PF space to the main VSI */
2491 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2492 					    vsi->num_tx_queues);
2493 	if (err) {
2494 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2495 			      ice_err_str(err));
2496 		return (err);
2497 	}
2498 
2499 	/* Re-assign Rx queues from PF space to this VSI */
2500 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2501 					    vsi->num_rx_queues);
2502 	if (err) {
2503 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2504 			      ice_err_str(err));
2505 		goto err_release_tx_queues;
2506 	}
2507 
2508 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2509 
2510 	/* Re-assign Tx queue tail pointers */
2511 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2512 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2513 
2514 	/* Re-assign Rx queue tail pointers */
2515 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2516 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2517 
2518 	return (0);
2519 
2520 err_release_tx_queues:
2521 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2522 				   sc->pf_vsi.num_tx_queues);
2523 
2524 	return (err);
2525 }
2526 
2527 /* determine if the iflib context is active */
2528 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2529 
2530 /**
2531  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2532  * @sc: The device private softc
2533  *
2534  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2535  * limited functionality supported while in recovery mode.
2536  */
2537 static void
2538 ice_rebuild_recovery_mode(struct ice_softc *sc)
2539 {
2540 	device_t dev = sc->dev;
2541 
2542 	/* enable PCIe bus master */
2543 	pci_enable_busmaster(dev);
2544 
2545 	/* Configure interrupt causes for the administrative interrupt */
2546 	ice_configure_misc_interrupts(sc);
2547 
2548 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2549 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2550 
2551 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2552 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2553 
2554 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2555 
2556 	/* In order to completely restore device functionality, the iflib core
2557 	 * needs to be reset. We need to request an iflib reset. Additionally,
2558 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2559 	 * the iflib core, we also want re-run the admin task so that iflib
2560 	 * resets immediately instead of waiting for the next interrupt.
2561 	 */
2562 	ice_request_stack_reinit(sc);
2563 
2564 	return;
2565 }
2566 
2567 /**
2568  * ice_rebuild - Rebuild driver state post reset
2569  * @sc: The device private softc
2570  *
2571  * Restore driver state after a reset occurred. Restart the controlqs, setup
2572  * the hardware port, and re-enable the VSIs.
2573  */
2574 static void
2575 ice_rebuild(struct ice_softc *sc)
2576 {
2577 	struct ice_hw *hw = &sc->hw;
2578 	device_t dev = sc->dev;
2579 	enum ice_ddp_state pkg_state;
2580 	int status;
2581 	int err;
2582 
2583 	sc->rebuild_ticks = ticks;
2584 
2585 	/* If we're rebuilding, then a reset has succeeded. */
2586 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2587 
2588 	/*
2589 	 * If the firmware is in recovery mode, only restore the limited
2590 	 * functionality supported by recovery mode.
2591 	 */
2592 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2593 		ice_rebuild_recovery_mode(sc);
2594 		return;
2595 	}
2596 
2597 	/* enable PCIe bus master */
2598 	pci_enable_busmaster(dev);
2599 
2600 	status = ice_init_all_ctrlq(hw);
2601 	if (status) {
2602 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2603 			      ice_status_str(status));
2604 		goto err_shutdown_ctrlq;
2605 	}
2606 
2607 	/* Query the allocated resources for Tx scheduler */
2608 	status = ice_sched_query_res_alloc(hw);
2609 	if (status) {
2610 		device_printf(dev,
2611 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2612 			      ice_status_str(status),
2613 			      ice_aq_str(hw->adminq.sq_last_status));
2614 		goto err_shutdown_ctrlq;
2615 	}
2616 
2617 	/* Re-enable FW logging. Keep going even if this fails */
2618 	status = ICE_SUCCESS;
2619 	if (hw->pf_id == 0)
2620 		status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2621 	if (!status) {
2622 		/*
2623 		 * We should have the most updated cached copy of the
2624 		 * configuration, regardless of whether we're rebuilding
2625 		 * or not.  So we'll simply check to see if logging was
2626 		 * enabled pre-rebuild.
2627 		 */
2628 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2629 			status = ice_fwlog_register(hw);
2630 			if (status)
2631 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2632 				   ice_status_str(status),
2633 				   ice_aq_str(hw->adminq.sq_last_status));
2634 		}
2635 	} else
2636 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2637 		   ice_status_str(status),
2638 		   ice_aq_str(hw->adminq.sq_last_status));
2639 
2640 	err = ice_send_version(sc);
2641 	if (err)
2642 		goto err_shutdown_ctrlq;
2643 
2644 	err = ice_init_link_events(sc);
2645 	if (err) {
2646 		device_printf(dev, "ice_init_link_events failed: %s\n",
2647 			      ice_err_str(err));
2648 		goto err_shutdown_ctrlq;
2649 	}
2650 
2651 	status = ice_clear_pf_cfg(hw);
2652 	if (status) {
2653 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2654 			      ice_status_str(status));
2655 		goto err_shutdown_ctrlq;
2656 	}
2657 
2658 	ice_clean_all_vsi_rss_cfg(sc);
2659 
2660 	ice_clear_pxe_mode(hw);
2661 
2662 	status = ice_get_caps(hw);
2663 	if (status) {
2664 		device_printf(dev, "failed to get capabilities, err %s\n",
2665 			      ice_status_str(status));
2666 		goto err_shutdown_ctrlq;
2667 	}
2668 
2669 	status = ice_sched_init_port(hw->port_info);
2670 	if (status) {
2671 		device_printf(dev, "failed to initialize port, err %s\n",
2672 			      ice_status_str(status));
2673 		goto err_sched_cleanup;
2674 	}
2675 
2676 	/* If we previously loaded the package, it needs to be reloaded now */
2677 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2678 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2679 		if (!ice_is_init_pkg_successful(pkg_state)) {
2680 			ice_log_pkg_init(sc, pkg_state);
2681 			ice_transition_safe_mode(sc);
2682 		}
2683 	}
2684 
2685 	ice_reset_pf_stats(sc);
2686 
2687 	err = ice_rebuild_pf_vsi_qmap(sc);
2688 	if (err) {
2689 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2690 			      ice_err_str(err));
2691 		goto err_sched_cleanup;
2692 	}
2693 	err = ice_initialize_vsi(&sc->pf_vsi);
2694 	if (err) {
2695 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2696 			      ice_err_str(err));
2697 		goto err_release_queue_allocations;
2698 	}
2699 
2700 	/* Replay all VSI configuration */
2701 	err = ice_replay_all_vsi_cfg(sc);
2702 	if (err)
2703 		goto err_deinit_pf_vsi;
2704 
2705 	/* Re-enable FW health event reporting */
2706 	ice_init_health_events(sc);
2707 
2708 	/* Reconfigure the main PF VSI for RSS */
2709 	err = ice_config_rss(&sc->pf_vsi);
2710 	if (err) {
2711 		device_printf(sc->dev,
2712 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2713 			      ice_err_str(err));
2714 		goto err_deinit_pf_vsi;
2715 	}
2716 
2717 	if (hw->port_info->qos_cfg.is_sw_lldp)
2718 		ice_add_rx_lldp_filter(sc);
2719 
2720 	/* Apply previous link settings and refresh link status, if PHY
2721 	 * FW is ready.
2722 	 */
2723 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2724 	ice_init_link(sc);
2725 
2726 	/* RDMA interface will be restarted by the stack re-init */
2727 
2728 	/* Configure interrupt causes for the administrative interrupt */
2729 	ice_configure_misc_interrupts(sc);
2730 
2731 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2732 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2733 
2734 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2735 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2736 
2737 	/* Reconfigure the subinterface */
2738 	if (sc->mirr_if) {
2739 		err = ice_subif_rebuild(sc);
2740 		if (err)
2741 			goto err_deinit_pf_vsi;
2742 	}
2743 
2744 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2745 
2746 	/* In order to completely restore device functionality, the iflib core
2747 	 * needs to be reset. We need to request an iflib reset. Additionally,
2748 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2749 	 * the iflib core, we also want re-run the admin task so that iflib
2750 	 * resets immediately instead of waiting for the next interrupt.
2751 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2752 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2753 	 */
2754 	if (hw->port_info->qos_cfg.is_sw_lldp)
2755 		ice_request_stack_reinit(sc);
2756 	else
2757 		ice_do_dcb_reconfig(sc, false);
2758 
2759 	return;
2760 
2761 err_deinit_pf_vsi:
2762 	ice_deinit_vsi(&sc->pf_vsi);
2763 err_release_queue_allocations:
2764 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2765 				    sc->pf_vsi.num_tx_queues);
2766 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2767 				    sc->pf_vsi.num_rx_queues);
2768 err_sched_cleanup:
2769 	ice_sched_cleanup_all(hw);
2770 err_shutdown_ctrlq:
2771 	ice_shutdown_all_ctrlq(hw, false);
2772 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2773 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2774 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2775 }
2776 
2777 /**
2778  * ice_handle_reset_event - Handle reset events triggered by OICR
2779  * @sc: The device private softc
2780  *
2781  * Handle reset events triggered by an OICR notification. This includes CORER,
2782  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2783  * firmware.
2784  *
2785  * @pre assumes the iflib context lock is held, and will unlock it while
2786  * waiting for the hardware to finish reset.
2787  */
2788 static void
2789 ice_handle_reset_event(struct ice_softc *sc)
2790 {
2791 	struct ice_hw *hw = &sc->hw;
2792 	int status;
2793 	device_t dev = sc->dev;
2794 
2795 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2796 	 * trigger an OICR interrupt. Our OICR handler will determine when
2797 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2798 	 * appropriate.
2799 	 */
2800 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2801 		return;
2802 
2803 	ice_prepare_for_reset(sc);
2804 
2805 	/*
2806 	 * Release the iflib context lock and wait for the device to finish
2807 	 * resetting.
2808 	 */
2809 	IFLIB_CTX_UNLOCK(sc);
2810 
2811 #define ICE_EMPR_ADDL_WAIT_MSEC_SLOW		20000
2812 	if ((ice_is_e830(hw) || ice_is_e825c(hw)) &&
2813 	    (((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
2814 	         GLGEN_RSTAT_RESET_TYPE_S) == ICE_RESET_EMPR))
2815 			ice_msec_pause(ICE_EMPR_ADDL_WAIT_MSEC_SLOW);
2816 
2817 	status = ice_check_reset(hw);
2818 	IFLIB_CTX_LOCK(sc);
2819 	if (status) {
2820 		device_printf(dev, "Device never came out of reset, err %s\n",
2821 			      ice_status_str(status));
2822 
2823 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2824 		ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2825 		ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2826 		device_printf(dev, "Reset failed; please reload the device driver\n");
2827 		return;
2828 	}
2829 
2830 	/* We're done with the reset, so we can rebuild driver state */
2831 	sc->hw.reset_ongoing = false;
2832 	ice_rebuild(sc);
2833 
2834 	/* In the unlikely event that a PF reset request occurs at the same
2835 	 * time as a global reset, clear the request now. This avoids
2836 	 * resetting a second time right after we reset due to a global event.
2837 	 */
2838 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2839 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2840 }
2841 
2842 /**
2843  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2844  * @sc: The device private softc
2845  *
2846  * Initiate a PF reset requested by software. We handle this in the admin task
2847  * so that only one thread actually handles driver preparation and cleanup,
2848  * rather than having multiple threads possibly attempt to run this code
2849  * simultaneously.
2850  *
2851  * @pre assumes the iflib context lock is held and will unlock it while
2852  * waiting for the PF reset to complete.
2853  */
2854 static void
2855 ice_handle_pf_reset_request(struct ice_softc *sc)
2856 {
2857 	struct ice_hw *hw = &sc->hw;
2858 	int status;
2859 
2860 	/* Check for PF reset requests */
2861 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2862 		return;
2863 
2864 	/* Make sure we're prepared for reset */
2865 	ice_prepare_for_reset(sc);
2866 
2867 	/*
2868 	 * Release the iflib context lock and wait for the device to finish
2869 	 * resetting.
2870 	 */
2871 	IFLIB_CTX_UNLOCK(sc);
2872 	status = ice_reset(hw, ICE_RESET_PFR);
2873 	IFLIB_CTX_LOCK(sc);
2874 	if (status) {
2875 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2876 			      ice_status_str(status));
2877 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2878 		return;
2879 	}
2880 
2881 	sc->soft_stats.pfr_count++;
2882 	ice_rebuild(sc);
2883 }
2884 
2885 /**
2886  * ice_init_device_features - Init device driver features
2887  * @sc: driver softc structure
2888  *
2889  * @pre assumes that the function capabilities bits have been set up by
2890  * ice_init_hw().
2891  */
2892 static void
2893 ice_init_device_features(struct ice_softc *sc)
2894 {
2895 	struct ice_hw *hw = &sc->hw;
2896 
2897 	/* Set capabilities that all devices support */
2898 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2899 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2900 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2901 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2902 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2903 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2904 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2905 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2906 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2907 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2908 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2909 	ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_cap);
2910 
2911 	if (ice_is_e810(hw))
2912 		ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_en);
2913 
2914 	if (ice_is_e825c(hw))
2915 		ice_set_bit(ICE_FEATURE_DUAL_NAC, sc->feat_cap);
2916 	/* Disable features due to hardware limitations... */
2917 	if (!hw->func_caps.common_cap.rss_table_size)
2918 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2919 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma ||
2920 	    ice_is_e830(hw))
2921 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2922 	if (!hw->func_caps.common_cap.dcb)
2923 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2924 	/* Disable features due to firmware limitations... */
2925 	if (!ice_is_fw_health_report_supported(hw))
2926 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2927 	if (!ice_fwlog_supported(hw))
2928 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2929 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2930 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2931 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2932 		else
2933 			ice_fwlog_unregister(hw);
2934 	}
2935 
2936 	/* Disable capabilities not supported by the OS */
2937 	ice_disable_unsupported_features(sc->feat_cap);
2938 
2939 	/* RSS is always enabled for iflib */
2940 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2941 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2942 
2943 	/* Disable features based on sysctl settings */
2944 	if (!ice_tx_balance_en)
2945 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2946 
2947 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2948 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2949 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2950 	}
2951 
2952 	if (hw->func_caps.common_cap.next_cluster_id_support ||
2953 	    hw->dev_caps.common_cap.next_cluster_id_support) {
2954 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_cap);
2955 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_en);
2956 	}
2957 }
2958 
2959 /**
2960  * ice_if_multi_set - Callback to update Multicast filters in HW
2961  * @ctx: iflib ctx structure
2962  *
2963  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2964  * the if_multiaddrs list and determine which filters have been added or
2965  * removed from the list, and update HW programming to reflect the new list.
2966  *
2967  * @pre assumes the caller holds the iflib CTX lock
2968  */
2969 static void
2970 ice_if_multi_set(if_ctx_t ctx)
2971 {
2972 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2973 	int err;
2974 
2975 	ASSERT_CTX_LOCKED(sc);
2976 
2977 	/* Do not handle multicast configuration in recovery mode */
2978 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2979 		return;
2980 
2981 	err = ice_sync_multicast_filters(sc);
2982 	if (err) {
2983 		device_printf(sc->dev,
2984 			      "Failed to synchronize multicast filter list: %s\n",
2985 			      ice_err_str(err));
2986 		return;
2987 	}
2988 }
2989 
2990 /**
2991  * ice_if_vlan_register - Register a VLAN with the hardware
2992  * @ctx: iflib ctx pointer
2993  * @vtag: VLAN to add
2994  *
2995  * Programs the main PF VSI with a hardware filter for the given VLAN.
2996  *
2997  * @pre assumes the caller holds the iflib CTX lock
2998  */
2999 static void
3000 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
3001 {
3002 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3003 	int status;
3004 
3005 	ASSERT_CTX_LOCKED(sc);
3006 
3007 	/* Do not handle VLAN configuration in recovery mode */
3008 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3009 		return;
3010 
3011 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
3012 	if (status) {
3013 		device_printf(sc->dev,
3014 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
3015 			      vtag, ice_status_str(status),
3016 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3017 	}
3018 }
3019 
3020 /**
3021  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
3022  * @ctx: iflib ctx pointer
3023  * @vtag: VLAN to add
3024  *
3025  * Removes the previously programmed VLAN filter from the main PF VSI.
3026  *
3027  * @pre assumes the caller holds the iflib CTX lock
3028  */
3029 static void
3030 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
3031 {
3032 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3033 	int status;
3034 
3035 	ASSERT_CTX_LOCKED(sc);
3036 
3037 	/* Do not handle VLAN configuration in recovery mode */
3038 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3039 		return;
3040 
3041 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3042 	if (status) {
3043 		device_printf(sc->dev,
3044 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3045 			      vtag, ice_status_str(status),
3046 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3047 	}
3048 }
3049 
3050 /**
3051  * ice_if_stop - Stop the device
3052  * @ctx: iflib context structure
3053  *
3054  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3055  * down)
3056  *
3057  * @pre assumes the caller holds the iflib CTX lock
3058  */
3059 static void
3060 ice_if_stop(if_ctx_t ctx)
3061 {
3062 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3063 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3064 
3065 	ASSERT_CTX_LOCKED(sc);
3066 
3067 	/*
3068 	 * The iflib core may call IFDI_STOP prior to the first call to
3069 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3070 	 * don't have, and disable Tx queues which aren't yet configured.
3071 	 * Although it is likely these extra operations are harmless, they do
3072 	 * cause spurious warning messages to be displayed, which may confuse
3073 	 * users.
3074 	 *
3075 	 * To avoid these messages, we use a state bit indicating if we've
3076 	 * been initialized. It will be set when ice_if_init is called, and
3077 	 * cleared here in ice_if_stop.
3078 	 */
3079 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3080 		return;
3081 
3082 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3083 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3084 		return;
3085 	}
3086 
3087 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3088 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3089 		return;
3090 	}
3091 
3092 	ice_rdma_pf_stop(sc);
3093 
3094 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3095 	 * return of these functions because there's nothing we can really do
3096 	 * if they fail, and the functions already print error messages.
3097 	 * Just try to shut down as much as we can.
3098 	 */
3099 	ice_rm_pf_default_mac_filters(sc);
3100 
3101 	/* Dissociate the Tx and Rx queues from the interrupts */
3102 	ice_flush_txq_interrupts(&sc->pf_vsi);
3103 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3104 
3105 	/* Disable the Tx and Rx queues */
3106 	ice_vsi_disable_tx(&sc->pf_vsi);
3107 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3108 
3109 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3110 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3111 		ice_set_link(sc, false);
3112 
3113 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3114 		ice_subif_if_stop(sc->mirr_if->subctx);
3115 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3116 	}
3117 }
3118 
3119 /**
3120  * ice_if_get_counter - Get current value of an ifnet statistic
3121  * @ctx: iflib context pointer
3122  * @counter: ifnet counter to read
3123  *
3124  * Reads the current value of an ifnet counter for the device.
3125  *
3126  * This function is not protected by the iflib CTX lock.
3127  */
3128 static uint64_t
3129 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3130 {
3131 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3132 
3133 	/* Return the counter for the main PF VSI */
3134 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3135 }
3136 
3137 /**
3138  * ice_request_stack_reinit - Request that iflib re-initialize
3139  * @sc: the device private softc
3140  *
3141  * Request that the device be brought down and up, to re-initialize. For
3142  * example, this may be called when a device reset occurs, or when Tx and Rx
3143  * queues need to be re-initialized.
3144  *
3145  * This is required because the iflib state is outside the driver, and must be
3146  * re-initialized if we need to resart Tx and Rx queues.
3147  */
3148 void
3149 ice_request_stack_reinit(struct ice_softc *sc)
3150 {
3151 	if (CTX_ACTIVE(sc->ctx)) {
3152 		iflib_request_reset(sc->ctx);
3153 		iflib_admin_intr_deferred(sc->ctx);
3154 	}
3155 }
3156 
3157 /**
3158  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3159  * @sc: device private softc
3160  *
3161  * Returns true if the driver is detaching, false otherwise.
3162  *
3163  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3164  * report detachment correctly as early as possible.
3165  *
3166  * @remark this function is used by various code paths that want to avoid
3167  * running if the driver is about to be removed. This includes sysctls and
3168  * other driver access points. Note that it does not fully resolve
3169  * detach-based race conditions as it is possible for a thread to race with
3170  * iflib_in_detach.
3171  */
3172 bool
3173 ice_driver_is_detaching(struct ice_softc *sc)
3174 {
3175 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3176 		iflib_in_detach(sc->ctx));
3177 }
3178 
3179 /**
3180  * ice_if_priv_ioctl - Device private ioctl handler
3181  * @ctx: iflib context pointer
3182  * @command: The ioctl command issued
3183  * @data: ioctl specific data
3184  *
3185  * iflib callback for handling custom driver specific ioctls.
3186  *
3187  * @pre Assumes that the iflib context lock is held.
3188  */
3189 static int
3190 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3191 {
3192 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3193 	struct ifdrv *ifd;
3194 	device_t dev = sc->dev;
3195 
3196 	if (data == NULL)
3197 		return (EINVAL);
3198 
3199 	ASSERT_CTX_LOCKED(sc);
3200 
3201 	/* Make sure the command type is valid */
3202 	switch (command) {
3203 	case SIOCSDRVSPEC:
3204 	case SIOCGDRVSPEC:
3205 		/* Accepted commands */
3206 		break;
3207 	case SIOCGPRIVATE_0:
3208 		/*
3209 		 * Although we do not support this ioctl command, it's
3210 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3211 		 * handler. Do not print a message in this case
3212 		 */
3213 		return (ENOTSUP);
3214 	default:
3215 		/*
3216 		 * If we get a different command for this function, it's
3217 		 * definitely unexpected, so log a message indicating what
3218 		 * command we got for debugging purposes.
3219 		 */
3220 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3221 			      __func__, command);
3222 		return (EINVAL);
3223 	}
3224 
3225 	ifd = (struct ifdrv *)data;
3226 
3227 	switch (ifd->ifd_cmd) {
3228 	case ICE_NVM_ACCESS:
3229 		return ice_handle_nvm_access_ioctl(sc, ifd);
3230 	case ICE_DEBUG_DUMP:
3231 		return ice_handle_debug_dump_ioctl(sc, ifd);
3232 	default:
3233 		return EINVAL;
3234 	}
3235 }
3236 
3237 /**
3238  * ice_if_i2c_req - I2C request handler for iflib
3239  * @ctx: iflib context pointer
3240  * @req: The I2C parameters to use
3241  *
3242  * Read from the port's I2C eeprom using the parameters from the ioctl.
3243  *
3244  * @remark The iflib-only part is pretty simple.
3245  */
3246 static int
3247 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3248 {
3249 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3250 
3251 	return ice_handle_i2c_req(sc, req);
3252 }
3253 
3254 /**
3255  * ice_if_suspend - PCI device suspend handler for iflib
3256  * @ctx: iflib context pointer
3257  *
3258  * Deinitializes the driver and clears HW resources in preparation for
3259  * suspend or an FLR.
3260  *
3261  * @returns 0; this return value is ignored
3262  */
3263 static int
3264 ice_if_suspend(if_ctx_t ctx)
3265 {
3266 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3267 
3268 	/* At least a PFR is always going to happen after this;
3269 	 * either via FLR or during the D3->D0 transition.
3270 	 */
3271 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3272 
3273 	ice_prepare_for_reset(sc);
3274 
3275 	return (0);
3276 }
3277 
3278 /**
3279  * ice_if_resume - PCI device resume handler for iflib
3280  * @ctx: iflib context pointer
3281  *
3282  * Reinitializes the driver and the HW after PCI resume or after
3283  * an FLR. An init is performed by iflib after this function is finished.
3284  *
3285  * @returns 0; this return value is ignored
3286  */
3287 static int
3288 ice_if_resume(if_ctx_t ctx)
3289 {
3290 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3291 
3292 	ice_rebuild(sc);
3293 
3294 	return (0);
3295 }
3296 
3297 /**
3298  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3299  * @ctx: iflib context pointer
3300  * @event: event code to check
3301  *
3302  * Defaults to returning true for unknown events.
3303  *
3304  * @returns true if iflib needs to reinit the interface
3305  */
3306 static bool
3307 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3308 {
3309 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3310 
3311 	switch (event) {
3312 	case IFLIB_RESTART_VLAN_CONFIG:
3313 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3314 			 !(if_getflags(sc->ifp) & IFF_UP))
3315 			return false;
3316 	default:
3317 		return true;
3318 	}
3319 }
3320 
3321 /**
3322  * ice_init_link - Do link configuration and link status reporting
3323  * @sc: driver private structure
3324  *
3325  * Contains an extra check that skips link config when an E830 device
3326  * does not have the "FW_LOADING"/"PHYBUSY" bit set in GL_MNG_FWSM set.
3327  */
3328 static void
3329 ice_init_link(struct ice_softc *sc)
3330 {
3331 	struct ice_hw *hw = &sc->hw;
3332 	device_t dev = sc->dev;
3333 
3334 	/* Check if FW is ready before setting up link; defer setup to the
3335 	 * admin task if it isn't.
3336 	 */
3337 	if (ice_is_e830(hw) &&
3338 	    (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)) {
3339 		ice_set_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3340 		device_printf(dev,
3341 		    "Link initialization is blocked by PHY FW initialization.\n");
3342 		device_printf(dev,
3343 		    "Link initialization will continue after PHY FW initialization completes.\n");
3344 		/* Do not access PHY config while PHY FW is busy initializing */
3345 	} else {
3346 		ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3347 		ice_init_link_configuration(sc);
3348 		ice_update_link_status(sc, true);
3349 	}
3350 
3351 }
3352 
3353 extern struct if_txrx ice_subif_txrx;
3354 
3355 /**
3356  * @var ice_subif_methods
3357  * @brief ice driver method entry points
3358  */
3359 static device_method_t ice_subif_methods[] = {
3360 	/* Device interface */
3361 	DEVMETHOD(device_register, ice_subif_register),
3362 	DEVMETHOD_END
3363 };
3364 
3365 /**
3366  * @var ice_subif_driver
3367  * @brief driver structure for the device API
3368  */
3369 static driver_t ice_subif_driver = {
3370 	.name = "ice_subif",
3371 	.methods = ice_subif_methods,
3372 	.size = sizeof(struct ice_mirr_if),
3373 };
3374 
3375 static device_method_t ice_iflib_subif_methods[] = {
3376 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3377 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3378 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3379 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3380 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3381 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3382 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3383 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3384 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3385 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3386 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3387 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3388 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3389 };
3390 
3391 /**
3392  * @var ice_iflib_subif_driver
3393  * @brief driver structure for the iflib stack
3394  *
3395  * driver_t definition used to setup the iflib device methods.
3396  */
3397 static driver_t ice_iflib_subif_driver = {
3398 	.name = "ice_subif",
3399 	.methods = ice_iflib_subif_methods,
3400 	.size = sizeof(struct ice_mirr_if),
3401 };
3402 
3403 /**
3404  * @var ice_subif_sctx
3405  * @brief ice driver shared context
3406  *
3407  * Similar to the existing ice_sctx, this structure has these differences:
3408  * - isc_admin_intrcnt is set to 0
3409  * - Uses subif iflib driver methods
3410  * - Flagged as a VF for iflib
3411  */
3412 static struct if_shared_ctx ice_subif_sctx = {
3413 	.isc_magic = IFLIB_MAGIC,
3414 	.isc_q_align = PAGE_SIZE,
3415 
3416 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3417 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3418 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3419 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3420 
3421 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3422 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3423 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3424 
3425 	.isc_nfl = 1,
3426 	.isc_ntxqs = 1,
3427 	.isc_nrxqs = 1,
3428 
3429 	.isc_admin_intrcnt = 0,
3430 	.isc_vendor_info = ice_vendor_info_array,
3431 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3432 	.isc_driver = &ice_iflib_subif_driver,
3433 
3434 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3435 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3436 		IFLIB_IS_VF,
3437 
3438 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3439 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3440 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3441 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3442 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3443 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3444 };
3445 
3446 static void *
3447 ice_subif_register(device_t dev __unused)
3448 {
3449 	return (&ice_subif_sctx);
3450 }
3451 
3452 static void
3453 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3454 {
3455 	if_softc_ctx_t scctx = mif->subscctx;
3456 
3457 	scctx->isc_txrx = &ice_subif_txrx;
3458 
3459 	scctx->isc_capenable = ICE_FULL_CAPS;
3460 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3461 
3462 	scctx->isc_ntxqsets = 4;
3463 	scctx->isc_nrxqsets = 4;
3464 	scctx->isc_vectors = scctx->isc_nrxqsets;
3465 
3466 	scctx->isc_ntxqsets_max = 256;
3467 	scctx->isc_nrxqsets_max = 256;
3468 
3469 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3470 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3471 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3472 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3473 
3474 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3475 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3476 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3477 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3478 }
3479 
3480 static int
3481 ice_subif_if_attach_pre(if_ctx_t ctx)
3482 {
3483 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3484 	device_t dev = iflib_get_dev(ctx);
3485 
3486 	mif->subctx = ctx;
3487 	mif->subdev = dev;
3488 	mif->subscctx = iflib_get_softc_ctx(ctx);
3489 
3490 	/* Setup the iflib softc context structure */
3491 	ice_subif_setup_scctx(mif);
3492 
3493 	return (0);
3494 }
3495 
3496 static int
3497 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3498 {
3499 	return (0);
3500 }
3501 
3502 /**
3503  * ice_destroy_mirror_interface - destroy mirror interface
3504  * @sc: driver private data
3505  *
3506  * Destroys all resources associated with the mirroring interface.
3507  * Will not exit early on failure.
3508  *
3509  * @pre: Mirror interface already exists and is initialized.
3510  */
3511 void
3512 ice_destroy_mirror_interface(struct ice_softc *sc)
3513 {
3514 	struct ice_mirr_if *mif = sc->mirr_if;
3515 	struct ice_vsi *vsi = mif->vsi;
3516 	bool is_locked = false;
3517 	int ret;
3518 
3519 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3520 	if (is_locked)
3521 		IFLIB_CTX_UNLOCK(sc);
3522 
3523 	if (mif->ifp) {
3524 		ret = iflib_device_deregister(mif->subctx);
3525 		if (ret) {
3526 			device_printf(sc->dev,
3527 			    "iflib_device_deregister for mirror interface failed: %d\n",
3528 			    ret);
3529 		}
3530 	}
3531 
3532 	bus_topo_lock();
3533 	ret = device_delete_child(sc->dev, mif->subdev);
3534 	bus_topo_unlock();
3535 	if (ret) {
3536 		device_printf(sc->dev,
3537 		    "device_delete_child for mirror interface failed: %d\n",
3538 		    ret);
3539 	}
3540 
3541 	if (is_locked)
3542 		IFLIB_CTX_LOCK(sc);
3543 
3544 	if (mif->if_imap) {
3545 		free(mif->if_imap, M_ICE);
3546 		mif->if_imap = NULL;
3547 	}
3548 	if (mif->os_imap) {
3549 		free(mif->os_imap, M_ICE);
3550 		mif->os_imap = NULL;
3551 	}
3552 
3553 	/* These are freed via ice_subif_queues_free_subif
3554 	 * vsi:
3555 	 * - rx_irqvs
3556 	 * - tx_queues
3557 	 * - rx_queues
3558 	 */
3559 	ice_release_vsi(vsi);
3560 
3561 	free(mif, M_ICE);
3562 	sc->mirr_if = NULL;
3563 
3564 }
3565 
3566 /**
3567  * ice_setup_mirror_vsi - Initialize mirror VSI
3568  * @mif: driver private data for mirror interface
3569  *
3570  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3571  * mirror for the main PF VSI.
3572  *
3573  * Returns 0 on success, or a standard error code on failure.
3574  */
3575 static int
3576 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3577 {
3578 	struct ice_softc *sc = mif->back;
3579 	device_t dev = sc->dev;
3580 	struct ice_vsi *vsi;
3581 	int ret = 0;
3582 
3583 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3584 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3585 	if (!vsi) {
3586 		/* Already prints an error message */
3587 		return (ENOMEM);
3588 	}
3589 	mif->vsi = vsi;
3590 
3591 	/* Reserve VSI queue allocation from PF queues */
3592 	ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3593 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3594 
3595 	/* Assign Tx queues from PF space */
3596 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3597 	    vsi->num_tx_queues);
3598 	if (ret) {
3599 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3600 		    ice_err_str(ret));
3601 		goto release_vsi;
3602 	}
3603 	/* Assign Rx queues from PF space */
3604 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3605 	    vsi->num_rx_queues);
3606 	if (ret) {
3607 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3608 		    ice_err_str(ret));
3609 		goto release_vsi;
3610 	}
3611 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3612 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3613 
3614 	ret = ice_initialize_vsi(vsi);
3615 	if (ret) {
3616 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3617 		    __func__, ice_err_str(ret));
3618 		goto release_vsi;
3619 	}
3620 
3621 	/* Setup this VSI for receiving traffic */
3622 	ret = ice_config_rss(vsi);
3623 	if (ret) {
3624 		device_printf(dev,
3625 		    "Unable to configure RSS for mirror VSI: %s\n",
3626 		    ice_err_str(ret));
3627 		goto release_vsi;
3628 	}
3629 
3630 	/* Set HW rules for mirroring traffic */
3631 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3632 
3633 	ice_debug(&sc->hw, ICE_DBG_INIT,
3634 	    "Configuring mirroring from VSI %d to %d\n",
3635 	    vsi->mirror_src_vsi, vsi->idx);
3636 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3637 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3638 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3639 
3640 	ret = ice_setup_vsi_mirroring(vsi);
3641 	if (ret) {
3642 		device_printf(dev,
3643 		    "Unable to configure mirroring for VSI: %s\n",
3644 		    ice_err_str(ret));
3645 		goto release_vsi;
3646 	}
3647 
3648 	return (0);
3649 
3650 release_vsi:
3651 	ice_release_vsi(vsi);
3652 	mif->vsi = NULL;
3653 	return (ret);
3654 }
3655 
3656 /**
3657  * ice_create_mirror_interface - Initialize mirror interface
3658  * @sc: driver private data
3659  *
3660  * Creates and sets up a mirror interface that will mirror traffic from
3661  * the main PF interface. Includes a call to iflib_device_register() in order
3662  * to setup necessary iflib structures for this new interface as well.
3663  *
3664  * If it returns successfully, a new interface will be created and will show
3665  * up in the ifconfig interface list.
3666  *
3667  * Returns 0 on success, or a standard error code on failure.
3668  */
3669 int
3670 ice_create_mirror_interface(struct ice_softc *sc)
3671 {
3672 	device_t dev = sc->dev;
3673 	struct ice_mirr_if *mif;
3674 	struct ifmedia *media;
3675 	struct sbuf *sb;
3676 	int ret = 0;
3677 
3678 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3679 	if (!mif) {
3680 		device_printf(dev, "malloc() error allocating mirror interface\n");
3681 		return (ENOMEM);
3682 	}
3683 
3684 	/* Set pointers */
3685 	sc->mirr_if = mif;
3686 	mif->back = sc;
3687 
3688 	/* Do early setup because these will be called during iflib_device_register():
3689 	 * - ice_subif_if_tx_queues_alloc
3690 	 * - ice_subif_if_rx_queues_alloc
3691 	 */
3692 	ret = ice_setup_mirror_vsi(mif);
3693 	if (ret)
3694 		goto out;
3695 
3696 	/* Determine name for new interface:
3697 	 * (base interface name)(modifier name)(modifier unit number)
3698 	 * e.g. for ice0 with a new mirror interface (modifier m)
3699 	 * of index 0, this equals "ice0m0"
3700 	 */
3701 	sb = sbuf_new_auto();
3702 	MPASS(sb != NULL);
3703 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3704 	sbuf_finish(sb);
3705 
3706 	bus_topo_lock();
3707 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3708 	bus_topo_unlock();
3709 
3710 	if (!mif->subdev) {
3711 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3712 		sbuf_delete(sb);
3713 		free(mif, M_ICE);
3714 		sc->mirr_if = NULL;
3715 		return (ENOMEM);
3716 	}
3717 	sbuf_delete(sb);
3718 
3719 	device_set_driver(mif->subdev, &ice_subif_driver);
3720 
3721 	/* Use iflib_device_register() directly because the driver already
3722 	 * has an initialized softc to pass to iflib
3723 	 */
3724 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3725 	if (ret)
3726 		goto out;
3727 
3728 	/* Indicate that created interface will be just for monitoring */
3729 	mif->ifp = iflib_get_ifp(mif->subctx);
3730 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3731 
3732 	/* Use autoselect media by default */
3733 	media = iflib_get_media(mif->subctx);
3734 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3735 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3736 
3737 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3738 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3739 
3740 	ice_add_vsi_sysctls(mif->vsi);
3741 
3742 	ret = ice_wire_mirror_intrs(mif);
3743 	if (ret)
3744 		goto out;
3745 
3746 	mif->if_attached = true;
3747 	return (0);
3748 
3749 out:
3750 	ice_destroy_mirror_interface(sc);
3751 	return (ret);
3752 }
3753 
3754 /**
3755  * ice_wire_mirror_intrs
3756  * @mif: driver private subinterface structure
3757  *
3758  * Helper function that sets up driver interrupt data and calls
3759  * into iflib in order to setup interrupts in its data structures as well.
3760  *
3761  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3762  * number of vectors as we have queues, and that we always have the same number
3763  * of Tx and Rx queues. Unlike that function, this calls a special
3764  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3765  * driver needs to get MSI-X resources from the parent device.
3766  *
3767  * Tx queues use a softirq instead of using their own hardware interrupt so that
3768  * remains unchanged.
3769  *
3770  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3771  * on failure.
3772  */
3773 static int
3774 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3775 {
3776 	struct ice_softc *sc = mif->back;
3777 	struct ice_hw *hw = &sc->hw;
3778 	struct ice_vsi *vsi = mif->vsi;
3779 	device_t dev = mif->subdev;
3780 	int err, i, rid;
3781 
3782 	if_ctx_t ctx = mif->subctx;
3783 
3784 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3785 
3786 	rid = sc->last_rid + 1;
3787 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3788 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3789 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3790 		char irq_name[16];
3791 
3792 		// TODO: Change to use dynamic interface number
3793 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3794 		/* First arg is parent device (physical port's) iflib ctx */
3795 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3796 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3797 		    rxq, rxq->me, irq_name);
3798 		if (err) {
3799 			device_printf(dev,
3800 			    "Failed to allocate q int %d err: %s\n",
3801 			    i, ice_err_str(err));
3802 			i--;
3803 			goto fail;
3804 		}
3805 		MPASS(rid - 1 > 0);
3806 		/* Set vector number used in interrupt enable/disable functions */
3807 		mif->rx_irqvs[i].me = rid - 1;
3808 		rxq->irqv = &mif->rx_irqvs[i];
3809 
3810 		bzero(irq_name, sizeof(irq_name));
3811 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3812 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3813 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3814 		txq->irqv = &mif->rx_irqvs[i];
3815 	}
3816 
3817 	sc->last_rid = rid - 1;
3818 
3819 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3820 	    sc->last_rid);
3821 
3822 	return (0);
3823 
3824 fail:
3825 	for (; i >= 0; i--)
3826 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3827 	return (err);
3828 }
3829 
3830 /**
3831  * ice_subif_rebuild - Rebuild subinterface post reset
3832  * @sc: The device private softc
3833  *
3834  * Restore subinterface state after a reset occurred.
3835  * Restart the VSI and enable the mirroring.
3836  */
3837 static int
3838 ice_subif_rebuild(struct ice_softc *sc)
3839 {
3840 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3841 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3842 	int err;
3843 
3844 	err = ice_subif_rebuild_vsi_qmap(sc);
3845 	if (err) {
3846 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3847 		      ice_err_str(err));
3848 		return (err);
3849 	}
3850 
3851 	err = ice_initialize_vsi(vsi);
3852 	if (err) {
3853 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3854 		      ice_err_str(err));
3855 		goto err_release_queue_allocations_subif;
3856 	}
3857 
3858 	err = ice_config_rss(vsi);
3859 	if (err) {
3860 		device_printf(sc->dev,
3861 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3862 		      ice_err_str(err));
3863 		goto err_deinit_subif_vsi;
3864 	}
3865 
3866 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3867 
3868 	err = ice_setup_vsi_mirroring(vsi);
3869 	if (err) {
3870 		device_printf(sc->dev,
3871 		      "Unable to configure mirroring for VSI: %s\n",
3872 		      ice_err_str(err));
3873 		goto err_deinit_subif_vsi;
3874 	}
3875 
3876 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3877 
3878 	return (0);
3879 
3880 err_deinit_subif_vsi:
3881 	ice_deinit_vsi(vsi);
3882 err_release_queue_allocations_subif:
3883 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3884 	    sc->mirr_if->num_irq_vectors);
3885 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3886 	    sc->mirr_if->num_irq_vectors);
3887 
3888 	return (err);
3889 }
3890 
3891 /**
3892  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3893  * @sc: the device softc pointer
3894  *
3895  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3896  * mapping after a reset occurred.
3897  */
3898 static int
3899 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3900 {
3901 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3902 	struct ice_tx_queue *txq;
3903 	struct ice_rx_queue *rxq;
3904 	int err, i;
3905 
3906 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3907 	if (err) {
3908 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3909 		      ice_err_str(err));
3910 		return (err);
3911 	}
3912 
3913 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3914 	if (err) {
3915 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3916 		      ice_err_str(err));
3917 		goto err_release_tx_queues;
3918 	}
3919 
3920 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3921 
3922 	/* Re-assign Tx queue tail pointers */
3923 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3924 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3925 
3926 	/* Re-assign Rx queue tail pointers */
3927 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3928 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3929 
3930 	return (0);
3931 
3932 err_release_tx_queues:
3933 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3934 
3935 	return (err);
3936 }
3937 
3938 /**
3939  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3940  * @ctx: iflib context structure
3941  * @vaddrs: virtual addresses for the queue memory
3942  * @paddrs: physical addresses for the queue memory
3943  * @ntxqs: the number of Tx queues per set (should always be 1)
3944  * @ntxqsets: the number of Tx queue sets to allocate
3945  *
3946  * See ice_if_tx_queues_alloc() description. Similar to that function, but
3947  * for subinterfaces instead.
3948  */
3949 static int
3950 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3951 			     int __invariant_only ntxqs, int ntxqsets)
3952 {
3953 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3954 	struct ice_tx_queue *txq;
3955 	device_t dev = mif->subdev;
3956 	struct ice_vsi *vsi;
3957 	int err, i, j;
3958 
3959 	MPASS(mif != NULL);
3960 	MPASS(ntxqs == 1);
3961 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3962 
3963 	vsi = mif->vsi;
3964 
3965 	MPASS(vsi->num_tx_queues == ntxqsets);
3966 
3967 	/* Allocate queue structure memory */
3968 	if (!(vsi->tx_queues =
3969 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3970 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3971 		    __func__);
3972 		return (ENOMEM);
3973 	}
3974 
3975 	/* Allocate report status arrays */
3976 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3977 		if (!(txq->tx_rsq =
3978 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3979 			device_printf(dev,
3980 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3981 			err = ENOMEM;
3982 			goto free_tx_queues;
3983 		}
3984 		/* Initialize report status array */
3985 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3986 			txq->tx_rsq[j] = QIDX_INVALID;
3987 	}
3988 
3989 	/* Add Tx queue sysctls context */
3990 	ice_vsi_add_txqs_ctx(vsi);
3991 
3992 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3993 		/* q_handle == me when only one TC */
3994 		txq->me = txq->q_handle = i;
3995 		txq->vsi = vsi;
3996 
3997 		/* store the queue size for easier access */
3998 		txq->desc_count = mif->subscctx->isc_ntxd[0];
3999 
4000 		/* get the virtual and physical address of the hardware queues */
4001 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
4002 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
4003 		txq->tx_paddr = paddrs[i];
4004 
4005 		ice_add_txq_sysctls(txq);
4006 	}
4007 
4008 	return (0);
4009 
4010 free_tx_queues:
4011 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
4012 		if (txq->tx_rsq != NULL) {
4013 			free(txq->tx_rsq, M_ICE);
4014 			txq->tx_rsq = NULL;
4015 		}
4016 	}
4017 	free(vsi->tx_queues, M_ICE);
4018 	vsi->tx_queues = NULL;
4019 	return (err);
4020 }
4021 
4022 /**
4023  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
4024  * @ctx: iflib context structure
4025  * @vaddrs: virtual addresses for the queue memory
4026  * @paddrs: physical addresses for the queue memory
4027  * @nrxqs: number of Rx queues per set (should always be 1)
4028  * @nrxqsets: number of Rx queue sets to allocate
4029  *
4030  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
4031  * but implemented for subinterfaces.
4032  */
4033 static int
4034 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
4035     int __invariant_only nrxqs, int nrxqsets)
4036 {
4037 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4038 	struct ice_rx_queue *rxq;
4039 	device_t dev = mif->subdev;
4040 	struct ice_vsi *vsi;
4041 	int i;
4042 
4043 	MPASS(mif != NULL);
4044 	MPASS(nrxqs == 1);
4045 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
4046 
4047 	vsi = mif->vsi;
4048 
4049 	MPASS(vsi->num_rx_queues == nrxqsets);
4050 
4051 	/* Allocate queue structure memory */
4052 	if (!(vsi->rx_queues =
4053 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
4054 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
4055 		    __func__);
4056 		return (ENOMEM);
4057 	}
4058 
4059 	/* Add Rx queue sysctls context */
4060 	ice_vsi_add_rxqs_ctx(vsi);
4061 
4062 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
4063 		rxq->me = i;
4064 		rxq->vsi = vsi;
4065 
4066 		/* store the queue size for easier access */
4067 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4068 
4069 		/* get the virtual and physical address of the hardware queues */
4070 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4071 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4072 		rxq->rx_paddr = paddrs[i];
4073 
4074 		ice_add_rxq_sysctls(rxq);
4075 	}
4076 
4077 	return (0);
4078 }
4079 
4080 /**
4081  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4082  * @ctx: the iflib context structure
4083  * @msix: the number of vectors we were assigned
4084  *
4085  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4086  *
4087  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4088  */
4089 static int
4090 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4091 {
4092 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4093 	struct ice_softc *sc = mif->back;
4094 	struct ice_vsi *vsi = mif->vsi;
4095 
4096 	device_t dev = mif->subdev;
4097 	int ret;
4098 
4099 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4100 		device_printf(dev,
4101 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4102 			      vsi->num_tx_queues, vsi->num_rx_queues);
4103 		return (EOPNOTSUPP);
4104 	}
4105 
4106 	if (msix > sc->extra_vectors) {
4107 		device_printf(dev,
4108 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4109 		     __func__, sc->extra_vectors, msix);
4110 		return (ENOSPC);
4111 	}
4112 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4113 	    msix);
4114 
4115 	/* Allocate space to store the IRQ vector data */
4116 	mif->num_irq_vectors = vsi->num_rx_queues;
4117 	mif->rx_irqvs = (struct ice_irq_vector *)
4118 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4119 		   M_ICE, M_NOWAIT);
4120 	if (!mif->rx_irqvs) {
4121 		device_printf(dev,
4122 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4123 			      mif->num_irq_vectors);
4124 		return (ENOMEM);
4125 	}
4126 
4127 	/* Assign mirror interface interrupts from PF device space */
4128 	if (!(mif->if_imap =
4129 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4130 	      M_ICE, M_NOWAIT))) {
4131 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4132 		ret = ENOMEM;
4133 		goto free_irqvs;
4134 	}
4135 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4136 	if (ret) {
4137 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4138 			      ice_err_str(ret));
4139 		goto free_if_imap;
4140 	}
4141 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4142 	if (!(mif->os_imap =
4143 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4144 	      M_ICE, M_NOWAIT))) {
4145 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4146 		ret = ENOMEM;
4147 		goto free_if_imap;
4148 	}
4149 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4150 	if (ret) {
4151 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4152 			      ice_err_str(ret));
4153 		goto free_if_imap;
4154 	}
4155 
4156 	return (0);
4157 
4158 free_if_imap:
4159 	free(mif->if_imap, M_ICE);
4160 	mif->if_imap = NULL;
4161 free_irqvs:
4162 	free(mif->rx_irqvs, M_ICE);
4163 	mif->rx_irqvs = NULL;
4164 	return (ret);
4165 }
4166 
4167 /**
4168  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4169  * @ctx: iflib context structure
4170  *
4171  * Called by iflib to request enabling all interrupts that belong to a
4172  * subinterface.
4173  */
4174 static void
4175 ice_subif_if_intr_enable(if_ctx_t ctx)
4176 {
4177 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4178 	struct ice_softc *sc = mif->back;
4179 	struct ice_vsi *vsi = mif->vsi;
4180 	struct ice_hw *hw = &sc->hw;
4181 
4182 	/* Do not enable queue interrupts in recovery mode */
4183 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4184 		return;
4185 
4186 	/* Enable all queue interrupts */
4187 	for (int i = 0; i < vsi->num_rx_queues; i++)
4188 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4189 }
4190 
4191 /**
4192  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4193  * @ctx: iflib context structure
4194  * @rxqid: the Rx queue to enable
4195  *
4196  * Enable a specific Rx queue interrupt.
4197  *
4198  * This function is not protected by the iflib CTX lock.
4199  */
4200 static int
4201 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4202 {
4203 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4204 	struct ice_softc *sc = mif->back;
4205 	struct ice_vsi *vsi = mif->vsi;
4206 	struct ice_hw *hw = &sc->hw;
4207 
4208 	/* Do not enable queue interrupts in recovery mode */
4209 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4210 		return (ENOSYS);
4211 
4212 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4213 	return (0);
4214 }
4215 
4216 /**
4217  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4218  * @ctx: iflib context structure
4219  * @txqid: the Tx queue to enable
4220  *
4221  * Enable a specific Tx queue interrupt.
4222  *
4223  * This function is not protected by the iflib CTX lock.
4224  */
4225 static int
4226 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4227 {
4228 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4229 	struct ice_softc *sc = mif->back;
4230 	struct ice_vsi *vsi = mif->vsi;
4231 	struct ice_hw *hw = &sc->hw;
4232 
4233 	/* Do not enable queue interrupts in recovery mode */
4234 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4235 		return (ENOSYS);
4236 
4237 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4238 	return (0);
4239 }
4240 
4241 /**
4242  * ice_subif_if_init - Initialize the subinterface
4243  * @ctx: iflib ctx structure
4244  *
4245  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4246  * Prepares the Tx and Rx engines and enables interrupts.
4247  *
4248  * @pre assumes the caller holds the iflib CTX lock
4249  */
4250 static void
4251 ice_subif_if_init(if_ctx_t ctx)
4252 {
4253 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4254 	struct ice_softc *sc = mif->back;
4255 	struct ice_vsi *vsi = mif->vsi;
4256 	device_t dev = mif->subdev;
4257 	int err;
4258 
4259 	if (ice_driver_is_detaching(sc))
4260 		return;
4261 
4262 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4263 		return;
4264 
4265 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4266 		device_printf(dev,
4267 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4268 		    device_get_nameunit(sc->dev));
4269 		return;
4270 	}
4271 
4272 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4273 		device_printf(dev,
4274 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4275 		    device_get_nameunit(sc->dev));
4276 		return;
4277 	}
4278 
4279 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4280 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4281 
4282 	/* Initialize software Tx tracking values */
4283 	ice_init_tx_tracking(vsi);
4284 
4285 	err = ice_cfg_vsi_for_tx(vsi);
4286 	if (err) {
4287 		device_printf(dev,
4288 			      "Unable to configure subif VSI for Tx: %s\n",
4289 			      ice_err_str(err));
4290 		return;
4291 	}
4292 
4293 	err = ice_cfg_vsi_for_rx(vsi);
4294 	if (err) {
4295 		device_printf(dev,
4296 			      "Unable to configure subif VSI for Rx: %s\n",
4297 			      ice_err_str(err));
4298 		goto err_cleanup_tx;
4299 	}
4300 
4301 	err = ice_control_all_rx_queues(vsi, true);
4302 	if (err) {
4303 		device_printf(dev,
4304 			      "Unable to enable subif Rx rings for receive: %s\n",
4305 			      ice_err_str(err));
4306 		goto err_cleanup_tx;
4307 	}
4308 
4309 	ice_configure_all_rxq_interrupts(vsi);
4310 	ice_configure_rx_itr(vsi);
4311 
4312 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4313 	return;
4314 
4315 err_cleanup_tx:
4316 	ice_vsi_disable_tx(vsi);
4317 }
4318 
4319 /**
4320  * ice_if_stop_subif - Stop the subinterface
4321  * @ctx: iflib context structure
4322  * @ifs: subinterface context structure
4323  *
4324  * Called by iflib to stop the subinterface and bring it down.
4325  * (e.g. ifconfig ice0m0 down)
4326  *
4327  * @pre assumes the caller holds the iflib CTX lock
4328  */
4329 static void
4330 ice_subif_if_stop(if_ctx_t ctx)
4331 {
4332 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4333 	struct ice_softc *sc = mif->back;
4334 	struct ice_vsi *vsi = mif->vsi;
4335 	device_t dev = mif->subdev;
4336 
4337 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4338 		return;
4339 
4340 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4341 		device_printf(dev,
4342 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4343 		    device_get_nameunit(sc->dev));
4344 		return;
4345 	}
4346 
4347 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4348 		device_printf(dev,
4349 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4350 		    device_get_nameunit(sc->dev));
4351 		return;
4352 	}
4353 
4354 	/* Dissociate the Tx and Rx queues from the interrupts */
4355 	ice_flush_txq_interrupts(vsi);
4356 	ice_flush_rxq_interrupts(vsi);
4357 
4358 	/* Disable the Tx and Rx queues */
4359 	ice_vsi_disable_tx(vsi);
4360 	ice_control_all_rx_queues(vsi, false);
4361 }
4362 
4363 /**
4364  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4365  * @mif: Mirror interface private structure
4366  *
4367  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4368  */
4369 static void
4370 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4371 {
4372 	struct ice_softc *sc = mif->back;
4373 	struct ice_vsi *vsi = mif->vsi;
4374 	if_ctx_t ctx = sc->ctx;
4375 	int i;
4376 
4377 	/* If the irqvs array is NULL, then there are no vectors to free */
4378 	if (mif->rx_irqvs == NULL)
4379 		return;
4380 
4381 	/* Free the IRQ vectors -- currently subinterfaces have number
4382 	 * of vectors equal to number of RX queues
4383 	 *
4384 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4385 	 */
4386 	for (i = 0; i < vsi->num_rx_queues; i++)
4387 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4388 
4389 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4390 	    mif->num_irq_vectors);
4391 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4392 	    mif->num_irq_vectors);
4393 
4394 	sc->last_rid -= vsi->num_rx_queues;
4395 
4396 	/* Clear the irqv pointers */
4397 	for (i = 0; i < vsi->num_rx_queues; i++)
4398 		vsi->rx_queues[i].irqv = NULL;
4399 
4400 	for (i = 0; i < vsi->num_tx_queues; i++)
4401 		vsi->tx_queues[i].irqv = NULL;
4402 
4403 	/* Release the vector array memory */
4404 	free(mif->rx_irqvs, M_ICE);
4405 	mif->rx_irqvs = NULL;
4406 }
4407 
4408 /**
4409  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4410  * @ctx: the iflib context structure
4411  *
4412  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4413  * ice_subif_if_rx_queues_alloc().
4414  */
4415 static void
4416 ice_subif_if_queues_free(if_ctx_t ctx)
4417 {
4418 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4419 	struct ice_vsi *vsi = mif->vsi;
4420 	struct ice_tx_queue *txq;
4421 	int i;
4422 
4423 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4424 	 * pointers.
4425 	 */
4426 	ice_vsi_del_txqs_ctx(vsi);
4427 	ice_vsi_del_rxqs_ctx(vsi);
4428 
4429 	/* Release MSI-X IRQ vectors */
4430 	ice_free_irqvs_subif(mif);
4431 
4432 	if (vsi->tx_queues != NULL) {
4433 		/* free the tx_rsq arrays */
4434 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4435 			if (txq->tx_rsq != NULL) {
4436 				free(txq->tx_rsq, M_ICE);
4437 				txq->tx_rsq = NULL;
4438 			}
4439 		}
4440 		free(vsi->tx_queues, M_ICE);
4441 		vsi->tx_queues = NULL;
4442 	}
4443 	if (vsi->rx_queues != NULL) {
4444 		free(vsi->rx_queues, M_ICE);
4445 		vsi->rx_queues = NULL;
4446 	}
4447 }
4448 
4449 /**
4450  * ice_subif_if_media_status - Report subinterface media
4451  * @ctx: iflib context structure
4452  * @ifmr: ifmedia request structure to update
4453  *
4454  * Updates the provided ifmr with something, in order to prevent a
4455  * "no media types?" message from ifconfig.
4456  *
4457  * Mirror interfaces are always up.
4458  */
4459 static void
4460 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4461 {
4462 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4463 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4464 }
4465 
4466 /**
4467  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4468  * @ctx: iflib context structure
4469  * @flags: promiscuous flags to configure
4470  *
4471  * Called by iflib to configure device promiscuous mode.
4472  *
4473  * @remark This does not need to be implemented for now.
4474  */
4475 static int
4476 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4477 {
4478 	return (0);
4479 }
4480 
4481