xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 
46 #include <sys/module.h>
47 #include <sys/sockio.h>
48 #include <sys/smp.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pcireg.h>
51 
52 /*
53  * Device method prototypes
54  */
55 
56 static void *ice_register(device_t);
57 static int  ice_if_attach_pre(if_ctx_t);
58 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59 static int  ice_if_attach_post(if_ctx_t);
60 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
61 static int  ice_if_detach(if_ctx_t);
62 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65 static void ice_if_queues_free(if_ctx_t ctx);
66 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67 static void ice_if_intr_enable(if_ctx_t ctx);
68 static void ice_if_intr_disable(if_ctx_t ctx);
69 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
72 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73 static int ice_if_media_change(if_ctx_t ctx);
74 static void ice_if_init(if_ctx_t ctx);
75 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76 static void ice_if_update_admin_status(if_ctx_t ctx);
77 static void ice_if_multi_set(if_ctx_t ctx);
78 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80 static void ice_if_stop(if_ctx_t ctx);
81 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84 static int ice_if_suspend(if_ctx_t ctx);
85 static int ice_if_resume(if_ctx_t ctx);
86 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
88 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
89 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
90 static void *ice_subif_register(device_t);
91 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
92 static int ice_subif_rebuild(struct ice_softc *sc);
93 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
94 
95 /* Iflib API */
96 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
97     uint64_t *paddrs, int ntxqs, int ntxqsets);
98 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
99     uint64_t *paddrs, int nrxqs, int nrxqsets);
100 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
101 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
102 static void ice_subif_if_intr_enable(if_ctx_t ctx);
103 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
104 static void ice_subif_if_init(if_ctx_t ctx);
105 static void ice_subif_if_stop(if_ctx_t ctx);
106 static void ice_subif_if_queues_free(if_ctx_t ctx);
107 static int ice_subif_if_attach_pre(if_ctx_t);
108 static int ice_subif_if_attach_post(if_ctx_t);
109 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
110 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
111 
112 static int ice_msix_que(void *arg);
113 static int ice_msix_admin(void *arg);
114 
115 /*
116  * Helper function prototypes
117  */
118 static int ice_pci_mapping(struct ice_softc *sc);
119 static void ice_free_pci_mapping(struct ice_softc *sc);
120 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
121 static void ice_init_device_features(struct ice_softc *sc);
122 static void ice_init_tx_tracking(struct ice_vsi *vsi);
123 static void ice_handle_reset_event(struct ice_softc *sc);
124 static void ice_handle_pf_reset_request(struct ice_softc *sc);
125 static void ice_prepare_for_reset(struct ice_softc *sc);
126 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
127 static void ice_rebuild(struct ice_softc *sc);
128 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
129 static void ice_free_irqvs(struct ice_softc *sc);
130 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
131 static void ice_poll_for_media_avail(struct ice_softc *sc);
132 static void ice_setup_scctx(struct ice_softc *sc);
133 static int ice_allocate_msix(struct ice_softc *sc);
134 static void ice_admin_timer(void *arg);
135 static void ice_transition_recovery_mode(struct ice_softc *sc);
136 static void ice_transition_safe_mode(struct ice_softc *sc);
137 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
138 
139 /*
140  * Device Interface Declaration
141  */
142 
143 /**
144  * @var ice_methods
145  * @brief ice driver method entry points
146  *
147  * List of device methods implementing the generic device interface used by
148  * the device stack to interact with the ice driver. Since this is an iflib
149  * driver, most of the methods point to the generic iflib implementation.
150  */
151 static device_method_t ice_methods[] = {
152 	/* Device interface */
153 	DEVMETHOD(device_register, ice_register),
154 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
155 	DEVMETHOD(device_attach,   iflib_device_attach),
156 	DEVMETHOD(device_detach,   iflib_device_detach),
157 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
158 	DEVMETHOD(device_suspend,  iflib_device_suspend),
159 	DEVMETHOD(device_resume,   iflib_device_resume),
160 	DEVMETHOD_END
161 };
162 
163 /**
164  * @var ice_iflib_methods
165  * @brief iflib method entry points
166  *
167  * List of device methods used by the iflib stack to interact with this
168  * driver. These are the real main entry points used to interact with this
169  * driver.
170  */
171 static device_method_t ice_iflib_methods[] = {
172 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
173 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
174 	DEVMETHOD(ifdi_detach, ice_if_detach),
175 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
176 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
177 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
178 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
179 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
180 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
181 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
182 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
183 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
184 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
185 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
186 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
187 	DEVMETHOD(ifdi_init, ice_if_init),
188 	DEVMETHOD(ifdi_stop, ice_if_stop),
189 	DEVMETHOD(ifdi_timer, ice_if_timer),
190 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
191 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
192 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
193 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
194 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
195 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
196 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
197 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
198 	DEVMETHOD(ifdi_resume, ice_if_resume),
199 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
200 	DEVMETHOD_END
201 };
202 
203 /**
204  * @var ice_driver
205  * @brief driver structure for the generic device stack
206  *
207  * driver_t definition used to setup the generic device methods.
208  */
209 static driver_t ice_driver = {
210 	.name = "ice",
211 	.methods = ice_methods,
212 	.size = sizeof(struct ice_softc),
213 };
214 
215 /**
216  * @var ice_iflib_driver
217  * @brief driver structure for the iflib stack
218  *
219  * driver_t definition used to setup the iflib device methods.
220  */
221 static driver_t ice_iflib_driver = {
222 	.name = "ice",
223 	.methods = ice_iflib_methods,
224 	.size = sizeof(struct ice_softc),
225 };
226 
227 extern struct if_txrx ice_txrx;
228 extern struct if_txrx ice_recovery_txrx;
229 
230 /**
231  * @var ice_sctx
232  * @brief ice driver shared context
233  *
234  * Structure defining shared values (context) that is used by all instances of
235  * the device. Primarily used to setup details about how the iflib stack
236  * should treat this driver. Also defines the default, minimum, and maximum
237  * number of descriptors in each ring.
238  */
239 static struct if_shared_ctx ice_sctx = {
240 	.isc_magic = IFLIB_MAGIC,
241 	.isc_q_align = PAGE_SIZE,
242 
243 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
244 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
245 	 * that doesn't make sense since that would be larger than the maximum
246 	 * size of a single packet.
247 	 */
248 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
249 
250 	/* XXX: This is only used by iflib to ensure that
251 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
252 	 */
253 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
254 	/* XXX: This is used by iflib to set the number of segments in the TSO
255 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
256 	 * related ifnet parameter.
257 	 */
258 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
259 
260 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
261 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
262 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
263 
264 	.isc_nfl = 1,
265 	.isc_ntxqs = 1,
266 	.isc_nrxqs = 1,
267 
268 	.isc_admin_intrcnt = 1,
269 	.isc_vendor_info = ice_vendor_info_array,
270 	.isc_driver_version = __DECONST(char *, ice_driver_version),
271 	.isc_driver = &ice_iflib_driver,
272 
273 	/*
274 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
275 	 * for hardware checksum offload
276 	 *
277 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
278 	 * IP sum field, required by our hardware to calculate valid TSO
279 	 * checksums.
280 	 *
281 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
282 	 * even when the interface is down.
283 	 *
284 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
285 	 * vectors manually instead of relying on iflib code to do this.
286 	 */
287 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
288 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
289 
290 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
291 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
292 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
293 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
295 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
296 };
297 
298 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
299 
300 MODULE_VERSION(ice, 1);
301 MODULE_DEPEND(ice, pci, 1, 1, 1);
302 MODULE_DEPEND(ice, ether, 1, 1, 1);
303 MODULE_DEPEND(ice, iflib, 1, 1, 1);
304 
305 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
306 
307 /* Static driver-wide sysctls */
308 #include "ice_iflib_sysctls.h"
309 
310 /**
311  * ice_pci_mapping - Map PCI BAR memory
312  * @sc: device private softc
313  *
314  * Map PCI BAR 0 for device operation.
315  */
316 static int
317 ice_pci_mapping(struct ice_softc *sc)
318 {
319 	int rc;
320 
321 	/* Map BAR0 */
322 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
323 	if (rc)
324 		return rc;
325 
326 	return 0;
327 }
328 
329 /**
330  * ice_free_pci_mapping - Release PCI BAR memory
331  * @sc: device private softc
332  *
333  * Release PCI BARs which were previously mapped by ice_pci_mapping().
334  */
335 static void
336 ice_free_pci_mapping(struct ice_softc *sc)
337 {
338 	/* Free BAR0 */
339 	ice_free_bar(sc->dev, &sc->bar0);
340 }
341 
342 /*
343  * Device methods
344  */
345 
346 /**
347  * ice_register - register device method callback
348  * @dev: the device being registered
349  *
350  * Returns a pointer to the shared context structure, which is used by iflib.
351  */
352 static void *
353 ice_register(device_t dev __unused)
354 {
355 	return &ice_sctx;
356 } /* ice_register */
357 
358 /**
359  * ice_setup_scctx - Setup the iflib softc context structure
360  * @sc: the device private structure
361  *
362  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
363  * when loading.
364  */
365 static void
366 ice_setup_scctx(struct ice_softc *sc)
367 {
368 	if_softc_ctx_t scctx = sc->scctx;
369 	struct ice_hw *hw = &sc->hw;
370 	device_t dev = sc->dev;
371 	bool safe_mode, recovery_mode;
372 
373 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
374 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
375 
376 	/*
377 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
378 	 * a single queue pair.
379 	 */
380 	if (safe_mode || recovery_mode) {
381 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
382 		scctx->isc_ntxqsets_max = 1;
383 		scctx->isc_nrxqsets_max = 1;
384 	} else {
385 		/*
386 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
387 		 * the values of the override sysctls. Cache these initial
388 		 * values so that the driver can be aware of what the iflib
389 		 * sysctl value is when setting up MSI-X vectors.
390 		 */
391 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
392 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
393 
394 		if (scctx->isc_ntxqsets == 0)
395 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
396 		if (scctx->isc_nrxqsets == 0)
397 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
398 
399 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
400 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
401 
402 		/*
403 		 * Sanity check that the iflib sysctl values are within the
404 		 * maximum supported range.
405 		 */
406 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
407 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
408 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
409 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
410 	}
411 
412 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
413 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
414 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
415 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
416 
417 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
418 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
419 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
420 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
421 
422 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
423 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
424 
425 	/*
426 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
427 	 */
428 	if (recovery_mode)
429 		scctx->isc_txrx = &ice_recovery_txrx;
430 	else
431 		scctx->isc_txrx = &ice_txrx;
432 
433 	/*
434 	 * If the driver loads in Safe mode or Recovery mode, disable
435 	 * advanced features including hardware offloads.
436 	 */
437 	if (safe_mode || recovery_mode) {
438 		scctx->isc_capenable = ICE_SAFE_CAPS;
439 		scctx->isc_tx_csum_flags = 0;
440 	} else {
441 		scctx->isc_capenable = ICE_FULL_CAPS;
442 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
443 	}
444 
445 	scctx->isc_capabilities = scctx->isc_capenable;
446 } /* ice_setup_scctx */
447 
448 /**
449  * ice_if_attach_pre - Early device attach logic
450  * @ctx: the iflib context structure
451  *
452  * Called by iflib during the attach process. Earliest main driver entry
453  * point which performs necessary hardware and driver initialization. Called
454  * before the Tx and Rx queues are allocated.
455  */
456 static int
457 ice_if_attach_pre(if_ctx_t ctx)
458 {
459 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
460 	enum ice_fw_modes fw_mode;
461 	enum ice_status status;
462 	if_softc_ctx_t scctx;
463 	struct ice_hw *hw;
464 	device_t dev;
465 	int err;
466 
467 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
468 
469 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
470 
471 	sc->ctx = ctx;
472 	sc->media = iflib_get_media(ctx);
473 	sc->sctx = iflib_get_sctx(ctx);
474 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
475 
476 	dev = sc->dev = iflib_get_dev(ctx);
477 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
478 
479 	hw = &sc->hw;
480 	hw->back = sc;
481 
482 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
483 		 "%s:admin", device_get_nameunit(dev));
484 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
485 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
486 
487 	ASSERT_CTX_LOCKED(sc);
488 
489 	if (ice_pci_mapping(sc)) {
490 		err = (ENXIO);
491 		goto destroy_admin_timer;
492 	}
493 
494 	/* Save off the PCI information */
495 	ice_save_pci_info(hw, dev);
496 
497 	/* create tunables as early as possible */
498 	ice_add_device_tunables(sc);
499 
500 	/* Setup ControlQ lengths */
501 	ice_set_ctrlq_len(hw);
502 
503 reinit_hw:
504 
505 	fw_mode = ice_get_fw_mode(hw);
506 	if (fw_mode == ICE_FW_MODE_REC) {
507 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
508 
509 		err = ice_attach_pre_recovery_mode(sc);
510 		if (err)
511 			goto free_pci_mapping;
512 
513 		return (0);
514 	}
515 
516 	/* Initialize the hw data structure */
517 	status = ice_init_hw(hw);
518 	if (status) {
519 		if (status == ICE_ERR_FW_API_VER) {
520 			/* Enter recovery mode, so that the driver remains
521 			 * loaded. This way, if the system administrator
522 			 * cannot update the driver, they may still attempt to
523 			 * downgrade the NVM.
524 			 */
525 			err = ice_attach_pre_recovery_mode(sc);
526 			if (err)
527 				goto free_pci_mapping;
528 
529 			return (0);
530 		} else {
531 			err = EIO;
532 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
533 				      ice_status_str(status),
534 				      ice_aq_str(hw->adminq.sq_last_status));
535 		}
536 		goto free_pci_mapping;
537 	}
538 
539 	ice_init_device_features(sc);
540 
541 	/* Keep flag set by default */
542 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
543 
544 	/* Notify firmware of the device driver version */
545 	err = ice_send_version(sc);
546 	if (err)
547 		goto deinit_hw;
548 
549 	/*
550 	 * Success indicates a change was made that requires a reinitialization
551 	 * of the hardware
552 	 */
553 	err = ice_load_pkg_file(sc);
554 	if (err == ICE_SUCCESS) {
555 		ice_deinit_hw(hw);
556 		goto reinit_hw;
557 	}
558 
559 	err = ice_init_link_events(sc);
560 	if (err) {
561 		device_printf(dev, "ice_init_link_events failed: %s\n",
562 			      ice_err_str(err));
563 		goto deinit_hw;
564 	}
565 
566 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
567 	 * and firmware, this will force them to use single VLAN mode.
568 	 */
569 	status = ice_set_vlan_mode(hw);
570 	if (status) {
571 		err = EIO;
572 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
573 			      ice_status_str(status),
574 			      ice_aq_str(hw->adminq.sq_last_status));
575 		goto deinit_hw;
576 	}
577 
578 	ice_print_nvm_version(sc);
579 
580 	/* Setup the MAC address */
581 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
582 
583 	/* Setup the iflib softc context structure */
584 	ice_setup_scctx(sc);
585 
586 	/* Initialize the Tx queue manager */
587 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
588 	if (err) {
589 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
590 			      ice_err_str(err));
591 		goto deinit_hw;
592 	}
593 
594 	/* Initialize the Rx queue manager */
595 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
596 	if (err) {
597 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
598 			      ice_err_str(err));
599 		goto free_tx_qmgr;
600 	}
601 
602 	/* Initialize the PF device interrupt resource manager */
603 	err = ice_alloc_intr_tracking(sc);
604 	if (err)
605 		/* Errors are already printed */
606 		goto free_rx_qmgr;
607 
608 	/* Determine maximum number of VSIs we'll prepare for */
609 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
610 				    hw->func_caps.guar_num_vsi);
611 
612 	if (!sc->num_available_vsi) {
613 		err = EIO;
614 		device_printf(dev, "No VSIs allocated to host\n");
615 		goto free_intr_tracking;
616 	}
617 
618 	/* Allocate storage for the VSI pointers */
619 	sc->all_vsi = (struct ice_vsi **)
620 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
621 		       M_ICE, M_WAITOK | M_ZERO);
622 	if (!sc->all_vsi) {
623 		err = ENOMEM;
624 		device_printf(dev, "Unable to allocate VSI array\n");
625 		goto free_intr_tracking;
626 	}
627 
628 	/*
629 	 * Prepare the statically allocated primary PF VSI in the softc
630 	 * structure. Other VSIs will be dynamically allocated as needed.
631 	 */
632 	ice_setup_pf_vsi(sc);
633 
634 	ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
635 	    scctx->isc_nrxqsets_max);
636 
637 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
638 	err = ice_allocate_msix(sc);
639 	if (err)
640 		goto free_main_vsi;
641 
642 	return 0;
643 
644 free_main_vsi:
645 	/* ice_release_vsi will free the queue maps if they were allocated */
646 	ice_release_vsi(&sc->pf_vsi);
647 	free(sc->all_vsi, M_ICE);
648 	sc->all_vsi = NULL;
649 free_intr_tracking:
650 	ice_free_intr_tracking(sc);
651 free_rx_qmgr:
652 	ice_resmgr_destroy(&sc->rx_qmgr);
653 free_tx_qmgr:
654 	ice_resmgr_destroy(&sc->tx_qmgr);
655 deinit_hw:
656 	ice_deinit_hw(hw);
657 free_pci_mapping:
658 	ice_free_pci_mapping(sc);
659 destroy_admin_timer:
660 	mtx_lock(&sc->admin_mtx);
661 	callout_stop(&sc->admin_timer);
662 	mtx_unlock(&sc->admin_mtx);
663 	mtx_destroy(&sc->admin_mtx);
664 	return err;
665 } /* ice_if_attach_pre */
666 
667 /**
668  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
669  * @sc: the device private softc
670  *
671  * Loads the device driver in limited Firmware Recovery mode, intended to
672  * allow users to update the firmware to attempt to recover the device.
673  *
674  * @remark We may enter recovery mode in case either (a) the firmware is
675  * detected to be in an invalid state and must be re-programmed, or (b) the
676  * driver detects that the loaded firmware has a non-compatible API version
677  * that the driver cannot operate with.
678  */
679 static int
680 ice_attach_pre_recovery_mode(struct ice_softc *sc)
681 {
682 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
683 
684 	/* Setup the iflib softc context */
685 	ice_setup_scctx(sc);
686 
687 	/* Setup the PF VSI back pointer */
688 	sc->pf_vsi.sc = sc;
689 
690 	/*
691 	 * We still need to allocate MSI-X vectors since we need one vector to
692 	 * run the administrative admin interrupt
693 	 */
694 	return ice_allocate_msix(sc);
695 }
696 
697 /**
698  * ice_update_link_status - notify OS of link state change
699  * @sc: device private softc structure
700  * @update_media: true if we should update media even if link didn't change
701  *
702  * Called to notify iflib core of link status changes. Should be called once
703  * during attach_post, and whenever link status changes during runtime.
704  *
705  * This call only updates the currently supported media types if the link
706  * status changed, or if update_media is set to true.
707  */
708 static void
709 ice_update_link_status(struct ice_softc *sc, bool update_media)
710 {
711 	struct ice_hw *hw = &sc->hw;
712 	enum ice_status status;
713 
714 	/* Never report link up when in recovery mode */
715 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
716 		return;
717 
718 	/* Report link status to iflib only once each time it changes */
719 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
720 		if (sc->link_up) { /* link is up */
721 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
722 
723 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
724 				ice_set_default_local_lldp_mib(sc);
725 
726 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
727 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
728 
729 			ice_link_up_msg(sc);
730 		} else { /* link is down */
731 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
732 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
733 		}
734 		update_media = true;
735 	}
736 
737 	/* Update the supported media types */
738 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
739 		status = ice_add_media_types(sc, sc->media);
740 		if (status)
741 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
742 				      ice_status_str(status),
743 				      ice_aq_str(hw->adminq.sq_last_status));
744 	}
745 }
746 
747 /**
748  * ice_if_attach_post - Late device attach logic
749  * @ctx: the iflib context structure
750  *
751  * Called by iflib to finish up attaching the device. Performs any attach
752  * logic which must wait until after the Tx and Rx queues have been
753  * allocated.
754  */
755 static int
756 ice_if_attach_post(if_ctx_t ctx)
757 {
758 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
759 	if_t ifp = iflib_get_ifp(ctx);
760 	enum ice_status status;
761 	int err;
762 
763 	ASSERT_CTX_LOCKED(sc);
764 
765 	/* We don't yet support loading if MSI-X is not supported */
766 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
767 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
768 		return (ENOTSUP);
769 	}
770 
771 	/* The ifnet structure hasn't yet been initialized when the attach_pre
772 	 * handler is called, so wait until attach_post to setup the
773 	 * isc_max_frame_size.
774 	 */
775 
776 	sc->ifp = ifp;
777 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
778 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
779 
780 	/*
781 	 * If we are in recovery mode, only perform a limited subset of
782 	 * initialization to support NVM recovery.
783 	 */
784 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
785 		ice_attach_post_recovery_mode(sc);
786 		return (0);
787 	}
788 
789 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
790 
791 	err = ice_initialize_vsi(&sc->pf_vsi);
792 	if (err) {
793 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
794 			      ice_err_str(err));
795 		return err;
796 	}
797 
798 	/* Enable FW health event reporting */
799 	ice_init_health_events(sc);
800 
801 	/* Configure the main PF VSI for RSS */
802 	err = ice_config_rss(&sc->pf_vsi);
803 	if (err) {
804 		device_printf(sc->dev,
805 			      "Unable to configure RSS for the main VSI, err %s\n",
806 			      ice_err_str(err));
807 		return err;
808 	}
809 
810 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
811 	err = ice_cfg_pf_ethertype_filters(sc);
812 	if (err)
813 		return err;
814 
815 	ice_get_and_print_bus_info(sc);
816 
817 	ice_set_link_management_mode(sc);
818 
819 	ice_init_saved_phy_cfg(sc);
820 
821 	ice_cfg_pba_num(sc);
822 
823 	/* Set a default value for PFC mode on attach since the FW state is unknown
824 	 * before sysctl tunables are executed and it can't be queried. This fixes an
825 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
826 	 * was previously in DSCP PFC mode.
827 	 */
828 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
829 	if (status != ICE_SUCCESS)
830 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
831 
832 	ice_add_device_sysctls(sc);
833 
834 	/* Get DCBX/LLDP state and start DCBX agent */
835 	ice_init_dcb_setup(sc);
836 
837 	/* Setup link configuration parameters */
838 	ice_init_link_configuration(sc);
839 	ice_update_link_status(sc, true);
840 
841 	/* Configure interrupt causes for the administrative interrupt */
842 	ice_configure_misc_interrupts(sc);
843 
844 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
845 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
846 
847 	err = ice_rdma_pf_attach(sc);
848 	if (err)
849 		return (err);
850 
851 	/* Start the admin timer */
852 	mtx_lock(&sc->admin_mtx);
853 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
854 	mtx_unlock(&sc->admin_mtx);
855 
856 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
857 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
858 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
859 
860 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
861 
862 	return 0;
863 } /* ice_if_attach_post */
864 
865 /**
866  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
867  * @sc: the device private softc
868  *
869  * Performs minimal work to prepare the driver to recover an NVM in case the
870  * firmware is in recovery mode.
871  */
872 static void
873 ice_attach_post_recovery_mode(struct ice_softc *sc)
874 {
875 	/* Configure interrupt causes for the administrative interrupt */
876 	ice_configure_misc_interrupts(sc);
877 
878 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
879 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
880 
881 	/* Start the admin timer */
882 	mtx_lock(&sc->admin_mtx);
883 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
884 	mtx_unlock(&sc->admin_mtx);
885 
886 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
887 }
888 
889 /**
890  * ice_free_irqvs - Free IRQ vector memory
891  * @sc: the device private softc structure
892  *
893  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
894  */
895 static void
896 ice_free_irqvs(struct ice_softc *sc)
897 {
898 	struct ice_vsi *vsi = &sc->pf_vsi;
899 	if_ctx_t ctx = sc->ctx;
900 	int i;
901 
902 	/* If the irqvs array is NULL, then there are no vectors to free */
903 	if (sc->irqvs == NULL)
904 		return;
905 
906 	/* Free the IRQ vectors */
907 	for (i = 0; i < sc->num_irq_vectors; i++)
908 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
909 
910 	/* Clear the irqv pointers */
911 	for (i = 0; i < vsi->num_rx_queues; i++)
912 		vsi->rx_queues[i].irqv = NULL;
913 
914 	for (i = 0; i < vsi->num_tx_queues; i++)
915 		vsi->tx_queues[i].irqv = NULL;
916 
917 	/* Release the vector array memory */
918 	free(sc->irqvs, M_ICE);
919 	sc->irqvs = NULL;
920 	sc->num_irq_vectors = 0;
921 }
922 
923 /**
924  * ice_if_detach - Device driver detach logic
925  * @ctx: iflib context structure
926  *
927  * Perform device shutdown logic to detach the device driver.
928  *
929  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
930  * ice_if_detach(). It is possible for the functions to be called in either
931  * order, and they must not assume to have a strict ordering.
932  */
933 static int
934 ice_if_detach(if_ctx_t ctx)
935 {
936 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
937 	struct ice_vsi *vsi = &sc->pf_vsi;
938 	enum ice_status status;
939 	int i;
940 
941 	ASSERT_CTX_LOCKED(sc);
942 
943 	/* Indicate that we're detaching */
944 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
945 
946 	/* Stop the admin timer */
947 	mtx_lock(&sc->admin_mtx);
948 	callout_stop(&sc->admin_timer);
949 	mtx_unlock(&sc->admin_mtx);
950 	mtx_destroy(&sc->admin_mtx);
951 
952 	/* Remove additional interfaces if they exist */
953 	if (sc->mirr_if)
954 		ice_destroy_mirror_interface(sc);
955 	ice_rdma_pf_detach(sc);
956 
957 	/* Free allocated media types */
958 	ifmedia_removeall(sc->media);
959 
960 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
961 	 * pointers. Note, the calls here and those in ice_if_queues_free()
962 	 * are *BOTH* necessary, as we cannot guarantee which path will be
963 	 * run first
964 	 */
965 	ice_vsi_del_txqs_ctx(vsi);
966 	ice_vsi_del_rxqs_ctx(vsi);
967 
968 	/* Release MSI-X resources */
969 	ice_free_irqvs(sc);
970 
971 	for (i = 0; i < sc->num_available_vsi; i++) {
972 		if (sc->all_vsi[i])
973 			ice_release_vsi(sc->all_vsi[i]);
974 	}
975 
976 	if (sc->all_vsi) {
977 		free(sc->all_vsi, M_ICE);
978 		sc->all_vsi = NULL;
979 	}
980 
981 	/* Release MSI-X memory */
982 	pci_release_msi(sc->dev);
983 
984 	if (sc->msix_table != NULL) {
985 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
986 				     rman_get_rid(sc->msix_table),
987 				     sc->msix_table);
988 		sc->msix_table = NULL;
989 	}
990 
991 	ice_free_intr_tracking(sc);
992 
993 	/* Destroy the queue managers */
994 	ice_resmgr_destroy(&sc->tx_qmgr);
995 	ice_resmgr_destroy(&sc->rx_qmgr);
996 
997 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
998 		ice_deinit_hw(&sc->hw);
999 
1000 	IFLIB_CTX_UNLOCK(sc);
1001 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1002 	IFLIB_CTX_LOCK(sc);
1003 	if (status) {
1004 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1005 			      ice_status_str(status));
1006 	}
1007 
1008 	ice_free_pci_mapping(sc);
1009 
1010 	return 0;
1011 } /* ice_if_detach */
1012 
1013 /**
1014  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1015  * @ctx: iflib context structure
1016  * @vaddrs: virtual addresses for the queue memory
1017  * @paddrs: physical addresses for the queue memory
1018  * @ntxqs: the number of Tx queues per set (should always be 1)
1019  * @ntxqsets: the number of Tx queue sets to allocate
1020  *
1021  * Called by iflib to allocate Tx queues for the device. Allocates driver
1022  * memory to track each queue, the status arrays used for descriptor
1023  * status reporting, and Tx queue sysctls.
1024  */
1025 static int
1026 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1027 		       int __invariant_only ntxqs, int ntxqsets)
1028 {
1029 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1030 	struct ice_vsi *vsi = &sc->pf_vsi;
1031 	struct ice_tx_queue *txq;
1032 	int err, i, j;
1033 
1034 	MPASS(ntxqs == 1);
1035 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1036 	ASSERT_CTX_LOCKED(sc);
1037 
1038 	/* Do not bother allocating queues if we're in recovery mode */
1039 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1040 		return (0);
1041 
1042 	/* Allocate queue structure memory */
1043 	if (!(vsi->tx_queues =
1044 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1045 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1046 		return (ENOMEM);
1047 	}
1048 
1049 	/* Allocate report status arrays */
1050 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1051 		if (!(txq->tx_rsq =
1052 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1053 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1054 			err = ENOMEM;
1055 			goto free_tx_queues;
1056 		}
1057 		/* Initialize report status array */
1058 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1059 			txq->tx_rsq[j] = QIDX_INVALID;
1060 	}
1061 
1062 	/* Assign queues from PF space to the main VSI */
1063 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1064 	if (err) {
1065 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1066 			      ice_err_str(err));
1067 		goto free_tx_queues;
1068 	}
1069 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1070 
1071 	/* Add Tx queue sysctls context */
1072 	ice_vsi_add_txqs_ctx(vsi);
1073 
1074 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1075 		/* q_handle == me when only one TC */
1076 		txq->me = txq->q_handle = i;
1077 		txq->vsi = vsi;
1078 
1079 		/* store the queue size for easier access */
1080 		txq->desc_count = sc->scctx->isc_ntxd[0];
1081 
1082 		/* get the virtual and physical address of the hardware queues */
1083 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1084 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1085 		txq->tx_paddr = paddrs[i];
1086 
1087 		ice_add_txq_sysctls(txq);
1088 	}
1089 
1090 	vsi->num_tx_queues = ntxqsets;
1091 
1092 	return (0);
1093 
1094 free_tx_queues:
1095 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1096 		if (txq->tx_rsq != NULL) {
1097 			free(txq->tx_rsq, M_ICE);
1098 			txq->tx_rsq = NULL;
1099 		}
1100 	}
1101 	free(vsi->tx_queues, M_ICE);
1102 	vsi->tx_queues = NULL;
1103 	return err;
1104 }
1105 
1106 /**
1107  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1108  * @ctx: iflib context structure
1109  * @vaddrs: virtual addresses for the queue memory
1110  * @paddrs: physical addresses for the queue memory
1111  * @nrxqs: number of Rx queues per set (should always be 1)
1112  * @nrxqsets: number of Rx queue sets to allocate
1113  *
1114  * Called by iflib to allocate Rx queues for the device. Allocates driver
1115  * memory to track each queue, as well as sets up the Rx queue sysctls.
1116  */
1117 static int
1118 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1119 		       int __invariant_only nrxqs, int nrxqsets)
1120 {
1121 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1122 	struct ice_vsi *vsi = &sc->pf_vsi;
1123 	struct ice_rx_queue *rxq;
1124 	int err, i;
1125 
1126 	MPASS(nrxqs == 1);
1127 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1128 	ASSERT_CTX_LOCKED(sc);
1129 
1130 	/* Do not bother allocating queues if we're in recovery mode */
1131 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1132 		return (0);
1133 
1134 	/* Allocate queue structure memory */
1135 	if (!(vsi->rx_queues =
1136 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1137 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1138 		return (ENOMEM);
1139 	}
1140 
1141 	/* Assign queues from PF space to the main VSI */
1142 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1143 	if (err) {
1144 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1145 			      ice_err_str(err));
1146 		goto free_rx_queues;
1147 	}
1148 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1149 
1150 	/* Add Rx queue sysctls context */
1151 	ice_vsi_add_rxqs_ctx(vsi);
1152 
1153 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1154 		rxq->me = i;
1155 		rxq->vsi = vsi;
1156 
1157 		/* store the queue size for easier access */
1158 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1159 
1160 		/* get the virtual and physical address of the hardware queues */
1161 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1162 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1163 		rxq->rx_paddr = paddrs[i];
1164 
1165 		ice_add_rxq_sysctls(rxq);
1166 	}
1167 
1168 	vsi->num_rx_queues = nrxqsets;
1169 
1170 	return (0);
1171 
1172 free_rx_queues:
1173 	free(vsi->rx_queues, M_ICE);
1174 	vsi->rx_queues = NULL;
1175 	return err;
1176 }
1177 
1178 /**
1179  * ice_if_queues_free - Free queue memory
1180  * @ctx: the iflib context structure
1181  *
1182  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1183  * ice_if_rx_queues_alloc().
1184  *
1185  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1186  * called in the same order. It's possible for ice_if_queues_free() to be
1187  * called prior to ice_if_detach(), and vice versa.
1188  *
1189  * For this reason, the main VSI is a static member of the ice_softc, which is
1190  * not free'd until after iflib finishes calling both of these functions.
1191  *
1192  * Thus, care must be taken in how we manage the memory being freed by this
1193  * function, and in what tasks it can and must perform.
1194  */
1195 static void
1196 ice_if_queues_free(if_ctx_t ctx)
1197 {
1198 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1199 	struct ice_vsi *vsi = &sc->pf_vsi;
1200 	struct ice_tx_queue *txq;
1201 	int i;
1202 
1203 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1204 	 * pointers. Note, the calls here and those in ice_if_detach()
1205 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1206 	 * run first
1207 	 */
1208 	ice_vsi_del_txqs_ctx(vsi);
1209 	ice_vsi_del_rxqs_ctx(vsi);
1210 
1211 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1212 	ice_free_irqvs(sc);
1213 
1214 	if (vsi->tx_queues != NULL) {
1215 		/* free the tx_rsq arrays */
1216 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1217 			if (txq->tx_rsq != NULL) {
1218 				free(txq->tx_rsq, M_ICE);
1219 				txq->tx_rsq = NULL;
1220 			}
1221 		}
1222 		free(vsi->tx_queues, M_ICE);
1223 		vsi->tx_queues = NULL;
1224 		vsi->num_tx_queues = 0;
1225 	}
1226 	if (vsi->rx_queues != NULL) {
1227 		free(vsi->rx_queues, M_ICE);
1228 		vsi->rx_queues = NULL;
1229 		vsi->num_rx_queues = 0;
1230 	}
1231 }
1232 
1233 /**
1234  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1235  * @arg: The Rx queue memory
1236  *
1237  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1238  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1239  * iflib to schedule the main Rx thread.
1240  */
1241 static int
1242 ice_msix_que(void *arg)
1243 {
1244 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1245 
1246 	/* TODO: dynamic ITR algorithm?? */
1247 
1248 	return (FILTER_SCHEDULE_THREAD);
1249 }
1250 
1251 /**
1252  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1253  * @arg: pointer to device softc memory
1254  *
1255  * Called by iflib when an administrative interrupt occurs. Should perform any
1256  * fast logic for handling the interrupt cause, and then indicate whether the
1257  * admin task needs to be queued.
1258  */
1259 static int
1260 ice_msix_admin(void *arg)
1261 {
1262 	struct ice_softc *sc = (struct ice_softc *)arg;
1263 	struct ice_hw *hw = &sc->hw;
1264 	device_t dev = sc->dev;
1265 	u32 oicr;
1266 
1267 	/* There is no safe way to modify the enabled miscellaneous causes of
1268 	 * the OICR vector at runtime, as doing so would be prone to race
1269 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1270 	 * causes and allow future interrupts to occur. The admin interrupt
1271 	 * vector will not be re-enabled until after we exit this function,
1272 	 * but any delayed tasks must be resilient against possible "late
1273 	 * arrival" interrupts that occur while we're already handling the
1274 	 * task. This is done by using state bits and serializing these
1275 	 * delayed tasks via the admin status task function.
1276 	 */
1277 	oicr = rd32(hw, PFINT_OICR);
1278 
1279 	/* Processing multiple controlq interrupts on a single vector does not
1280 	 * provide an indication of which controlq triggered the interrupt.
1281 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1282 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1283 	 * it gets automatically cleared when the hardware acknowledges the
1284 	 * interrupt.
1285 	 *
1286 	 * This means we don't really have a good indication of whether or
1287 	 * which controlq triggered this interrupt. We'll just notify the
1288 	 * admin task that it should check all the controlqs.
1289 	 */
1290 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1291 
1292 	if (oicr & PFINT_OICR_VFLR_M) {
1293 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1294 	}
1295 
1296 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1297 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1298 	}
1299 
1300 	if (oicr & PFINT_OICR_GRST_M) {
1301 		u32 reset;
1302 
1303 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1304 			GLGEN_RSTAT_RESET_TYPE_S;
1305 
1306 		if (reset == ICE_RESET_CORER)
1307 			sc->soft_stats.corer_count++;
1308 		else if (reset == ICE_RESET_GLOBR)
1309 			sc->soft_stats.globr_count++;
1310 		else
1311 			sc->soft_stats.empr_count++;
1312 
1313 		/* There are a couple of bits at play for handling resets.
1314 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1315 		 * indicate that the driver has received an OICR with a reset
1316 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1317 		 * happen. Second, we set hw->reset_ongoing to indicate that
1318 		 * the hardware is in reset. We will set this back to false as
1319 		 * soon as the driver has determined that the hardware is out
1320 		 * of reset.
1321 		 *
1322 		 * If the driver wishes to trigger a request, it can set one of
1323 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1324 		 * correct type of reset.
1325 		 */
1326 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1327 			hw->reset_ongoing = true;
1328 			/*
1329 			 * During the NVM update process, there is a driver reset and link
1330 			 * goes down and then up. The below if-statement prevents a second
1331 			 * link flap from occurring in ice_if_init().
1332 			 */
1333 			if (if_getflags(sc->ifp) & IFF_UP)
1334 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1335 		}
1336 	}
1337 
1338 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1339 		device_printf(dev, "ECC Error detected!\n");
1340 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1341 	}
1342 
1343 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1344 		if (oicr & PFINT_OICR_HMC_ERR_M)
1345 			/* Log the HMC errors */
1346 			ice_log_hmc_error(hw, dev);
1347 		ice_rdma_notify_pe_intr(sc, oicr);
1348 	}
1349 
1350 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1351 		device_printf(dev, "PCI Exception detected!\n");
1352 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1353 	}
1354 
1355 	return (FILTER_SCHEDULE_THREAD);
1356 }
1357 
1358 /**
1359  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1360  * @sc: the device private softc
1361  *
1362  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1363  *
1364  * First, determine a suitable total number of vectors based on the number
1365  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1366  * RDMA.
1367  *
1368  * Request the desired amount of vectors, and see how many we obtain. If we
1369  * don't obtain as many as desired, reduce the demands by lowering the number
1370  * of requested queues or reducing the demand from other features such as
1371  * RDMA.
1372  *
1373  * @remark This function is required because the driver sets the
1374  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1375  * manually.
1376  *
1377  * @remark This driver will only use MSI-X vectors. If this is not possible,
1378  * neither MSI or legacy interrupts will be tried.
1379  *
1380  * @remark if it exists, os_imgr is initialized here for keeping track of
1381  * the assignments of extra MSIX vectors.
1382  *
1383  * @post on success this function must set the following scctx parameters:
1384  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1385  *
1386  * @returns zero on success or an error code on failure.
1387  */
1388 static int
1389 ice_allocate_msix(struct ice_softc *sc)
1390 {
1391 	bool iflib_override_queue_count = false;
1392 	if_softc_ctx_t scctx = sc->scctx;
1393 	device_t dev = sc->dev;
1394 	cpuset_t cpus;
1395 	int bar, queues, vectors, requested;
1396 	int err = 0;
1397 	int rdma;
1398 
1399 	/* Allocate the MSI-X bar */
1400 	bar = scctx->isc_msix_bar;
1401 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1402 	if (!sc->msix_table) {
1403 		device_printf(dev, "Unable to map MSI-X table\n");
1404 		return (ENOMEM);
1405 	}
1406 
1407 	/* Check if the iflib queue count sysctls have been set */
1408 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1409 		iflib_override_queue_count = true;
1410 
1411 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1412 	if (err) {
1413 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1414 			      __func__, ice_err_str(err));
1415 		CPU_COPY(&all_cpus, &cpus);
1416 	}
1417 
1418 	/* Attempt to mimic behavior of iflib_msix_init */
1419 	if (iflib_override_queue_count) {
1420 		/*
1421 		 * If the override sysctls have been set, limit the queues to
1422 		 * the number of logical CPUs.
1423 		 */
1424 		queues = mp_ncpus;
1425 	} else {
1426 		/*
1427 		 * Otherwise, limit the queue count to the CPUs associated
1428 		 * with the NUMA node the device is associated with.
1429 		 */
1430 		queues = CPU_COUNT(&cpus);
1431 	}
1432 
1433 	/* Clamp to the number of RSS buckets */
1434 	queues = imin(queues, rss_getnumbuckets());
1435 
1436 	/*
1437 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1438 	 * and Rx queues.
1439 	 */
1440 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1441 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1442 
1443 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1444 		/*
1445 		 * Choose a number of RDMA vectors based on the number of CPUs
1446 		 * up to a maximum
1447 		 */
1448 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1449 
1450 		/* Further limit by the user configurable tunable */
1451 		rdma = min(rdma, ice_rdma_max_msix);
1452 	} else {
1453 		rdma = 0;
1454 	}
1455 
1456 	/*
1457 	 * Determine the number of vectors to request. Note that we also need
1458 	 * to allocate one vector for administrative tasks.
1459 	 */
1460 	requested = rdma + queues + 1;
1461 	/* Add extra vectors requested by the user for later subinterface
1462 	 * creation.
1463 	 */
1464 	if_ctx_t ctx = sc->ctx;
1465 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1466 	requested += extra_vectors;
1467 
1468 	vectors = requested;
1469 	err = pci_alloc_msix(dev, &vectors);
1470 	if (err) {
1471 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1472 			      vectors, ice_err_str(err));
1473 		goto err_free_msix_table;
1474 	}
1475 
1476 	/* If we don't receive enough vectors, reduce demands */
1477 	if (vectors < requested) {
1478 		int diff = requested - vectors;
1479 
1480 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1481 			      requested, vectors);
1482 
1483 		diff += extra_vectors;
1484 		extra_vectors = 0;
1485 		/*
1486 		 * The OS didn't grant us the requested number of vectors.
1487 		 * Check to see if we can reduce demands by limiting the
1488 		 * number of vectors allocated to certain features.
1489 		 */
1490 
1491 		if (rdma >= diff) {
1492 			/* Reduce the number of RDMA vectors we reserve */
1493 			rdma -= diff;
1494 			diff = 0;
1495 		} else {
1496 			/* Disable RDMA and reduce the difference */
1497 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1498 			diff -= rdma;
1499 			rdma = 0;
1500 		}
1501 
1502 		/*
1503 		 * If we still have a difference, we need to reduce the number
1504 		 * of queue pairs.
1505 		 *
1506 		 * However, we still need at least one vector for the admin
1507 		 * interrupt and one queue pair.
1508 		 */
1509 		if (queues <= diff) {
1510 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1511 			err = (ERANGE);
1512 			goto err_pci_release_msi;
1513 		}
1514 
1515 		queues -= diff;
1516 	}
1517 
1518 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1519 	if (rdma)
1520 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1521 			      rdma);
1522 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1523 		      vectors);
1524 
1525 	/* Split resulting vectors back into requested splits */
1526 	scctx->isc_vectors = vectors;
1527 	scctx->isc_nrxqsets = queues;
1528 	scctx->isc_ntxqsets = queues;
1529 	scctx->isc_intr = IFLIB_INTR_MSIX;
1530 
1531 	sc->irdma_vectors = rdma;
1532 
1533 	/* Interrupt allocation tracking isn't required in recovery mode,
1534 	 * since neither RDMA nor VFs are enabled.
1535 	 */
1536 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1537 		return (0);
1538 
1539 	/* Keep track of which interrupt indices are being used for what */
1540 	sc->lan_vectors = vectors - rdma;
1541 	sc->lan_vectors -= extra_vectors;
1542 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1543 	if (err) {
1544 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1545 			      ice_err_str(err));
1546 		goto err_pci_release_msi;
1547 	}
1548 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1549 	if (err) {
1550 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1551 			      ice_err_str(err));
1552 		goto err_release_pf_imap;
1553 	}
1554 	sc->extra_vectors = extra_vectors;
1555 	/* Setup another resource manager to track the assignments of extra OS
1556 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1557 	 * unlike the ones that come from the device.
1558 	 */
1559 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1560 	if (err) {
1561 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1562 			      ice_err_str(err));
1563 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1564 					    rdma);
1565 		goto err_release_pf_imap;
1566 	}
1567 	return (0);
1568 
1569 err_release_pf_imap:
1570 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1571 				    sc->lan_vectors);
1572 err_pci_release_msi:
1573 	pci_release_msi(dev);
1574 err_free_msix_table:
1575 	if (sc->msix_table != NULL) {
1576 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1577 				rman_get_rid(sc->msix_table),
1578 				sc->msix_table);
1579 		sc->msix_table = NULL;
1580 	}
1581 
1582 	return (err);
1583 }
1584 
1585 /**
1586  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1587  * @ctx: the iflib context structure
1588  * @msix: the number of vectors we were assigned
1589  *
1590  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1591  * we get at least the same number of vectors as we have queues, and that we
1592  * always have the same number of Tx and Rx queues.
1593  *
1594  * Tx queues use a softirq instead of using their own hardware interrupt.
1595  */
1596 static int
1597 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1598 {
1599 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1600 	struct ice_vsi *vsi = &sc->pf_vsi;
1601 	int err, i, vector;
1602 
1603 	ASSERT_CTX_LOCKED(sc);
1604 
1605 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1606 		device_printf(sc->dev,
1607 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1608 			      vsi->num_tx_queues, vsi->num_rx_queues);
1609 		return (EOPNOTSUPP);
1610 	}
1611 
1612 	if (msix < (vsi->num_rx_queues + 1)) {
1613 		device_printf(sc->dev,
1614 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1615 		return (EOPNOTSUPP);
1616 	}
1617 
1618 	/* Save the number of vectors for future use */
1619 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1620 
1621 	/* Allocate space to store the IRQ vector data */
1622 	if (!(sc->irqvs =
1623 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1624 					       M_ICE, M_NOWAIT))) {
1625 		device_printf(sc->dev,
1626 			      "Unable to allocate irqv memory\n");
1627 		return (ENOMEM);
1628 	}
1629 
1630 	/* Administrative interrupt events will use vector 0 */
1631 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1632 				      ice_msix_admin, sc, 0, "admin");
1633 	if (err) {
1634 		device_printf(sc->dev,
1635 			      "Failed to register Admin queue handler: %s\n",
1636 			      ice_err_str(err));
1637 		goto free_irqvs;
1638 	}
1639 	sc->irqvs[0].me = 0;
1640 
1641 	/* Do not allocate queue interrupts when in recovery mode */
1642 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1643 		return (0);
1644 
1645 	int rid;
1646 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1647 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1648 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1649 		char irq_name[16];
1650 
1651 		rid = vector + 1;
1652 
1653 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1654 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1655 					      IFLIB_INTR_RXTX, ice_msix_que,
1656 					      rxq, rxq->me, irq_name);
1657 		if (err) {
1658 			device_printf(sc->dev,
1659 				      "Failed to allocate q int %d err: %s\n",
1660 				      i, ice_err_str(err));
1661 			vector--;
1662 			i--;
1663 			goto fail;
1664 		}
1665 		sc->irqvs[vector].me = vector;
1666 		rxq->irqv = &sc->irqvs[vector];
1667 
1668 		bzero(irq_name, sizeof(irq_name));
1669 
1670 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1671 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1672 					    IFLIB_INTR_TX, txq,
1673 					    txq->me, irq_name);
1674 		txq->irqv = &sc->irqvs[vector];
1675 	}
1676 
1677 	/* For future interrupt assignments */
1678 	sc->last_rid = rid + sc->irdma_vectors;
1679 
1680 	return (0);
1681 fail:
1682 	for (; i >= 0; i--, vector--)
1683 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1684 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1685 free_irqvs:
1686 	free(sc->irqvs, M_ICE);
1687 	sc->irqvs = NULL;
1688 	return err;
1689 }
1690 
1691 /**
1692  * ice_if_mtu_set - Set the device MTU
1693  * @ctx: iflib context structure
1694  * @mtu: the MTU requested
1695  *
1696  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1697  *
1698  * @pre assumes the caller holds the iflib CTX lock
1699  */
1700 static int
1701 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1702 {
1703 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1704 
1705 	ASSERT_CTX_LOCKED(sc);
1706 
1707 	/* Do not support configuration when in recovery mode */
1708 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1709 		return (ENOSYS);
1710 
1711 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1712 		return (EINVAL);
1713 
1714 	sc->scctx->isc_max_frame_size = mtu +
1715 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1716 
1717 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1718 
1719 	return (0);
1720 }
1721 
1722 /**
1723  * ice_if_intr_enable - Enable device interrupts
1724  * @ctx: iflib context structure
1725  *
1726  * Called by iflib to request enabling device interrupts.
1727  */
1728 static void
1729 ice_if_intr_enable(if_ctx_t ctx)
1730 {
1731 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1732 	struct ice_vsi *vsi = &sc->pf_vsi;
1733 	struct ice_hw *hw = &sc->hw;
1734 
1735 	ASSERT_CTX_LOCKED(sc);
1736 
1737 	/* Enable ITR 0 */
1738 	ice_enable_intr(hw, sc->irqvs[0].me);
1739 
1740 	/* Do not enable queue interrupts in recovery mode */
1741 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1742 		return;
1743 
1744 	/* Enable all queue interrupts */
1745 	for (int i = 0; i < vsi->num_rx_queues; i++)
1746 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1747 }
1748 
1749 /**
1750  * ice_if_intr_disable - Disable device interrupts
1751  * @ctx: iflib context structure
1752  *
1753  * Called by iflib to request disabling device interrupts.
1754  */
1755 static void
1756 ice_if_intr_disable(if_ctx_t ctx)
1757 {
1758 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1759 	struct ice_hw *hw = &sc->hw;
1760 	unsigned int i;
1761 
1762 	ASSERT_CTX_LOCKED(sc);
1763 
1764 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1765 	 * assigned to queues. Instead of assuming that the interrupt
1766 	 * assignment in the rx_queues structure is valid, just disable all
1767 	 * possible interrupts
1768 	 *
1769 	 * Note that we choose not to disable ITR 0 because this handles the
1770 	 * AdminQ interrupts, and we want to keep processing these even when
1771 	 * the interface is offline.
1772 	 */
1773 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1774 		ice_disable_intr(hw, i);
1775 }
1776 
1777 /**
1778  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1779  * @ctx: iflib context structure
1780  * @rxqid: the Rx queue to enable
1781  *
1782  * Enable a specific Rx queue interrupt.
1783  *
1784  * This function is not protected by the iflib CTX lock.
1785  */
1786 static int
1787 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1788 {
1789 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1790 	struct ice_vsi *vsi = &sc->pf_vsi;
1791 	struct ice_hw *hw = &sc->hw;
1792 
1793 	/* Do not enable queue interrupts in recovery mode */
1794 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1795 		return (ENOSYS);
1796 
1797 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1798 	return (0);
1799 }
1800 
1801 /**
1802  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1803  * @ctx: iflib context structure
1804  * @txqid: the Tx queue to enable
1805  *
1806  * Enable a specific Tx queue interrupt.
1807  *
1808  * This function is not protected by the iflib CTX lock.
1809  */
1810 static int
1811 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1812 {
1813 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1814 	struct ice_vsi *vsi = &sc->pf_vsi;
1815 	struct ice_hw *hw = &sc->hw;
1816 
1817 	/* Do not enable queue interrupts in recovery mode */
1818 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1819 		return (ENOSYS);
1820 
1821 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1822 	return (0);
1823 }
1824 
1825 /**
1826  * ice_set_default_promisc_mask - Set default config for promisc settings
1827  * @promisc_mask: bitmask to setup
1828  *
1829  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1830  * modes to operate on. The mask used in here is the default one for the
1831  * driver, where promiscuous is enabled/disabled for all types of
1832  * non-VLAN-tagged/VLAN 0 traffic.
1833  */
1834 static void
1835 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1836 {
1837 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1838 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1839 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1840 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1841 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1842 }
1843 
1844 /**
1845  * ice_if_promisc_set - Set device promiscuous mode
1846  * @ctx: iflib context structure
1847  * @flags: promiscuous flags to configure
1848  *
1849  * Called by iflib to configure device promiscuous mode.
1850  *
1851  * @remark Calls to this function will always overwrite the previous setting
1852  */
1853 static int
1854 ice_if_promisc_set(if_ctx_t ctx, int flags)
1855 {
1856 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1857 	struct ice_hw *hw = &sc->hw;
1858 	device_t dev = sc->dev;
1859 	enum ice_status status;
1860 	bool promisc_enable = flags & IFF_PROMISC;
1861 	bool multi_enable = flags & IFF_ALLMULTI;
1862 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1863 
1864 	/* Do not support configuration when in recovery mode */
1865 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1866 		return (ENOSYS);
1867 
1868 	ice_set_default_promisc_mask(promisc_mask);
1869 
1870 	if (multi_enable)
1871 		return (EOPNOTSUPP);
1872 
1873 	if (promisc_enable) {
1874 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1875 					     promisc_mask, 0);
1876 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1877 			device_printf(dev,
1878 				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1879 				      ice_status_str(status),
1880 				      ice_aq_str(hw->adminq.sq_last_status));
1881 			return (EIO);
1882 		}
1883 	} else {
1884 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1885 					       promisc_mask, 0);
1886 		if (status) {
1887 			device_printf(dev,
1888 				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1889 				      ice_status_str(status),
1890 				      ice_aq_str(hw->adminq.sq_last_status));
1891 			return (EIO);
1892 		}
1893 	}
1894 
1895 	return (0);
1896 }
1897 
1898 /**
1899  * ice_if_media_change - Change device media
1900  * @ctx: device ctx structure
1901  *
1902  * Called by iflib when a media change is requested. This operation is not
1903  * supported by the hardware, so we just return an error code.
1904  */
1905 static int
1906 ice_if_media_change(if_ctx_t ctx)
1907 {
1908 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1909 
1910 	device_printf(sc->dev, "Media change is not supported.\n");
1911 	return (ENODEV);
1912 }
1913 
1914 /**
1915  * ice_if_media_status - Report current device media
1916  * @ctx: iflib context structure
1917  * @ifmr: ifmedia request structure to update
1918  *
1919  * Updates the provided ifmr with current device media status, including link
1920  * status and media type.
1921  */
1922 static void
1923 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1924 {
1925 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1926 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1927 
1928 	ifmr->ifm_status = IFM_AVALID;
1929 	ifmr->ifm_active = IFM_ETHER;
1930 
1931 	/* Never report link up or media types when in recovery mode */
1932 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1933 		return;
1934 
1935 	if (!sc->link_up)
1936 		return;
1937 
1938 	ifmr->ifm_status |= IFM_ACTIVE;
1939 	ifmr->ifm_active |= IFM_FDX;
1940 
1941 	if (li->phy_type_low)
1942 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1943 	else if (li->phy_type_high)
1944 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1945 	else
1946 		ifmr->ifm_active |= IFM_UNKNOWN;
1947 
1948 	/* Report flow control status as well */
1949 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1950 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1951 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1952 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1953 }
1954 
1955 /**
1956  * ice_init_tx_tracking - Initialize Tx queue software tracking values
1957  * @vsi: the VSI to initialize
1958  *
1959  * Initialize Tx queue software tracking values, including the Report Status
1960  * queue, and related software tracking values.
1961  */
1962 static void
1963 ice_init_tx_tracking(struct ice_vsi *vsi)
1964 {
1965 	struct ice_tx_queue *txq;
1966 	size_t j;
1967 	int i;
1968 
1969 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1970 
1971 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1972 
1973 		/* Initialize the last processed descriptor to be the end of
1974 		 * the ring, rather than the start, so that we avoid an
1975 		 * off-by-one error in ice_ift_txd_credits_update for the
1976 		 * first packet.
1977 		 */
1978 		txq->tx_cidx_processed = txq->desc_count - 1;
1979 
1980 		for (j = 0; j < txq->desc_count; j++)
1981 			txq->tx_rsq[j] = QIDX_INVALID;
1982 	}
1983 }
1984 
1985 /**
1986  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1987  * @sc: the device softc
1988  *
1989  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1990  * buffer sizes when programming hardware.
1991  */
1992 static void
1993 ice_update_rx_mbuf_sz(struct ice_softc *sc)
1994 {
1995 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1996 	struct ice_vsi *vsi = &sc->pf_vsi;
1997 
1998 	MPASS(mbuf_sz <= UINT16_MAX);
1999 	vsi->mbuf_sz = mbuf_sz;
2000 }
2001 
2002 /**
2003  * ice_if_init - Initialize the device
2004  * @ctx: iflib ctx structure
2005  *
2006  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2007  * device filters and prepares the Tx and Rx engines.
2008  *
2009  * @pre assumes the caller holds the iflib CTX lock
2010  */
2011 static void
2012 ice_if_init(if_ctx_t ctx)
2013 {
2014 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2015 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2016 	device_t dev = sc->dev;
2017 	int err;
2018 
2019 	ASSERT_CTX_LOCKED(sc);
2020 
2021 	/*
2022 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2023 	 * called after detach is called.  This would call routines after
2024 	 * if_stop, causing issues with the teardown process.  This has
2025 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2026 	 * good idea to have this guard here regardless.
2027 	 */
2028 	if (ice_driver_is_detaching(sc))
2029 		return;
2030 
2031 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2032 		return;
2033 
2034 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2035 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2036 		return;
2037 	}
2038 
2039 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2040 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2041 		return;
2042 	}
2043 
2044 	ice_update_rx_mbuf_sz(sc);
2045 
2046 	/* Update the MAC address... User might use a LAA */
2047 	err = ice_update_laa_mac(sc);
2048 	if (err) {
2049 		device_printf(dev,
2050 			      "LAA address change failed, err %s\n",
2051 			      ice_err_str(err));
2052 		return;
2053 	}
2054 
2055 	/* Initialize software Tx tracking values */
2056 	ice_init_tx_tracking(&sc->pf_vsi);
2057 
2058 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2059 	if (err) {
2060 		device_printf(dev,
2061 			      "Unable to configure the main VSI for Tx: %s\n",
2062 			      ice_err_str(err));
2063 		return;
2064 	}
2065 
2066 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2067 	if (err) {
2068 		device_printf(dev,
2069 			      "Unable to configure the main VSI for Rx: %s\n",
2070 			      ice_err_str(err));
2071 		goto err_cleanup_tx;
2072 	}
2073 
2074 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2075 	if (err) {
2076 		device_printf(dev,
2077 			      "Unable to enable Rx rings for transmit: %s\n",
2078 			      ice_err_str(err));
2079 		goto err_cleanup_tx;
2080 	}
2081 
2082 	err = ice_cfg_pf_default_mac_filters(sc);
2083 	if (err) {
2084 		device_printf(dev,
2085 			      "Unable to configure default MAC filters: %s\n",
2086 			      ice_err_str(err));
2087 		goto err_stop_rx;
2088 	}
2089 
2090 	/* We use software interrupts for Tx, so we only program the hardware
2091 	 * interrupts for Rx.
2092 	 */
2093 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2094 	ice_configure_rx_itr(&sc->pf_vsi);
2095 
2096 	/* Configure promiscuous mode */
2097 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2098 
2099 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2100 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2101 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2102 			ice_set_link(sc, true);
2103 
2104 	ice_rdma_pf_init(sc);
2105 
2106 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2107 
2108 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2109 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2110 		iflib_request_reset(sc->mirr_if->subctx);
2111 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2112 	}
2113 
2114 	return;
2115 
2116 err_stop_rx:
2117 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2118 err_cleanup_tx:
2119 	ice_vsi_disable_tx(&sc->pf_vsi);
2120 }
2121 
2122 /**
2123  * ice_poll_for_media_avail - Re-enable link if media is detected
2124  * @sc: device private structure
2125  *
2126  * Intended to be called from the driver's timer function, this function
2127  * sends the Get Link Status AQ command and re-enables HW link if the
2128  * command says that media is available.
2129  *
2130  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2131  * since media removal events are supposed to be sent to the driver through
2132  * a link status event.
2133  */
2134 static void
2135 ice_poll_for_media_avail(struct ice_softc *sc)
2136 {
2137 	struct ice_hw *hw = &sc->hw;
2138 	struct ice_port_info *pi = hw->port_info;
2139 
2140 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2141 		pi->phy.get_link_info = true;
2142 		ice_get_link_status(pi, &sc->link_up);
2143 
2144 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2145 			enum ice_status status;
2146 
2147 			/* Re-enable link and re-apply user link settings */
2148 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2149 			    (if_getflags(sc->ifp) & IFF_UP)) {
2150 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2151 
2152 				/* Update the OS about changes in media capability */
2153 				status = ice_add_media_types(sc, sc->media);
2154 				if (status)
2155 					device_printf(sc->dev,
2156 					    "Error adding device media types: %s aq_err %s\n",
2157 					    ice_status_str(status),
2158 					    ice_aq_str(hw->adminq.sq_last_status));
2159 			}
2160 
2161 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2162 		}
2163 	}
2164 }
2165 
2166 /**
2167  * ice_if_timer - called by iflib periodically
2168  * @ctx: iflib ctx structure
2169  * @qid: the queue this timer was called for
2170  *
2171  * This callback is triggered by iflib periodically. We use it to update the
2172  * hw statistics.
2173  *
2174  * @remark this function is not protected by the iflib CTX lock.
2175  */
2176 static void
2177 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2178 {
2179 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2180 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2181 
2182 	if (qid != 0)
2183 		return;
2184 
2185 	/* Do not attempt to update stats when in recovery mode */
2186 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2187 		return;
2188 
2189 	/* Update device statistics */
2190 	ice_update_pf_stats(sc);
2191 
2192 	/*
2193 	 * For proper watchdog management, the iflib stack needs to know if
2194 	 * we've been paused during the last interval. Check if the
2195 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2196 	 */
2197 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2198 		sc->scctx->isc_pause_frames = 1;
2199 
2200 	/* Update the primary VSI stats */
2201 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2202 
2203 	/* Update mirror VSI stats */
2204 	if (sc->mirr_if && sc->mirr_if->if_attached)
2205 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2206 }
2207 
2208 /**
2209  * ice_admin_timer - called periodically to trigger the admin task
2210  * @arg: callout(9) argument pointing to the device private softc structure
2211  *
2212  * Timer function used as part of a callout(9) timer that will periodically
2213  * trigger the admin task, even when the interface is down.
2214  *
2215  * @remark this function is not called by iflib and is not protected by the
2216  * iflib CTX lock.
2217  *
2218  * @remark because this is a callout function, it cannot sleep and should not
2219  * attempt taking the iflib CTX lock.
2220  */
2221 static void
2222 ice_admin_timer(void *arg)
2223 {
2224 	struct ice_softc *sc = (struct ice_softc *)arg;
2225 
2226 	/*
2227 	 * There is a point where callout routines are no longer
2228 	 * cancelable.  So there exists a window of time where the
2229 	 * driver enters detach() and tries to cancel the callout, but the
2230 	 * callout routine has passed the cancellation point.  The detach()
2231 	 * routine is unaware of this and tries to free resources that the
2232 	 * callout routine needs.  So we check for the detach state flag to
2233 	 * at least shrink the window of opportunity.
2234 	 */
2235 	if (ice_driver_is_detaching(sc))
2236 		return;
2237 
2238 	/* Fire off the admin task */
2239 	iflib_admin_intr_deferred(sc->ctx);
2240 
2241 	/* Reschedule the admin timer */
2242 	callout_schedule(&sc->admin_timer, hz/2);
2243 }
2244 
2245 /**
2246  * ice_transition_recovery_mode - Transition to recovery mode
2247  * @sc: the device private softc
2248  *
2249  * Called when the driver detects that the firmware has entered recovery mode
2250  * at run time.
2251  */
2252 static void
2253 ice_transition_recovery_mode(struct ice_softc *sc)
2254 {
2255 	struct ice_vsi *vsi = &sc->pf_vsi;
2256 	int i;
2257 
2258 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2259 
2260 	/* Tell the stack that the link has gone down */
2261 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2262 
2263 	/* Request that the device be re-initialized */
2264 	ice_request_stack_reinit(sc);
2265 
2266 	ice_rdma_pf_detach(sc);
2267 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2268 
2269 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2270 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2271 
2272 	ice_vsi_del_txqs_ctx(vsi);
2273 	ice_vsi_del_rxqs_ctx(vsi);
2274 
2275 	for (i = 0; i < sc->num_available_vsi; i++) {
2276 		if (sc->all_vsi[i])
2277 			ice_release_vsi(sc->all_vsi[i]);
2278 	}
2279 	sc->num_available_vsi = 0;
2280 
2281 	if (sc->all_vsi) {
2282 		free(sc->all_vsi, M_ICE);
2283 		sc->all_vsi = NULL;
2284 	}
2285 
2286 	/* Destroy the interrupt manager */
2287 	ice_resmgr_destroy(&sc->dev_imgr);
2288 	/* Destroy the queue managers */
2289 	ice_resmgr_destroy(&sc->tx_qmgr);
2290 	ice_resmgr_destroy(&sc->rx_qmgr);
2291 
2292 	ice_deinit_hw(&sc->hw);
2293 }
2294 
2295 /**
2296  * ice_transition_safe_mode - Transition to safe mode
2297  * @sc: the device private softc
2298  *
2299  * Called when the driver attempts to reload the DDP package during a device
2300  * reset, and the new download fails. If so, we must transition to safe mode
2301  * at run time.
2302  *
2303  * @remark although safe mode normally allocates only a single queue, we can't
2304  * change the number of queues dynamically when using iflib. Due to this, we
2305  * do not attempt to reduce the number of queues.
2306  */
2307 static void
2308 ice_transition_safe_mode(struct ice_softc *sc)
2309 {
2310 	/* Indicate that we are in Safe mode */
2311 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2312 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2313 
2314 	ice_rdma_pf_detach(sc);
2315 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2316 
2317 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2318 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2319 
2320 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2321 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2322 }
2323 
2324 /**
2325  * ice_if_update_admin_status - update admin status
2326  * @ctx: iflib ctx structure
2327  *
2328  * Called by iflib to update the admin status. For our purposes, this means
2329  * check the adminq, and update the link status. It's ultimately triggered by
2330  * our admin interrupt, or by the ice_if_timer periodically.
2331  *
2332  * @pre assumes the caller holds the iflib CTX lock
2333  */
2334 static void
2335 ice_if_update_admin_status(if_ctx_t ctx)
2336 {
2337 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2338 	enum ice_fw_modes fw_mode;
2339 	bool reschedule = false;
2340 	u16 pending = 0;
2341 
2342 	ASSERT_CTX_LOCKED(sc);
2343 
2344 	/* Check if the firmware entered recovery mode at run time */
2345 	fw_mode = ice_get_fw_mode(&sc->hw);
2346 	if (fw_mode == ICE_FW_MODE_REC) {
2347 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2348 			/* If we just entered recovery mode, log a warning to
2349 			 * the system administrator and deinit driver state
2350 			 * that is no longer functional.
2351 			 */
2352 			ice_transition_recovery_mode(sc);
2353 		}
2354 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2355 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2356 			/* Rollback mode isn't fatal, but we don't want to
2357 			 * repeatedly post a message about it.
2358 			 */
2359 			ice_print_rollback_msg(&sc->hw);
2360 		}
2361 	}
2362 
2363 	/* Handle global reset events */
2364 	ice_handle_reset_event(sc);
2365 
2366 	/* Handle PF reset requests */
2367 	ice_handle_pf_reset_request(sc);
2368 
2369 	/* Handle MDD events */
2370 	ice_handle_mdd_event(sc);
2371 
2372 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2373 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2374 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2375 		/*
2376 		 * If we know the control queues are disabled, skip processing
2377 		 * the control queues entirely.
2378 		 */
2379 		;
2380 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2381 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2382 		if (pending > 0)
2383 			reschedule = true;
2384 
2385 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2386 		if (pending > 0)
2387 			reschedule = true;
2388 	}
2389 
2390 	/* Poll for link up */
2391 	ice_poll_for_media_avail(sc);
2392 
2393 	/* Check and update link status */
2394 	ice_update_link_status(sc, false);
2395 
2396 	/*
2397 	 * If there are still messages to process, we need to reschedule
2398 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2399 	 * woken up at the next interrupt or timer event.
2400 	 */
2401 	if (reschedule) {
2402 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2403 		iflib_admin_intr_deferred(ctx);
2404 	} else {
2405 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2406 	}
2407 }
2408 
2409 /**
2410  * ice_prepare_for_reset - Prepare device for an impending reset
2411  * @sc: The device private softc
2412  *
2413  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2414  * scheduler setup, and shutting down controlqs. Uses the
2415  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2416  * driver for reset or not.
2417  */
2418 static void
2419 ice_prepare_for_reset(struct ice_softc *sc)
2420 {
2421 	struct ice_hw *hw = &sc->hw;
2422 
2423 	/* If we're already prepared, there's nothing to do */
2424 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2425 		return;
2426 
2427 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2428 
2429 	/* In recovery mode, hardware is not initialized */
2430 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2431 		return;
2432 
2433 	/* inform the RDMA client */
2434 	ice_rdma_notify_reset(sc);
2435 	/* stop the RDMA client */
2436 	ice_rdma_pf_stop(sc);
2437 
2438 	/* Release the main PF VSI queue mappings */
2439 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2440 				    sc->pf_vsi.num_tx_queues);
2441 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2442 				    sc->pf_vsi.num_rx_queues);
2443 	if (sc->mirr_if) {
2444 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2445 		    sc->mirr_if->num_irq_vectors);
2446 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2447 		    sc->mirr_if->num_irq_vectors);
2448 	}
2449 
2450 	ice_clear_hw_tbls(hw);
2451 
2452 	if (hw->port_info)
2453 		ice_sched_cleanup_all(hw);
2454 
2455 	ice_shutdown_all_ctrlq(hw, false);
2456 }
2457 
2458 /**
2459  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2460  * @sc: the device softc pointer
2461  *
2462  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2463  * mapping after a reset occurred.
2464  */
2465 static int
2466 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2467 {
2468 	struct ice_vsi *vsi = &sc->pf_vsi;
2469 	struct ice_tx_queue *txq;
2470 	struct ice_rx_queue *rxq;
2471 	int err, i;
2472 
2473 	/* Re-assign Tx queues from PF space to the main VSI */
2474 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2475 					    vsi->num_tx_queues);
2476 	if (err) {
2477 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2478 			      ice_err_str(err));
2479 		return (err);
2480 	}
2481 
2482 	/* Re-assign Rx queues from PF space to this VSI */
2483 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2484 					    vsi->num_rx_queues);
2485 	if (err) {
2486 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2487 			      ice_err_str(err));
2488 		goto err_release_tx_queues;
2489 	}
2490 
2491 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2492 
2493 	/* Re-assign Tx queue tail pointers */
2494 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2495 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2496 
2497 	/* Re-assign Rx queue tail pointers */
2498 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2499 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2500 
2501 	return (0);
2502 
2503 err_release_tx_queues:
2504 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2505 				   sc->pf_vsi.num_tx_queues);
2506 
2507 	return (err);
2508 }
2509 
2510 /* determine if the iflib context is active */
2511 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2512 
2513 /**
2514  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2515  * @sc: The device private softc
2516  *
2517  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2518  * limited functionality supported while in recovery mode.
2519  */
2520 static void
2521 ice_rebuild_recovery_mode(struct ice_softc *sc)
2522 {
2523 	device_t dev = sc->dev;
2524 
2525 	/* enable PCIe bus master */
2526 	pci_enable_busmaster(dev);
2527 
2528 	/* Configure interrupt causes for the administrative interrupt */
2529 	ice_configure_misc_interrupts(sc);
2530 
2531 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2532 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2533 
2534 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2535 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2536 
2537 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2538 
2539 	/* In order to completely restore device functionality, the iflib core
2540 	 * needs to be reset. We need to request an iflib reset. Additionally,
2541 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2542 	 * the iflib core, we also want re-run the admin task so that iflib
2543 	 * resets immediately instead of waiting for the next interrupt.
2544 	 */
2545 	ice_request_stack_reinit(sc);
2546 
2547 	return;
2548 }
2549 
2550 /**
2551  * ice_rebuild - Rebuild driver state post reset
2552  * @sc: The device private softc
2553  *
2554  * Restore driver state after a reset occurred. Restart the controlqs, setup
2555  * the hardware port, and re-enable the VSIs.
2556  */
2557 static void
2558 ice_rebuild(struct ice_softc *sc)
2559 {
2560 	struct ice_hw *hw = &sc->hw;
2561 	device_t dev = sc->dev;
2562 	enum ice_ddp_state pkg_state;
2563 	enum ice_status status;
2564 	int err;
2565 
2566 	sc->rebuild_ticks = ticks;
2567 
2568 	/* If we're rebuilding, then a reset has succeeded. */
2569 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2570 
2571 	/*
2572 	 * If the firmware is in recovery mode, only restore the limited
2573 	 * functionality supported by recovery mode.
2574 	 */
2575 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2576 		ice_rebuild_recovery_mode(sc);
2577 		return;
2578 	}
2579 
2580 	/* enable PCIe bus master */
2581 	pci_enable_busmaster(dev);
2582 
2583 	status = ice_init_all_ctrlq(hw);
2584 	if (status) {
2585 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2586 			      ice_status_str(status));
2587 		goto err_shutdown_ctrlq;
2588 	}
2589 
2590 	/* Query the allocated resources for Tx scheduler */
2591 	status = ice_sched_query_res_alloc(hw);
2592 	if (status) {
2593 		device_printf(dev,
2594 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2595 			      ice_status_str(status),
2596 			      ice_aq_str(hw->adminq.sq_last_status));
2597 		goto err_shutdown_ctrlq;
2598 	}
2599 
2600 	/* Re-enable FW logging. Keep going even if this fails */
2601 	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2602 	if (!status) {
2603 		/*
2604 		 * We should have the most updated cached copy of the
2605 		 * configuration, regardless of whether we're rebuilding
2606 		 * or not.  So we'll simply check to see if logging was
2607 		 * enabled pre-rebuild.
2608 		 */
2609 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2610 			status = ice_fwlog_register(hw);
2611 			if (status)
2612 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2613 				   ice_status_str(status),
2614 				   ice_aq_str(hw->adminq.sq_last_status));
2615 		}
2616 	} else
2617 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2618 		   ice_status_str(status),
2619 		   ice_aq_str(hw->adminq.sq_last_status));
2620 
2621 	err = ice_send_version(sc);
2622 	if (err)
2623 		goto err_shutdown_ctrlq;
2624 
2625 	err = ice_init_link_events(sc);
2626 	if (err) {
2627 		device_printf(dev, "ice_init_link_events failed: %s\n",
2628 			      ice_err_str(err));
2629 		goto err_shutdown_ctrlq;
2630 	}
2631 
2632 	status = ice_clear_pf_cfg(hw);
2633 	if (status) {
2634 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2635 			      ice_status_str(status));
2636 		goto err_shutdown_ctrlq;
2637 	}
2638 
2639 	ice_clean_all_vsi_rss_cfg(sc);
2640 
2641 	ice_clear_pxe_mode(hw);
2642 
2643 	status = ice_get_caps(hw);
2644 	if (status) {
2645 		device_printf(dev, "failed to get capabilities, err %s\n",
2646 			      ice_status_str(status));
2647 		goto err_shutdown_ctrlq;
2648 	}
2649 
2650 	status = ice_sched_init_port(hw->port_info);
2651 	if (status) {
2652 		device_printf(dev, "failed to initialize port, err %s\n",
2653 			      ice_status_str(status));
2654 		goto err_sched_cleanup;
2655 	}
2656 
2657 	/* If we previously loaded the package, it needs to be reloaded now */
2658 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2659 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2660 		if (!ice_is_init_pkg_successful(pkg_state)) {
2661 			ice_log_pkg_init(sc, pkg_state);
2662 			ice_transition_safe_mode(sc);
2663 		}
2664 	}
2665 
2666 	ice_reset_pf_stats(sc);
2667 
2668 	err = ice_rebuild_pf_vsi_qmap(sc);
2669 	if (err) {
2670 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2671 			      ice_err_str(err));
2672 		goto err_sched_cleanup;
2673 	}
2674 	err = ice_initialize_vsi(&sc->pf_vsi);
2675 	if (err) {
2676 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2677 			      ice_err_str(err));
2678 		goto err_release_queue_allocations;
2679 	}
2680 
2681 	/* Replay all VSI configuration */
2682 	err = ice_replay_all_vsi_cfg(sc);
2683 	if (err)
2684 		goto err_deinit_pf_vsi;
2685 
2686 	/* Re-enable FW health event reporting */
2687 	ice_init_health_events(sc);
2688 
2689 	/* Reconfigure the main PF VSI for RSS */
2690 	err = ice_config_rss(&sc->pf_vsi);
2691 	if (err) {
2692 		device_printf(sc->dev,
2693 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2694 			      ice_err_str(err));
2695 		goto err_deinit_pf_vsi;
2696 	}
2697 
2698 	if (hw->port_info->qos_cfg.is_sw_lldp)
2699 		ice_add_rx_lldp_filter(sc);
2700 
2701 	/* Refresh link status */
2702 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2703 	sc->hw.port_info->phy.get_link_info = true;
2704 	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2705 	ice_update_link_status(sc, true);
2706 
2707 	/* RDMA interface will be restarted by the stack re-init */
2708 
2709 	/* Configure interrupt causes for the administrative interrupt */
2710 	ice_configure_misc_interrupts(sc);
2711 
2712 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2713 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2714 
2715 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2716 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2717 
2718 	/* Reconfigure the subinterface */
2719 	if (sc->mirr_if) {
2720 		err = ice_subif_rebuild(sc);
2721 		if (err)
2722 			goto err_deinit_pf_vsi;
2723 	}
2724 
2725 	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2726 
2727 	/* In order to completely restore device functionality, the iflib core
2728 	 * needs to be reset. We need to request an iflib reset. Additionally,
2729 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2730 	 * the iflib core, we also want re-run the admin task so that iflib
2731 	 * resets immediately instead of waiting for the next interrupt.
2732 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2733 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2734 	 */
2735 	if (hw->port_info->qos_cfg.is_sw_lldp)
2736 		ice_request_stack_reinit(sc);
2737 	else
2738 		ice_do_dcb_reconfig(sc, false);
2739 
2740 	return;
2741 
2742 err_deinit_pf_vsi:
2743 	ice_deinit_vsi(&sc->pf_vsi);
2744 err_release_queue_allocations:
2745 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2746 				    sc->pf_vsi.num_tx_queues);
2747 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2748 				    sc->pf_vsi.num_rx_queues);
2749 err_sched_cleanup:
2750 	ice_sched_cleanup_all(hw);
2751 err_shutdown_ctrlq:
2752 	ice_shutdown_all_ctrlq(hw, false);
2753 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2754 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2755 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2756 }
2757 
2758 /**
2759  * ice_handle_reset_event - Handle reset events triggered by OICR
2760  * @sc: The device private softc
2761  *
2762  * Handle reset events triggered by an OICR notification. This includes CORER,
2763  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2764  * firmware.
2765  *
2766  * @pre assumes the iflib context lock is held, and will unlock it while
2767  * waiting for the hardware to finish reset.
2768  */
2769 static void
2770 ice_handle_reset_event(struct ice_softc *sc)
2771 {
2772 	struct ice_hw *hw = &sc->hw;
2773 	enum ice_status status;
2774 	device_t dev = sc->dev;
2775 
2776 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2777 	 * trigger an OICR interrupt. Our OICR handler will determine when
2778 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2779 	 * appropriate.
2780 	 */
2781 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2782 		return;
2783 
2784 	ice_prepare_for_reset(sc);
2785 
2786 	/*
2787 	 * Release the iflib context lock and wait for the device to finish
2788 	 * resetting.
2789 	 */
2790 	IFLIB_CTX_UNLOCK(sc);
2791 	status = ice_check_reset(hw);
2792 	IFLIB_CTX_LOCK(sc);
2793 	if (status) {
2794 		device_printf(dev, "Device never came out of reset, err %s\n",
2795 			      ice_status_str(status));
2796 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2797 		return;
2798 	}
2799 
2800 	/* We're done with the reset, so we can rebuild driver state */
2801 	sc->hw.reset_ongoing = false;
2802 	ice_rebuild(sc);
2803 
2804 	/* In the unlikely event that a PF reset request occurs at the same
2805 	 * time as a global reset, clear the request now. This avoids
2806 	 * resetting a second time right after we reset due to a global event.
2807 	 */
2808 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2809 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2810 }
2811 
2812 /**
2813  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2814  * @sc: The device private softc
2815  *
2816  * Initiate a PF reset requested by software. We handle this in the admin task
2817  * so that only one thread actually handles driver preparation and cleanup,
2818  * rather than having multiple threads possibly attempt to run this code
2819  * simultaneously.
2820  *
2821  * @pre assumes the iflib context lock is held and will unlock it while
2822  * waiting for the PF reset to complete.
2823  */
2824 static void
2825 ice_handle_pf_reset_request(struct ice_softc *sc)
2826 {
2827 	struct ice_hw *hw = &sc->hw;
2828 	enum ice_status status;
2829 
2830 	/* Check for PF reset requests */
2831 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2832 		return;
2833 
2834 	/* Make sure we're prepared for reset */
2835 	ice_prepare_for_reset(sc);
2836 
2837 	/*
2838 	 * Release the iflib context lock and wait for the device to finish
2839 	 * resetting.
2840 	 */
2841 	IFLIB_CTX_UNLOCK(sc);
2842 	status = ice_reset(hw, ICE_RESET_PFR);
2843 	IFLIB_CTX_LOCK(sc);
2844 	if (status) {
2845 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2846 			      ice_status_str(status));
2847 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2848 		return;
2849 	}
2850 
2851 	sc->soft_stats.pfr_count++;
2852 	ice_rebuild(sc);
2853 }
2854 
2855 /**
2856  * ice_init_device_features - Init device driver features
2857  * @sc: driver softc structure
2858  *
2859  * @pre assumes that the function capabilities bits have been set up by
2860  * ice_init_hw().
2861  */
2862 static void
2863 ice_init_device_features(struct ice_softc *sc)
2864 {
2865 	struct ice_hw *hw = &sc->hw;
2866 
2867 	/* Set capabilities that all devices support */
2868 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2869 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2870 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2871 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2872 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2873 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2874 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2875 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2876 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2877 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2878 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2879 
2880 	/* Disable features due to hardware limitations... */
2881 	if (!hw->func_caps.common_cap.rss_table_size)
2882 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2883 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2884 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2885 	if (!hw->func_caps.common_cap.dcb)
2886 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2887 	/* Disable features due to firmware limitations... */
2888 	if (!ice_is_fw_health_report_supported(hw))
2889 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2890 	if (!ice_fwlog_supported(hw))
2891 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2892 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2893 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2894 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2895 		else
2896 			ice_fwlog_unregister(hw);
2897 	}
2898 
2899 	/* Disable capabilities not supported by the OS */
2900 	ice_disable_unsupported_features(sc->feat_cap);
2901 
2902 	/* RSS is always enabled for iflib */
2903 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2904 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2905 
2906 	/* Disable features based on sysctl settings */
2907 	if (!ice_tx_balance_en)
2908 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2909 
2910 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2911 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2912 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2913 	}
2914 }
2915 
2916 /**
2917  * ice_if_multi_set - Callback to update Multicast filters in HW
2918  * @ctx: iflib ctx structure
2919  *
2920  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2921  * the if_multiaddrs list and determine which filters have been added or
2922  * removed from the list, and update HW programming to reflect the new list.
2923  *
2924  * @pre assumes the caller holds the iflib CTX lock
2925  */
2926 static void
2927 ice_if_multi_set(if_ctx_t ctx)
2928 {
2929 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2930 	int err;
2931 
2932 	ASSERT_CTX_LOCKED(sc);
2933 
2934 	/* Do not handle multicast configuration in recovery mode */
2935 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2936 		return;
2937 
2938 	err = ice_sync_multicast_filters(sc);
2939 	if (err) {
2940 		device_printf(sc->dev,
2941 			      "Failed to synchronize multicast filter list: %s\n",
2942 			      ice_err_str(err));
2943 		return;
2944 	}
2945 }
2946 
2947 /**
2948  * ice_if_vlan_register - Register a VLAN with the hardware
2949  * @ctx: iflib ctx pointer
2950  * @vtag: VLAN to add
2951  *
2952  * Programs the main PF VSI with a hardware filter for the given VLAN.
2953  *
2954  * @pre assumes the caller holds the iflib CTX lock
2955  */
2956 static void
2957 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2958 {
2959 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2960 	enum ice_status status;
2961 
2962 	ASSERT_CTX_LOCKED(sc);
2963 
2964 	/* Do not handle VLAN configuration in recovery mode */
2965 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2966 		return;
2967 
2968 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2969 	if (status) {
2970 		device_printf(sc->dev,
2971 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2972 			      vtag, ice_status_str(status),
2973 			      ice_aq_str(sc->hw.adminq.sq_last_status));
2974 	}
2975 }
2976 
2977 /**
2978  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2979  * @ctx: iflib ctx pointer
2980  * @vtag: VLAN to add
2981  *
2982  * Removes the previously programmed VLAN filter from the main PF VSI.
2983  *
2984  * @pre assumes the caller holds the iflib CTX lock
2985  */
2986 static void
2987 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2988 {
2989 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2990 	enum ice_status status;
2991 
2992 	ASSERT_CTX_LOCKED(sc);
2993 
2994 	/* Do not handle VLAN configuration in recovery mode */
2995 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2996 		return;
2997 
2998 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2999 	if (status) {
3000 		device_printf(sc->dev,
3001 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3002 			      vtag, ice_status_str(status),
3003 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3004 	}
3005 }
3006 
3007 /**
3008  * ice_if_stop - Stop the device
3009  * @ctx: iflib context structure
3010  *
3011  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3012  * down)
3013  *
3014  * @pre assumes the caller holds the iflib CTX lock
3015  */
3016 static void
3017 ice_if_stop(if_ctx_t ctx)
3018 {
3019 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3020 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3021 
3022 	ASSERT_CTX_LOCKED(sc);
3023 
3024 	/*
3025 	 * The iflib core may call IFDI_STOP prior to the first call to
3026 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3027 	 * don't have, and disable Tx queues which aren't yet configured.
3028 	 * Although it is likely these extra operations are harmless, they do
3029 	 * cause spurious warning messages to be displayed, which may confuse
3030 	 * users.
3031 	 *
3032 	 * To avoid these messages, we use a state bit indicating if we've
3033 	 * been initialized. It will be set when ice_if_init is called, and
3034 	 * cleared here in ice_if_stop.
3035 	 */
3036 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3037 		return;
3038 
3039 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3040 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3041 		return;
3042 	}
3043 
3044 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3045 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3046 		return;
3047 	}
3048 
3049 	ice_rdma_pf_stop(sc);
3050 
3051 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3052 	 * return of these functions because there's nothing we can really do
3053 	 * if they fail, and the functions already print error messages.
3054 	 * Just try to shut down as much as we can.
3055 	 */
3056 	ice_rm_pf_default_mac_filters(sc);
3057 
3058 	/* Dissociate the Tx and Rx queues from the interrupts */
3059 	ice_flush_txq_interrupts(&sc->pf_vsi);
3060 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3061 
3062 	/* Disable the Tx and Rx queues */
3063 	ice_vsi_disable_tx(&sc->pf_vsi);
3064 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3065 
3066 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3067 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3068 		ice_set_link(sc, false);
3069 
3070 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3071 		ice_subif_if_stop(sc->mirr_if->subctx);
3072 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3073 	}
3074 }
3075 
3076 /**
3077  * ice_if_get_counter - Get current value of an ifnet statistic
3078  * @ctx: iflib context pointer
3079  * @counter: ifnet counter to read
3080  *
3081  * Reads the current value of an ifnet counter for the device.
3082  *
3083  * This function is not protected by the iflib CTX lock.
3084  */
3085 static uint64_t
3086 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3087 {
3088 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3089 
3090 	/* Return the counter for the main PF VSI */
3091 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3092 }
3093 
3094 /**
3095  * ice_request_stack_reinit - Request that iflib re-initialize
3096  * @sc: the device private softc
3097  *
3098  * Request that the device be brought down and up, to re-initialize. For
3099  * example, this may be called when a device reset occurs, or when Tx and Rx
3100  * queues need to be re-initialized.
3101  *
3102  * This is required because the iflib state is outside the driver, and must be
3103  * re-initialized if we need to resart Tx and Rx queues.
3104  */
3105 void
3106 ice_request_stack_reinit(struct ice_softc *sc)
3107 {
3108 	if (CTX_ACTIVE(sc->ctx)) {
3109 		iflib_request_reset(sc->ctx);
3110 		iflib_admin_intr_deferred(sc->ctx);
3111 	}
3112 }
3113 
3114 /**
3115  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3116  * @sc: device private softc
3117  *
3118  * Returns true if the driver is detaching, false otherwise.
3119  *
3120  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3121  * report detachment correctly as early as possible.
3122  *
3123  * @remark this function is used by various code paths that want to avoid
3124  * running if the driver is about to be removed. This includes sysctls and
3125  * other driver access points. Note that it does not fully resolve
3126  * detach-based race conditions as it is possible for a thread to race with
3127  * iflib_in_detach.
3128  */
3129 bool
3130 ice_driver_is_detaching(struct ice_softc *sc)
3131 {
3132 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3133 		iflib_in_detach(sc->ctx));
3134 }
3135 
3136 /**
3137  * ice_if_priv_ioctl - Device private ioctl handler
3138  * @ctx: iflib context pointer
3139  * @command: The ioctl command issued
3140  * @data: ioctl specific data
3141  *
3142  * iflib callback for handling custom driver specific ioctls.
3143  *
3144  * @pre Assumes that the iflib context lock is held.
3145  */
3146 static int
3147 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3148 {
3149 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3150 	struct ifdrv *ifd;
3151 	device_t dev = sc->dev;
3152 
3153 	if (data == NULL)
3154 		return (EINVAL);
3155 
3156 	ASSERT_CTX_LOCKED(sc);
3157 
3158 	/* Make sure the command type is valid */
3159 	switch (command) {
3160 	case SIOCSDRVSPEC:
3161 	case SIOCGDRVSPEC:
3162 		/* Accepted commands */
3163 		break;
3164 	case SIOCGPRIVATE_0:
3165 		/*
3166 		 * Although we do not support this ioctl command, it's
3167 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3168 		 * handler. Do not print a message in this case
3169 		 */
3170 		return (ENOTSUP);
3171 	default:
3172 		/*
3173 		 * If we get a different command for this function, it's
3174 		 * definitely unexpected, so log a message indicating what
3175 		 * command we got for debugging purposes.
3176 		 */
3177 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3178 			      __func__, command);
3179 		return (EINVAL);
3180 	}
3181 
3182 	ifd = (struct ifdrv *)data;
3183 
3184 	switch (ifd->ifd_cmd) {
3185 	case ICE_NVM_ACCESS:
3186 		return ice_handle_nvm_access_ioctl(sc, ifd);
3187 	case ICE_DEBUG_DUMP:
3188 		return ice_handle_debug_dump_ioctl(sc, ifd);
3189 	default:
3190 		return EINVAL;
3191 	}
3192 }
3193 
3194 /**
3195  * ice_if_i2c_req - I2C request handler for iflib
3196  * @ctx: iflib context pointer
3197  * @req: The I2C parameters to use
3198  *
3199  * Read from the port's I2C eeprom using the parameters from the ioctl.
3200  *
3201  * @remark The iflib-only part is pretty simple.
3202  */
3203 static int
3204 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3205 {
3206 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3207 
3208 	return ice_handle_i2c_req(sc, req);
3209 }
3210 
3211 /**
3212  * ice_if_suspend - PCI device suspend handler for iflib
3213  * @ctx: iflib context pointer
3214  *
3215  * Deinitializes the driver and clears HW resources in preparation for
3216  * suspend or an FLR.
3217  *
3218  * @returns 0; this return value is ignored
3219  */
3220 static int
3221 ice_if_suspend(if_ctx_t ctx)
3222 {
3223 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3224 
3225 	/* At least a PFR is always going to happen after this;
3226 	 * either via FLR or during the D3->D0 transition.
3227 	 */
3228 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3229 
3230 	ice_prepare_for_reset(sc);
3231 
3232 	return (0);
3233 }
3234 
3235 /**
3236  * ice_if_resume - PCI device resume handler for iflib
3237  * @ctx: iflib context pointer
3238  *
3239  * Reinitializes the driver and the HW after PCI resume or after
3240  * an FLR. An init is performed by iflib after this function is finished.
3241  *
3242  * @returns 0; this return value is ignored
3243  */
3244 static int
3245 ice_if_resume(if_ctx_t ctx)
3246 {
3247 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3248 
3249 	ice_rebuild(sc);
3250 
3251 	return (0);
3252 }
3253 
3254 /**
3255  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3256  * @ctx: iflib context pointer
3257  * @event: event code to check
3258  *
3259  * Defaults to returning true for unknown events.
3260  *
3261  * @returns true if iflib needs to reinit the interface
3262  */
3263 static bool
3264 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3265 {
3266 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3267 
3268 	switch (event) {
3269 	case IFLIB_RESTART_VLAN_CONFIG:
3270 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3271 			 !(if_getflags(sc->ifp) & IFF_UP))
3272 			return false;
3273 	default:
3274 		return true;
3275 	}
3276 }
3277 
3278 extern struct if_txrx ice_subif_txrx;
3279 
3280 /**
3281  * @var ice_subif_methods
3282  * @brief ice driver method entry points
3283  */
3284 static device_method_t ice_subif_methods[] = {
3285 	/* Device interface */
3286 	DEVMETHOD(device_register, ice_subif_register),
3287 	DEVMETHOD_END
3288 };
3289 
3290 /**
3291  * @var ice_subif_driver
3292  * @brief driver structure for the device API
3293  */
3294 static driver_t ice_subif_driver = {
3295 	.name = "ice_subif",
3296 	.methods = ice_subif_methods,
3297 	.size = sizeof(struct ice_mirr_if),
3298 };
3299 
3300 static device_method_t ice_iflib_subif_methods[] = {
3301 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3302 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3303 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3304 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3305 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3306 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3307 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3308 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3309 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3310 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3311 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3312 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3313 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3314 };
3315 
3316 /**
3317  * @var ice_iflib_subif_driver
3318  * @brief driver structure for the iflib stack
3319  *
3320  * driver_t definition used to setup the iflib device methods.
3321  */
3322 static driver_t ice_iflib_subif_driver = {
3323 	.name = "ice_subif",
3324 	.methods = ice_iflib_subif_methods,
3325 	.size = sizeof(struct ice_mirr_if),
3326 };
3327 
3328 /**
3329  * @var ice_subif_sctx
3330  * @brief ice driver shared context
3331  *
3332  * Similar to the existing ice_sctx, this structure has these differences:
3333  * - isc_admin_intrcnt is set to 0
3334  * - Uses subif iflib driver methods
3335  * - Flagged as a VF for iflib
3336  */
3337 static struct if_shared_ctx ice_subif_sctx = {
3338 	.isc_magic = IFLIB_MAGIC,
3339 	.isc_q_align = PAGE_SIZE,
3340 
3341 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3342 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3343 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3344 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3345 
3346 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3347 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3348 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3349 
3350 	.isc_nfl = 1,
3351 	.isc_ntxqs = 1,
3352 	.isc_nrxqs = 1,
3353 
3354 	.isc_admin_intrcnt = 0,
3355 	.isc_vendor_info = ice_vendor_info_array,
3356 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3357 	.isc_driver = &ice_iflib_subif_driver,
3358 
3359 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3360 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3361 		IFLIB_IS_VF,
3362 
3363 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3364 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3365 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3366 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3367 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3368 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3369 };
3370 
3371 static void *
3372 ice_subif_register(device_t dev __unused)
3373 {
3374 	return (&ice_subif_sctx);
3375 }
3376 
3377 static void
3378 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3379 {
3380 	if_softc_ctx_t scctx = mif->subscctx;
3381 
3382 	scctx->isc_txrx = &ice_subif_txrx;
3383 
3384 	scctx->isc_capenable = ICE_FULL_CAPS;
3385 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3386 
3387 	scctx->isc_ntxqsets = 4;
3388 	scctx->isc_nrxqsets = 4;
3389 	scctx->isc_vectors = scctx->isc_nrxqsets;
3390 
3391 	scctx->isc_ntxqsets_max = 256;
3392 	scctx->isc_nrxqsets_max = 256;
3393 
3394 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3395 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3396 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3397 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3398 
3399 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3400 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3401 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3402 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3403 }
3404 
3405 static int
3406 ice_subif_if_attach_pre(if_ctx_t ctx)
3407 {
3408 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3409 	device_t dev = iflib_get_dev(ctx);
3410 
3411 	mif->subctx = ctx;
3412 	mif->subdev = dev;
3413 	mif->subscctx = iflib_get_softc_ctx(ctx);
3414 
3415 	/* Setup the iflib softc context structure */
3416 	ice_subif_setup_scctx(mif);
3417 
3418 	return (0);
3419 }
3420 
3421 static int
3422 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3423 {
3424 	return (0);
3425 }
3426 
3427 /**
3428  * ice_destroy_mirror_interface - destroy mirror interface
3429  * @sc: driver private data
3430  *
3431  * Destroys all resources associated with the mirroring interface.
3432  * Will not exit early on failure.
3433  *
3434  * @pre: Mirror interface already exists and is initialized.
3435  */
3436 void
3437 ice_destroy_mirror_interface(struct ice_softc *sc)
3438 {
3439 	struct ice_mirr_if *mif = sc->mirr_if;
3440 	struct ice_vsi *vsi = mif->vsi;
3441 	bool is_locked = false;
3442 	int ret;
3443 
3444 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3445 	if (is_locked)
3446 		IFLIB_CTX_UNLOCK(sc);
3447 
3448 	if (mif->ifp) {
3449 		ret = iflib_device_deregister(mif->subctx);
3450 		if (ret) {
3451 			device_printf(sc->dev,
3452 			    "iflib_device_deregister for mirror interface failed: %d\n",
3453 			    ret);
3454 		}
3455 	}
3456 
3457 	bus_topo_lock();
3458 	ret = device_delete_child(sc->dev, mif->subdev);
3459 	bus_topo_unlock();
3460 	if (ret) {
3461 		device_printf(sc->dev,
3462 		    "device_delete_child for mirror interface failed: %d\n",
3463 		    ret);
3464 	}
3465 
3466 	if (is_locked)
3467 		IFLIB_CTX_LOCK(sc);
3468 
3469 	if (mif->if_imap) {
3470 		free(mif->if_imap, M_ICE);
3471 		mif->if_imap = NULL;
3472 	}
3473 	if (mif->os_imap) {
3474 		free(mif->os_imap, M_ICE);
3475 		mif->os_imap = NULL;
3476 	}
3477 
3478 	/* These are freed via ice_subif_queues_free_subif
3479 	 * vsi:
3480 	 * - rx_irqvs
3481 	 * - tx_queues
3482 	 * - rx_queues
3483 	 */
3484 	ice_release_vsi(vsi);
3485 
3486 	free(mif, M_ICE);
3487 	sc->mirr_if = NULL;
3488 
3489 }
3490 
3491 /**
3492  * ice_setup_mirror_vsi - Initialize mirror VSI
3493  * @mif: driver private data for mirror interface
3494  *
3495  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3496  * mirror for the main PF VSI.
3497  *
3498  * Returns 0 on success, or a standard error code on failure.
3499  */
3500 static int
3501 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3502 {
3503 	struct ice_softc *sc = mif->back;
3504 	device_t dev = sc->dev;
3505 	struct ice_vsi *vsi;
3506 	int ret = 0;
3507 
3508 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3509 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3510 	if (!vsi) {
3511 		/* Already prints an error message */
3512 		return (ENOMEM);
3513 	}
3514 	mif->vsi = vsi;
3515 
3516 	/* Reserve VSI queue allocation from PF queues */
3517 	ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3518 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3519 
3520 	/* Assign Tx queues from PF space */
3521 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3522 	    vsi->num_tx_queues);
3523 	if (ret) {
3524 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3525 		    ice_err_str(ret));
3526 		goto release_vsi;
3527 	}
3528 	/* Assign Rx queues from PF space */
3529 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3530 	    vsi->num_rx_queues);
3531 	if (ret) {
3532 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3533 		    ice_err_str(ret));
3534 		goto release_vsi;
3535 	}
3536 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3537 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3538 
3539 	ret = ice_initialize_vsi(vsi);
3540 	if (ret) {
3541 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3542 		    __func__, ice_err_str(ret));
3543 		goto release_vsi;
3544 	}
3545 
3546 	/* Setup this VSI for receiving traffic */
3547 	ret = ice_config_rss(vsi);
3548 	if (ret) {
3549 		device_printf(dev,
3550 		    "Unable to configure RSS for mirror VSI: %s\n",
3551 		    ice_err_str(ret));
3552 		goto release_vsi;
3553 	}
3554 
3555 	/* Set HW rules for mirroring traffic */
3556 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3557 
3558 	ice_debug(&sc->hw, ICE_DBG_INIT,
3559 	    "Configuring mirroring from VSI %d to %d\n",
3560 	    vsi->mirror_src_vsi, vsi->idx);
3561 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3562 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3563 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3564 
3565 	ret = ice_setup_vsi_mirroring(vsi);
3566 	if (ret) {
3567 		device_printf(dev,
3568 		    "Unable to configure mirroring for VSI: %s\n",
3569 		    ice_err_str(ret));
3570 		goto release_vsi;
3571 	}
3572 
3573 	return (0);
3574 
3575 release_vsi:
3576 	ice_release_vsi(vsi);
3577 	mif->vsi = NULL;
3578 	return (ret);
3579 }
3580 
3581 /**
3582  * ice_create_mirror_interface - Initialize mirror interface
3583  * @sc: driver private data
3584  *
3585  * Creates and sets up a mirror interface that will mirror traffic from
3586  * the main PF interface. Includes a call to iflib_device_register() in order
3587  * to setup necessary iflib structures for this new interface as well.
3588  *
3589  * If it returns successfully, a new interface will be created and will show
3590  * up in the ifconfig interface list.
3591  *
3592  * Returns 0 on success, or a standard error code on failure.
3593  */
3594 int
3595 ice_create_mirror_interface(struct ice_softc *sc)
3596 {
3597 	device_t dev = sc->dev;
3598 	struct ice_mirr_if *mif;
3599 	struct ifmedia *media;
3600 	struct sbuf *sb;
3601 	int ret = 0;
3602 
3603 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3604 	if (!mif) {
3605 		device_printf(dev, "malloc() error allocating mirror interface\n");
3606 		return (ENOMEM);
3607 	}
3608 
3609 	/* Set pointers */
3610 	sc->mirr_if = mif;
3611 	mif->back = sc;
3612 
3613 	/* Do early setup because these will be called during iflib_device_register():
3614 	 * - ice_subif_if_tx_queues_alloc
3615 	 * - ice_subif_if_rx_queues_alloc
3616 	 */
3617 	ret = ice_setup_mirror_vsi(mif);
3618 	if (ret)
3619 		goto out;
3620 
3621 	/* Determine name for new interface:
3622 	 * (base interface name)(modifier name)(modifier unit number)
3623 	 * e.g. for ice0 with a new mirror interface (modifier m)
3624 	 * of index 0, this equals "ice0m0"
3625 	 */
3626 	sb = sbuf_new_auto();
3627 	MPASS(sb != NULL);
3628 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3629 	sbuf_finish(sb);
3630 
3631 	bus_topo_lock();
3632 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3633 	bus_topo_unlock();
3634 
3635 	if (!mif->subdev) {
3636 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3637 		sbuf_delete(sb);
3638 		free(mif, M_ICE);
3639 		sc->mirr_if = NULL;
3640 		return (ENOMEM);
3641 	}
3642 	sbuf_delete(sb);
3643 
3644 	device_set_driver(mif->subdev, &ice_subif_driver);
3645 
3646 	/* Use iflib_device_register() directly because the driver already
3647 	 * has an initialized softc to pass to iflib
3648 	 */
3649 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3650 	if (ret)
3651 		goto out;
3652 
3653 	/* Indicate that created interface will be just for monitoring */
3654 	mif->ifp = iflib_get_ifp(mif->subctx);
3655 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3656 
3657 	/* Use autoselect media by default */
3658 	media = iflib_get_media(mif->subctx);
3659 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3660 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3661 
3662 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3663 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3664 
3665 	ice_add_vsi_sysctls(mif->vsi);
3666 
3667 	ret = ice_wire_mirror_intrs(mif);
3668 	if (ret)
3669 		goto out;
3670 
3671 	mif->if_attached = true;
3672 	return (0);
3673 
3674 out:
3675 	ice_destroy_mirror_interface(sc);
3676 	return (ret);
3677 }
3678 
3679 /**
3680  * ice_wire_mirror_intrs
3681  * @mif: driver private subinterface structure
3682  *
3683  * Helper function that sets up driver interrupt data and calls
3684  * into iflib in order to setup interrupts in its data structures as well.
3685  *
3686  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3687  * number of vectors as we have queues, and that we always have the same number
3688  * of Tx and Rx queues. Unlike that function, this calls a special
3689  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3690  * driver needs to get MSI-X resources from the parent device.
3691  *
3692  * Tx queues use a softirq instead of using their own hardware interrupt so that
3693  * remains unchanged.
3694  *
3695  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3696  * on failure.
3697  */
3698 static int
3699 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3700 {
3701 	struct ice_softc *sc = mif->back;
3702 	struct ice_hw *hw = &sc->hw;
3703 	struct ice_vsi *vsi = mif->vsi;
3704 	device_t dev = mif->subdev;
3705 	int err, i, rid;
3706 
3707 	if_ctx_t ctx = mif->subctx;
3708 
3709 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3710 
3711 	rid = sc->last_rid + 1;
3712 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3713 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3714 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3715 		char irq_name[16];
3716 
3717 		// TODO: Change to use dynamic interface number
3718 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3719 		/* First arg is parent device (physical port's) iflib ctx */
3720 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3721 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3722 		    rxq, rxq->me, irq_name);
3723 		if (err) {
3724 			device_printf(dev,
3725 			    "Failed to allocate q int %d err: %s\n",
3726 			    i, ice_err_str(err));
3727 			i--;
3728 			goto fail;
3729 		}
3730 		MPASS(rid - 1 > 0);
3731 		/* Set vector number used in interrupt enable/disable functions */
3732 		mif->rx_irqvs[i].me = rid - 1;
3733 		rxq->irqv = &mif->rx_irqvs[i];
3734 
3735 		bzero(irq_name, sizeof(irq_name));
3736 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3737 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3738 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3739 		txq->irqv = &mif->rx_irqvs[i];
3740 	}
3741 
3742 	sc->last_rid = rid - 1;
3743 
3744 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3745 	    sc->last_rid);
3746 
3747 	return (0);
3748 
3749 fail:
3750 	for (; i >= 0; i--)
3751 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3752 	return (err);
3753 }
3754 
3755 /**
3756  * ice_subif_rebuild - Rebuild subinterface post reset
3757  * @sc: The device private softc
3758  *
3759  * Restore subinterface state after a reset occurred.
3760  * Restart the VSI and enable the mirroring.
3761  */
3762 static int
3763 ice_subif_rebuild(struct ice_softc *sc)
3764 {
3765 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3766 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3767 	int err;
3768 
3769 	err = ice_subif_rebuild_vsi_qmap(sc);
3770 	if (err) {
3771 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3772 		      ice_err_str(err));
3773 		return (err);
3774 	}
3775 
3776 	err = ice_initialize_vsi(vsi);
3777 	if (err) {
3778 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3779 		      ice_err_str(err));
3780 		goto err_release_queue_allocations_subif;
3781 	}
3782 
3783 	err = ice_config_rss(vsi);
3784 	if (err) {
3785 		device_printf(sc->dev,
3786 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3787 		      ice_err_str(err));
3788 		goto err_deinit_subif_vsi;
3789 	}
3790 
3791 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3792 
3793 	err = ice_setup_vsi_mirroring(vsi);
3794 	if (err) {
3795 		device_printf(sc->dev,
3796 		      "Unable to configure mirroring for VSI: %s\n",
3797 		      ice_err_str(err));
3798 		goto err_deinit_subif_vsi;
3799 	}
3800 
3801 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3802 
3803 	return (0);
3804 
3805 err_deinit_subif_vsi:
3806 	ice_deinit_vsi(vsi);
3807 err_release_queue_allocations_subif:
3808 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3809 	    sc->mirr_if->num_irq_vectors);
3810 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3811 	    sc->mirr_if->num_irq_vectors);
3812 
3813 	return (err);
3814 }
3815 
3816 /**
3817  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3818  * @sc: the device softc pointer
3819  *
3820  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3821  * mapping after a reset occurred.
3822  */
3823 static int
3824 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3825 {
3826 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3827 	struct ice_tx_queue *txq;
3828 	struct ice_rx_queue *rxq;
3829 	int err, i;
3830 
3831 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3832 	if (err) {
3833 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3834 		      ice_err_str(err));
3835 		return (err);
3836 	}
3837 
3838 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3839 	if (err) {
3840 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3841 		      ice_err_str(err));
3842 		goto err_release_tx_queues;
3843 	}
3844 
3845 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3846 
3847 	/* Re-assign Tx queue tail pointers */
3848 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3849 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3850 
3851 	/* Re-assign Rx queue tail pointers */
3852 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3853 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3854 
3855 	return (0);
3856 
3857 err_release_tx_queues:
3858 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3859 
3860 	return (err);
3861 }
3862 
3863 /**
3864  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3865  * @ctx: iflib context structure
3866  * @vaddrs: virtual addresses for the queue memory
3867  * @paddrs: physical addresses for the queue memory
3868  * @ntxqs: the number of Tx queues per set (should always be 1)
3869  * @ntxqsets: the number of Tx queue sets to allocate
3870  *
3871  * See ice_if_tx_queues_alloc() description. Similar to that function, but
3872  * for subinterfaces instead.
3873  */
3874 static int
3875 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3876 			     int __invariant_only ntxqs, int ntxqsets)
3877 {
3878 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3879 	struct ice_tx_queue *txq;
3880 	device_t dev = mif->subdev;
3881 	struct ice_vsi *vsi;
3882 	int err, i, j;
3883 
3884 	MPASS(mif != NULL);
3885 	MPASS(ntxqs == 1);
3886 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3887 
3888 	vsi = mif->vsi;
3889 
3890 	MPASS(vsi->num_tx_queues == ntxqsets);
3891 
3892 	/* Allocate queue structure memory */
3893 	if (!(vsi->tx_queues =
3894 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3895 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3896 		    __func__);
3897 		return (ENOMEM);
3898 	}
3899 
3900 	/* Allocate report status arrays */
3901 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3902 		if (!(txq->tx_rsq =
3903 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3904 			device_printf(dev,
3905 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3906 			err = ENOMEM;
3907 			goto free_tx_queues;
3908 		}
3909 		/* Initialize report status array */
3910 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3911 			txq->tx_rsq[j] = QIDX_INVALID;
3912 	}
3913 
3914 	/* Add Tx queue sysctls context */
3915 	ice_vsi_add_txqs_ctx(vsi);
3916 
3917 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3918 		/* q_handle == me when only one TC */
3919 		txq->me = txq->q_handle = i;
3920 		txq->vsi = vsi;
3921 
3922 		/* store the queue size for easier access */
3923 		txq->desc_count = mif->subscctx->isc_ntxd[0];
3924 
3925 		/* get the virtual and physical address of the hardware queues */
3926 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3927 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
3928 		txq->tx_paddr = paddrs[i];
3929 
3930 		ice_add_txq_sysctls(txq);
3931 	}
3932 
3933 	return (0);
3934 
3935 free_tx_queues:
3936 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3937 		if (txq->tx_rsq != NULL) {
3938 			free(txq->tx_rsq, M_ICE);
3939 			txq->tx_rsq = NULL;
3940 		}
3941 	}
3942 	free(vsi->tx_queues, M_ICE);
3943 	vsi->tx_queues = NULL;
3944 	return (err);
3945 }
3946 
3947 /**
3948  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
3949  * @ctx: iflib context structure
3950  * @vaddrs: virtual addresses for the queue memory
3951  * @paddrs: physical addresses for the queue memory
3952  * @nrxqs: number of Rx queues per set (should always be 1)
3953  * @nrxqsets: number of Rx queue sets to allocate
3954  *
3955  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
3956  * but implemented for subinterfaces.
3957  */
3958 static int
3959 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3960     int __invariant_only nrxqs, int nrxqsets)
3961 {
3962 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3963 	struct ice_rx_queue *rxq;
3964 	device_t dev = mif->subdev;
3965 	struct ice_vsi *vsi;
3966 	int i;
3967 
3968 	MPASS(mif != NULL);
3969 	MPASS(nrxqs == 1);
3970 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
3971 
3972 	vsi = mif->vsi;
3973 
3974 	MPASS(vsi->num_rx_queues == nrxqsets);
3975 
3976 	/* Allocate queue structure memory */
3977 	if (!(vsi->rx_queues =
3978 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3979 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
3980 		    __func__);
3981 		return (ENOMEM);
3982 	}
3983 
3984 	/* Add Rx queue sysctls context */
3985 	ice_vsi_add_rxqs_ctx(vsi);
3986 
3987 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
3988 		rxq->me = i;
3989 		rxq->vsi = vsi;
3990 
3991 		/* store the queue size for easier access */
3992 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
3993 
3994 		/* get the virtual and physical address of the hardware queues */
3995 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3996 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
3997 		rxq->rx_paddr = paddrs[i];
3998 
3999 		ice_add_rxq_sysctls(rxq);
4000 	}
4001 
4002 	return (0);
4003 }
4004 
4005 /**
4006  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4007  * @ctx: the iflib context structure
4008  * @msix: the number of vectors we were assigned
4009  *
4010  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4011  *
4012  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4013  */
4014 static int
4015 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4016 {
4017 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4018 	struct ice_softc *sc = mif->back;
4019 	struct ice_vsi *vsi = mif->vsi;
4020 
4021 	device_t dev = mif->subdev;
4022 	int ret;
4023 
4024 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4025 		device_printf(dev,
4026 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4027 			      vsi->num_tx_queues, vsi->num_rx_queues);
4028 		return (EOPNOTSUPP);
4029 	}
4030 
4031 	if (msix > sc->extra_vectors) {
4032 		device_printf(dev,
4033 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4034 		     __func__, sc->extra_vectors, msix);
4035 		return (ENOSPC);
4036 	}
4037 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4038 	    msix);
4039 
4040 	/* Allocate space to store the IRQ vector data */
4041 	mif->num_irq_vectors = vsi->num_rx_queues;
4042 	mif->rx_irqvs = (struct ice_irq_vector *)
4043 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4044 		   M_ICE, M_NOWAIT);
4045 	if (!mif->rx_irqvs) {
4046 		device_printf(dev,
4047 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4048 			      mif->num_irq_vectors);
4049 		return (ENOMEM);
4050 	}
4051 
4052 	/* Assign mirror interface interrupts from PF device space */
4053 	if (!(mif->if_imap =
4054 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4055 	      M_ICE, M_NOWAIT))) {
4056 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4057 		ret = ENOMEM;
4058 		goto free_irqvs;
4059 	}
4060 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4061 	if (ret) {
4062 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4063 			      ice_err_str(ret));
4064 		goto free_if_imap;
4065 	}
4066 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4067 	if (!(mif->os_imap =
4068 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4069 	      M_ICE, M_NOWAIT))) {
4070 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4071 		ret = ENOMEM;
4072 		goto free_if_imap;
4073 	}
4074 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4075 	if (ret) {
4076 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4077 			      ice_err_str(ret));
4078 		goto free_if_imap;
4079 	}
4080 
4081 	return (0);
4082 
4083 free_if_imap:
4084 	free(mif->if_imap, M_ICE);
4085 	mif->if_imap = NULL;
4086 free_irqvs:
4087 	free(mif->rx_irqvs, M_ICE);
4088 	mif->rx_irqvs = NULL;
4089 	return (ret);
4090 }
4091 
4092 /**
4093  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4094  * @ctx: iflib context structure
4095  *
4096  * Called by iflib to request enabling all interrupts that belong to a
4097  * subinterface.
4098  */
4099 static void
4100 ice_subif_if_intr_enable(if_ctx_t ctx)
4101 {
4102 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4103 	struct ice_softc *sc = mif->back;
4104 	struct ice_vsi *vsi = mif->vsi;
4105 	struct ice_hw *hw = &sc->hw;
4106 
4107 	/* Do not enable queue interrupts in recovery mode */
4108 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4109 		return;
4110 
4111 	/* Enable all queue interrupts */
4112 	for (int i = 0; i < vsi->num_rx_queues; i++)
4113 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4114 }
4115 
4116 /**
4117  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4118  * @ctx: iflib context structure
4119  * @rxqid: the Rx queue to enable
4120  *
4121  * Enable a specific Rx queue interrupt.
4122  *
4123  * This function is not protected by the iflib CTX lock.
4124  */
4125 static int
4126 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4127 {
4128 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4129 	struct ice_softc *sc = mif->back;
4130 	struct ice_vsi *vsi = mif->vsi;
4131 	struct ice_hw *hw = &sc->hw;
4132 
4133 	/* Do not enable queue interrupts in recovery mode */
4134 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4135 		return (ENOSYS);
4136 
4137 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4138 	return (0);
4139 }
4140 
4141 /**
4142  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4143  * @ctx: iflib context structure
4144  * @txqid: the Tx queue to enable
4145  *
4146  * Enable a specific Tx queue interrupt.
4147  *
4148  * This function is not protected by the iflib CTX lock.
4149  */
4150 static int
4151 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4152 {
4153 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4154 	struct ice_softc *sc = mif->back;
4155 	struct ice_vsi *vsi = mif->vsi;
4156 	struct ice_hw *hw = &sc->hw;
4157 
4158 	/* Do not enable queue interrupts in recovery mode */
4159 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4160 		return (ENOSYS);
4161 
4162 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4163 	return (0);
4164 }
4165 
4166 /**
4167  * ice_subif_if_init - Initialize the subinterface
4168  * @ctx: iflib ctx structure
4169  *
4170  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4171  * Prepares the Tx and Rx engines and enables interrupts.
4172  *
4173  * @pre assumes the caller holds the iflib CTX lock
4174  */
4175 static void
4176 ice_subif_if_init(if_ctx_t ctx)
4177 {
4178 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4179 	struct ice_softc *sc = mif->back;
4180 	struct ice_vsi *vsi = mif->vsi;
4181 	device_t dev = mif->subdev;
4182 	int err;
4183 
4184 	if (ice_driver_is_detaching(sc))
4185 		return;
4186 
4187 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4188 		return;
4189 
4190 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4191 		device_printf(dev,
4192 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4193 		    device_get_nameunit(sc->dev));
4194 		return;
4195 	}
4196 
4197 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4198 		device_printf(dev,
4199 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4200 		    device_get_nameunit(sc->dev));
4201 		return;
4202 	}
4203 
4204 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4205 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4206 
4207 	/* Initialize software Tx tracking values */
4208 	ice_init_tx_tracking(vsi);
4209 
4210 	err = ice_cfg_vsi_for_tx(vsi);
4211 	if (err) {
4212 		device_printf(dev,
4213 			      "Unable to configure subif VSI for Tx: %s\n",
4214 			      ice_err_str(err));
4215 		return;
4216 	}
4217 
4218 	err = ice_cfg_vsi_for_rx(vsi);
4219 	if (err) {
4220 		device_printf(dev,
4221 			      "Unable to configure subif VSI for Rx: %s\n",
4222 			      ice_err_str(err));
4223 		goto err_cleanup_tx;
4224 	}
4225 
4226 	err = ice_control_all_rx_queues(vsi, true);
4227 	if (err) {
4228 		device_printf(dev,
4229 			      "Unable to enable subif Rx rings for receive: %s\n",
4230 			      ice_err_str(err));
4231 		goto err_cleanup_tx;
4232 	}
4233 
4234 	ice_configure_all_rxq_interrupts(vsi);
4235 	ice_configure_rx_itr(vsi);
4236 
4237 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4238 	return;
4239 
4240 err_cleanup_tx:
4241 	ice_vsi_disable_tx(vsi);
4242 }
4243 
4244 /**
4245  * ice_if_stop_subif - Stop the subinterface
4246  * @ctx: iflib context structure
4247  * @ifs: subinterface context structure
4248  *
4249  * Called by iflib to stop the subinterface and bring it down.
4250  * (e.g. ifconfig ice0m0 down)
4251  *
4252  * @pre assumes the caller holds the iflib CTX lock
4253  */
4254 static void
4255 ice_subif_if_stop(if_ctx_t ctx)
4256 {
4257 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4258 	struct ice_softc *sc = mif->back;
4259 	struct ice_vsi *vsi = mif->vsi;
4260 	device_t dev = mif->subdev;
4261 
4262 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4263 		return;
4264 
4265 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4266 		device_printf(dev,
4267 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4268 		    device_get_nameunit(sc->dev));
4269 		return;
4270 	}
4271 
4272 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4273 		device_printf(dev,
4274 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4275 		    device_get_nameunit(sc->dev));
4276 		return;
4277 	}
4278 
4279 	/* Dissociate the Tx and Rx queues from the interrupts */
4280 	ice_flush_txq_interrupts(vsi);
4281 	ice_flush_rxq_interrupts(vsi);
4282 
4283 	/* Disable the Tx and Rx queues */
4284 	ice_vsi_disable_tx(vsi);
4285 	ice_control_all_rx_queues(vsi, false);
4286 }
4287 
4288 /**
4289  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4290  * @mif: Mirror interface private structure
4291  *
4292  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4293  */
4294 static void
4295 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4296 {
4297 	struct ice_softc *sc = mif->back;
4298 	struct ice_vsi *vsi = mif->vsi;
4299 	if_ctx_t ctx = sc->ctx;
4300 	int i;
4301 
4302 	/* If the irqvs array is NULL, then there are no vectors to free */
4303 	if (mif->rx_irqvs == NULL)
4304 		return;
4305 
4306 	/* Free the IRQ vectors -- currently subinterfaces have number
4307 	 * of vectors equal to number of RX queues
4308 	 *
4309 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4310 	 */
4311 	for (i = 0; i < vsi->num_rx_queues; i++)
4312 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4313 
4314 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4315 	    mif->num_irq_vectors);
4316 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4317 	    mif->num_irq_vectors);
4318 
4319 	sc->last_rid -= vsi->num_rx_queues;
4320 
4321 	/* Clear the irqv pointers */
4322 	for (i = 0; i < vsi->num_rx_queues; i++)
4323 		vsi->rx_queues[i].irqv = NULL;
4324 
4325 	for (i = 0; i < vsi->num_tx_queues; i++)
4326 		vsi->tx_queues[i].irqv = NULL;
4327 
4328 	/* Release the vector array memory */
4329 	free(mif->rx_irqvs, M_ICE);
4330 	mif->rx_irqvs = NULL;
4331 }
4332 
4333 /**
4334  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4335  * @ctx: the iflib context structure
4336  *
4337  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4338  * ice_subif_if_rx_queues_alloc().
4339  */
4340 static void
4341 ice_subif_if_queues_free(if_ctx_t ctx)
4342 {
4343 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4344 	struct ice_vsi *vsi = mif->vsi;
4345 	struct ice_tx_queue *txq;
4346 	int i;
4347 
4348 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4349 	 * pointers.
4350 	 */
4351 	ice_vsi_del_txqs_ctx(vsi);
4352 	ice_vsi_del_rxqs_ctx(vsi);
4353 
4354 	/* Release MSI-X IRQ vectors */
4355 	ice_free_irqvs_subif(mif);
4356 
4357 	if (vsi->tx_queues != NULL) {
4358 		/* free the tx_rsq arrays */
4359 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4360 			if (txq->tx_rsq != NULL) {
4361 				free(txq->tx_rsq, M_ICE);
4362 				txq->tx_rsq = NULL;
4363 			}
4364 		}
4365 		free(vsi->tx_queues, M_ICE);
4366 		vsi->tx_queues = NULL;
4367 	}
4368 	if (vsi->rx_queues != NULL) {
4369 		free(vsi->rx_queues, M_ICE);
4370 		vsi->rx_queues = NULL;
4371 	}
4372 }
4373 
4374 /**
4375  * ice_subif_if_media_status - Report subinterface media
4376  * @ctx: iflib context structure
4377  * @ifmr: ifmedia request structure to update
4378  *
4379  * Updates the provided ifmr with something, in order to prevent a
4380  * "no media types?" message from ifconfig.
4381  *
4382  * Mirror interfaces are always up.
4383  */
4384 static void
4385 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4386 {
4387 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4388 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4389 }
4390 
4391 /**
4392  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4393  * @ctx: iflib context structure
4394  * @flags: promiscuous flags to configure
4395  *
4396  * Called by iflib to configure device promiscuous mode.
4397  *
4398  * @remark This does not need to be implemented for now.
4399  */
4400 static int
4401 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4402 {
4403 	return (0);
4404 }
4405 
4406