xref: /freebsd/sys/dev/ice/if_ice_iflib.c (revision 98bdf63f6e94be42a1787de73608de15bcb3419a)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file if_ice_iflib.c
34  * @brief iflib driver implementation
35  *
36  * Contains the main entry point for the iflib driver implementation. It
37  * implements the various ifdi driver methods, and sets up the module and
38  * driver values to load an iflib driver.
39  */
40 
41 #include "ice_iflib.h"
42 #include "ice_drv_info.h"
43 #include "ice_switch.h"
44 #include "ice_sched.h"
45 #ifdef PCI_IOV
46 #include "ice_iov.h"
47 #endif
48 
49 #include <sys/module.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <dev/pci/pcivar.h>
53 #include <dev/pci/pcireg.h>
54 
55 /*
56  * Device method prototypes
57  */
58 
59 static void *ice_register(device_t);
60 static int  ice_if_attach_pre(if_ctx_t);
61 static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
62 static int  ice_if_attach_post(if_ctx_t);
63 static void ice_attach_post_recovery_mode(struct ice_softc *sc);
64 static int  ice_if_detach(if_ctx_t);
65 static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
66 static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
67 static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
68 static void ice_if_queues_free(if_ctx_t ctx);
69 static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
70 static void ice_if_intr_enable(if_ctx_t ctx);
71 static void ice_if_intr_disable(if_ctx_t ctx);
72 static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
73 static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
74 static int ice_if_promisc_set(if_ctx_t ctx, int flags);
75 static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
76 static int ice_if_media_change(if_ctx_t ctx);
77 static void ice_if_init(if_ctx_t ctx);
78 static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
79 static void ice_if_update_admin_status(if_ctx_t ctx);
80 static void ice_if_multi_set(if_ctx_t ctx);
81 static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
82 static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
83 static void ice_if_stop(if_ctx_t ctx);
84 static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
85 static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
86 static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
87 static int ice_if_suspend(if_ctx_t ctx);
88 static int ice_if_resume(if_ctx_t ctx);
89 static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
90 static void ice_init_link(struct ice_softc *sc);
91 #ifdef PCI_IOV
92 static int ice_if_iov_init(if_ctx_t ctx, uint16_t num_vfs, const nvlist_t *params);
93 static void ice_if_iov_uninit(if_ctx_t ctx);
94 static int ice_if_iov_vf_add(if_ctx_t ctx, uint16_t vfnum, const nvlist_t *params);
95 static void ice_if_vflr_handle(if_ctx_t ctx);
96 #endif
97 static int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
98 static int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
99 static void ice_free_irqvs_subif(struct ice_mirr_if *mif);
100 static void *ice_subif_register(device_t);
101 static void ice_subif_setup_scctx(struct ice_mirr_if *mif);
102 static int ice_subif_rebuild(struct ice_softc *sc);
103 static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
104 
105 /* Iflib API */
106 static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
107     uint64_t *paddrs, int ntxqs, int ntxqsets);
108 static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
109     uint64_t *paddrs, int nrxqs, int nrxqsets);
110 static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
111 static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
112 static void ice_subif_if_intr_enable(if_ctx_t ctx);
113 static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
114 static void ice_subif_if_init(if_ctx_t ctx);
115 static void ice_subif_if_stop(if_ctx_t ctx);
116 static void ice_subif_if_queues_free(if_ctx_t ctx);
117 static int ice_subif_if_attach_pre(if_ctx_t);
118 static int ice_subif_if_attach_post(if_ctx_t);
119 static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
120 static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
121 
122 static int ice_msix_que(void *arg);
123 static int ice_msix_admin(void *arg);
124 
125 /*
126  * Helper function prototypes
127  */
128 static int ice_pci_mapping(struct ice_softc *sc);
129 static void ice_free_pci_mapping(struct ice_softc *sc);
130 static void ice_update_link_status(struct ice_softc *sc, bool update_media);
131 static void ice_init_device_features(struct ice_softc *sc);
132 static void ice_init_tx_tracking(struct ice_vsi *vsi);
133 static void ice_handle_reset_event(struct ice_softc *sc);
134 static void ice_handle_pf_reset_request(struct ice_softc *sc);
135 static void ice_prepare_for_reset(struct ice_softc *sc);
136 static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
137 static void ice_rebuild(struct ice_softc *sc);
138 static void ice_rebuild_recovery_mode(struct ice_softc *sc);
139 static void ice_free_irqvs(struct ice_softc *sc);
140 static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
141 static void ice_poll_for_media_avail(struct ice_softc *sc);
142 static void ice_setup_scctx(struct ice_softc *sc);
143 static int ice_allocate_msix(struct ice_softc *sc);
144 static void ice_admin_timer(void *arg);
145 static void ice_transition_recovery_mode(struct ice_softc *sc);
146 static void ice_transition_safe_mode(struct ice_softc *sc);
147 static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
148 
149 /*
150  * Device Interface Declaration
151  */
152 
153 /**
154  * @var ice_methods
155  * @brief ice driver method entry points
156  *
157  * List of device methods implementing the generic device interface used by
158  * the device stack to interact with the ice driver. Since this is an iflib
159  * driver, most of the methods point to the generic iflib implementation.
160  */
161 static device_method_t ice_methods[] = {
162 	/* Device interface */
163 	DEVMETHOD(device_register, ice_register),
164 	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
165 	DEVMETHOD(device_attach,   iflib_device_attach),
166 	DEVMETHOD(device_detach,   iflib_device_detach),
167 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
168 	DEVMETHOD(device_suspend,  iflib_device_suspend),
169 	DEVMETHOD(device_resume,   iflib_device_resume),
170 #ifdef PCI_IOV
171 	DEVMETHOD(pci_iov_init, iflib_device_iov_init),
172 	DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit),
173 	DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf),
174 #endif
175 	DEVMETHOD_END
176 };
177 
178 /**
179  * @var ice_iflib_methods
180  * @brief iflib method entry points
181  *
182  * List of device methods used by the iflib stack to interact with this
183  * driver. These are the real main entry points used to interact with this
184  * driver.
185  */
186 static device_method_t ice_iflib_methods[] = {
187 	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
188 	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
189 	DEVMETHOD(ifdi_detach, ice_if_detach),
190 	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
191 	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
192 	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
193 	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
194 	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
195 	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
196 	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
197 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
198 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
199 	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
200 	DEVMETHOD(ifdi_media_status, ice_if_media_status),
201 	DEVMETHOD(ifdi_media_change, ice_if_media_change),
202 	DEVMETHOD(ifdi_init, ice_if_init),
203 	DEVMETHOD(ifdi_stop, ice_if_stop),
204 	DEVMETHOD(ifdi_timer, ice_if_timer),
205 	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
206 	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
207 	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
208 	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
209 	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
210 	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
211 	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
212 	DEVMETHOD(ifdi_suspend, ice_if_suspend),
213 	DEVMETHOD(ifdi_resume, ice_if_resume),
214 	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
215 #ifdef PCI_IOV
216 	DEVMETHOD(ifdi_iov_vf_add, ice_if_iov_vf_add),
217 	DEVMETHOD(ifdi_iov_init, ice_if_iov_init),
218 	DEVMETHOD(ifdi_iov_uninit, ice_if_iov_uninit),
219 	DEVMETHOD(ifdi_vflr_handle, ice_if_vflr_handle),
220 #endif
221 	DEVMETHOD_END
222 };
223 
224 /**
225  * @var ice_driver
226  * @brief driver structure for the generic device stack
227  *
228  * driver_t definition used to setup the generic device methods.
229  */
230 static driver_t ice_driver = {
231 	.name = "ice",
232 	.methods = ice_methods,
233 	.size = sizeof(struct ice_softc),
234 };
235 
236 /**
237  * @var ice_iflib_driver
238  * @brief driver structure for the iflib stack
239  *
240  * driver_t definition used to setup the iflib device methods.
241  */
242 static driver_t ice_iflib_driver = {
243 	.name = "ice",
244 	.methods = ice_iflib_methods,
245 	.size = sizeof(struct ice_softc),
246 };
247 
248 extern struct if_txrx ice_txrx;
249 extern struct if_txrx ice_recovery_txrx;
250 
251 /**
252  * @var ice_sctx
253  * @brief ice driver shared context
254  *
255  * Structure defining shared values (context) that is used by all instances of
256  * the device. Primarily used to setup details about how the iflib stack
257  * should treat this driver. Also defines the default, minimum, and maximum
258  * number of descriptors in each ring.
259  */
260 static struct if_shared_ctx ice_sctx = {
261 	.isc_magic = IFLIB_MAGIC,
262 	.isc_q_align = PAGE_SIZE,
263 
264 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
265 	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
266 	 * that doesn't make sense since that would be larger than the maximum
267 	 * size of a single packet.
268 	 */
269 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
270 
271 	/* XXX: This is only used by iflib to ensure that
272 	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
273 	 */
274 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
275 	/* XXX: This is used by iflib to set the number of segments in the TSO
276 	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
277 	 * related ifnet parameter.
278 	 */
279 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
280 
281 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
282 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
283 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
284 
285 	.isc_nfl = 1,
286 	.isc_ntxqs = 1,
287 	.isc_nrxqs = 1,
288 
289 	.isc_admin_intrcnt = 1,
290 	.isc_vendor_info = ice_vendor_info_array,
291 	.isc_driver_version = __DECONST(char *, ice_driver_version),
292 	.isc_driver = &ice_iflib_driver,
293 
294 	/*
295 	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
296 	 * for hardware checksum offload
297 	 *
298 	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
299 	 * IP sum field, required by our hardware to calculate valid TSO
300 	 * checksums.
301 	 *
302 	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
303 	 * even when the interface is down.
304 	 *
305 	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
306 	 * vectors manually instead of relying on iflib code to do this.
307 	 */
308 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
309 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
310 
311 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
312 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
313 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
314 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
315 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
316 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
317 };
318 
319 DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
320 
321 MODULE_VERSION(ice, 1);
322 MODULE_DEPEND(ice, pci, 1, 1, 1);
323 MODULE_DEPEND(ice, ether, 1, 1, 1);
324 MODULE_DEPEND(ice, iflib, 1, 1, 1);
325 
326 IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
327 
328 /* Static driver-wide sysctls */
329 #include "ice_iflib_sysctls.h"
330 
331 /**
332  * ice_pci_mapping - Map PCI BAR memory
333  * @sc: device private softc
334  *
335  * Map PCI BAR 0 for device operation.
336  */
337 static int
ice_pci_mapping(struct ice_softc * sc)338 ice_pci_mapping(struct ice_softc *sc)
339 {
340 	int rc;
341 
342 	/* Map BAR0 */
343 	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
344 	if (rc)
345 		return rc;
346 
347 	return 0;
348 }
349 
350 /**
351  * ice_free_pci_mapping - Release PCI BAR memory
352  * @sc: device private softc
353  *
354  * Release PCI BARs which were previously mapped by ice_pci_mapping().
355  */
356 static void
ice_free_pci_mapping(struct ice_softc * sc)357 ice_free_pci_mapping(struct ice_softc *sc)
358 {
359 	/* Free BAR0 */
360 	ice_free_bar(sc->dev, &sc->bar0);
361 }
362 
363 /*
364  * Device methods
365  */
366 
367 /**
368  * ice_register - register device method callback
369  * @dev: the device being registered
370  *
371  * Returns a pointer to the shared context structure, which is used by iflib.
372  */
373 static void *
ice_register(device_t dev __unused)374 ice_register(device_t dev __unused)
375 {
376 	return &ice_sctx;
377 } /* ice_register */
378 
379 /**
380  * ice_setup_scctx - Setup the iflib softc context structure
381  * @sc: the device private structure
382  *
383  * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
384  * when loading.
385  */
386 static void
ice_setup_scctx(struct ice_softc * sc)387 ice_setup_scctx(struct ice_softc *sc)
388 {
389 	if_softc_ctx_t scctx = sc->scctx;
390 	struct ice_hw *hw = &sc->hw;
391 	device_t dev = sc->dev;
392 	bool safe_mode, recovery_mode;
393 
394 	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
395 	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
396 
397 	/*
398 	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
399 	 * a single queue pair.
400 	 */
401 	if (safe_mode || recovery_mode) {
402 		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
403 		scctx->isc_ntxqsets_max = 1;
404 		scctx->isc_nrxqsets_max = 1;
405 	} else {
406 		/*
407 		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
408 		 * the values of the override sysctls. Cache these initial
409 		 * values so that the driver can be aware of what the iflib
410 		 * sysctl value is when setting up MSI-X vectors.
411 		 */
412 		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
413 		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
414 
415 		if (scctx->isc_ntxqsets == 0)
416 			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
417 		if (scctx->isc_nrxqsets == 0)
418 			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
419 
420 		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
421 		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
422 
423 		/*
424 		 * Sanity check that the iflib sysctl values are within the
425 		 * maximum supported range.
426 		 */
427 		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
428 			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
429 		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
430 			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
431 	}
432 
433 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
434 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
435 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
436 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
437 
438 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
439 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
440 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
441 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
442 
443 	scctx->isc_msix_bar = pci_msix_table_bar(dev);
444 	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
445 
446 	/*
447 	 * If the driver loads in recovery mode, disable Tx/Rx functionality
448 	 */
449 	if (recovery_mode)
450 		scctx->isc_txrx = &ice_recovery_txrx;
451 	else
452 		scctx->isc_txrx = &ice_txrx;
453 
454 	/*
455 	 * If the driver loads in Safe mode or Recovery mode, disable
456 	 * advanced features including hardware offloads.
457 	 */
458 	if (safe_mode || recovery_mode) {
459 		scctx->isc_capenable = ICE_SAFE_CAPS;
460 		scctx->isc_tx_csum_flags = 0;
461 	} else {
462 		scctx->isc_capenable = ICE_FULL_CAPS;
463 		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
464 	}
465 
466 	scctx->isc_capabilities = scctx->isc_capenable;
467 } /* ice_setup_scctx */
468 
469 /**
470  * ice_if_attach_pre - Early device attach logic
471  * @ctx: the iflib context structure
472  *
473  * Called by iflib during the attach process. Earliest main driver entry
474  * point which performs necessary hardware and driver initialization. Called
475  * before the Tx and Rx queues are allocated.
476  */
477 static int
ice_if_attach_pre(if_ctx_t ctx)478 ice_if_attach_pre(if_ctx_t ctx)
479 {
480 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
481 	enum ice_fw_modes fw_mode;
482 	int status;
483 	if_softc_ctx_t scctx;
484 	struct ice_hw *hw;
485 	device_t dev;
486 	int err;
487 
488 	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
489 
490 	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
491 
492 	sc->ctx = ctx;
493 	sc->media = iflib_get_media(ctx);
494 	sc->sctx = iflib_get_sctx(ctx);
495 	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
496 	sc->ifp = iflib_get_ifp(ctx);
497 
498 	dev = sc->dev = iflib_get_dev(ctx);
499 	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
500 
501 	hw = &sc->hw;
502 	hw->back = sc;
503 
504 	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
505 		 "%s:admin", device_get_nameunit(dev));
506 	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
507 	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
508 
509 	ASSERT_CTX_LOCKED(sc);
510 
511 	if (ice_pci_mapping(sc)) {
512 		err = (ENXIO);
513 		goto destroy_admin_timer;
514 	}
515 
516 	/* Save off the PCI information */
517 	ice_save_pci_info(hw, dev);
518 
519 	/* create tunables as early as possible */
520 	ice_add_device_tunables(sc);
521 
522 	/* Setup ControlQ lengths */
523 	ice_set_ctrlq_len(hw);
524 
525 reinit_hw:
526 
527 	fw_mode = ice_get_fw_mode(hw);
528 	if (fw_mode == ICE_FW_MODE_REC) {
529 		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
530 
531 		err = ice_attach_pre_recovery_mode(sc);
532 		if (err)
533 			goto free_pci_mapping;
534 
535 		return (0);
536 	}
537 
538 	/* Initialize the hw data structure */
539 	status = ice_init_hw(hw);
540 	if (status) {
541 		if (status == ICE_ERR_FW_API_VER) {
542 			/* Enter recovery mode, so that the driver remains
543 			 * loaded. This way, if the system administrator
544 			 * cannot update the driver, they may still attempt to
545 			 * downgrade the NVM.
546 			 */
547 			err = ice_attach_pre_recovery_mode(sc);
548 			if (err)
549 				goto free_pci_mapping;
550 
551 			return (0);
552 		} else {
553 			err = EIO;
554 			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
555 				      ice_status_str(status),
556 				      ice_aq_str(hw->adminq.sq_last_status));
557 		}
558 		goto free_pci_mapping;
559 	}
560 
561 	ice_init_device_features(sc);
562 
563 	/* Keep flag set by default */
564 	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
565 
566 	/* Notify firmware of the device driver version */
567 	err = ice_send_version(sc);
568 	if (err)
569 		goto deinit_hw;
570 
571 	/*
572 	 * Success indicates a change was made that requires a reinitialization
573 	 * of the hardware
574 	 */
575 	err = ice_load_pkg_file(sc);
576 	if (!err) {
577 		ice_deinit_hw(hw);
578 		goto reinit_hw;
579 	}
580 
581 	err = ice_init_link_events(sc);
582 	if (err) {
583 		device_printf(dev, "ice_init_link_events failed: %s\n",
584 			      ice_err_str(err));
585 		goto deinit_hw;
586 	}
587 
588 	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
589 	 * and firmware, this will force them to use single VLAN mode.
590 	 */
591 	status = ice_set_vlan_mode(hw);
592 	if (status) {
593 		err = EIO;
594 		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
595 			      ice_status_str(status),
596 			      ice_aq_str(hw->adminq.sq_last_status));
597 		goto deinit_hw;
598 	}
599 
600 	ice_print_nvm_version(sc);
601 
602 	/* Setup the MAC address */
603 	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
604 
605 	/* Setup the iflib softc context structure */
606 	ice_setup_scctx(sc);
607 
608 	/* Initialize the Tx queue manager */
609 	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
610 	if (err) {
611 		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
612 			      ice_err_str(err));
613 		goto deinit_hw;
614 	}
615 
616 	/* Initialize the Rx queue manager */
617 	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
618 	if (err) {
619 		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
620 			      ice_err_str(err));
621 		goto free_tx_qmgr;
622 	}
623 
624 	/* Initialize the PF device interrupt resource manager */
625 	err = ice_alloc_intr_tracking(sc);
626 	if (err)
627 		/* Errors are already printed */
628 		goto free_rx_qmgr;
629 
630 	/* Determine maximum number of VSIs we'll prepare for */
631 	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
632 				    hw->func_caps.guar_num_vsi);
633 
634 	if (!sc->num_available_vsi) {
635 		err = EIO;
636 		device_printf(dev, "No VSIs allocated to host\n");
637 		goto free_intr_tracking;
638 	}
639 
640 	/* Allocate storage for the VSI pointers */
641 	sc->all_vsi = (struct ice_vsi **)
642 		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
643 		       M_ICE, M_WAITOK | M_ZERO);
644 	if (!sc->all_vsi) {
645 		err = ENOMEM;
646 		device_printf(dev, "Unable to allocate VSI array\n");
647 		goto free_intr_tracking;
648 	}
649 
650 	/*
651 	 * Prepare the statically allocated primary PF VSI in the softc
652 	 * structure. Other VSIs will be dynamically allocated as needed.
653 	 */
654 	ice_setup_pf_vsi(sc);
655 
656 	ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
657 	    scctx->isc_nrxqsets_max);
658 
659 	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
660 	err = ice_allocate_msix(sc);
661 	if (err)
662 		goto free_main_vsi;
663 
664 	return 0;
665 
666 free_main_vsi:
667 	/* ice_release_vsi will free the queue maps if they were allocated */
668 	ice_release_vsi(&sc->pf_vsi);
669 	free(sc->all_vsi, M_ICE);
670 	sc->all_vsi = NULL;
671 free_intr_tracking:
672 	ice_free_intr_tracking(sc);
673 free_rx_qmgr:
674 	ice_resmgr_destroy(&sc->rx_qmgr);
675 free_tx_qmgr:
676 	ice_resmgr_destroy(&sc->tx_qmgr);
677 deinit_hw:
678 	ice_deinit_hw(hw);
679 free_pci_mapping:
680 	ice_free_pci_mapping(sc);
681 destroy_admin_timer:
682 	mtx_lock(&sc->admin_mtx);
683 	callout_stop(&sc->admin_timer);
684 	mtx_unlock(&sc->admin_mtx);
685 	mtx_destroy(&sc->admin_mtx);
686 	return err;
687 } /* ice_if_attach_pre */
688 
689 /**
690  * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
691  * @sc: the device private softc
692  *
693  * Loads the device driver in limited Firmware Recovery mode, intended to
694  * allow users to update the firmware to attempt to recover the device.
695  *
696  * @remark We may enter recovery mode in case either (a) the firmware is
697  * detected to be in an invalid state and must be re-programmed, or (b) the
698  * driver detects that the loaded firmware has a non-compatible API version
699  * that the driver cannot operate with.
700  */
701 static int
ice_attach_pre_recovery_mode(struct ice_softc * sc)702 ice_attach_pre_recovery_mode(struct ice_softc *sc)
703 {
704 	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
705 
706 	/* Setup the iflib softc context */
707 	ice_setup_scctx(sc);
708 
709 	/* Setup the PF VSI back pointer */
710 	sc->pf_vsi.sc = sc;
711 
712 	/*
713 	 * We still need to allocate MSI-X vectors since we need one vector to
714 	 * run the administrative admin interrupt
715 	 */
716 	return ice_allocate_msix(sc);
717 }
718 
719 /**
720  * ice_update_link_status - notify OS of link state change
721  * @sc: device private softc structure
722  * @update_media: true if we should update media even if link didn't change
723  *
724  * Called to notify iflib core of link status changes. Should be called once
725  * during attach_post, and whenever link status changes during runtime.
726  *
727  * This call only updates the currently supported media types if the link
728  * status changed, or if update_media is set to true.
729  */
730 static void
ice_update_link_status(struct ice_softc * sc,bool update_media)731 ice_update_link_status(struct ice_softc *sc, bool update_media)
732 {
733 	struct ice_hw *hw = &sc->hw;
734 	int status;
735 
736 	/* Never report link up when in recovery mode */
737 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
738 		return;
739 
740 	/* Report link status to iflib only once each time it changes */
741 	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
742 		if (sc->link_up) { /* link is up */
743 			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
744 
745 			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
746 				ice_set_default_local_lldp_mib(sc);
747 
748 			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
749 			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
750 
751 			ice_link_up_msg(sc);
752 		} else { /* link is down */
753 			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
754 			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
755 		}
756 #ifdef PCI_IOV
757 		ice_vc_notify_all_vfs_link_state(sc);
758 #endif
759 		update_media = true;
760 	}
761 
762 	/* Update the supported media types */
763 	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
764 		status = ice_add_media_types(sc, sc->media);
765 		if (status)
766 			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
767 				      ice_status_str(status),
768 				      ice_aq_str(hw->adminq.sq_last_status));
769 	}
770 }
771 
772 /**
773  * ice_if_attach_post - Late device attach logic
774  * @ctx: the iflib context structure
775  *
776  * Called by iflib to finish up attaching the device. Performs any attach
777  * logic which must wait until after the Tx and Rx queues have been
778  * allocated.
779  */
780 static int
ice_if_attach_post(if_ctx_t ctx)781 ice_if_attach_post(if_ctx_t ctx)
782 {
783 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
784 	if_t ifp = iflib_get_ifp(ctx);
785 	int status;
786 	int err;
787 
788 	ASSERT_CTX_LOCKED(sc);
789 
790 	/* We don't yet support loading if MSI-X is not supported */
791 	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
792 		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
793 		return (ENOTSUP);
794 	}
795 
796 	/* The ifnet structure hasn't yet been initialized when the attach_pre
797 	 * handler is called, so wait until attach_post to setup the
798 	 * isc_max_frame_size.
799 	 */
800 	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
801 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
802 
803 	/*
804 	 * If we are in recovery mode, only perform a limited subset of
805 	 * initialization to support NVM recovery.
806 	 */
807 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
808 		ice_attach_post_recovery_mode(sc);
809 		return (0);
810 	}
811 
812 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
813 
814 	err = ice_initialize_vsi(&sc->pf_vsi);
815 	if (err) {
816 		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
817 			      ice_err_str(err));
818 		return err;
819 	}
820 
821 	/* Enable FW health event reporting */
822 	ice_init_health_events(sc);
823 
824 	/* Configure the main PF VSI for RSS */
825 	err = ice_config_rss(&sc->pf_vsi);
826 	if (err) {
827 		device_printf(sc->dev,
828 			      "Unable to configure RSS for the main VSI, err %s\n",
829 			      ice_err_str(err));
830 		return err;
831 	}
832 
833 	/* Configure switch to drop transmitted LLDP and PAUSE frames */
834 	err = ice_cfg_pf_ethertype_filters(sc);
835 	if (err)
836 		return err;
837 
838 	ice_get_and_print_bus_info(sc);
839 
840 	ice_set_link_management_mode(sc);
841 
842 	ice_init_saved_phy_cfg(sc);
843 
844 	ice_cfg_pba_num(sc);
845 
846 	/* Set a default value for PFC mode on attach since the FW state is unknown
847 	 * before sysctl tunables are executed and it can't be queried. This fixes an
848 	 * issue when loading the driver with the FW LLDP agent enabled but the FW
849 	 * was previously in DSCP PFC mode.
850 	 */
851 	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
852 	if (status)
853 		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
854 
855 	ice_add_device_sysctls(sc);
856 
857 #ifdef PCI_IOV
858 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_SRIOV)) {
859 		err = ice_iov_attach(sc);
860 		if (err == ENOMEM)
861 			return (err);
862 	}
863 #endif /* PCI_IOV */
864 
865 	/* Get DCBX/LLDP state and start DCBX agent */
866 	ice_init_dcb_setup(sc);
867 
868 	/* Setup link, if PHY FW is ready */
869 	ice_init_link(sc);
870 
871 	/* Configure interrupt causes for the administrative interrupt */
872 	ice_configure_misc_interrupts(sc);
873 
874 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
875 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
876 
877 	err = ice_rdma_pf_attach(sc);
878 	if (err)
879 		return (err);
880 
881 	/* Start the admin timer */
882 	mtx_lock(&sc->admin_mtx);
883 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
884 	mtx_unlock(&sc->admin_mtx);
885 
886 	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
887 		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
888 		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
889 
890 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
891 
892 	return 0;
893 } /* ice_if_attach_post */
894 
895 /**
896  * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
897  * @sc: the device private softc
898  *
899  * Performs minimal work to prepare the driver to recover an NVM in case the
900  * firmware is in recovery mode.
901  */
902 static void
ice_attach_post_recovery_mode(struct ice_softc * sc)903 ice_attach_post_recovery_mode(struct ice_softc *sc)
904 {
905 	/* Configure interrupt causes for the administrative interrupt */
906 	ice_configure_misc_interrupts(sc);
907 
908 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
909 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
910 
911 	/* Start the admin timer */
912 	mtx_lock(&sc->admin_mtx);
913 	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
914 	mtx_unlock(&sc->admin_mtx);
915 
916 	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
917 }
918 
919 /**
920  * ice_free_irqvs - Free IRQ vector memory
921  * @sc: the device private softc structure
922  *
923  * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
924  */
925 static void
ice_free_irqvs(struct ice_softc * sc)926 ice_free_irqvs(struct ice_softc *sc)
927 {
928 	struct ice_vsi *vsi = &sc->pf_vsi;
929 	if_ctx_t ctx = sc->ctx;
930 	int i;
931 
932 	/* If the irqvs array is NULL, then there are no vectors to free */
933 	if (sc->irqvs == NULL)
934 		return;
935 
936 	/* Free the IRQ vectors */
937 	for (i = 0; i < sc->num_irq_vectors; i++)
938 		iflib_irq_free(ctx, &sc->irqvs[i].irq);
939 
940 	/* Clear the irqv pointers */
941 	for (i = 0; i < vsi->num_rx_queues; i++)
942 		vsi->rx_queues[i].irqv = NULL;
943 
944 	for (i = 0; i < vsi->num_tx_queues; i++)
945 		vsi->tx_queues[i].irqv = NULL;
946 
947 	/* Release the vector array memory */
948 	free(sc->irqvs, M_ICE);
949 	sc->irqvs = NULL;
950 	sc->num_irq_vectors = 0;
951 }
952 
953 /**
954  * ice_if_detach - Device driver detach logic
955  * @ctx: iflib context structure
956  *
957  * Perform device shutdown logic to detach the device driver.
958  *
959  * Note that there is no guarantee of the ordering of ice_if_queues_free() and
960  * ice_if_detach(). It is possible for the functions to be called in either
961  * order, and they must not assume to have a strict ordering.
962  */
963 static int
ice_if_detach(if_ctx_t ctx)964 ice_if_detach(if_ctx_t ctx)
965 {
966 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
967 	struct ice_vsi *vsi = &sc->pf_vsi;
968 	int status;
969 	int i;
970 
971 	ASSERT_CTX_LOCKED(sc);
972 
973 	/* Indicate that we're detaching */
974 	ice_set_state(&sc->state, ICE_STATE_DETACHING);
975 
976 	/* Stop the admin timer */
977 	mtx_lock(&sc->admin_mtx);
978 	callout_stop(&sc->admin_timer);
979 	mtx_unlock(&sc->admin_mtx);
980 	mtx_destroy(&sc->admin_mtx);
981 
982 	/* Remove additional interfaces if they exist */
983 	if (sc->mirr_if)
984 		ice_destroy_mirror_interface(sc);
985 	ice_rdma_pf_detach(sc);
986 
987 #ifdef PCI_IOV
988 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_SRIOV))
989 		ice_iov_detach(sc);
990 #endif /* PCI_IOV */
991 
992 	/* Free allocated media types */
993 	ifmedia_removeall(sc->media);
994 
995 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
996 	 * pointers. Note, the calls here and those in ice_if_queues_free()
997 	 * are *BOTH* necessary, as we cannot guarantee which path will be
998 	 * run first
999 	 */
1000 	ice_vsi_del_txqs_ctx(vsi);
1001 	ice_vsi_del_rxqs_ctx(vsi);
1002 
1003 	/* Release MSI-X resources */
1004 	ice_free_irqvs(sc);
1005 
1006 	for (i = 0; i < sc->num_available_vsi; i++) {
1007 		if (sc->all_vsi[i])
1008 			ice_release_vsi(sc->all_vsi[i]);
1009 	}
1010 
1011 	if (sc->all_vsi) {
1012 		free(sc->all_vsi, M_ICE);
1013 		sc->all_vsi = NULL;
1014 	}
1015 
1016 	/* Release MSI-X memory */
1017 	pci_release_msi(sc->dev);
1018 
1019 	if (sc->msix_table != NULL) {
1020 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1021 				     rman_get_rid(sc->msix_table),
1022 				     sc->msix_table);
1023 		sc->msix_table = NULL;
1024 	}
1025 
1026 	ice_free_intr_tracking(sc);
1027 
1028 	/* Destroy the queue managers */
1029 	ice_resmgr_destroy(&sc->tx_qmgr);
1030 	ice_resmgr_destroy(&sc->rx_qmgr);
1031 
1032 	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1033 		ice_deinit_hw(&sc->hw);
1034 
1035 	IFLIB_CTX_UNLOCK(sc);
1036 	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1037 	IFLIB_CTX_LOCK(sc);
1038 	if (status) {
1039 		device_printf(sc->dev, "device PF reset failed, err %s\n",
1040 			      ice_status_str(status));
1041 	}
1042 
1043 	ice_free_pci_mapping(sc);
1044 
1045 	return 0;
1046 } /* ice_if_detach */
1047 
1048 /**
1049  * ice_if_tx_queues_alloc - Allocate Tx queue memory
1050  * @ctx: iflib context structure
1051  * @vaddrs: virtual addresses for the queue memory
1052  * @paddrs: physical addresses for the queue memory
1053  * @ntxqs: the number of Tx queues per set (should always be 1)
1054  * @ntxqsets: the number of Tx queue sets to allocate
1055  *
1056  * Called by iflib to allocate Tx queues for the device. Allocates driver
1057  * memory to track each queue, the status arrays used for descriptor
1058  * status reporting, and Tx queue sysctls.
1059  */
1060 static int
ice_if_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only ntxqs,int ntxqsets)1061 ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1062 		       int __invariant_only ntxqs, int ntxqsets)
1063 {
1064 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1065 	struct ice_vsi *vsi = &sc->pf_vsi;
1066 	struct ice_tx_queue *txq;
1067 	int err, i, j;
1068 
1069 	MPASS(ntxqs == 1);
1070 	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1071 	ASSERT_CTX_LOCKED(sc);
1072 
1073 	/* Do not bother allocating queues if we're in recovery mode */
1074 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1075 		return (0);
1076 
1077 	/* Allocate queue structure memory */
1078 	if (!(vsi->tx_queues =
1079 	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1080 		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1081 		return (ENOMEM);
1082 	}
1083 
1084 	/* Allocate report status arrays */
1085 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1086 		if (!(txq->tx_rsq =
1087 		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1088 			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1089 			err = ENOMEM;
1090 			goto free_tx_queues;
1091 		}
1092 		/* Initialize report status array */
1093 		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1094 			txq->tx_rsq[j] = QIDX_INVALID;
1095 	}
1096 
1097 	/* Assign queues from PF space to the main VSI */
1098 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1099 	if (err) {
1100 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1101 			      ice_err_str(err));
1102 		goto free_tx_queues;
1103 	}
1104 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1105 
1106 	/* Add Tx queue sysctls context */
1107 	ice_vsi_add_txqs_ctx(vsi);
1108 
1109 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1110 		/* q_handle == me when only one TC */
1111 		txq->me = txq->q_handle = i;
1112 		txq->vsi = vsi;
1113 
1114 		/* store the queue size for easier access */
1115 		txq->desc_count = sc->scctx->isc_ntxd[0];
1116 
1117 		/* get the virtual and physical address of the hardware queues */
1118 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1119 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1120 		txq->tx_paddr = paddrs[i];
1121 
1122 		ice_add_txq_sysctls(txq);
1123 	}
1124 
1125 	vsi->num_tx_queues = ntxqsets;
1126 
1127 	return (0);
1128 
1129 free_tx_queues:
1130 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1131 		if (txq->tx_rsq != NULL) {
1132 			free(txq->tx_rsq, M_ICE);
1133 			txq->tx_rsq = NULL;
1134 		}
1135 	}
1136 	free(vsi->tx_queues, M_ICE);
1137 	vsi->tx_queues = NULL;
1138 	return err;
1139 }
1140 
1141 /**
1142  * ice_if_rx_queues_alloc - Allocate Rx queue memory
1143  * @ctx: iflib context structure
1144  * @vaddrs: virtual addresses for the queue memory
1145  * @paddrs: physical addresses for the queue memory
1146  * @nrxqs: number of Rx queues per set (should always be 1)
1147  * @nrxqsets: number of Rx queue sets to allocate
1148  *
1149  * Called by iflib to allocate Rx queues for the device. Allocates driver
1150  * memory to track each queue, as well as sets up the Rx queue sysctls.
1151  */
1152 static int
ice_if_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only nrxqs,int nrxqsets)1153 ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1154 		       int __invariant_only nrxqs, int nrxqsets)
1155 {
1156 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1157 	struct ice_vsi *vsi = &sc->pf_vsi;
1158 	struct ice_rx_queue *rxq;
1159 	int err, i;
1160 
1161 	MPASS(nrxqs == 1);
1162 	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1163 	ASSERT_CTX_LOCKED(sc);
1164 
1165 	/* Do not bother allocating queues if we're in recovery mode */
1166 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1167 		return (0);
1168 
1169 	/* Allocate queue structure memory */
1170 	if (!(vsi->rx_queues =
1171 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1172 		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1173 		return (ENOMEM);
1174 	}
1175 
1176 	/* Assign queues from PF space to the main VSI */
1177 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1178 	if (err) {
1179 		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1180 			      ice_err_str(err));
1181 		goto free_rx_queues;
1182 	}
1183 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1184 
1185 	/* Add Rx queue sysctls context */
1186 	ice_vsi_add_rxqs_ctx(vsi);
1187 
1188 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1189 		rxq->me = i;
1190 		rxq->vsi = vsi;
1191 
1192 		/* store the queue size for easier access */
1193 		rxq->desc_count = sc->scctx->isc_nrxd[0];
1194 
1195 		/* get the virtual and physical address of the hardware queues */
1196 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1197 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1198 		rxq->rx_paddr = paddrs[i];
1199 
1200 		ice_add_rxq_sysctls(rxq);
1201 	}
1202 
1203 	vsi->num_rx_queues = nrxqsets;
1204 
1205 	return (0);
1206 
1207 free_rx_queues:
1208 	free(vsi->rx_queues, M_ICE);
1209 	vsi->rx_queues = NULL;
1210 	return err;
1211 }
1212 
1213 /**
1214  * ice_if_queues_free - Free queue memory
1215  * @ctx: the iflib context structure
1216  *
1217  * Free queue memory allocated by ice_if_tx_queues_alloc() and
1218  * ice_if_rx_queues_alloc().
1219  *
1220  * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1221  * called in the same order. It's possible for ice_if_queues_free() to be
1222  * called prior to ice_if_detach(), and vice versa.
1223  *
1224  * For this reason, the main VSI is a static member of the ice_softc, which is
1225  * not free'd until after iflib finishes calling both of these functions.
1226  *
1227  * Thus, care must be taken in how we manage the memory being freed by this
1228  * function, and in what tasks it can and must perform.
1229  */
1230 static void
ice_if_queues_free(if_ctx_t ctx)1231 ice_if_queues_free(if_ctx_t ctx)
1232 {
1233 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1234 	struct ice_vsi *vsi = &sc->pf_vsi;
1235 	struct ice_tx_queue *txq;
1236 	int i;
1237 
1238 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1239 	 * pointers. Note, the calls here and those in ice_if_detach()
1240 	 * are *BOTH* necessary, as we cannot guarantee which path will be
1241 	 * run first
1242 	 */
1243 	ice_vsi_del_txqs_ctx(vsi);
1244 	ice_vsi_del_rxqs_ctx(vsi);
1245 
1246 	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1247 	ice_free_irqvs(sc);
1248 
1249 	if (vsi->tx_queues != NULL) {
1250 		/* free the tx_rsq arrays */
1251 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1252 			if (txq->tx_rsq != NULL) {
1253 				free(txq->tx_rsq, M_ICE);
1254 				txq->tx_rsq = NULL;
1255 			}
1256 		}
1257 		free(vsi->tx_queues, M_ICE);
1258 		vsi->tx_queues = NULL;
1259 		vsi->num_tx_queues = 0;
1260 	}
1261 	if (vsi->rx_queues != NULL) {
1262 		free(vsi->rx_queues, M_ICE);
1263 		vsi->rx_queues = NULL;
1264 		vsi->num_rx_queues = 0;
1265 	}
1266 }
1267 
1268 /**
1269  * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1270  * @arg: The Rx queue memory
1271  *
1272  * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1273  * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1274  * iflib to schedule the main Rx thread.
1275  */
1276 static int
ice_msix_que(void * arg)1277 ice_msix_que(void *arg)
1278 {
1279 	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1280 
1281 	/* TODO: dynamic ITR algorithm?? */
1282 
1283 	return (FILTER_SCHEDULE_THREAD);
1284 }
1285 
1286 /**
1287  * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1288  * @arg: pointer to device softc memory
1289  *
1290  * Called by iflib when an administrative interrupt occurs. Should perform any
1291  * fast logic for handling the interrupt cause, and then indicate whether the
1292  * admin task needs to be queued.
1293  */
1294 static int
ice_msix_admin(void * arg)1295 ice_msix_admin(void *arg)
1296 {
1297 	struct ice_softc *sc = (struct ice_softc *)arg;
1298 	struct ice_hw *hw = &sc->hw;
1299 	device_t dev = sc->dev;
1300 	u32 oicr;
1301 
1302 	/* There is no safe way to modify the enabled miscellaneous causes of
1303 	 * the OICR vector at runtime, as doing so would be prone to race
1304 	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1305 	 * causes and allow future interrupts to occur. The admin interrupt
1306 	 * vector will not be re-enabled until after we exit this function,
1307 	 * but any delayed tasks must be resilient against possible "late
1308 	 * arrival" interrupts that occur while we're already handling the
1309 	 * task. This is done by using state bits and serializing these
1310 	 * delayed tasks via the admin status task function.
1311 	 */
1312 	oicr = rd32(hw, PFINT_OICR);
1313 
1314 	/* Processing multiple controlq interrupts on a single vector does not
1315 	 * provide an indication of which controlq triggered the interrupt.
1316 	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1317 	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1318 	 * it gets automatically cleared when the hardware acknowledges the
1319 	 * interrupt.
1320 	 *
1321 	 * This means we don't really have a good indication of whether or
1322 	 * which controlq triggered this interrupt. We'll just notify the
1323 	 * admin task that it should check all the controlqs.
1324 	 */
1325 	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1326 
1327 	if (oicr & PFINT_OICR_VFLR_M) {
1328 		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1329 	}
1330 
1331 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1332 		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1333 	}
1334 
1335 	if (oicr & PFINT_OICR_GRST_M) {
1336 		u32 reset;
1337 
1338 		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1339 			GLGEN_RSTAT_RESET_TYPE_S;
1340 
1341 		if (reset == ICE_RESET_CORER)
1342 			sc->soft_stats.corer_count++;
1343 		else if (reset == ICE_RESET_GLOBR)
1344 			sc->soft_stats.globr_count++;
1345 		else
1346 			sc->soft_stats.empr_count++;
1347 
1348 		/* There are a couple of bits at play for handling resets.
1349 		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1350 		 * indicate that the driver has received an OICR with a reset
1351 		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1352 		 * happen. Second, we set hw->reset_ongoing to indicate that
1353 		 * the hardware is in reset. We will set this back to false as
1354 		 * soon as the driver has determined that the hardware is out
1355 		 * of reset.
1356 		 *
1357 		 * If the driver wishes to trigger a request, it can set one of
1358 		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1359 		 * correct type of reset.
1360 		 */
1361 		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1362 			hw->reset_ongoing = true;
1363 			/*
1364 			 * During the NVM update process, there is a driver reset and link
1365 			 * goes down and then up. The below if-statement prevents a second
1366 			 * link flap from occurring in ice_if_init().
1367 			 */
1368 			if (if_getflags(sc->ifp) & IFF_UP)
1369 				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1370 		}
1371 	}
1372 
1373 	if (oicr & PFINT_OICR_ECC_ERR_M) {
1374 		device_printf(dev, "ECC Error detected!\n");
1375 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1376 	}
1377 
1378 	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1379 		if (oicr & PFINT_OICR_HMC_ERR_M)
1380 			/* Log the HMC errors */
1381 			ice_log_hmc_error(hw, dev);
1382 		ice_rdma_notify_pe_intr(sc, oicr);
1383 	}
1384 
1385 	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1386 		device_printf(dev, "PCI Exception detected!\n");
1387 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1388 	}
1389 
1390 	return (FILTER_SCHEDULE_THREAD);
1391 }
1392 
1393 /**
1394  * ice_allocate_msix - Allocate MSI-X vectors for the interface
1395  * @sc: the device private softc
1396  *
1397  * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1398  *
1399  * First, determine a suitable total number of vectors based on the number
1400  * of CPUs, RSS buckets, the administrative vector, and other demands such as
1401  * RDMA.
1402  *
1403  * Request the desired amount of vectors, and see how many we obtain. If we
1404  * don't obtain as many as desired, reduce the demands by lowering the number
1405  * of requested queues or reducing the demand from other features such as
1406  * RDMA.
1407  *
1408  * @remark This function is required because the driver sets the
1409  * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1410  * manually.
1411  *
1412  * @remark This driver will only use MSI-X vectors. If this is not possible,
1413  * neither MSI or legacy interrupts will be tried.
1414  *
1415  * @remark if it exists, os_imgr is initialized here for keeping track of
1416  * the assignments of extra MSIX vectors.
1417  *
1418  * @post on success this function must set the following scctx parameters:
1419  * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1420  *
1421  * @returns zero on success or an error code on failure.
1422  */
1423 static int
ice_allocate_msix(struct ice_softc * sc)1424 ice_allocate_msix(struct ice_softc *sc)
1425 {
1426 	bool iflib_override_queue_count = false;
1427 	if_softc_ctx_t scctx = sc->scctx;
1428 	device_t dev = sc->dev;
1429 	cpuset_t cpus;
1430 	int bar, queues, vectors, requested;
1431 	int err = 0;
1432 	int rdma;
1433 
1434 	/* Allocate the MSI-X bar */
1435 	bar = scctx->isc_msix_bar;
1436 	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1437 	if (!sc->msix_table) {
1438 		device_printf(dev, "Unable to map MSI-X table\n");
1439 		return (ENOMEM);
1440 	}
1441 
1442 	/* Check if the iflib queue count sysctls have been set */
1443 	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1444 		iflib_override_queue_count = true;
1445 
1446 	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1447 	if (err) {
1448 		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1449 			      __func__, ice_err_str(err));
1450 		CPU_COPY(&all_cpus, &cpus);
1451 	}
1452 
1453 	/* Attempt to mimic behavior of iflib_msix_init */
1454 	if (iflib_override_queue_count) {
1455 		/*
1456 		 * If the override sysctls have been set, limit the queues to
1457 		 * the number of logical CPUs.
1458 		 */
1459 		queues = mp_ncpus;
1460 	} else {
1461 		/*
1462 		 * Otherwise, limit the queue count to the CPUs associated
1463 		 * with the NUMA node the device is associated with.
1464 		 */
1465 		queues = CPU_COUNT(&cpus);
1466 	}
1467 
1468 	/* Clamp to the number of RSS buckets */
1469 	queues = imin(queues, rss_getnumbuckets());
1470 
1471 	/*
1472 	 * Clamp the number of queue pairs to the minimum of the requested Tx
1473 	 * and Rx queues.
1474 	 */
1475 	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1476 	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1477 
1478 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1479 		/*
1480 		 * Choose a number of RDMA vectors based on the number of CPUs
1481 		 * up to a maximum
1482 		 */
1483 		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1484 
1485 		/* Further limit by the user configurable tunable */
1486 		rdma = min(rdma, ice_rdma_max_msix);
1487 	} else {
1488 		rdma = 0;
1489 	}
1490 
1491 	/*
1492 	 * Determine the number of vectors to request. Note that we also need
1493 	 * to allocate one vector for administrative tasks.
1494 	 */
1495 	requested = rdma + queues + 1;
1496 	/* Add extra vectors requested by the user for later subinterface
1497 	 * creation.
1498 	 */
1499 	if_ctx_t ctx = sc->ctx;
1500 	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1501 	requested += extra_vectors;
1502 
1503 	vectors = requested;
1504 	err = pci_alloc_msix(dev, &vectors);
1505 	if (err) {
1506 		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1507 			      vectors, ice_err_str(err));
1508 		goto err_free_msix_table;
1509 	}
1510 
1511 	/* If we don't receive enough vectors, reduce demands */
1512 	if (vectors < requested) {
1513 		int diff = requested - vectors;
1514 
1515 		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1516 			      requested, vectors);
1517 
1518 		diff += extra_vectors;
1519 		extra_vectors = 0;
1520 		/*
1521 		 * The OS didn't grant us the requested number of vectors.
1522 		 * Check to see if we can reduce demands by limiting the
1523 		 * number of vectors allocated to certain features.
1524 		 */
1525 
1526 		if (rdma >= diff) {
1527 			/* Reduce the number of RDMA vectors we reserve */
1528 			rdma -= diff;
1529 			diff = 0;
1530 		} else {
1531 			/* Disable RDMA and reduce the difference */
1532 			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1533 			diff -= rdma;
1534 			rdma = 0;
1535 		}
1536 
1537 		/*
1538 		 * If we still have a difference, we need to reduce the number
1539 		 * of queue pairs.
1540 		 *
1541 		 * However, we still need at least one vector for the admin
1542 		 * interrupt and one queue pair.
1543 		 */
1544 		if (queues <= diff) {
1545 			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1546 			err = (ERANGE);
1547 			goto err_pci_release_msi;
1548 		}
1549 
1550 		queues -= diff;
1551 	}
1552 
1553 	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1554 	if (rdma)
1555 		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1556 			      rdma);
1557 	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1558 		      vectors);
1559 
1560 	/* Split resulting vectors back into requested splits */
1561 	scctx->isc_vectors = vectors;
1562 	scctx->isc_nrxqsets = queues;
1563 	scctx->isc_ntxqsets = queues;
1564 	scctx->isc_intr = IFLIB_INTR_MSIX;
1565 
1566 	sc->irdma_vectors = rdma;
1567 
1568 	/* Interrupt allocation tracking isn't required in recovery mode,
1569 	 * since neither RDMA nor VFs are enabled.
1570 	 */
1571 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1572 		return (0);
1573 
1574 	/* Keep track of which interrupt indices are being used for what */
1575 	sc->lan_vectors = vectors - rdma;
1576 	sc->lan_vectors -= extra_vectors;
1577 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1578 	if (err) {
1579 		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1580 			      ice_err_str(err));
1581 		goto err_pci_release_msi;
1582 	}
1583 	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1584 	if (err) {
1585 		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1586 			      ice_err_str(err));
1587 		goto err_release_pf_imap;
1588 	}
1589 	sc->extra_vectors = extra_vectors;
1590 	/* Setup another resource manager to track the assignments of extra OS
1591 	 * vectors. These OS interrupt allocations don't need to be contiguous,
1592 	 * unlike the ones that come from the device.
1593 	 */
1594 	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1595 	if (err) {
1596 		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1597 			      ice_err_str(err));
1598 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1599 					    rdma);
1600 		goto err_release_pf_imap;
1601 	}
1602 	return (0);
1603 
1604 err_release_pf_imap:
1605 	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1606 				    sc->lan_vectors);
1607 err_pci_release_msi:
1608 	pci_release_msi(dev);
1609 err_free_msix_table:
1610 	if (sc->msix_table != NULL) {
1611 		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1612 				rman_get_rid(sc->msix_table),
1613 				sc->msix_table);
1614 		sc->msix_table = NULL;
1615 	}
1616 
1617 	return (err);
1618 }
1619 
1620 /**
1621  * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1622  * @ctx: the iflib context structure
1623  * @msix: the number of vectors we were assigned
1624  *
1625  * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1626  * we get at least the same number of vectors as we have queues, and that we
1627  * always have the same number of Tx and Rx queues.
1628  *
1629  * Tx queues use a softirq instead of using their own hardware interrupt.
1630  */
1631 static int
ice_if_msix_intr_assign(if_ctx_t ctx,int msix)1632 ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1633 {
1634 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1635 	struct ice_vsi *vsi = &sc->pf_vsi;
1636 	int err, i, vector;
1637 
1638 	ASSERT_CTX_LOCKED(sc);
1639 
1640 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1641 		device_printf(sc->dev,
1642 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1643 			      vsi->num_tx_queues, vsi->num_rx_queues);
1644 		return (EOPNOTSUPP);
1645 	}
1646 
1647 	if (msix < (vsi->num_rx_queues + 1)) {
1648 		device_printf(sc->dev,
1649 			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1650 		return (EOPNOTSUPP);
1651 	}
1652 
1653 	/* Save the number of vectors for future use */
1654 	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1655 
1656 	/* Allocate space to store the IRQ vector data */
1657 	if (!(sc->irqvs =
1658 	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1659 					       M_ICE, M_NOWAIT))) {
1660 		device_printf(sc->dev,
1661 			      "Unable to allocate irqv memory\n");
1662 		return (ENOMEM);
1663 	}
1664 
1665 	/* Administrative interrupt events will use vector 0 */
1666 	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1667 				      ice_msix_admin, sc, 0, "admin");
1668 	if (err) {
1669 		device_printf(sc->dev,
1670 			      "Failed to register Admin queue handler: %s\n",
1671 			      ice_err_str(err));
1672 		goto free_irqvs;
1673 	}
1674 	sc->irqvs[0].me = 0;
1675 
1676 	/* Do not allocate queue interrupts when in recovery mode */
1677 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1678 		return (0);
1679 
1680 	int rid;
1681 	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1682 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1683 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1684 		char irq_name[16];
1685 
1686 		rid = vector + 1;
1687 
1688 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1689 		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1690 					      IFLIB_INTR_RXTX, ice_msix_que,
1691 					      rxq, rxq->me, irq_name);
1692 		if (err) {
1693 			device_printf(sc->dev,
1694 				      "Failed to allocate q int %d err: %s\n",
1695 				      i, ice_err_str(err));
1696 			vector--;
1697 			i--;
1698 			goto fail;
1699 		}
1700 		sc->irqvs[vector].me = vector;
1701 		rxq->irqv = &sc->irqvs[vector];
1702 
1703 		bzero(irq_name, sizeof(irq_name));
1704 
1705 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1706 		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1707 					    IFLIB_INTR_TX, txq,
1708 					    txq->me, irq_name);
1709 		txq->irqv = &sc->irqvs[vector];
1710 	}
1711 
1712 	/* For future interrupt assignments */
1713 	sc->last_rid = rid + sc->irdma_vectors;
1714 
1715 #ifdef PCI_IOV
1716 	/* Create soft IRQ for handling VF resets */
1717 	iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, sc, 0, "iov");
1718 #endif
1719 
1720 	return (0);
1721 fail:
1722 	for (; i >= 0; i--, vector--)
1723 		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1724 	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1725 free_irqvs:
1726 	free(sc->irqvs, M_ICE);
1727 	sc->irqvs = NULL;
1728 	return err;
1729 }
1730 
1731 /**
1732  * ice_if_mtu_set - Set the device MTU
1733  * @ctx: iflib context structure
1734  * @mtu: the MTU requested
1735  *
1736  * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1737  *
1738  * @pre assumes the caller holds the iflib CTX lock
1739  */
1740 static int
ice_if_mtu_set(if_ctx_t ctx,uint32_t mtu)1741 ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1742 {
1743 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1744 
1745 	ASSERT_CTX_LOCKED(sc);
1746 
1747 	/* Do not support configuration when in recovery mode */
1748 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1749 		return (ENOSYS);
1750 
1751 	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1752 		return (EINVAL);
1753 
1754 	sc->scctx->isc_max_frame_size = mtu +
1755 		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1756 
1757 	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1758 
1759 	return (0);
1760 }
1761 
1762 /**
1763  * ice_if_intr_enable - Enable device interrupts
1764  * @ctx: iflib context structure
1765  *
1766  * Called by iflib to request enabling device interrupts.
1767  */
1768 static void
ice_if_intr_enable(if_ctx_t ctx)1769 ice_if_intr_enable(if_ctx_t ctx)
1770 {
1771 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1772 	struct ice_vsi *vsi = &sc->pf_vsi;
1773 	struct ice_hw *hw = &sc->hw;
1774 
1775 	ASSERT_CTX_LOCKED(sc);
1776 
1777 	/* Enable ITR 0 */
1778 	ice_enable_intr(hw, sc->irqvs[0].me);
1779 
1780 	/* Do not enable queue interrupts in recovery mode */
1781 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1782 		return;
1783 
1784 	/* Enable all queue interrupts */
1785 	for (int i = 0; i < vsi->num_rx_queues; i++)
1786 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1787 }
1788 
1789 /**
1790  * ice_if_intr_disable - Disable device interrupts
1791  * @ctx: iflib context structure
1792  *
1793  * Called by iflib to request disabling device interrupts.
1794  */
1795 static void
ice_if_intr_disable(if_ctx_t ctx)1796 ice_if_intr_disable(if_ctx_t ctx)
1797 {
1798 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1799 	struct ice_hw *hw = &sc->hw;
1800 	unsigned int i;
1801 
1802 	ASSERT_CTX_LOCKED(sc);
1803 
1804 	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1805 	 * assigned to queues. Instead of assuming that the interrupt
1806 	 * assignment in the rx_queues structure is valid, just disable all
1807 	 * possible interrupts
1808 	 *
1809 	 * Note that we choose not to disable ITR 0 because this handles the
1810 	 * AdminQ interrupts, and we want to keep processing these even when
1811 	 * the interface is offline.
1812 	 */
1813 	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1814 		ice_disable_intr(hw, i);
1815 }
1816 
1817 /**
1818  * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1819  * @ctx: iflib context structure
1820  * @rxqid: the Rx queue to enable
1821  *
1822  * Enable a specific Rx queue interrupt.
1823  *
1824  * This function is not protected by the iflib CTX lock.
1825  */
1826 static int
ice_if_rx_queue_intr_enable(if_ctx_t ctx,uint16_t rxqid)1827 ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1828 {
1829 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1830 	struct ice_vsi *vsi = &sc->pf_vsi;
1831 	struct ice_hw *hw = &sc->hw;
1832 
1833 	/* Do not enable queue interrupts in recovery mode */
1834 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1835 		return (ENOSYS);
1836 
1837 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1838 	return (0);
1839 }
1840 
1841 /**
1842  * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1843  * @ctx: iflib context structure
1844  * @txqid: the Tx queue to enable
1845  *
1846  * Enable a specific Tx queue interrupt.
1847  *
1848  * This function is not protected by the iflib CTX lock.
1849  */
1850 static int
ice_if_tx_queue_intr_enable(if_ctx_t ctx,uint16_t txqid)1851 ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1852 {
1853 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1854 	struct ice_vsi *vsi = &sc->pf_vsi;
1855 	struct ice_hw *hw = &sc->hw;
1856 
1857 	/* Do not enable queue interrupts in recovery mode */
1858 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1859 		return (ENOSYS);
1860 
1861 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1862 	return (0);
1863 }
1864 
1865 /**
1866  * ice_set_default_promisc_mask - Set default config for promisc settings
1867  * @promisc_mask: bitmask to setup
1868  *
1869  * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1870  * modes to operate on. The mask used in here is the default one for the
1871  * driver, where promiscuous is enabled/disabled for all types of
1872  * non-VLAN-tagged/VLAN 0 traffic.
1873  */
1874 static void
ice_set_default_promisc_mask(ice_bitmap_t * promisc_mask)1875 ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1876 {
1877 	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1878 	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1879 	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1880 	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1881 	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1882 }
1883 
1884 /**
1885  * ice_if_promisc_set - Set device promiscuous mode
1886  * @ctx: iflib context structure
1887  * @flags: promiscuous flags to configure
1888  *
1889  * Called by iflib to configure device promiscuous mode.
1890  *
1891  * @remark Calls to this function will always overwrite the previous setting
1892  */
1893 static int
ice_if_promisc_set(if_ctx_t ctx,int flags)1894 ice_if_promisc_set(if_ctx_t ctx, int flags)
1895 {
1896 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1897 	struct ice_hw *hw = &sc->hw;
1898 	device_t dev = sc->dev;
1899 	int status;
1900 	bool promisc_enable = flags & IFF_PROMISC;
1901 	bool multi_enable = flags & IFF_ALLMULTI;
1902 	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1903 
1904 	/* Do not support configuration when in recovery mode */
1905 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1906 		return (ENOSYS);
1907 
1908 	ice_set_default_promisc_mask(promisc_mask);
1909 
1910 	if (promisc_enable) {
1911 		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1912 					     promisc_mask, 0);
1913 		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1914 			device_printf(dev,
1915 				      "Failed to enable promiscuous mode for "
1916 				      "PF VSI, err %s aq_err %s\n",
1917 				      ice_status_str(status),
1918 				      ice_aq_str(hw->adminq.sq_last_status));
1919 			return (EIO);
1920 		}
1921 	} else {
1922 		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1923 					       promisc_mask, 0);
1924 		if (status) {
1925 			device_printf(dev,
1926 				      "Failed to disable promiscuous mode for"
1927 				      " PF VSI, err %s aq_err %s\n",
1928 				      ice_status_str(status),
1929 				      ice_aq_str(hw->adminq.sq_last_status));
1930 			return (EIO);
1931 		}
1932 
1933 		if (multi_enable) {
1934 			ice_clear_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1935 			ice_clear_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1936 			status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1937 						     promisc_mask, 0);
1938 			if (status && status != ICE_ERR_ALREADY_EXISTS) {
1939 				device_printf(dev,
1940 					      "Failed to enable allmulti mode "
1941 					      "for PF VSI, err %s aq_err %s\n",
1942 					      ice_status_str(status),
1943 					      ice_aq_str(
1944 					      hw->adminq.sq_last_status));
1945 				return (EIO);
1946 			}
1947 		}
1948 	}
1949 
1950 	return (0);
1951 }
1952 
1953 /**
1954  * ice_if_media_change - Change device media
1955  * @ctx: device ctx structure
1956  *
1957  * Called by iflib when a media change is requested. This operation is not
1958  * supported by the hardware, so we just return an error code.
1959  */
1960 static int
ice_if_media_change(if_ctx_t ctx)1961 ice_if_media_change(if_ctx_t ctx)
1962 {
1963 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1964 
1965 	device_printf(sc->dev, "Media change is not supported.\n");
1966 	return (ENODEV);
1967 }
1968 
1969 /**
1970  * ice_if_media_status - Report current device media
1971  * @ctx: iflib context structure
1972  * @ifmr: ifmedia request structure to update
1973  *
1974  * Updates the provided ifmr with current device media status, including link
1975  * status and media type.
1976  */
1977 static void
ice_if_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)1978 ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1979 {
1980 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1981 	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1982 
1983 	ifmr->ifm_status = IFM_AVALID;
1984 	ifmr->ifm_active = IFM_ETHER;
1985 
1986 	/* Never report link up or media types when in recovery mode */
1987 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1988 		return;
1989 
1990 	if (!sc->link_up)
1991 		return;
1992 
1993 	ifmr->ifm_status |= IFM_ACTIVE;
1994 	ifmr->ifm_active |= IFM_FDX;
1995 
1996 	if (li->phy_type_low)
1997 		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1998 	else if (li->phy_type_high)
1999 		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
2000 	else
2001 		ifmr->ifm_active |= IFM_UNKNOWN;
2002 
2003 	/* Report flow control status as well */
2004 	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
2005 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2006 	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
2007 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2008 }
2009 
2010 /**
2011  * ice_init_tx_tracking - Initialize Tx queue software tracking values
2012  * @vsi: the VSI to initialize
2013  *
2014  * Initialize Tx queue software tracking values, including the Report Status
2015  * queue, and related software tracking values.
2016  */
2017 static void
ice_init_tx_tracking(struct ice_vsi * vsi)2018 ice_init_tx_tracking(struct ice_vsi *vsi)
2019 {
2020 	struct ice_tx_queue *txq;
2021 	size_t j;
2022 	int i;
2023 
2024 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
2025 
2026 		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
2027 
2028 		/* Initialize the last processed descriptor to be the end of
2029 		 * the ring, rather than the start, so that we avoid an
2030 		 * off-by-one error in ice_ift_txd_credits_update for the
2031 		 * first packet.
2032 		 */
2033 		txq->tx_cidx_processed = txq->desc_count - 1;
2034 
2035 		for (j = 0; j < txq->desc_count; j++)
2036 			txq->tx_rsq[j] = QIDX_INVALID;
2037 	}
2038 }
2039 
2040 /**
2041  * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
2042  * @sc: the device softc
2043  *
2044  * Called to update the Rx queue mbuf_sz parameter for configuring the receive
2045  * buffer sizes when programming hardware.
2046  */
2047 static void
ice_update_rx_mbuf_sz(struct ice_softc * sc)2048 ice_update_rx_mbuf_sz(struct ice_softc *sc)
2049 {
2050 	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
2051 	struct ice_vsi *vsi = &sc->pf_vsi;
2052 
2053 	MPASS(mbuf_sz <= UINT16_MAX);
2054 	vsi->mbuf_sz = mbuf_sz;
2055 }
2056 
2057 /**
2058  * ice_if_init - Initialize the device
2059  * @ctx: iflib ctx structure
2060  *
2061  * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2062  * device filters and prepares the Tx and Rx engines.
2063  *
2064  * @pre assumes the caller holds the iflib CTX lock
2065  */
2066 static void
ice_if_init(if_ctx_t ctx)2067 ice_if_init(if_ctx_t ctx)
2068 {
2069 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2070 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2071 	device_t dev = sc->dev;
2072 	int err;
2073 
2074 	ASSERT_CTX_LOCKED(sc);
2075 
2076 	/*
2077 	 * We've seen an issue with 11.3/12.1 where sideband routines are
2078 	 * called after detach is called.  This would call routines after
2079 	 * if_stop, causing issues with the teardown process.  This has
2080 	 * seemingly been fixed in STABLE snapshots, but it seems like a
2081 	 * good idea to have this guard here regardless.
2082 	 */
2083 	if (ice_driver_is_detaching(sc))
2084 		return;
2085 
2086 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2087 		return;
2088 
2089 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2090 		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2091 		return;
2092 	}
2093 
2094 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2095 		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2096 		return;
2097 	}
2098 
2099 	ice_update_rx_mbuf_sz(sc);
2100 
2101 	/* Update the MAC address... User might use a LAA */
2102 	err = ice_update_laa_mac(sc);
2103 	if (err) {
2104 		device_printf(dev,
2105 			      "LAA address change failed, err %s\n",
2106 			      ice_err_str(err));
2107 		return;
2108 	}
2109 
2110 	/* Initialize software Tx tracking values */
2111 	ice_init_tx_tracking(&sc->pf_vsi);
2112 
2113 	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2114 	if (err) {
2115 		device_printf(dev,
2116 			      "Unable to configure the main VSI for Tx: %s\n",
2117 			      ice_err_str(err));
2118 		return;
2119 	}
2120 
2121 	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2122 	if (err) {
2123 		device_printf(dev,
2124 			      "Unable to configure the main VSI for Rx: %s\n",
2125 			      ice_err_str(err));
2126 		goto err_cleanup_tx;
2127 	}
2128 
2129 	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2130 	if (err) {
2131 		device_printf(dev,
2132 			      "Unable to enable Rx rings for transmit: %s\n",
2133 			      ice_err_str(err));
2134 		goto err_cleanup_tx;
2135 	}
2136 
2137 	err = ice_cfg_pf_default_mac_filters(sc);
2138 	if (err) {
2139 		device_printf(dev,
2140 			      "Unable to configure default MAC filters: %s\n",
2141 			      ice_err_str(err));
2142 		goto err_stop_rx;
2143 	}
2144 
2145 	/* We use software interrupts for Tx, so we only program the hardware
2146 	 * interrupts for Rx.
2147 	 */
2148 	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2149 	ice_configure_rx_itr(&sc->pf_vsi);
2150 
2151 	/* Configure promiscuous mode */
2152 	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2153 
2154 	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2155 		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2156 			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2157 			ice_set_link(sc, true);
2158 
2159 	ice_rdma_pf_init(sc);
2160 
2161 	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2162 
2163 	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2164 		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2165 		iflib_request_reset(sc->mirr_if->subctx);
2166 		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2167 	}
2168 
2169 	return;
2170 
2171 err_stop_rx:
2172 	ice_control_all_rx_queues(&sc->pf_vsi, false);
2173 err_cleanup_tx:
2174 	ice_vsi_disable_tx(&sc->pf_vsi);
2175 }
2176 
2177 /**
2178  * ice_poll_for_media_avail - Re-enable link if media is detected
2179  * @sc: device private structure
2180  *
2181  * Intended to be called from the driver's timer function, this function
2182  * sends the Get Link Status AQ command and re-enables HW link if the
2183  * command says that media is available.
2184  *
2185  * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2186  * since media removal events are supposed to be sent to the driver through
2187  * a link status event.
2188  */
2189 static void
ice_poll_for_media_avail(struct ice_softc * sc)2190 ice_poll_for_media_avail(struct ice_softc *sc)
2191 {
2192 	struct ice_hw *hw = &sc->hw;
2193 	struct ice_port_info *pi = hw->port_info;
2194 
2195 	/* E830 only: There's no interrupt for when the PHY FW has finished loading,
2196 	 * so poll for the status in the media task here if it's previously
2197 	 * been detected that it's still loading.
2198 	 */
2199 	if (ice_is_e830(hw) &&
2200 	    ice_test_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING)) {
2201 		if (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)
2202 			ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
2203 		else
2204 			return;
2205 	}
2206 
2207 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2208 		pi->phy.get_link_info = true;
2209 		ice_get_link_status(pi, &sc->link_up);
2210 
2211 		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2212 			int status;
2213 
2214 			/* Re-enable link and re-apply user link settings */
2215 			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2216 			    (if_getflags(sc->ifp) & IFF_UP)) {
2217 				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2218 
2219 				/* Update the OS about changes in media capability */
2220 				status = ice_add_media_types(sc, sc->media);
2221 				if (status)
2222 					device_printf(sc->dev,
2223 					    "Error adding device media types: %s aq_err %s\n",
2224 					    ice_status_str(status),
2225 					    ice_aq_str(hw->adminq.sq_last_status));
2226 			}
2227 
2228 			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2229 		}
2230 	}
2231 }
2232 
2233 /**
2234  * ice_if_timer - called by iflib periodically
2235  * @ctx: iflib ctx structure
2236  * @qid: the queue this timer was called for
2237  *
2238  * This callback is triggered by iflib periodically. We use it to update the
2239  * hw statistics.
2240  *
2241  * @remark this function is not protected by the iflib CTX lock.
2242  */
2243 static void
ice_if_timer(if_ctx_t ctx,uint16_t qid)2244 ice_if_timer(if_ctx_t ctx, uint16_t qid)
2245 {
2246 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2247 	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2248 
2249 	if (qid != 0)
2250 		return;
2251 
2252 	/* Do not attempt to update stats when in recovery mode */
2253 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2254 		return;
2255 
2256 	/* Update device statistics */
2257 	ice_update_pf_stats(sc);
2258 
2259 	/*
2260 	 * For proper watchdog management, the iflib stack needs to know if
2261 	 * we've been paused during the last interval. Check if the
2262 	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2263 	 */
2264 	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2265 		sc->scctx->isc_pause_frames = 1;
2266 
2267 	/* Update the primary VSI stats */
2268 	ice_update_vsi_hw_stats(&sc->pf_vsi);
2269 
2270 	/* Update mirror VSI stats */
2271 	if (sc->mirr_if && sc->mirr_if->if_attached)
2272 		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2273 }
2274 
2275 /**
2276  * ice_admin_timer - called periodically to trigger the admin task
2277  * @arg: callout(9) argument pointing to the device private softc structure
2278  *
2279  * Timer function used as part of a callout(9) timer that will periodically
2280  * trigger the admin task, even when the interface is down.
2281  *
2282  * @remark this function is not called by iflib and is not protected by the
2283  * iflib CTX lock.
2284  *
2285  * @remark because this is a callout function, it cannot sleep and should not
2286  * attempt taking the iflib CTX lock.
2287  */
2288 static void
ice_admin_timer(void * arg)2289 ice_admin_timer(void *arg)
2290 {
2291 	struct ice_softc *sc = (struct ice_softc *)arg;
2292 
2293 	/*
2294 	 * There is a point where callout routines are no longer
2295 	 * cancelable.  So there exists a window of time where the
2296 	 * driver enters detach() and tries to cancel the callout, but the
2297 	 * callout routine has passed the cancellation point.  The detach()
2298 	 * routine is unaware of this and tries to free resources that the
2299 	 * callout routine needs.  So we check for the detach state flag to
2300 	 * at least shrink the window of opportunity.
2301 	 */
2302 	if (ice_driver_is_detaching(sc))
2303 		return;
2304 
2305 	/* Fire off the admin task */
2306 	iflib_admin_intr_deferred(sc->ctx);
2307 
2308 	/* Reschedule the admin timer */
2309 	callout_schedule(&sc->admin_timer, hz/2);
2310 }
2311 
2312 /**
2313  * ice_transition_recovery_mode - Transition to recovery mode
2314  * @sc: the device private softc
2315  *
2316  * Called when the driver detects that the firmware has entered recovery mode
2317  * at run time.
2318  */
2319 static void
ice_transition_recovery_mode(struct ice_softc * sc)2320 ice_transition_recovery_mode(struct ice_softc *sc)
2321 {
2322 	struct ice_vsi *vsi = &sc->pf_vsi;
2323 	int i;
2324 
2325 	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2326 
2327 	/* Tell the stack that the link has gone down */
2328 	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2329 
2330 	/* Request that the device be re-initialized */
2331 	ice_request_stack_reinit(sc);
2332 
2333 	ice_rdma_pf_detach(sc);
2334 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2335 
2336 #ifdef PCI_IOV
2337 	if (ice_test_and_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en))
2338 		 ice_iov_detach(sc);
2339 #else
2340 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2341 #endif /* PCI_IOV */
2342 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2343 
2344 	ice_vsi_del_txqs_ctx(vsi);
2345 	ice_vsi_del_rxqs_ctx(vsi);
2346 
2347 	for (i = 0; i < sc->num_available_vsi; i++) {
2348 		if (sc->all_vsi[i])
2349 			ice_release_vsi(sc->all_vsi[i]);
2350 	}
2351 	sc->num_available_vsi = 0;
2352 
2353 	if (sc->all_vsi) {
2354 		free(sc->all_vsi, M_ICE);
2355 		sc->all_vsi = NULL;
2356 	}
2357 
2358 	/* Destroy the interrupt manager */
2359 	ice_resmgr_destroy(&sc->dev_imgr);
2360 	/* Destroy the queue managers */
2361 	ice_resmgr_destroy(&sc->tx_qmgr);
2362 	ice_resmgr_destroy(&sc->rx_qmgr);
2363 
2364 	ice_deinit_hw(&sc->hw);
2365 }
2366 
2367 /**
2368  * ice_transition_safe_mode - Transition to safe mode
2369  * @sc: the device private softc
2370  *
2371  * Called when the driver attempts to reload the DDP package during a device
2372  * reset, and the new download fails. If so, we must transition to safe mode
2373  * at run time.
2374  *
2375  * @remark although safe mode normally allocates only a single queue, we can't
2376  * change the number of queues dynamically when using iflib. Due to this, we
2377  * do not attempt to reduce the number of queues.
2378  */
2379 static void
ice_transition_safe_mode(struct ice_softc * sc)2380 ice_transition_safe_mode(struct ice_softc *sc)
2381 {
2382 	/* Indicate that we are in Safe mode */
2383 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2384 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2385 
2386 	ice_rdma_pf_detach(sc);
2387 	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2388 
2389 #ifdef PCI_IOV
2390 	if (ice_test_and_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en))
2391 		 ice_iov_detach(sc);
2392 #else
2393 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2394 #endif /* PCI_IOV */
2395 	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2396 
2397 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2398 	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2399 }
2400 
2401 /**
2402  * ice_if_update_admin_status - update admin status
2403  * @ctx: iflib ctx structure
2404  *
2405  * Called by iflib to update the admin status. For our purposes, this means
2406  * check the adminq, and update the link status. It's ultimately triggered by
2407  * our admin interrupt, or by the ice_if_timer periodically.
2408  *
2409  * @pre assumes the caller holds the iflib CTX lock
2410  */
2411 static void
ice_if_update_admin_status(if_ctx_t ctx)2412 ice_if_update_admin_status(if_ctx_t ctx)
2413 {
2414 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2415 	enum ice_fw_modes fw_mode;
2416 	bool reschedule = false;
2417 	u16 pending = 0;
2418 
2419 	ASSERT_CTX_LOCKED(sc);
2420 
2421 	/* Check if the firmware entered recovery mode at run time */
2422 	fw_mode = ice_get_fw_mode(&sc->hw);
2423 	if (fw_mode == ICE_FW_MODE_REC) {
2424 		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2425 			/* If we just entered recovery mode, log a warning to
2426 			 * the system administrator and deinit driver state
2427 			 * that is no longer functional.
2428 			 */
2429 			ice_transition_recovery_mode(sc);
2430 		}
2431 	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2432 		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2433 			/* Rollback mode isn't fatal, but we don't want to
2434 			 * repeatedly post a message about it.
2435 			 */
2436 			ice_print_rollback_msg(&sc->hw);
2437 		}
2438 	}
2439 
2440 	/* Handle global reset events */
2441 	ice_handle_reset_event(sc);
2442 
2443 	/* Handle PF reset requests */
2444 	ice_handle_pf_reset_request(sc);
2445 
2446 	/* Handle MDD events */
2447 	ice_handle_mdd_event(sc);
2448 
2449 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2450 	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2451 	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2452 		/*
2453 		 * If we know the control queues are disabled, skip processing
2454 		 * the control queues entirely.
2455 		 */
2456 		;
2457 	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2458 		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2459 		if (pending > 0)
2460 			reschedule = true;
2461 
2462 		if (ice_is_generic_mac(&sc->hw)) {
2463 			ice_process_ctrlq(sc, ICE_CTL_Q_SB, &pending);
2464 			if (pending > 0)
2465 				reschedule = true;
2466 		}
2467 
2468 		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2469 		if (pending > 0)
2470 			reschedule = true;
2471 	}
2472 
2473 	/* Poll for link up */
2474 	ice_poll_for_media_avail(sc);
2475 
2476 	/* Check and update link status */
2477 	ice_update_link_status(sc, false);
2478 
2479 #ifdef PCI_IOV
2480 	/*
2481 	 * Schedule VFs' reset handler after global resets
2482 	 * and other events were processed.
2483 	 */
2484 	if (ice_testandclear_state(&sc->state, ICE_STATE_VFLR_PENDING))
2485 		iflib_iov_intr_deferred(ctx);
2486 #endif
2487 
2488 	/*
2489 	 * If there are still messages to process, we need to reschedule
2490 	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2491 	 * woken up at the next interrupt or timer event.
2492 	 */
2493 	if (reschedule) {
2494 		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2495 		iflib_admin_intr_deferred(ctx);
2496 	} else {
2497 		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2498 	}
2499 }
2500 
2501 /**
2502  * ice_prepare_for_reset - Prepare device for an impending reset
2503  * @sc: The device private softc
2504  *
2505  * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2506  * scheduler setup, and shutting down controlqs. Uses the
2507  * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2508  * driver for reset or not.
2509  */
2510 static void
ice_prepare_for_reset(struct ice_softc * sc)2511 ice_prepare_for_reset(struct ice_softc *sc)
2512 {
2513 	struct ice_hw *hw = &sc->hw;
2514 
2515 	/* If we're already prepared, there's nothing to do */
2516 	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2517 		return;
2518 
2519 	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2520 
2521 	/* In recovery mode, hardware is not initialized */
2522 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2523 		return;
2524 
2525 	/* inform the RDMA client */
2526 	ice_rdma_notify_reset(sc);
2527 	/* stop the RDMA client */
2528 	ice_rdma_pf_stop(sc);
2529 
2530 	/* Release the main PF VSI queue mappings */
2531 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2532 				    sc->pf_vsi.num_tx_queues);
2533 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2534 				    sc->pf_vsi.num_rx_queues);
2535 	if (sc->mirr_if) {
2536 		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2537 		    sc->mirr_if->num_irq_vectors);
2538 		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2539 		    sc->mirr_if->num_irq_vectors);
2540 	}
2541 
2542 	ice_clear_hw_tbls(hw);
2543 
2544 	if (hw->port_info)
2545 		ice_sched_cleanup_all(hw);
2546 
2547 	ice_shutdown_all_ctrlq(hw, false);
2548 }
2549 
2550 /**
2551  * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2552  * @sc: the device softc pointer
2553  *
2554  * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2555  * mapping after a reset occurred.
2556  */
2557 static int
ice_rebuild_pf_vsi_qmap(struct ice_softc * sc)2558 ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2559 {
2560 	struct ice_vsi *vsi = &sc->pf_vsi;
2561 	struct ice_tx_queue *txq;
2562 	struct ice_rx_queue *rxq;
2563 	int err, i;
2564 
2565 	/* Re-assign Tx queues from PF space to the main VSI */
2566 	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2567 					    vsi->num_tx_queues);
2568 	if (err) {
2569 		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2570 			      ice_err_str(err));
2571 		return (err);
2572 	}
2573 
2574 	/* Re-assign Rx queues from PF space to this VSI */
2575 	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2576 					    vsi->num_rx_queues);
2577 	if (err) {
2578 		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2579 			      ice_err_str(err));
2580 		goto err_release_tx_queues;
2581 	}
2582 
2583 	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2584 
2585 	/* Re-assign Tx queue tail pointers */
2586 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2587 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2588 
2589 	/* Re-assign Rx queue tail pointers */
2590 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2591 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2592 
2593 	return (0);
2594 
2595 err_release_tx_queues:
2596 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2597 				   sc->pf_vsi.num_tx_queues);
2598 
2599 	return (err);
2600 }
2601 
2602 /* determine if the iflib context is active */
2603 #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2604 
2605 /**
2606  * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2607  * @sc: The device private softc
2608  *
2609  * Handle a driver rebuild while in recovery mode. This will only rebuild the
2610  * limited functionality supported while in recovery mode.
2611  */
2612 static void
ice_rebuild_recovery_mode(struct ice_softc * sc)2613 ice_rebuild_recovery_mode(struct ice_softc *sc)
2614 {
2615 	device_t dev = sc->dev;
2616 
2617 	/* enable PCIe bus master */
2618 	pci_enable_busmaster(dev);
2619 
2620 	/* Configure interrupt causes for the administrative interrupt */
2621 	ice_configure_misc_interrupts(sc);
2622 
2623 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2624 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2625 
2626 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2627 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2628 
2629 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2630 
2631 	/* In order to completely restore device functionality, the iflib core
2632 	 * needs to be reset. We need to request an iflib reset. Additionally,
2633 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2634 	 * the iflib core, we also want re-run the admin task so that iflib
2635 	 * resets immediately instead of waiting for the next interrupt.
2636 	 */
2637 	ice_request_stack_reinit(sc);
2638 
2639 	return;
2640 }
2641 
2642 /**
2643  * ice_rebuild - Rebuild driver state post reset
2644  * @sc: The device private softc
2645  *
2646  * Restore driver state after a reset occurred. Restart the controlqs, setup
2647  * the hardware port, and re-enable the VSIs.
2648  */
2649 static void
ice_rebuild(struct ice_softc * sc)2650 ice_rebuild(struct ice_softc *sc)
2651 {
2652 	struct ice_hw *hw = &sc->hw;
2653 	device_t dev = sc->dev;
2654 	enum ice_ddp_state pkg_state;
2655 	int status;
2656 	int err;
2657 
2658 	sc->rebuild_ticks = ticks;
2659 
2660 	/* If we're rebuilding, then a reset has succeeded. */
2661 	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2662 
2663 	/*
2664 	 * If the firmware is in recovery mode, only restore the limited
2665 	 * functionality supported by recovery mode.
2666 	 */
2667 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2668 		ice_rebuild_recovery_mode(sc);
2669 		return;
2670 	}
2671 
2672 	/* enable PCIe bus master */
2673 	pci_enable_busmaster(dev);
2674 
2675 	status = ice_init_all_ctrlq(hw);
2676 	if (status) {
2677 		device_printf(dev, "failed to re-init controlqs, err %s\n",
2678 			      ice_status_str(status));
2679 		goto err_shutdown_ctrlq;
2680 	}
2681 
2682 	/* Query the allocated resources for Tx scheduler */
2683 	status = ice_sched_query_res_alloc(hw);
2684 	if (status) {
2685 		device_printf(dev,
2686 			      "Failed to query scheduler resources, err %s aq_err %s\n",
2687 			      ice_status_str(status),
2688 			      ice_aq_str(hw->adminq.sq_last_status));
2689 		goto err_shutdown_ctrlq;
2690 	}
2691 
2692 	/* Re-enable FW logging. Keep going even if this fails */
2693 	status = ICE_SUCCESS;
2694 	if (hw->pf_id == 0)
2695 		status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2696 	if (!status) {
2697 		/*
2698 		 * We should have the most updated cached copy of the
2699 		 * configuration, regardless of whether we're rebuilding
2700 		 * or not.  So we'll simply check to see if logging was
2701 		 * enabled pre-rebuild.
2702 		 */
2703 		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2704 			status = ice_fwlog_register(hw);
2705 			if (status)
2706 				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2707 				   ice_status_str(status),
2708 				   ice_aq_str(hw->adminq.sq_last_status));
2709 		}
2710 	} else
2711 		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2712 		   ice_status_str(status),
2713 		   ice_aq_str(hw->adminq.sq_last_status));
2714 
2715 	err = ice_send_version(sc);
2716 	if (err)
2717 		goto err_shutdown_ctrlq;
2718 
2719 	err = ice_init_link_events(sc);
2720 	if (err) {
2721 		device_printf(dev, "ice_init_link_events failed: %s\n",
2722 			      ice_err_str(err));
2723 		goto err_shutdown_ctrlq;
2724 	}
2725 
2726 	status = ice_clear_pf_cfg(hw);
2727 	if (status) {
2728 		device_printf(dev, "failed to clear PF configuration, err %s\n",
2729 			      ice_status_str(status));
2730 		goto err_shutdown_ctrlq;
2731 	}
2732 
2733 	ice_clean_all_vsi_rss_cfg(sc);
2734 
2735 	ice_clear_pxe_mode(hw);
2736 
2737 	status = ice_get_caps(hw);
2738 	if (status) {
2739 		device_printf(dev, "failed to get capabilities, err %s\n",
2740 			      ice_status_str(status));
2741 		goto err_shutdown_ctrlq;
2742 	}
2743 
2744 	status = ice_sched_init_port(hw->port_info);
2745 	if (status) {
2746 		device_printf(dev, "failed to initialize port, err %s\n",
2747 			      ice_status_str(status));
2748 		goto err_sched_cleanup;
2749 	}
2750 
2751 	/* If we previously loaded the package, it needs to be reloaded now */
2752 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2753 		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2754 		if (!ice_is_init_pkg_successful(pkg_state)) {
2755 			ice_log_pkg_init(sc, pkg_state);
2756 			ice_transition_safe_mode(sc);
2757 		}
2758 	}
2759 
2760 	ice_reset_pf_stats(sc);
2761 
2762 	err = ice_rebuild_pf_vsi_qmap(sc);
2763 	if (err) {
2764 		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2765 			      ice_err_str(err));
2766 		goto err_sched_cleanup;
2767 	}
2768 	err = ice_initialize_vsi(&sc->pf_vsi);
2769 	if (err) {
2770 		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2771 			      ice_err_str(err));
2772 		goto err_release_queue_allocations;
2773 	}
2774 
2775 	/* Replay all VSI configuration */
2776 	err = ice_replay_all_vsi_cfg(sc);
2777 	if (err)
2778 		goto err_deinit_pf_vsi;
2779 
2780 	/* Re-enable FW health event reporting */
2781 	ice_init_health_events(sc);
2782 
2783 	/* Reconfigure the main PF VSI for RSS */
2784 	err = ice_config_rss(&sc->pf_vsi);
2785 	if (err) {
2786 		device_printf(sc->dev,
2787 			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2788 			      ice_err_str(err));
2789 		goto err_deinit_pf_vsi;
2790 	}
2791 
2792 	if (hw->port_info->qos_cfg.is_sw_lldp)
2793 		ice_add_rx_lldp_filter(sc);
2794 
2795 	/* Apply previous link settings and refresh link status, if PHY
2796 	 * FW is ready.
2797 	 */
2798 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2799 	ice_init_link(sc);
2800 
2801 	/* RDMA interface will be restarted by the stack re-init */
2802 
2803 	/* Configure interrupt causes for the administrative interrupt */
2804 	ice_configure_misc_interrupts(sc);
2805 
2806 	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2807 	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2808 
2809 	/* Now that the rebuild is finished, we're no longer prepared to reset */
2810 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2811 
2812 	/* Reconfigure the subinterface */
2813 	if (sc->mirr_if) {
2814 		err = ice_subif_rebuild(sc);
2815 		if (err)
2816 			goto err_deinit_pf_vsi;
2817 	}
2818 
2819 	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2820 
2821 	/* In order to completely restore device functionality, the iflib core
2822 	 * needs to be reset. We need to request an iflib reset. Additionally,
2823 	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2824 	 * the iflib core, we also want re-run the admin task so that iflib
2825 	 * resets immediately instead of waiting for the next interrupt.
2826 	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2827 	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2828 	 */
2829 	if (hw->port_info->qos_cfg.is_sw_lldp)
2830 		ice_request_stack_reinit(sc);
2831 	else
2832 		ice_do_dcb_reconfig(sc, false);
2833 
2834 	return;
2835 
2836 err_deinit_pf_vsi:
2837 	ice_deinit_vsi(&sc->pf_vsi);
2838 err_release_queue_allocations:
2839 	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2840 				    sc->pf_vsi.num_tx_queues);
2841 	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2842 				    sc->pf_vsi.num_rx_queues);
2843 err_sched_cleanup:
2844 	ice_sched_cleanup_all(hw);
2845 err_shutdown_ctrlq:
2846 	ice_shutdown_all_ctrlq(hw, false);
2847 	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2848 	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2849 	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2850 }
2851 
2852 /**
2853  * ice_handle_reset_event - Handle reset events triggered by OICR
2854  * @sc: The device private softc
2855  *
2856  * Handle reset events triggered by an OICR notification. This includes CORER,
2857  * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2858  * firmware.
2859  *
2860  * @pre assumes the iflib context lock is held, and will unlock it while
2861  * waiting for the hardware to finish reset.
2862  */
2863 static void
ice_handle_reset_event(struct ice_softc * sc)2864 ice_handle_reset_event(struct ice_softc *sc)
2865 {
2866 	struct ice_hw *hw = &sc->hw;
2867 	int status;
2868 	device_t dev = sc->dev;
2869 
2870 	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2871 	 * trigger an OICR interrupt. Our OICR handler will determine when
2872 	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2873 	 * appropriate.
2874 	 */
2875 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2876 		return;
2877 
2878 	ice_prepare_for_reset(sc);
2879 
2880 	/*
2881 	 * Release the iflib context lock and wait for the device to finish
2882 	 * resetting.
2883 	 */
2884 	IFLIB_CTX_UNLOCK(sc);
2885 
2886 #define ICE_EMPR_ADDL_WAIT_MSEC_SLOW		20000
2887 	if ((ice_is_e830(hw) || ice_is_e825c(hw)) &&
2888 	    (((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
2889 	         GLGEN_RSTAT_RESET_TYPE_S) == ICE_RESET_EMPR))
2890 			ice_msec_pause(ICE_EMPR_ADDL_WAIT_MSEC_SLOW);
2891 
2892 	status = ice_check_reset(hw);
2893 	IFLIB_CTX_LOCK(sc);
2894 	if (status) {
2895 		device_printf(dev, "Device never came out of reset, err %s\n",
2896 			      ice_status_str(status));
2897 
2898 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2899 		ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2900 		ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2901 		device_printf(dev, "Reset failed; please reload the device driver\n");
2902 		return;
2903 	}
2904 
2905 	/* We're done with the reset, so we can rebuild driver state */
2906 	sc->hw.reset_ongoing = false;
2907 	ice_rebuild(sc);
2908 
2909 	/* In the unlikely event that a PF reset request occurs at the same
2910 	 * time as a global reset, clear the request now. This avoids
2911 	 * resetting a second time right after we reset due to a global event.
2912 	 */
2913 	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2914 		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2915 }
2916 
2917 /**
2918  * ice_handle_pf_reset_request - Initiate PF reset requested by software
2919  * @sc: The device private softc
2920  *
2921  * Initiate a PF reset requested by software. We handle this in the admin task
2922  * so that only one thread actually handles driver preparation and cleanup,
2923  * rather than having multiple threads possibly attempt to run this code
2924  * simultaneously.
2925  *
2926  * @pre assumes the iflib context lock is held and will unlock it while
2927  * waiting for the PF reset to complete.
2928  */
2929 static void
ice_handle_pf_reset_request(struct ice_softc * sc)2930 ice_handle_pf_reset_request(struct ice_softc *sc)
2931 {
2932 	struct ice_hw *hw = &sc->hw;
2933 	int status;
2934 
2935 	/* Check for PF reset requests */
2936 	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2937 		return;
2938 
2939 	/* Make sure we're prepared for reset */
2940 	ice_prepare_for_reset(sc);
2941 
2942 	/*
2943 	 * Release the iflib context lock and wait for the device to finish
2944 	 * resetting.
2945 	 */
2946 	IFLIB_CTX_UNLOCK(sc);
2947 	status = ice_reset(hw, ICE_RESET_PFR);
2948 	IFLIB_CTX_LOCK(sc);
2949 	if (status) {
2950 		device_printf(sc->dev, "device PF reset failed, err %s\n",
2951 			      ice_status_str(status));
2952 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2953 		return;
2954 	}
2955 
2956 	sc->soft_stats.pfr_count++;
2957 	ice_rebuild(sc);
2958 }
2959 
2960 /**
2961  * ice_init_device_features - Init device driver features
2962  * @sc: driver softc structure
2963  *
2964  * @pre assumes that the function capabilities bits have been set up by
2965  * ice_init_hw().
2966  */
2967 static void
ice_init_device_features(struct ice_softc * sc)2968 ice_init_device_features(struct ice_softc *sc)
2969 {
2970 	struct ice_hw *hw = &sc->hw;
2971 
2972 	/* Set capabilities that all devices support */
2973 	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2974 	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2975 	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2976 	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2977 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2978 	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2979 	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2980 	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2981 	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2982 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2983 	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2984 	ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_cap);
2985 
2986 	if (ice_is_e810(hw))
2987 		ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_en);
2988 
2989 	if (ice_is_e825c(hw))
2990 		ice_set_bit(ICE_FEATURE_DUAL_NAC, sc->feat_cap);
2991 	/* Disable features due to hardware limitations... */
2992 	if (!hw->func_caps.common_cap.rss_table_size)
2993 		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2994 	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2995 		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2996 	if (!hw->func_caps.common_cap.dcb)
2997 		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2998 	/* Disable features due to firmware limitations... */
2999 	if (!ice_is_fw_health_report_supported(hw))
3000 		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
3001 	if (!ice_fwlog_supported(hw))
3002 		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
3003 	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
3004 		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
3005 			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
3006 		else
3007 			ice_fwlog_unregister(hw);
3008 	}
3009 
3010 	/* Disable capabilities not supported by the OS */
3011 	ice_disable_unsupported_features(sc->feat_cap);
3012 
3013 	/* RSS is always enabled for iflib */
3014 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
3015 		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
3016 
3017 	/* Disable features based on sysctl settings */
3018 	if (!ice_tx_balance_en)
3019 		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
3020 
3021 	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
3022 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
3023 		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
3024 	}
3025 
3026 	if (hw->func_caps.common_cap.next_cluster_id_support ||
3027 	    hw->dev_caps.common_cap.next_cluster_id_support) {
3028 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_cap);
3029 		ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_en);
3030 	}
3031 }
3032 
3033 /**
3034  * ice_if_multi_set - Callback to update Multicast filters in HW
3035  * @ctx: iflib ctx structure
3036  *
3037  * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
3038  * the if_multiaddrs list and determine which filters have been added or
3039  * removed from the list, and update HW programming to reflect the new list.
3040  *
3041  * @pre assumes the caller holds the iflib CTX lock
3042  */
3043 static void
ice_if_multi_set(if_ctx_t ctx)3044 ice_if_multi_set(if_ctx_t ctx)
3045 {
3046 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3047 	int err;
3048 
3049 	ASSERT_CTX_LOCKED(sc);
3050 
3051 	/* Do not handle multicast configuration in recovery mode */
3052 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3053 		return;
3054 
3055 	err = ice_sync_multicast_filters(sc);
3056 	if (err) {
3057 		device_printf(sc->dev,
3058 			      "Failed to synchronize multicast filter list: %s\n",
3059 			      ice_err_str(err));
3060 		return;
3061 	}
3062 }
3063 
3064 /**
3065  * ice_if_vlan_register - Register a VLAN with the hardware
3066  * @ctx: iflib ctx pointer
3067  * @vtag: VLAN to add
3068  *
3069  * Programs the main PF VSI with a hardware filter for the given VLAN.
3070  *
3071  * @pre assumes the caller holds the iflib CTX lock
3072  */
3073 static void
ice_if_vlan_register(if_ctx_t ctx,u16 vtag)3074 ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
3075 {
3076 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3077 	int status;
3078 
3079 	ASSERT_CTX_LOCKED(sc);
3080 
3081 	/* Do not handle VLAN configuration in recovery mode */
3082 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3083 		return;
3084 
3085 	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
3086 	if (status) {
3087 		device_printf(sc->dev,
3088 			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
3089 			      vtag, ice_status_str(status),
3090 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3091 	}
3092 }
3093 
3094 /**
3095  * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
3096  * @ctx: iflib ctx pointer
3097  * @vtag: VLAN to add
3098  *
3099  * Removes the previously programmed VLAN filter from the main PF VSI.
3100  *
3101  * @pre assumes the caller holds the iflib CTX lock
3102  */
3103 static void
ice_if_vlan_unregister(if_ctx_t ctx,u16 vtag)3104 ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
3105 {
3106 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3107 	int status;
3108 
3109 	ASSERT_CTX_LOCKED(sc);
3110 
3111 	/* Do not handle VLAN configuration in recovery mode */
3112 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3113 		return;
3114 
3115 	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3116 	if (status) {
3117 		device_printf(sc->dev,
3118 			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3119 			      vtag, ice_status_str(status),
3120 			      ice_aq_str(sc->hw.adminq.sq_last_status));
3121 	}
3122 }
3123 
3124 /**
3125  * ice_if_stop - Stop the device
3126  * @ctx: iflib context structure
3127  *
3128  * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3129  * down)
3130  *
3131  * @pre assumes the caller holds the iflib CTX lock
3132  */
3133 static void
ice_if_stop(if_ctx_t ctx)3134 ice_if_stop(if_ctx_t ctx)
3135 {
3136 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3137 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3138 
3139 	ASSERT_CTX_LOCKED(sc);
3140 
3141 	/*
3142 	 * The iflib core may call IFDI_STOP prior to the first call to
3143 	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3144 	 * don't have, and disable Tx queues which aren't yet configured.
3145 	 * Although it is likely these extra operations are harmless, they do
3146 	 * cause spurious warning messages to be displayed, which may confuse
3147 	 * users.
3148 	 *
3149 	 * To avoid these messages, we use a state bit indicating if we've
3150 	 * been initialized. It will be set when ice_if_init is called, and
3151 	 * cleared here in ice_if_stop.
3152 	 */
3153 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3154 		return;
3155 
3156 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3157 		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3158 		return;
3159 	}
3160 
3161 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3162 		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3163 		return;
3164 	}
3165 
3166 	ice_rdma_pf_stop(sc);
3167 
3168 	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3169 	 * return of these functions because there's nothing we can really do
3170 	 * if they fail, and the functions already print error messages.
3171 	 * Just try to shut down as much as we can.
3172 	 */
3173 	ice_rm_pf_default_mac_filters(sc);
3174 
3175 	/* Dissociate the Tx and Rx queues from the interrupts */
3176 	ice_flush_txq_interrupts(&sc->pf_vsi);
3177 	ice_flush_rxq_interrupts(&sc->pf_vsi);
3178 
3179 	/* Disable the Tx and Rx queues */
3180 	ice_vsi_disable_tx(&sc->pf_vsi);
3181 	ice_control_all_rx_queues(&sc->pf_vsi, false);
3182 
3183 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3184 		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3185 		ice_set_link(sc, false);
3186 
3187 	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3188 		ice_subif_if_stop(sc->mirr_if->subctx);
3189 		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3190 	}
3191 }
3192 
3193 /**
3194  * ice_if_get_counter - Get current value of an ifnet statistic
3195  * @ctx: iflib context pointer
3196  * @counter: ifnet counter to read
3197  *
3198  * Reads the current value of an ifnet counter for the device.
3199  *
3200  * This function is not protected by the iflib CTX lock.
3201  */
3202 static uint64_t
ice_if_get_counter(if_ctx_t ctx,ift_counter counter)3203 ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3204 {
3205 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3206 
3207 	/* Return the counter for the main PF VSI */
3208 	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3209 }
3210 
3211 /**
3212  * ice_request_stack_reinit - Request that iflib re-initialize
3213  * @sc: the device private softc
3214  *
3215  * Request that the device be brought down and up, to re-initialize. For
3216  * example, this may be called when a device reset occurs, or when Tx and Rx
3217  * queues need to be re-initialized.
3218  *
3219  * This is required because the iflib state is outside the driver, and must be
3220  * re-initialized if we need to resart Tx and Rx queues.
3221  */
3222 void
ice_request_stack_reinit(struct ice_softc * sc)3223 ice_request_stack_reinit(struct ice_softc *sc)
3224 {
3225 	if (CTX_ACTIVE(sc->ctx)) {
3226 		iflib_request_reset(sc->ctx);
3227 		iflib_admin_intr_deferred(sc->ctx);
3228 	}
3229 }
3230 
3231 /**
3232  * ice_driver_is_detaching - Check if the driver is detaching/unloading
3233  * @sc: device private softc
3234  *
3235  * Returns true if the driver is detaching, false otherwise.
3236  *
3237  * @remark on newer kernels, take advantage of iflib_in_detach in order to
3238  * report detachment correctly as early as possible.
3239  *
3240  * @remark this function is used by various code paths that want to avoid
3241  * running if the driver is about to be removed. This includes sysctls and
3242  * other driver access points. Note that it does not fully resolve
3243  * detach-based race conditions as it is possible for a thread to race with
3244  * iflib_in_detach.
3245  */
3246 bool
ice_driver_is_detaching(struct ice_softc * sc)3247 ice_driver_is_detaching(struct ice_softc *sc)
3248 {
3249 	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3250 		iflib_in_detach(sc->ctx));
3251 }
3252 
3253 /**
3254  * ice_if_priv_ioctl - Device private ioctl handler
3255  * @ctx: iflib context pointer
3256  * @command: The ioctl command issued
3257  * @data: ioctl specific data
3258  *
3259  * iflib callback for handling custom driver specific ioctls.
3260  *
3261  * @pre Assumes that the iflib context lock is held.
3262  */
3263 static int
ice_if_priv_ioctl(if_ctx_t ctx,u_long command,caddr_t data)3264 ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3265 {
3266 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3267 	struct ifdrv *ifd;
3268 	device_t dev = sc->dev;
3269 
3270 	if (data == NULL)
3271 		return (EINVAL);
3272 
3273 	ASSERT_CTX_LOCKED(sc);
3274 
3275 	/* Make sure the command type is valid */
3276 	switch (command) {
3277 	case SIOCSDRVSPEC:
3278 	case SIOCGDRVSPEC:
3279 		/* Accepted commands */
3280 		break;
3281 	case SIOCGPRIVATE_0:
3282 		/*
3283 		 * Although we do not support this ioctl command, it's
3284 		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3285 		 * handler. Do not print a message in this case
3286 		 */
3287 		return (ENOTSUP);
3288 	default:
3289 		/*
3290 		 * If we get a different command for this function, it's
3291 		 * definitely unexpected, so log a message indicating what
3292 		 * command we got for debugging purposes.
3293 		 */
3294 		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3295 			      __func__, command);
3296 		return (EINVAL);
3297 	}
3298 
3299 	ifd = (struct ifdrv *)data;
3300 
3301 	switch (ifd->ifd_cmd) {
3302 	case ICE_NVM_ACCESS:
3303 		return ice_handle_nvm_access_ioctl(sc, ifd);
3304 	case ICE_DEBUG_DUMP:
3305 		return ice_handle_debug_dump_ioctl(sc, ifd);
3306 	default:
3307 		return EINVAL;
3308 	}
3309 }
3310 
3311 /**
3312  * ice_if_i2c_req - I2C request handler for iflib
3313  * @ctx: iflib context pointer
3314  * @req: The I2C parameters to use
3315  *
3316  * Read from the port's I2C eeprom using the parameters from the ioctl.
3317  *
3318  * @remark The iflib-only part is pretty simple.
3319  */
3320 static int
ice_if_i2c_req(if_ctx_t ctx,struct ifi2creq * req)3321 ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3322 {
3323 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3324 
3325 	return ice_handle_i2c_req(sc, req);
3326 }
3327 
3328 /**
3329  * ice_if_suspend - PCI device suspend handler for iflib
3330  * @ctx: iflib context pointer
3331  *
3332  * Deinitializes the driver and clears HW resources in preparation for
3333  * suspend or an FLR.
3334  *
3335  * @returns 0; this return value is ignored
3336  */
3337 static int
ice_if_suspend(if_ctx_t ctx)3338 ice_if_suspend(if_ctx_t ctx)
3339 {
3340 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3341 
3342 	/* At least a PFR is always going to happen after this;
3343 	 * either via FLR or during the D3->D0 transition.
3344 	 */
3345 	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3346 
3347 	ice_prepare_for_reset(sc);
3348 
3349 	return (0);
3350 }
3351 
3352 /**
3353  * ice_if_resume - PCI device resume handler for iflib
3354  * @ctx: iflib context pointer
3355  *
3356  * Reinitializes the driver and the HW after PCI resume or after
3357  * an FLR. An init is performed by iflib after this function is finished.
3358  *
3359  * @returns 0; this return value is ignored
3360  */
3361 static int
ice_if_resume(if_ctx_t ctx)3362 ice_if_resume(if_ctx_t ctx)
3363 {
3364 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3365 
3366 	ice_rebuild(sc);
3367 
3368 	return (0);
3369 }
3370 
3371 /**
3372  * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3373  * @ctx: iflib context pointer
3374  * @event: event code to check
3375  *
3376  * Defaults to returning true for unknown events.
3377  *
3378  * @returns true if iflib needs to reinit the interface
3379  */
3380 static bool
ice_if_needs_restart(if_ctx_t ctx,enum iflib_restart_event event)3381 ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3382 {
3383 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3384 
3385 	switch (event) {
3386 	case IFLIB_RESTART_VLAN_CONFIG:
3387 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3388 			 !(if_getflags(sc->ifp) & IFF_UP))
3389 			return false;
3390 	default:
3391 		return true;
3392 	}
3393 }
3394 
3395 /**
3396  * ice_init_link - Do link configuration and link status reporting
3397  * @sc: driver private structure
3398  *
3399  * Contains an extra check that skips link config when an E830 device
3400  * does not have the "FW_LOADING"/"PHYBUSY" bit set in GL_MNG_FWSM set.
3401  */
3402 static void
ice_init_link(struct ice_softc * sc)3403 ice_init_link(struct ice_softc *sc)
3404 {
3405 	struct ice_hw *hw = &sc->hw;
3406 	device_t dev = sc->dev;
3407 
3408 	/* Check if FW is ready before setting up link; defer setup to the
3409 	 * admin task if it isn't.
3410 	 */
3411 	if (ice_is_e830(hw) &&
3412 	    (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)) {
3413 		ice_set_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3414 		device_printf(dev,
3415 		    "Link initialization is blocked by PHY FW initialization.\n");
3416 		device_printf(dev,
3417 		    "Link initialization will continue after PHY FW initialization completes.\n");
3418 		/* Do not access PHY config while PHY FW is busy initializing */
3419 	} else {
3420 		ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING);
3421 		ice_init_link_configuration(sc);
3422 		ice_update_link_status(sc, true);
3423 	}
3424 
3425 }
3426 
3427 #ifdef PCI_IOV
3428 /**
3429  * ice_if_iov_init - iov init handler for iflib
3430  * @ctx: iflib context pointer
3431  * @num_vfs: number of VFs to create
3432  * @params: configuration parameters for the PF
3433  *
3434  * Configure the driver for SR-IOV mode. Used to setup things like memory
3435  * before any VFs are created.
3436  *
3437  * @remark This is a wrapper for ice_iov_init
3438  */
3439 static int
ice_if_iov_init(if_ctx_t ctx,uint16_t num_vfs,const nvlist_t * params)3440 ice_if_iov_init(if_ctx_t ctx, uint16_t num_vfs, const nvlist_t *params)
3441 {
3442 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3443 
3444 	return ice_iov_init(sc, num_vfs, params);
3445 }
3446 
3447 /**
3448  * ice_if_iov_uninit - iov uninit handler for iflib
3449  * @ctx: iflib context pointer
3450  *
3451  * Destroys VFs and frees their memory and resources.
3452  *
3453  * @remark This is a wrapper for ice_iov_uninit
3454  */
3455 static void
ice_if_iov_uninit(if_ctx_t ctx)3456 ice_if_iov_uninit(if_ctx_t ctx)
3457 {
3458 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3459 
3460 	ice_iov_uninit(sc);
3461 }
3462 
3463 /**
3464  * ice_if_iov_vf_add - iov add vf handler for iflib
3465  * @ctx: iflib context pointer
3466  * @vfnum: index of VF to configure
3467  * @params: configuration parameters for the VF
3468  *
3469  * Sets up the VF given by the vfnum index. This is called by the OS
3470  * for each VF created by the PF driver after it is spawned.
3471  *
3472  * @remark This is a wrapper for ice_iov_vf_add
3473  */
3474 static int
ice_if_iov_vf_add(if_ctx_t ctx,uint16_t vfnum,const nvlist_t * params)3475 ice_if_iov_vf_add(if_ctx_t ctx, uint16_t vfnum, const nvlist_t *params)
3476 {
3477 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3478 
3479 	return ice_iov_add_vf(sc, vfnum, params);
3480 }
3481 
3482 /**
3483  * ice_if_vflr_handle - iov VFLR handler
3484  * @ctx: iflib context pointer
3485  *
3486  * Performs the necessar teardown or setup required for a VF after
3487  * a VFLR is initiated.
3488  *
3489  * @remark This is a wrapper for ice_iov_handle_vflr
3490  */
3491 static void
ice_if_vflr_handle(if_ctx_t ctx)3492 ice_if_vflr_handle(if_ctx_t ctx)
3493 {
3494 	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3495 	ice_iov_handle_vflr(sc);
3496 }
3497 #endif /* PCI_IOV */
3498 
3499 extern struct if_txrx ice_subif_txrx;
3500 
3501 /**
3502  * @var ice_subif_methods
3503  * @brief ice driver method entry points
3504  */
3505 static device_method_t ice_subif_methods[] = {
3506 	/* Device interface */
3507 	DEVMETHOD(device_register, ice_subif_register),
3508 	DEVMETHOD_END
3509 };
3510 
3511 /**
3512  * @var ice_subif_driver
3513  * @brief driver structure for the device API
3514  */
3515 static driver_t ice_subif_driver = {
3516 	.name = "ice_subif",
3517 	.methods = ice_subif_methods,
3518 	.size = sizeof(struct ice_mirr_if),
3519 };
3520 
3521 static device_method_t ice_iflib_subif_methods[] = {
3522 	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3523 	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3524 	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3525 	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3526 	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3527 	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3528 	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3529 	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3530 	DEVMETHOD(ifdi_init, ice_subif_if_init),
3531 	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3532 	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3533 	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3534 	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3535 };
3536 
3537 /**
3538  * @var ice_iflib_subif_driver
3539  * @brief driver structure for the iflib stack
3540  *
3541  * driver_t definition used to setup the iflib device methods.
3542  */
3543 static driver_t ice_iflib_subif_driver = {
3544 	.name = "ice_subif",
3545 	.methods = ice_iflib_subif_methods,
3546 	.size = sizeof(struct ice_mirr_if),
3547 };
3548 
3549 /**
3550  * @var ice_subif_sctx
3551  * @brief ice driver shared context
3552  *
3553  * Similar to the existing ice_sctx, this structure has these differences:
3554  * - isc_admin_intrcnt is set to 0
3555  * - Uses subif iflib driver methods
3556  * - Flagged as a VF for iflib
3557  */
3558 static struct if_shared_ctx ice_subif_sctx = {
3559 	.isc_magic = IFLIB_MAGIC,
3560 	.isc_q_align = PAGE_SIZE,
3561 
3562 	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3563 	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3564 	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3565 	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3566 
3567 	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3568 	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3569 	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3570 
3571 	.isc_nfl = 1,
3572 	.isc_ntxqs = 1,
3573 	.isc_nrxqs = 1,
3574 
3575 	.isc_admin_intrcnt = 0,
3576 	.isc_vendor_info = ice_vendor_info_array,
3577 	.isc_driver_version = __DECONST(char *, ice_driver_version),
3578 	.isc_driver = &ice_iflib_subif_driver,
3579 
3580 	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3581 		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3582 		IFLIB_IS_VF,
3583 
3584 	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3585 	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3586 	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3587 	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3588 	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3589 	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3590 };
3591 
3592 static void *
ice_subif_register(device_t dev __unused)3593 ice_subif_register(device_t dev __unused)
3594 {
3595 	return (&ice_subif_sctx);
3596 }
3597 
3598 static void
ice_subif_setup_scctx(struct ice_mirr_if * mif)3599 ice_subif_setup_scctx(struct ice_mirr_if *mif)
3600 {
3601 	if_softc_ctx_t scctx = mif->subscctx;
3602 
3603 	scctx->isc_txrx = &ice_subif_txrx;
3604 
3605 	scctx->isc_capenable = ICE_FULL_CAPS;
3606 	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3607 
3608 	scctx->isc_ntxqsets = 4;
3609 	scctx->isc_nrxqsets = 4;
3610 	scctx->isc_vectors = scctx->isc_nrxqsets;
3611 
3612 	scctx->isc_ntxqsets_max = 256;
3613 	scctx->isc_nrxqsets_max = 256;
3614 
3615 	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3616 	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3617 	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3618 	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3619 
3620 	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3621 	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3622 	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3623 	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3624 }
3625 
3626 static int
ice_subif_if_attach_pre(if_ctx_t ctx)3627 ice_subif_if_attach_pre(if_ctx_t ctx)
3628 {
3629 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3630 	device_t dev = iflib_get_dev(ctx);
3631 
3632 	mif->subctx = ctx;
3633 	mif->subdev = dev;
3634 	mif->subscctx = iflib_get_softc_ctx(ctx);
3635 
3636 	/* Setup the iflib softc context structure */
3637 	ice_subif_setup_scctx(mif);
3638 
3639 	return (0);
3640 }
3641 
3642 static int
ice_subif_if_attach_post(if_ctx_t ctx __unused)3643 ice_subif_if_attach_post(if_ctx_t ctx __unused)
3644 {
3645 	return (0);
3646 }
3647 
3648 /**
3649  * ice_destroy_mirror_interface - destroy mirror interface
3650  * @sc: driver private data
3651  *
3652  * Destroys all resources associated with the mirroring interface.
3653  * Will not exit early on failure.
3654  *
3655  * @pre: Mirror interface already exists and is initialized.
3656  */
3657 void
ice_destroy_mirror_interface(struct ice_softc * sc)3658 ice_destroy_mirror_interface(struct ice_softc *sc)
3659 {
3660 	struct ice_mirr_if *mif = sc->mirr_if;
3661 	struct ice_vsi *vsi = mif->vsi;
3662 	bool is_locked = false;
3663 	int ret;
3664 
3665 	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3666 	if (is_locked)
3667 		IFLIB_CTX_UNLOCK(sc);
3668 
3669 	if (mif->ifp) {
3670 		ret = iflib_device_deregister(mif->subctx);
3671 		if (ret) {
3672 			device_printf(sc->dev,
3673 			    "iflib_device_deregister for mirror interface failed: %d\n",
3674 			    ret);
3675 		}
3676 	}
3677 
3678 	bus_topo_lock();
3679 	ret = device_delete_child(sc->dev, mif->subdev);
3680 	bus_topo_unlock();
3681 	if (ret) {
3682 		device_printf(sc->dev,
3683 		    "device_delete_child for mirror interface failed: %d\n",
3684 		    ret);
3685 	}
3686 
3687 	if (is_locked)
3688 		IFLIB_CTX_LOCK(sc);
3689 
3690 	if (mif->if_imap) {
3691 		free(mif->if_imap, M_ICE);
3692 		mif->if_imap = NULL;
3693 	}
3694 	if (mif->os_imap) {
3695 		free(mif->os_imap, M_ICE);
3696 		mif->os_imap = NULL;
3697 	}
3698 
3699 	/* These are freed via ice_subif_queues_free_subif
3700 	 * vsi:
3701 	 * - rx_irqvs
3702 	 * - tx_queues
3703 	 * - rx_queues
3704 	 */
3705 	ice_release_vsi(vsi);
3706 
3707 	free(mif, M_ICE);
3708 	sc->mirr_if = NULL;
3709 
3710 }
3711 
3712 /**
3713  * ice_setup_mirror_vsi - Initialize mirror VSI
3714  * @mif: driver private data for mirror interface
3715  *
3716  * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3717  * mirror for the main PF VSI.
3718  *
3719  * Returns 0 on success, or a standard error code on failure.
3720  */
3721 static int
ice_setup_mirror_vsi(struct ice_mirr_if * mif)3722 ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3723 {
3724 	struct ice_softc *sc = mif->back;
3725 	device_t dev = sc->dev;
3726 	struct ice_vsi *vsi;
3727 	int ret = 0;
3728 
3729 	/* vsi is for the new mirror vsi, not the PF's main VSI */
3730 	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3731 	if (!vsi) {
3732 		/* Already prints an error message */
3733 		return (ENOMEM);
3734 	}
3735 	mif->vsi = vsi;
3736 
3737 	/* Reserve VSI queue allocation from PF queues */
3738 	ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3739 	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3740 
3741 	/* Assign Tx queues from PF space */
3742 	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3743 	    vsi->num_tx_queues);
3744 	if (ret) {
3745 		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3746 		    ice_err_str(ret));
3747 		goto release_vsi;
3748 	}
3749 	/* Assign Rx queues from PF space */
3750 	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3751 	    vsi->num_rx_queues);
3752 	if (ret) {
3753 		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3754 		    ice_err_str(ret));
3755 		goto release_vsi;
3756 	}
3757 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3758 	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3759 
3760 	ret = ice_initialize_vsi(vsi);
3761 	if (ret) {
3762 		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3763 		    __func__, ice_err_str(ret));
3764 		goto release_vsi;
3765 	}
3766 
3767 	/* Setup this VSI for receiving traffic */
3768 	ret = ice_config_rss(vsi);
3769 	if (ret) {
3770 		device_printf(dev,
3771 		    "Unable to configure RSS for mirror VSI: %s\n",
3772 		    ice_err_str(ret));
3773 		goto release_vsi;
3774 	}
3775 
3776 	/* Set HW rules for mirroring traffic */
3777 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3778 
3779 	ice_debug(&sc->hw, ICE_DBG_INIT,
3780 	    "Configuring mirroring from VSI %d to %d\n",
3781 	    vsi->mirror_src_vsi, vsi->idx);
3782 	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3783 	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3784 	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3785 
3786 	ret = ice_setup_vsi_mirroring(vsi);
3787 	if (ret) {
3788 		device_printf(dev,
3789 		    "Unable to configure mirroring for VSI: %s\n",
3790 		    ice_err_str(ret));
3791 		goto release_vsi;
3792 	}
3793 
3794 	return (0);
3795 
3796 release_vsi:
3797 	ice_release_vsi(vsi);
3798 	mif->vsi = NULL;
3799 	return (ret);
3800 }
3801 
3802 /**
3803  * ice_create_mirror_interface - Initialize mirror interface
3804  * @sc: driver private data
3805  *
3806  * Creates and sets up a mirror interface that will mirror traffic from
3807  * the main PF interface. Includes a call to iflib_device_register() in order
3808  * to setup necessary iflib structures for this new interface as well.
3809  *
3810  * If it returns successfully, a new interface will be created and will show
3811  * up in the ifconfig interface list.
3812  *
3813  * Returns 0 on success, or a standard error code on failure.
3814  */
3815 int
ice_create_mirror_interface(struct ice_softc * sc)3816 ice_create_mirror_interface(struct ice_softc *sc)
3817 {
3818 	device_t dev = sc->dev;
3819 	struct ice_mirr_if *mif;
3820 	struct ifmedia *media;
3821 	struct sbuf *sb;
3822 	int ret = 0;
3823 
3824 	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3825 	if (!mif) {
3826 		device_printf(dev, "malloc() error allocating mirror interface\n");
3827 		return (ENOMEM);
3828 	}
3829 
3830 	/* Set pointers */
3831 	sc->mirr_if = mif;
3832 	mif->back = sc;
3833 
3834 	/* Do early setup because these will be called during iflib_device_register():
3835 	 * - ice_subif_if_tx_queues_alloc
3836 	 * - ice_subif_if_rx_queues_alloc
3837 	 */
3838 	ret = ice_setup_mirror_vsi(mif);
3839 	if (ret)
3840 		goto out;
3841 
3842 	/* Determine name for new interface:
3843 	 * (base interface name)(modifier name)(modifier unit number)
3844 	 * e.g. for ice0 with a new mirror interface (modifier m)
3845 	 * of index 0, this equals "ice0m0"
3846 	 */
3847 	sb = sbuf_new_auto();
3848 	MPASS(sb != NULL);
3849 	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3850 	sbuf_finish(sb);
3851 
3852 	bus_topo_lock();
3853 	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3854 	bus_topo_unlock();
3855 
3856 	if (!mif->subdev) {
3857 		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3858 		sbuf_delete(sb);
3859 		free(mif, M_ICE);
3860 		sc->mirr_if = NULL;
3861 		return (ENOMEM);
3862 	}
3863 	sbuf_delete(sb);
3864 
3865 	device_set_driver(mif->subdev, &ice_subif_driver);
3866 
3867 	/* Use iflib_device_register() directly because the driver already
3868 	 * has an initialized softc to pass to iflib
3869 	 */
3870 	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3871 	if (ret)
3872 		goto out;
3873 
3874 	/* Indicate that created interface will be just for monitoring */
3875 	mif->ifp = iflib_get_ifp(mif->subctx);
3876 	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3877 
3878 	/* Use autoselect media by default */
3879 	media = iflib_get_media(mif->subctx);
3880 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3881 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3882 
3883 	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3884 	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3885 
3886 	ice_add_vsi_sysctls(mif->vsi);
3887 
3888 	ret = ice_wire_mirror_intrs(mif);
3889 	if (ret)
3890 		goto out;
3891 
3892 	mif->if_attached = true;
3893 	return (0);
3894 
3895 out:
3896 	ice_destroy_mirror_interface(sc);
3897 	return (ret);
3898 }
3899 
3900 /**
3901  * ice_wire_mirror_intrs
3902  * @mif: driver private subinterface structure
3903  *
3904  * Helper function that sets up driver interrupt data and calls
3905  * into iflib in order to setup interrupts in its data structures as well.
3906  *
3907  * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3908  * number of vectors as we have queues, and that we always have the same number
3909  * of Tx and Rx queues. Unlike that function, this calls a special
3910  * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3911  * driver needs to get MSI-X resources from the parent device.
3912  *
3913  * Tx queues use a softirq instead of using their own hardware interrupt so that
3914  * remains unchanged.
3915  *
3916  * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3917  * on failure.
3918  */
3919 static int
ice_wire_mirror_intrs(struct ice_mirr_if * mif)3920 ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3921 {
3922 	struct ice_softc *sc = mif->back;
3923 	struct ice_hw *hw = &sc->hw;
3924 	struct ice_vsi *vsi = mif->vsi;
3925 	device_t dev = mif->subdev;
3926 	int err, i, rid;
3927 
3928 	if_ctx_t ctx = mif->subctx;
3929 
3930 	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3931 
3932 	rid = sc->last_rid + 1;
3933 	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3934 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3935 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3936 		char irq_name[16];
3937 
3938 		// TODO: Change to use dynamic interface number
3939 		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3940 		/* First arg is parent device (physical port's) iflib ctx */
3941 		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3942 		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3943 		    rxq, rxq->me, irq_name);
3944 		if (err) {
3945 			device_printf(dev,
3946 			    "Failed to allocate q int %d err: %s\n",
3947 			    i, ice_err_str(err));
3948 			i--;
3949 			goto fail;
3950 		}
3951 		MPASS(rid - 1 > 0);
3952 		/* Set vector number used in interrupt enable/disable functions */
3953 		mif->rx_irqvs[i].me = rid - 1;
3954 		rxq->irqv = &mif->rx_irqvs[i];
3955 
3956 		bzero(irq_name, sizeof(irq_name));
3957 		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3958 		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3959 		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3960 		txq->irqv = &mif->rx_irqvs[i];
3961 	}
3962 
3963 	sc->last_rid = rid - 1;
3964 
3965 	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3966 	    sc->last_rid);
3967 
3968 	return (0);
3969 
3970 fail:
3971 	for (; i >= 0; i--)
3972 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3973 	return (err);
3974 }
3975 
3976 /**
3977  * ice_subif_rebuild - Rebuild subinterface post reset
3978  * @sc: The device private softc
3979  *
3980  * Restore subinterface state after a reset occurred.
3981  * Restart the VSI and enable the mirroring.
3982  */
3983 static int
ice_subif_rebuild(struct ice_softc * sc)3984 ice_subif_rebuild(struct ice_softc *sc)
3985 {
3986 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3987 	struct ice_vsi *vsi = sc->mirr_if->vsi;
3988 	int err;
3989 
3990 	err = ice_subif_rebuild_vsi_qmap(sc);
3991 	if (err) {
3992 		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3993 		      ice_err_str(err));
3994 		return (err);
3995 	}
3996 
3997 	err = ice_initialize_vsi(vsi);
3998 	if (err) {
3999 		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
4000 		      ice_err_str(err));
4001 		goto err_release_queue_allocations_subif;
4002 	}
4003 
4004 	err = ice_config_rss(vsi);
4005 	if (err) {
4006 		device_printf(sc->dev,
4007 		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
4008 		      ice_err_str(err));
4009 		goto err_deinit_subif_vsi;
4010 	}
4011 
4012 	vsi->mirror_src_vsi = sc->pf_vsi.idx;
4013 
4014 	err = ice_setup_vsi_mirroring(vsi);
4015 	if (err) {
4016 		device_printf(sc->dev,
4017 		      "Unable to configure mirroring for VSI: %s\n",
4018 		      ice_err_str(err));
4019 		goto err_deinit_subif_vsi;
4020 	}
4021 
4022 	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
4023 
4024 	return (0);
4025 
4026 err_deinit_subif_vsi:
4027 	ice_deinit_vsi(vsi);
4028 err_release_queue_allocations_subif:
4029 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
4030 	    sc->mirr_if->num_irq_vectors);
4031 	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
4032 	    sc->mirr_if->num_irq_vectors);
4033 
4034 	return (err);
4035 }
4036 
4037 /**
4038  * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
4039  * @sc: the device softc pointer
4040  *
4041  * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
4042  * mapping after a reset occurred.
4043  */
4044 static int
ice_subif_rebuild_vsi_qmap(struct ice_softc * sc)4045 ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
4046 {
4047 	struct ice_vsi *vsi = sc->mirr_if->vsi;
4048 	struct ice_tx_queue *txq;
4049 	struct ice_rx_queue *rxq;
4050 	int err, i;
4051 
4052 	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
4053 	if (err) {
4054 		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
4055 		      ice_err_str(err));
4056 		return (err);
4057 	}
4058 
4059 	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
4060 	if (err) {
4061 		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
4062 		      ice_err_str(err));
4063 		goto err_release_tx_queues;
4064 	}
4065 
4066 	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
4067 
4068 	/* Re-assign Tx queue tail pointers */
4069 	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
4070 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
4071 
4072 	/* Re-assign Rx queue tail pointers */
4073 	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
4074 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4075 
4076 	return (0);
4077 
4078 err_release_tx_queues:
4079 	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
4080 
4081 	return (err);
4082 }
4083 
4084 /**
4085  * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
4086  * @ctx: iflib context structure
4087  * @vaddrs: virtual addresses for the queue memory
4088  * @paddrs: physical addresses for the queue memory
4089  * @ntxqs: the number of Tx queues per set (should always be 1)
4090  * @ntxqsets: the number of Tx queue sets to allocate
4091  *
4092  * See ice_if_tx_queues_alloc() description. Similar to that function, but
4093  * for subinterfaces instead.
4094  */
4095 static int
ice_subif_if_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only ntxqs,int ntxqsets)4096 ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
4097 			     int __invariant_only ntxqs, int ntxqsets)
4098 {
4099 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4100 	struct ice_tx_queue *txq;
4101 	device_t dev = mif->subdev;
4102 	struct ice_vsi *vsi;
4103 	int err, i, j;
4104 
4105 	MPASS(mif != NULL);
4106 	MPASS(ntxqs == 1);
4107 	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
4108 
4109 	vsi = mif->vsi;
4110 
4111 	MPASS(vsi->num_tx_queues == ntxqsets);
4112 
4113 	/* Allocate queue structure memory */
4114 	if (!(vsi->tx_queues =
4115 	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
4116 		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
4117 		    __func__);
4118 		return (ENOMEM);
4119 	}
4120 
4121 	/* Allocate report status arrays */
4122 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
4123 		if (!(txq->tx_rsq =
4124 		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
4125 			device_printf(dev,
4126 			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
4127 			err = ENOMEM;
4128 			goto free_tx_queues;
4129 		}
4130 		/* Initialize report status array */
4131 		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
4132 			txq->tx_rsq[j] = QIDX_INVALID;
4133 	}
4134 
4135 	/* Add Tx queue sysctls context */
4136 	ice_vsi_add_txqs_ctx(vsi);
4137 
4138 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
4139 		/* q_handle == me when only one TC */
4140 		txq->me = txq->q_handle = i;
4141 		txq->vsi = vsi;
4142 
4143 		/* store the queue size for easier access */
4144 		txq->desc_count = mif->subscctx->isc_ntxd[0];
4145 
4146 		/* get the virtual and physical address of the hardware queues */
4147 		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
4148 		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
4149 		txq->tx_paddr = paddrs[i];
4150 
4151 		ice_add_txq_sysctls(txq);
4152 	}
4153 
4154 	return (0);
4155 
4156 free_tx_queues:
4157 	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
4158 		if (txq->tx_rsq != NULL) {
4159 			free(txq->tx_rsq, M_ICE);
4160 			txq->tx_rsq = NULL;
4161 		}
4162 	}
4163 	free(vsi->tx_queues, M_ICE);
4164 	vsi->tx_queues = NULL;
4165 	return (err);
4166 }
4167 
4168 /**
4169  * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
4170  * @ctx: iflib context structure
4171  * @vaddrs: virtual addresses for the queue memory
4172  * @paddrs: physical addresses for the queue memory
4173  * @nrxqs: number of Rx queues per set (should always be 1)
4174  * @nrxqsets: number of Rx queue sets to allocate
4175  *
4176  * See ice_if_rx_queues_alloc() for general summary; this is similar to that
4177  * but implemented for subinterfaces.
4178  */
4179 static int
ice_subif_if_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int __invariant_only nrxqs,int nrxqsets)4180 ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
4181     int __invariant_only nrxqs, int nrxqsets)
4182 {
4183 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4184 	struct ice_rx_queue *rxq;
4185 	device_t dev = mif->subdev;
4186 	struct ice_vsi *vsi;
4187 	int i;
4188 
4189 	MPASS(mif != NULL);
4190 	MPASS(nrxqs == 1);
4191 	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
4192 
4193 	vsi = mif->vsi;
4194 
4195 	MPASS(vsi->num_rx_queues == nrxqsets);
4196 
4197 	/* Allocate queue structure memory */
4198 	if (!(vsi->rx_queues =
4199 	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
4200 		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
4201 		    __func__);
4202 		return (ENOMEM);
4203 	}
4204 
4205 	/* Add Rx queue sysctls context */
4206 	ice_vsi_add_rxqs_ctx(vsi);
4207 
4208 	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
4209 		rxq->me = i;
4210 		rxq->vsi = vsi;
4211 
4212 		/* store the queue size for easier access */
4213 		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4214 
4215 		/* get the virtual and physical address of the hardware queues */
4216 		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4217 		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4218 		rxq->rx_paddr = paddrs[i];
4219 
4220 		ice_add_rxq_sysctls(rxq);
4221 	}
4222 
4223 	return (0);
4224 }
4225 
4226 /**
4227  * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4228  * @ctx: the iflib context structure
4229  * @msix: the number of vectors we were assigned
4230  *
4231  * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4232  *
4233  * @pre OS MSI-X resources have been pre-allocated by parent interface.
4234  */
4235 static int
ice_subif_if_msix_intr_assign(if_ctx_t ctx,int msix)4236 ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4237 {
4238 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4239 	struct ice_softc *sc = mif->back;
4240 	struct ice_vsi *vsi = mif->vsi;
4241 
4242 	device_t dev = mif->subdev;
4243 	int ret;
4244 
4245 	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4246 		device_printf(dev,
4247 			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4248 			      vsi->num_tx_queues, vsi->num_rx_queues);
4249 		return (EOPNOTSUPP);
4250 	}
4251 
4252 	if (msix > sc->extra_vectors) {
4253 		device_printf(dev,
4254 		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4255 		     __func__, sc->extra_vectors, msix);
4256 		return (ENOSPC);
4257 	}
4258 	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4259 	    msix);
4260 
4261 	/* Allocate space to store the IRQ vector data */
4262 	mif->num_irq_vectors = vsi->num_rx_queues;
4263 	mif->rx_irqvs = (struct ice_irq_vector *)
4264 	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4265 		   M_ICE, M_NOWAIT);
4266 	if (!mif->rx_irqvs) {
4267 		device_printf(dev,
4268 			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4269 			      mif->num_irq_vectors);
4270 		return (ENOMEM);
4271 	}
4272 
4273 	/* Assign mirror interface interrupts from PF device space */
4274 	if (!(mif->if_imap =
4275 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4276 	      M_ICE, M_NOWAIT))) {
4277 		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4278 		ret = ENOMEM;
4279 		goto free_irqvs;
4280 	}
4281 	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4282 	if (ret) {
4283 		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4284 			      ice_err_str(ret));
4285 		goto free_if_imap;
4286 	}
4287 	/* Assign mirror interface interrupts from OS interrupt allocation space */
4288 	if (!(mif->os_imap =
4289 	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4290 	      M_ICE, M_NOWAIT))) {
4291 		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4292 		ret = ENOMEM;
4293 		goto free_if_imap;
4294 	}
4295 	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4296 	if (ret) {
4297 		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4298 			      ice_err_str(ret));
4299 		goto free_if_imap;
4300 	}
4301 
4302 	return (0);
4303 
4304 free_if_imap:
4305 	free(mif->if_imap, M_ICE);
4306 	mif->if_imap = NULL;
4307 free_irqvs:
4308 	free(mif->rx_irqvs, M_ICE);
4309 	mif->rx_irqvs = NULL;
4310 	return (ret);
4311 }
4312 
4313 /**
4314  * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4315  * @ctx: iflib context structure
4316  *
4317  * Called by iflib to request enabling all interrupts that belong to a
4318  * subinterface.
4319  */
4320 static void
ice_subif_if_intr_enable(if_ctx_t ctx)4321 ice_subif_if_intr_enable(if_ctx_t ctx)
4322 {
4323 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4324 	struct ice_softc *sc = mif->back;
4325 	struct ice_vsi *vsi = mif->vsi;
4326 	struct ice_hw *hw = &sc->hw;
4327 
4328 	/* Do not enable queue interrupts in recovery mode */
4329 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4330 		return;
4331 
4332 	/* Enable all queue interrupts */
4333 	for (int i = 0; i < vsi->num_rx_queues; i++)
4334 		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4335 }
4336 
4337 /**
4338  * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4339  * @ctx: iflib context structure
4340  * @rxqid: the Rx queue to enable
4341  *
4342  * Enable a specific Rx queue interrupt.
4343  *
4344  * This function is not protected by the iflib CTX lock.
4345  */
4346 static int
ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx,uint16_t rxqid)4347 ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4348 {
4349 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4350 	struct ice_softc *sc = mif->back;
4351 	struct ice_vsi *vsi = mif->vsi;
4352 	struct ice_hw *hw = &sc->hw;
4353 
4354 	/* Do not enable queue interrupts in recovery mode */
4355 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4356 		return (ENOSYS);
4357 
4358 	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4359 	return (0);
4360 }
4361 
4362 /**
4363  * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4364  * @ctx: iflib context structure
4365  * @txqid: the Tx queue to enable
4366  *
4367  * Enable a specific Tx queue interrupt.
4368  *
4369  * This function is not protected by the iflib CTX lock.
4370  */
4371 static int
ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx,uint16_t txqid)4372 ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4373 {
4374 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4375 	struct ice_softc *sc = mif->back;
4376 	struct ice_vsi *vsi = mif->vsi;
4377 	struct ice_hw *hw = &sc->hw;
4378 
4379 	/* Do not enable queue interrupts in recovery mode */
4380 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4381 		return (ENOSYS);
4382 
4383 	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4384 	return (0);
4385 }
4386 
4387 /**
4388  * ice_subif_if_init - Initialize the subinterface
4389  * @ctx: iflib ctx structure
4390  *
4391  * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4392  * Prepares the Tx and Rx engines and enables interrupts.
4393  *
4394  * @pre assumes the caller holds the iflib CTX lock
4395  */
4396 static void
ice_subif_if_init(if_ctx_t ctx)4397 ice_subif_if_init(if_ctx_t ctx)
4398 {
4399 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4400 	struct ice_softc *sc = mif->back;
4401 	struct ice_vsi *vsi = mif->vsi;
4402 	device_t dev = mif->subdev;
4403 	int err;
4404 
4405 	if (ice_driver_is_detaching(sc))
4406 		return;
4407 
4408 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4409 		return;
4410 
4411 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4412 		device_printf(dev,
4413 		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4414 		    device_get_nameunit(sc->dev));
4415 		return;
4416 	}
4417 
4418 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4419 		device_printf(dev,
4420 		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4421 		    device_get_nameunit(sc->dev));
4422 		return;
4423 	}
4424 
4425 	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4426 	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4427 
4428 	/* Initialize software Tx tracking values */
4429 	ice_init_tx_tracking(vsi);
4430 
4431 	err = ice_cfg_vsi_for_tx(vsi);
4432 	if (err) {
4433 		device_printf(dev,
4434 			      "Unable to configure subif VSI for Tx: %s\n",
4435 			      ice_err_str(err));
4436 		return;
4437 	}
4438 
4439 	err = ice_cfg_vsi_for_rx(vsi);
4440 	if (err) {
4441 		device_printf(dev,
4442 			      "Unable to configure subif VSI for Rx: %s\n",
4443 			      ice_err_str(err));
4444 		goto err_cleanup_tx;
4445 	}
4446 
4447 	err = ice_control_all_rx_queues(vsi, true);
4448 	if (err) {
4449 		device_printf(dev,
4450 			      "Unable to enable subif Rx rings for receive: %s\n",
4451 			      ice_err_str(err));
4452 		goto err_cleanup_tx;
4453 	}
4454 
4455 	ice_configure_all_rxq_interrupts(vsi);
4456 	ice_configure_rx_itr(vsi);
4457 
4458 	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4459 	return;
4460 
4461 err_cleanup_tx:
4462 	ice_vsi_disable_tx(vsi);
4463 }
4464 
4465 /**
4466  * ice_if_stop_subif - Stop the subinterface
4467  * @ctx: iflib context structure
4468  * @ifs: subinterface context structure
4469  *
4470  * Called by iflib to stop the subinterface and bring it down.
4471  * (e.g. ifconfig ice0m0 down)
4472  *
4473  * @pre assumes the caller holds the iflib CTX lock
4474  */
4475 static void
ice_subif_if_stop(if_ctx_t ctx)4476 ice_subif_if_stop(if_ctx_t ctx)
4477 {
4478 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4479 	struct ice_softc *sc = mif->back;
4480 	struct ice_vsi *vsi = mif->vsi;
4481 	device_t dev = mif->subdev;
4482 
4483 	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4484 		return;
4485 
4486 	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4487 		device_printf(dev,
4488 		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4489 		    device_get_nameunit(sc->dev));
4490 		return;
4491 	}
4492 
4493 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4494 		device_printf(dev,
4495 		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4496 		    device_get_nameunit(sc->dev));
4497 		return;
4498 	}
4499 
4500 	/* Dissociate the Tx and Rx queues from the interrupts */
4501 	ice_flush_txq_interrupts(vsi);
4502 	ice_flush_rxq_interrupts(vsi);
4503 
4504 	/* Disable the Tx and Rx queues */
4505 	ice_vsi_disable_tx(vsi);
4506 	ice_control_all_rx_queues(vsi, false);
4507 }
4508 
4509 /**
4510  * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4511  * @mif: Mirror interface private structure
4512  *
4513  * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4514  */
4515 static void
ice_free_irqvs_subif(struct ice_mirr_if * mif)4516 ice_free_irqvs_subif(struct ice_mirr_if *mif)
4517 {
4518 	struct ice_softc *sc = mif->back;
4519 	struct ice_vsi *vsi = mif->vsi;
4520 	if_ctx_t ctx = sc->ctx;
4521 	int i;
4522 
4523 	/* If the irqvs array is NULL, then there are no vectors to free */
4524 	if (mif->rx_irqvs == NULL)
4525 		return;
4526 
4527 	/* Free the IRQ vectors -- currently subinterfaces have number
4528 	 * of vectors equal to number of RX queues
4529 	 *
4530 	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4531 	 */
4532 	for (i = 0; i < vsi->num_rx_queues; i++)
4533 		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4534 
4535 	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4536 	    mif->num_irq_vectors);
4537 	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4538 	    mif->num_irq_vectors);
4539 
4540 	sc->last_rid -= vsi->num_rx_queues;
4541 
4542 	/* Clear the irqv pointers */
4543 	for (i = 0; i < vsi->num_rx_queues; i++)
4544 		vsi->rx_queues[i].irqv = NULL;
4545 
4546 	for (i = 0; i < vsi->num_tx_queues; i++)
4547 		vsi->tx_queues[i].irqv = NULL;
4548 
4549 	/* Release the vector array memory */
4550 	free(mif->rx_irqvs, M_ICE);
4551 	mif->rx_irqvs = NULL;
4552 }
4553 
4554 /**
4555  * ice_subif_if_queues_free - Free queue memory for subinterfaces
4556  * @ctx: the iflib context structure
4557  *
4558  * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4559  * ice_subif_if_rx_queues_alloc().
4560  */
4561 static void
ice_subif_if_queues_free(if_ctx_t ctx)4562 ice_subif_if_queues_free(if_ctx_t ctx)
4563 {
4564 	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4565 	struct ice_vsi *vsi = mif->vsi;
4566 	struct ice_tx_queue *txq;
4567 	int i;
4568 
4569 	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4570 	 * pointers.
4571 	 */
4572 	ice_vsi_del_txqs_ctx(vsi);
4573 	ice_vsi_del_rxqs_ctx(vsi);
4574 
4575 	/* Release MSI-X IRQ vectors */
4576 	ice_free_irqvs_subif(mif);
4577 
4578 	if (vsi->tx_queues != NULL) {
4579 		/* free the tx_rsq arrays */
4580 		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4581 			if (txq->tx_rsq != NULL) {
4582 				free(txq->tx_rsq, M_ICE);
4583 				txq->tx_rsq = NULL;
4584 			}
4585 		}
4586 		free(vsi->tx_queues, M_ICE);
4587 		vsi->tx_queues = NULL;
4588 	}
4589 	if (vsi->rx_queues != NULL) {
4590 		free(vsi->rx_queues, M_ICE);
4591 		vsi->rx_queues = NULL;
4592 	}
4593 }
4594 
4595 /**
4596  * ice_subif_if_media_status - Report subinterface media
4597  * @ctx: iflib context structure
4598  * @ifmr: ifmedia request structure to update
4599  *
4600  * Updates the provided ifmr with something, in order to prevent a
4601  * "no media types?" message from ifconfig.
4602  *
4603  * Mirror interfaces are always up.
4604  */
4605 static void
ice_subif_if_media_status(if_ctx_t ctx __unused,struct ifmediareq * ifmr)4606 ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4607 {
4608 	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4609 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4610 }
4611 
4612 /**
4613  * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4614  * @ctx: iflib context structure
4615  * @flags: promiscuous flags to configure
4616  *
4617  * Called by iflib to configure device promiscuous mode.
4618  *
4619  * @remark This does not need to be implemented for now.
4620  */
4621 static int
ice_subif_if_promisc_set(if_ctx_t ctx __unused,int flags __unused)4622 ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4623 {
4624 	return (0);
4625 }
4626 
4627