xref: /freebsd/sys/dev/ice/ice_lib.c (revision 54b955f4df5e76b5679ba7f3eb6bb2d5fc62923d)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file ice_lib.c
35  * @brief Generic device setup and sysctl functions
36  *
37  * Library of generic device functions not specific to the networking stack.
38  *
39  * This includes hardware initialization functions, as well as handlers for
40  * many of the device sysctls used to probe driver status or tune specific
41  * behaviors.
42  */
43 
44 #include "ice_lib.h"
45 #include "ice_iflib.h"
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcireg.h>
48 #include <machine/resource.h>
49 #include <net/if_dl.h>
50 #include <sys/firmware.h>
51 #include <sys/priv.h>
52 #include <sys/limits.h>
53 
54 /**
55  * @var M_ICE
56  * @brief main ice driver allocation type
57  *
58  * malloc(9) allocation type used by the majority of memory allocations in the
59  * ice driver.
60  */
61 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
62 
63 /*
64  * Helper function prototypes
65  */
66 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
67 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
68 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
69 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
70 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
71 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
72 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
73 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
74 static void ice_free_fltr_list(struct ice_list_head *list);
75 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
76 			       const u8 *addr, enum ice_sw_fwd_act_type action);
77 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
78 				   struct ice_ctl_q_info *cq);
79 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
80 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
81 				    struct ice_rq_event_info *event);
82 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
83 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
85 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
86 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
87 static void ice_add_debug_tunables(struct ice_softc *sc);
88 static void ice_add_debug_sysctls(struct ice_softc *sc);
89 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
90 static void ice_get_default_rss_key(u8 *seed);
91 static int  ice_set_rss_key(struct ice_vsi *vsi);
92 static int  ice_set_rss_lut(struct ice_vsi *vsi);
93 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
94 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
95 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
96 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
97 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
98 static const char *ice_autoneg_mode(struct ice_port_info *pi);
99 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
100 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
101 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
102 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
103 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
104 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
105 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
106 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
107 				     struct sysctl_ctx_list *ctx,
108 				     struct sysctl_oid *parent);
109 static void
110 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
111 				 struct sysctl_oid_list *parent_list,
112 				 u64* pfc_stat_location,
113 				 const char *node_name,
114 				 const char *descr);
115 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
116 					  struct sysctl_oid *parent,
117 					  struct ice_hw_port_stats *stats);
118 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
119 				 enum ice_vsi_type type, int idx,
120 				 bool dynamic);
121 static void ice_handle_mib_change_event(struct ice_softc *sc,
122 				 struct ice_rq_event_info *event);
123 static void
124 ice_handle_lan_overflow_event(struct ice_softc *sc,
125 			      struct ice_rq_event_info *event);
126 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
127 				     struct ice_list_head *list,
128 				     u16 ethertype, u16 direction,
129 				     enum ice_sw_fwd_act_type action);
130 static void ice_add_rx_lldp_filter(struct ice_softc *sc);
131 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
132 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
133 					   u64 phy_type_high);
134 struct ice_phy_data;
135 static int
136 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
137 				   struct ice_phy_data *phy_data);
138 static int
139 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
140 			       struct ice_aqc_set_phy_cfg_data *cfg);
141 static int
142 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
143 			       struct ice_aqc_set_phy_cfg_data *cfg);
144 static void
145 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
146 			      struct ice_aqc_set_phy_cfg_data *cfg);
147 static void
148 ice_print_ldo_tlv(struct ice_softc *sc,
149 		  struct ice_link_default_override_tlv *tlv);
150 static void
151 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
152 				  u64 *phy_type_high);
153 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
154 static void
155 ice_handle_health_status_event(struct ice_softc *sc,
156 			       struct ice_rq_event_info *event);
157 static void
158 ice_print_health_status_string(device_t dev,
159 			       struct ice_aqc_health_status_elem *elem);
160 static void
161 ice_debug_print_mib_change_event(struct ice_softc *sc,
162 				 struct ice_rq_event_info *event);
163 static bool ice_check_ets_bw(u8 *table);
164 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
165 static bool
166 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
167 		       struct ice_dcbx_cfg *new_cfg);
168 static void ice_dcb_recfg(struct ice_softc *sc);
169 static u8 ice_dcb_tc_contig(u8 tc_map);
170 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
171 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
172 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
173 				   struct ice_dcb_ets_cfg *ets);
174 static void ice_stop_pf_vsi(struct ice_softc *sc);
175 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
176 static void ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib);
177 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
178 void
179 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
180 			    struct sysctl_ctx_list *ctx,
181 			    struct sysctl_oid_list *ctx_list);
182 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
183 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
184 static void ice_start_dcbx_agent(struct ice_softc *sc);
185 static void ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
186 					    struct sbuf *sbuf, u16 cluster_id);
187 
188 static int ice_module_init(void);
189 static int ice_module_exit(void);
190 
191 /*
192  * package version comparison functions
193  */
194 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
195 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
196 
197 /*
198  * dynamic sysctl handlers
199  */
200 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
201 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
202 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
203 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
204 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
205 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
206 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
207 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
208 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
209 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
210 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
211 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
212 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
213 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
214 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
215 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
216 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
217 					 bool is_phy_type_high);
218 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
222 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
223 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
224 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
225 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
230 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
231 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
232 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
239 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
244 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
245 
246 /**
247  * ice_map_bar - Map PCIe BAR memory
248  * @dev: the PCIe device
249  * @bar: the BAR info structure
250  * @bar_num: PCIe BAR number
251  *
252  * Maps the specified PCIe BAR. Stores the mapping data in struct
253  * ice_bar_info.
254  */
255 int
256 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
257 {
258 	if (bar->res != NULL) {
259 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
260 		return (EDOOFUS);
261 	}
262 
263 	bar->rid = PCIR_BAR(bar_num);
264 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
265 					  RF_ACTIVE);
266 	if (!bar->res) {
267 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
268 		return (ENXIO);
269 	}
270 
271 	bar->tag = rman_get_bustag(bar->res);
272 	bar->handle = rman_get_bushandle(bar->res);
273 	bar->size = rman_get_size(bar->res);
274 
275 	return (0);
276 }
277 
278 /**
279  * ice_free_bar - Free PCIe BAR memory
280  * @dev: the PCIe device
281  * @bar: the BAR info structure
282  *
283  * Frees the specified PCIe BAR, releasing its resources.
284  */
285 void
286 ice_free_bar(device_t dev, struct ice_bar_info *bar)
287 {
288 	if (bar->res != NULL)
289 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
290 	bar->res = NULL;
291 }
292 
293 /**
294  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
295  * @hw: the device hardware structure
296  *
297  * Configures the control queues for the given device, setting up the
298  * specified lengths, prior to initializing hardware.
299  */
300 void
301 ice_set_ctrlq_len(struct ice_hw *hw)
302 {
303 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
304 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
305 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
306 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
307 
308 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
309 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
310 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
311 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
312 
313 }
314 
315 /**
316  * ice_get_next_vsi - Get the next available VSI slot
317  * @all_vsi: the VSI list
318  * @size: the size of the VSI list
319  *
320  * Returns the index to the first available VSI slot. Will return size (one
321  * past the last index) if there are no slots available.
322  */
323 static int
324 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
325 {
326 	int i;
327 
328 	for (i = 0; i < size; i++) {
329 		if (all_vsi[i] == NULL)
330 			return i;
331 	}
332 
333 	return size;
334 }
335 
336 /**
337  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
338  * @sc: the device private softc structure
339  * @vsi: the VSI to setup
340  * @type: the VSI type of the new VSI
341  * @idx: the index in the all_vsi array to use
342  * @dynamic: whether this VSI memory was dynamically allocated
343  *
344  * Perform setup for a VSI that is common to both dynamically allocated VSIs
345  * and the static PF VSI which is embedded in the softc structure.
346  */
347 static void
348 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
349 		     enum ice_vsi_type type, int idx, bool dynamic)
350 {
351 	/* Store important values in VSI struct */
352 	vsi->type = type;
353 	vsi->sc = sc;
354 	vsi->idx = idx;
355 	sc->all_vsi[idx] = vsi;
356 	vsi->dynamic = dynamic;
357 
358 	/* Setup the VSI tunables now */
359 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
360 }
361 
362 /**
363  * ice_alloc_vsi - Allocate a dynamic VSI
364  * @sc: device softc structure
365  * @type: VSI type
366  *
367  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
368  */
369 struct ice_vsi *
370 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
371 {
372 	struct ice_vsi *vsi;
373 	int idx;
374 
375 	/* Find an open index for a new VSI to be allocated. If the returned
376 	 * index is >= the num_available_vsi then it means no slot is
377 	 * available.
378 	 */
379 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
380 	if (idx >= sc->num_available_vsi) {
381 		device_printf(sc->dev, "No available VSI slots\n");
382 		return NULL;
383 	}
384 
385 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_WAITOK|M_ZERO);
386 	if (!vsi) {
387 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
388 		return NULL;
389 	}
390 
391 	ice_setup_vsi_common(sc, vsi, type, idx, true);
392 
393 	return vsi;
394 }
395 
396 /**
397  * ice_setup_pf_vsi - Setup the PF VSI
398  * @sc: the device private softc
399  *
400  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
401  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
402  * the softc memory, instead of being dynamically allocated at creation.
403  */
404 void
405 ice_setup_pf_vsi(struct ice_softc *sc)
406 {
407 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
408 }
409 
410 /**
411  * ice_alloc_vsi_qmap
412  * @vsi: VSI structure
413  * @max_tx_queues: Number of transmit queues to identify
414  * @max_rx_queues: Number of receive queues to identify
415  *
416  * Allocates a max_[t|r]x_queues array of words for the VSI where each
417  * word contains the index of the queue it represents.  In here, all
418  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
419  * all queues for this VSI are not yet assigned an index and thus,
420  * not ready for use.
421  *
422  * Returns an error code on failure.
423  */
424 int
425 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
426 		   const int max_rx_queues)
427 {
428 	struct ice_softc *sc = vsi->sc;
429 	int i;
430 
431 	MPASS(max_tx_queues > 0);
432 	MPASS(max_rx_queues > 0);
433 
434 	/* Allocate Tx queue mapping memory */
435 	if (!(vsi->tx_qmap =
436 	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
437 		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
438 		return (ENOMEM);
439 	}
440 
441 	/* Allocate Rx queue mapping memory */
442 	if (!(vsi->rx_qmap =
443 	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
444 		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
445 		goto free_tx_qmap;
446 	}
447 
448 	/* Mark every queue map as invalid to start with */
449 	for (i = 0; i < max_tx_queues; i++) {
450 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
451 	}
452 	for (i = 0; i < max_rx_queues; i++) {
453 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
454 	}
455 
456 	return 0;
457 
458 free_tx_qmap:
459 	free(vsi->tx_qmap, M_ICE);
460 	vsi->tx_qmap = NULL;
461 
462 	return (ENOMEM);
463 }
464 
465 /**
466  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
467  * @vsi: the VSI private structure
468  *
469  * Frees the PF qmaps associated with the given VSI. Generally this will be
470  * called by ice_release_vsi, but may need to be called during attach cleanup,
471  * depending on when the qmaps were allocated.
472  */
473 void
474 ice_free_vsi_qmaps(struct ice_vsi *vsi)
475 {
476 	struct ice_softc *sc = vsi->sc;
477 
478 	if (vsi->tx_qmap) {
479 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
480 					   vsi->num_tx_queues);
481 		free(vsi->tx_qmap, M_ICE);
482 		vsi->tx_qmap = NULL;
483 	}
484 
485 	if (vsi->rx_qmap) {
486 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
487 					   vsi->num_rx_queues);
488 		free(vsi->rx_qmap, M_ICE);
489 		vsi->rx_qmap = NULL;
490 	}
491 }
492 
493 /**
494  * ice_set_default_vsi_ctx - Setup default VSI context parameters
495  * @ctx: the VSI context to initialize
496  *
497  * Initialize and prepare a default VSI context for configuring a new VSI.
498  */
499 static void
500 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
501 {
502 	u32 table = 0;
503 
504 	memset(&ctx->info, 0, sizeof(ctx->info));
505 	/* VSI will be allocated from shared pool */
506 	ctx->alloc_from_pool = true;
507 	/* Enable source pruning by default */
508 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
509 	/* Traffic from VSI can be sent to LAN */
510 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
511 	/* Allow all packets untagged/tagged */
512 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
513 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
514 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
515 	/* Show VLAN/UP from packets in Rx descriptors */
516 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
517 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
518 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
519 	/* Have 1:1 UP mapping for both ingress/egress tables */
520 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
521 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
522 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
523 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
524 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
525 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
526 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
527 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
528 	ctx->info.ingress_table = CPU_TO_LE32(table);
529 	ctx->info.egress_table = CPU_TO_LE32(table);
530 	/* Have 1:1 UP mapping for outer to inner UP table */
531 	ctx->info.outer_up_table = CPU_TO_LE32(table);
532 	/* No Outer tag support, so outer_vlan_flags remains zero */
533 }
534 
535 /**
536  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
537  * @ctx: the VSI context to configure
538  * @type: the VSI type
539  *
540  * Configures the VSI context for RSS, based on the VSI type.
541  */
542 static void
543 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
544 {
545 	u8 lut_type, hash_type;
546 
547 	switch (type) {
548 	case ICE_VSI_PF:
549 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
550 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
551 		break;
552 	case ICE_VSI_VF:
553 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
554 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
555 		break;
556 	default:
557 		/* Other VSI types do not support RSS */
558 		return;
559 	}
560 
561 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
562 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
563 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
564 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
565 }
566 
567 /**
568  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
569  * @vsi: the VSI to configure
570  * @ctx: the VSI context to configure
571  *
572  * Configures the context for the given VSI, setting up how the firmware
573  * should map the queues for this VSI.
574  */
575 static int
576 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
577 {
578 	int pow = 0;
579 	u16 qmap;
580 
581 	MPASS(vsi->rx_qmap != NULL);
582 
583 	switch (vsi->qmap_type) {
584 	case ICE_RESMGR_ALLOC_CONTIGUOUS:
585 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
586 
587 		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
588 		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
589 
590 		break;
591 	case ICE_RESMGR_ALLOC_SCATTERED:
592 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
593 
594 		for (int i = 0; i < vsi->num_rx_queues; i++)
595 			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
596 		break;
597 	default:
598 		return (EOPNOTSUPP);
599 	}
600 
601 	/* Calculate the next power-of-2 of number of queues */
602 	if (vsi->num_rx_queues)
603 		pow = flsl(vsi->num_rx_queues - 1);
604 
605 	/* Assign all the queues to traffic class zero */
606 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
607 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
608 
609 	/* Fill out default driver TC queue info for VSI */
610 	vsi->tc_info[0].qoffset = 0;
611 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
612 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
613 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
614 		vsi->tc_info[i].qoffset = 0;
615 		vsi->tc_info[i].qcount_rx = 1;
616 		vsi->tc_info[i].qcount_tx = 1;
617 	}
618 	vsi->tc_map = 0x1;
619 
620 	return 0;
621 }
622 
623 /**
624  * ice_initialize_vsi - Initialize a VSI for use
625  * @vsi: the vsi to initialize
626  *
627  * Initialize a VSI over the adminq and prepare it for operation.
628  */
629 int
630 ice_initialize_vsi(struct ice_vsi *vsi)
631 {
632 	struct ice_vsi_ctx ctx = { 0 };
633 	struct ice_hw *hw = &vsi->sc->hw;
634 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
635 	enum ice_status status;
636 	int err;
637 
638 	/* For now, we only have code supporting PF VSIs */
639 	switch (vsi->type) {
640 	case ICE_VSI_PF:
641 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
642 		break;
643 	default:
644 		return (ENODEV);
645 	}
646 
647 	ice_set_default_vsi_ctx(&ctx);
648 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
649 
650 	/* XXX: VSIs of other types may need different port info? */
651 	ctx.info.sw_id = hw->port_info->sw_id;
652 
653 	/* Set some RSS parameters based on the VSI type */
654 	ice_vsi_set_rss_params(vsi);
655 
656 	/* Initialize the Rx queue mapping for this VSI */
657 	err = ice_setup_vsi_qmap(vsi, &ctx);
658 	if (err) {
659 		return err;
660 	}
661 
662 	/* (Re-)add VSI to HW VSI handle list */
663 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
664 	if (status != 0) {
665 		device_printf(vsi->sc->dev,
666 		    "Add VSI AQ call failed, err %s aq_err %s\n",
667 		    ice_status_str(status),
668 		    ice_aq_str(hw->adminq.sq_last_status));
669 		return (EIO);
670 	}
671 	vsi->info = ctx.info;
672 
673 	/* Initialize VSI with just 1 TC to start */
674 	max_txqs[0] = vsi->num_tx_queues;
675 
676 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
677 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
678 	if (status) {
679 		device_printf(vsi->sc->dev,
680 		    "Failed VSI lan queue config, err %s aq_err %s\n",
681 		    ice_status_str(status),
682 		    ice_aq_str(hw->adminq.sq_last_status));
683 		ice_deinit_vsi(vsi);
684 		return (ENODEV);
685 	}
686 
687 	/* Reset VSI stats */
688 	ice_reset_vsi_stats(vsi);
689 
690 	return 0;
691 }
692 
693 /**
694  * ice_deinit_vsi - Tell firmware to release resources for a VSI
695  * @vsi: the VSI to release
696  *
697  * Helper function which requests the firmware to release the hardware
698  * resources associated with a given VSI.
699  */
700 void
701 ice_deinit_vsi(struct ice_vsi *vsi)
702 {
703 	struct ice_vsi_ctx ctx = { 0 };
704 	struct ice_softc *sc = vsi->sc;
705 	struct ice_hw *hw = &sc->hw;
706 	enum ice_status status;
707 
708 	/* Assert that the VSI pointer matches in the list */
709 	MPASS(vsi == sc->all_vsi[vsi->idx]);
710 
711 	ctx.info = vsi->info;
712 
713 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
714 	if (status) {
715 		/*
716 		 * This should only fail if the VSI handle is invalid, or if
717 		 * any of the nodes have leaf nodes which are still in use.
718 		 */
719 		device_printf(sc->dev,
720 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
721 			      vsi->idx, ice_status_str(status));
722 	}
723 
724 	/* Tell firmware to release the VSI resources */
725 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
726 	if (status != 0) {
727 		device_printf(sc->dev,
728 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
729 		    vsi->idx, ice_status_str(status),
730 		    ice_aq_str(hw->adminq.sq_last_status));
731 	}
732 }
733 
734 /**
735  * ice_release_vsi - Release resources associated with a VSI
736  * @vsi: the VSI to release
737  *
738  * Release software and firmware resources associated with a VSI. Release the
739  * queue managers associated with this VSI. Also free the VSI structure memory
740  * if the VSI was allocated dynamically using ice_alloc_vsi().
741  */
742 void
743 ice_release_vsi(struct ice_vsi *vsi)
744 {
745 	struct ice_softc *sc = vsi->sc;
746 	int idx = vsi->idx;
747 
748 	/* Assert that the VSI pointer matches in the list */
749 	MPASS(vsi == sc->all_vsi[idx]);
750 
751 	/* Cleanup RSS configuration */
752 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
753 		ice_clean_vsi_rss_cfg(vsi);
754 
755 	ice_del_vsi_sysctl_ctx(vsi);
756 
757 	/*
758 	 * If we unload the driver after a reset fails, we do not need to do
759 	 * this step.
760 	 */
761 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
762 		ice_deinit_vsi(vsi);
763 
764 	ice_free_vsi_qmaps(vsi);
765 
766 	if (vsi->dynamic) {
767 		free(sc->all_vsi[idx], M_ICE);
768 	}
769 
770 	sc->all_vsi[idx] = NULL;
771 }
772 
773 /**
774  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
775  * @pi: port info data
776  *
777  * Returns the baudrate value for the current link speed of a given port.
778  */
779 uint64_t
780 ice_aq_speed_to_rate(struct ice_port_info *pi)
781 {
782 	switch (pi->phy.link_info.link_speed) {
783 	case ICE_AQ_LINK_SPEED_100GB:
784 		return IF_Gbps(100);
785 	case ICE_AQ_LINK_SPEED_50GB:
786 		return IF_Gbps(50);
787 	case ICE_AQ_LINK_SPEED_40GB:
788 		return IF_Gbps(40);
789 	case ICE_AQ_LINK_SPEED_25GB:
790 		return IF_Gbps(25);
791 	case ICE_AQ_LINK_SPEED_10GB:
792 		return IF_Gbps(10);
793 	case ICE_AQ_LINK_SPEED_5GB:
794 		return IF_Gbps(5);
795 	case ICE_AQ_LINK_SPEED_2500MB:
796 		return IF_Mbps(2500);
797 	case ICE_AQ_LINK_SPEED_1000MB:
798 		return IF_Mbps(1000);
799 	case ICE_AQ_LINK_SPEED_100MB:
800 		return IF_Mbps(100);
801 	case ICE_AQ_LINK_SPEED_10MB:
802 		return IF_Mbps(10);
803 	case ICE_AQ_LINK_SPEED_UNKNOWN:
804 	default:
805 		/* return 0 if we don't know the link speed */
806 		return 0;
807 	}
808 }
809 
810 /**
811  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
812  * @pi: port info data
813  *
814  * Returns the string representation of the current link speed for a given
815  * port.
816  */
817 static const char *
818 ice_aq_speed_to_str(struct ice_port_info *pi)
819 {
820 	switch (pi->phy.link_info.link_speed) {
821 	case ICE_AQ_LINK_SPEED_100GB:
822 		return "100 Gbps";
823 	case ICE_AQ_LINK_SPEED_50GB:
824 		return "50 Gbps";
825 	case ICE_AQ_LINK_SPEED_40GB:
826 		return "40 Gbps";
827 	case ICE_AQ_LINK_SPEED_25GB:
828 		return "25 Gbps";
829 	case ICE_AQ_LINK_SPEED_20GB:
830 		return "20 Gbps";
831 	case ICE_AQ_LINK_SPEED_10GB:
832 		return "10 Gbps";
833 	case ICE_AQ_LINK_SPEED_5GB:
834 		return "5 Gbps";
835 	case ICE_AQ_LINK_SPEED_2500MB:
836 		return "2.5 Gbps";
837 	case ICE_AQ_LINK_SPEED_1000MB:
838 		return "1 Gbps";
839 	case ICE_AQ_LINK_SPEED_100MB:
840 		return "100 Mbps";
841 	case ICE_AQ_LINK_SPEED_10MB:
842 		return "10 Mbps";
843 	case ICE_AQ_LINK_SPEED_UNKNOWN:
844 	default:
845 		return "Unknown speed";
846 	}
847 }
848 
849 /**
850  * ice_get_phy_type_low - Get media associated with phy_type_low
851  * @phy_type_low: the low 64bits of phy_type from the AdminQ
852  *
853  * Given the lower 64bits of the phy_type from the hardware, return the
854  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
855  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
856  * be called. If phy_type_low is zero, call ice_phy_type_high.
857  */
858 int
859 ice_get_phy_type_low(uint64_t phy_type_low)
860 {
861 	switch (phy_type_low) {
862 	case ICE_PHY_TYPE_LOW_100BASE_TX:
863 		return IFM_100_TX;
864 	case ICE_PHY_TYPE_LOW_100M_SGMII:
865 		return IFM_100_SGMII;
866 	case ICE_PHY_TYPE_LOW_1000BASE_T:
867 		return IFM_1000_T;
868 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
869 		return IFM_1000_SX;
870 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
871 		return IFM_1000_LX;
872 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
873 		return IFM_1000_KX;
874 	case ICE_PHY_TYPE_LOW_1G_SGMII:
875 		return IFM_1000_SGMII;
876 	case ICE_PHY_TYPE_LOW_2500BASE_T:
877 		return IFM_2500_T;
878 	case ICE_PHY_TYPE_LOW_2500BASE_X:
879 		return IFM_2500_X;
880 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
881 		return IFM_2500_KX;
882 	case ICE_PHY_TYPE_LOW_5GBASE_T:
883 		return IFM_5000_T;
884 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
885 		return IFM_5000_KR;
886 	case ICE_PHY_TYPE_LOW_10GBASE_T:
887 		return IFM_10G_T;
888 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
889 		return IFM_10G_TWINAX;
890 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
891 		return IFM_10G_SR;
892 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
893 		return IFM_10G_LR;
894 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
895 		return IFM_10G_KR;
896 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
897 		return IFM_10G_AOC;
898 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
899 		return IFM_10G_SFI;
900 	case ICE_PHY_TYPE_LOW_25GBASE_T:
901 		return IFM_25G_T;
902 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
903 		return IFM_25G_CR;
904 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
905 		return IFM_25G_CR_S;
906 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
907 		return IFM_25G_CR1;
908 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
909 		return IFM_25G_SR;
910 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
911 		return IFM_25G_LR;
912 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
913 		return IFM_25G_KR;
914 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
915 		return IFM_25G_KR_S;
916 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
917 		return IFM_25G_KR1;
918 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
919 		return IFM_25G_AOC;
920 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
921 		return IFM_25G_AUI;
922 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
923 		return IFM_40G_CR4;
924 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
925 		return IFM_40G_SR4;
926 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
927 		return IFM_40G_LR4;
928 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
929 		return IFM_40G_KR4;
930 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
931 		return IFM_40G_XLAUI_AC;
932 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
933 		return IFM_40G_XLAUI;
934 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
935 		return IFM_50G_CR2;
936 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
937 		return IFM_50G_SR2;
938 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
939 		return IFM_50G_LR2;
940 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
941 		return IFM_50G_KR2;
942 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
943 		return IFM_50G_LAUI2_AC;
944 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
945 		return IFM_50G_LAUI2;
946 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
947 		return IFM_50G_AUI2_AC;
948 	case ICE_PHY_TYPE_LOW_50G_AUI2:
949 		return IFM_50G_AUI2;
950 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
951 		return IFM_50G_CP;
952 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
953 		return IFM_50G_SR;
954 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
955 		return IFM_50G_FR;
956 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
957 		return IFM_50G_LR;
958 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
959 		return IFM_50G_KR_PAM4;
960 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
961 		return IFM_50G_AUI1_AC;
962 	case ICE_PHY_TYPE_LOW_50G_AUI1:
963 		return IFM_50G_AUI1;
964 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
965 		return IFM_100G_CR4;
966 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
967 		return IFM_100G_SR4;
968 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
969 		return IFM_100G_LR4;
970 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
971 		return IFM_100G_KR4;
972 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
973 		return IFM_100G_CAUI4_AC;
974 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
975 		return IFM_100G_CAUI4;
976 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
977 		return IFM_100G_AUI4_AC;
978 	case ICE_PHY_TYPE_LOW_100G_AUI4:
979 		return IFM_100G_AUI4;
980 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
981 		return IFM_100G_CR_PAM4;
982 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
983 		return IFM_100G_KR_PAM4;
984 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
985 		return IFM_100G_CP2;
986 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
987 		return IFM_100G_SR2;
988 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
989 		return IFM_100G_DR;
990 	default:
991 		return IFM_UNKNOWN;
992 	}
993 }
994 
995 /**
996  * ice_get_phy_type_high - Get media associated with phy_type_high
997  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
998  *
999  * Given the upper 64bits of the phy_type from the hardware, return the
1000  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1001  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1002  * called. If phy_type_high is zero, call ice_get_phy_type_low.
1003  */
1004 int
1005 ice_get_phy_type_high(uint64_t phy_type_high)
1006 {
1007 	switch (phy_type_high) {
1008 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1009 		return IFM_100G_KR2_PAM4;
1010 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1011 		return IFM_100G_CAUI2_AC;
1012 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1013 		return IFM_100G_CAUI2;
1014 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1015 		return IFM_100G_AUI2_AC;
1016 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1017 		return IFM_100G_AUI2;
1018 	default:
1019 		return IFM_UNKNOWN;
1020 	}
1021 }
1022 
1023 /**
1024  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1025  * @pi: port info struct
1026  *
1027  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1028  * to have been called before this function for it to work.
1029  */
1030 static uint64_t
1031 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1032 {
1033 	uint64_t phy_low = pi->phy.phy_type_low;
1034 	uint64_t phy_high = pi->phy.phy_type_high;
1035 	uint64_t max_rate = 0;
1036 	int bit;
1037 
1038 	/*
1039 	 * These are based on the indices used in the BIT() macros for
1040 	 * ICE_PHY_TYPE_LOW_*
1041 	 */
1042 	static const uint64_t phy_rates[] = {
1043 	    IF_Mbps(100),
1044 	    IF_Mbps(100),
1045 	    IF_Gbps(1ULL),
1046 	    IF_Gbps(1ULL),
1047 	    IF_Gbps(1ULL),
1048 	    IF_Gbps(1ULL),
1049 	    IF_Gbps(1ULL),
1050 	    IF_Mbps(2500ULL),
1051 	    IF_Mbps(2500ULL),
1052 	    IF_Mbps(2500ULL),
1053 	    IF_Gbps(5ULL),
1054 	    IF_Gbps(5ULL),
1055 	    IF_Gbps(10ULL),
1056 	    IF_Gbps(10ULL),
1057 	    IF_Gbps(10ULL),
1058 	    IF_Gbps(10ULL),
1059 	    IF_Gbps(10ULL),
1060 	    IF_Gbps(10ULL),
1061 	    IF_Gbps(10ULL),
1062 	    IF_Gbps(25ULL),
1063 	    IF_Gbps(25ULL),
1064 	    IF_Gbps(25ULL),
1065 	    IF_Gbps(25ULL),
1066 	    IF_Gbps(25ULL),
1067 	    IF_Gbps(25ULL),
1068 	    IF_Gbps(25ULL),
1069 	    IF_Gbps(25ULL),
1070 	    IF_Gbps(25ULL),
1071 	    IF_Gbps(25ULL),
1072 	    IF_Gbps(25ULL),
1073 	    IF_Gbps(40ULL),
1074 	    IF_Gbps(40ULL),
1075 	    IF_Gbps(40ULL),
1076 	    IF_Gbps(40ULL),
1077 	    IF_Gbps(40ULL),
1078 	    IF_Gbps(40ULL),
1079 	    IF_Gbps(50ULL),
1080 	    IF_Gbps(50ULL),
1081 	    IF_Gbps(50ULL),
1082 	    IF_Gbps(50ULL),
1083 	    IF_Gbps(50ULL),
1084 	    IF_Gbps(50ULL),
1085 	    IF_Gbps(50ULL),
1086 	    IF_Gbps(50ULL),
1087 	    IF_Gbps(50ULL),
1088 	    IF_Gbps(50ULL),
1089 	    IF_Gbps(50ULL),
1090 	    IF_Gbps(50ULL),
1091 	    IF_Gbps(50ULL),
1092 	    IF_Gbps(50ULL),
1093 	    IF_Gbps(50ULL),
1094 	    IF_Gbps(100ULL),
1095 	    IF_Gbps(100ULL),
1096 	    IF_Gbps(100ULL),
1097 	    IF_Gbps(100ULL),
1098 	    IF_Gbps(100ULL),
1099 	    IF_Gbps(100ULL),
1100 	    IF_Gbps(100ULL),
1101 	    IF_Gbps(100ULL),
1102 	    IF_Gbps(100ULL),
1103 	    IF_Gbps(100ULL),
1104 	    IF_Gbps(100ULL),
1105 	    IF_Gbps(100ULL),
1106 	    IF_Gbps(100ULL),
1107 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1108 	    IF_Gbps(100ULL),
1109 	    IF_Gbps(100ULL),
1110 	    IF_Gbps(100ULL),
1111 	    IF_Gbps(100ULL),
1112 	    IF_Gbps(100ULL)
1113 	};
1114 
1115 	/* coverity[address_of] */
1116 	for_each_set_bit(bit, &phy_high, 64)
1117 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1118 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1119 
1120 	/* coverity[address_of] */
1121 	for_each_set_bit(bit, &phy_low, 64)
1122 		max_rate = uqmax(max_rate, phy_rates[bit]);
1123 
1124 	return (max_rate);
1125 }
1126 
1127 /* The if_media type is split over the original 5 bit media variant field,
1128  * along with extended types using up extra bits in the options section.
1129  * We want to convert this split number into a bitmap index, so we reverse the
1130  * calculation of IFM_X here.
1131  */
1132 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1133 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1134 
1135 /**
1136  * ice_add_media_types - Add supported media types to the media structure
1137  * @sc: ice private softc structure
1138  * @media: ifmedia structure to setup
1139  *
1140  * Looks up the supported phy types, and initializes the various media types
1141  * available.
1142  *
1143  * @pre this function must be protected from being called while another thread
1144  * is accessing the ifmedia types.
1145  */
1146 enum ice_status
1147 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1148 {
1149 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1150 	struct ice_port_info *pi = sc->hw.port_info;
1151 	enum ice_status status;
1152 	uint64_t phy_low, phy_high;
1153 	int bit;
1154 
1155 	ASSERT_CFG_LOCKED(sc);
1156 
1157 	/* the maximum possible media type index is 511. We probably don't
1158 	 * need most of this space, but this ensures future compatibility when
1159 	 * additional media types are used.
1160 	 */
1161 	ice_declare_bitmap(already_added, 511);
1162 
1163 	/* Remove all previous media types */
1164 	ifmedia_removeall(media);
1165 
1166 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1167 				     &pcaps, NULL);
1168 	if (status != ICE_SUCCESS) {
1169 		device_printf(sc->dev,
1170 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1171 		    __func__, ice_status_str(status),
1172 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1173 		return (status);
1174 	}
1175 	phy_low = le64toh(pcaps.phy_type_low);
1176 	phy_high = le64toh(pcaps.phy_type_high);
1177 
1178 	/* make sure the added bitmap is zero'd */
1179 	memset(already_added, 0, sizeof(already_added));
1180 
1181 	/* coverity[address_of] */
1182 	for_each_set_bit(bit, &phy_low, 64) {
1183 		uint64_t type = BIT_ULL(bit);
1184 		int ostype;
1185 
1186 		/* get the OS media type */
1187 		ostype = ice_get_phy_type_low(type);
1188 
1189 		/* don't bother adding the unknown type */
1190 		if (ostype == IFM_UNKNOWN)
1191 			continue;
1192 
1193 		/* only add each media type to the list once */
1194 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1195 			continue;
1196 
1197 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1198 		ice_set_bit(IFM_IDX(ostype), already_added);
1199 	}
1200 
1201 	/* coverity[address_of] */
1202 	for_each_set_bit(bit, &phy_high, 64) {
1203 		uint64_t type = BIT_ULL(bit);
1204 		int ostype;
1205 
1206 		/* get the OS media type */
1207 		ostype = ice_get_phy_type_high(type);
1208 
1209 		/* don't bother adding the unknown type */
1210 		if (ostype == IFM_UNKNOWN)
1211 			continue;
1212 
1213 		/* only add each media type to the list once */
1214 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1215 			continue;
1216 
1217 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1218 		ice_set_bit(IFM_IDX(ostype), already_added);
1219 	}
1220 
1221 	/* Use autoselect media by default */
1222 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1223 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1224 
1225 	return (ICE_SUCCESS);
1226 }
1227 
1228 /**
1229  * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1230  * @hw: ice hw structure
1231  * @rxqid: Rx queue index in PF space
1232  * @vector: MSI-X vector index in PF/VF space
1233  * @itr_idx: ITR index to use for interrupt
1234  *
1235  * @remark ice_flush() may need to be called after this
1236  */
1237 void
1238 ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1239 {
1240 	u32 val;
1241 
1242 	MPASS(itr_idx <= ICE_ITR_NONE);
1243 
1244 	val = (QINT_RQCTL_CAUSE_ENA_M |
1245 	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1246 	       (vector << QINT_RQCTL_MSIX_INDX_S));
1247 	wr32(hw, QINT_RQCTL(rxqid), val);
1248 }
1249 
1250 /**
1251  * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1252  * @vsi: the VSI to configure
1253  *
1254  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1255  */
1256 void
1257 ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1258 {
1259 	struct ice_hw *hw = &vsi->sc->hw;
1260 	int i;
1261 
1262 	for (i = 0; i < vsi->num_rx_queues; i++) {
1263 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1264 
1265 		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1266 					    rxq->irqv->me, ICE_RX_ITR);
1267 	}
1268 
1269 	ice_flush(hw);
1270 }
1271 
1272 /**
1273  * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1274  * @hw: ice hw structure
1275  * @txqid: Tx queue index in PF space
1276  * @vector: MSI-X vector index in PF/VF space
1277  * @itr_idx: ITR index to use for interrupt
1278  *
1279  * @remark ice_flush() may need to be called after this
1280  */
1281 void
1282 ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1283 {
1284 	u32 val;
1285 
1286 	MPASS(itr_idx <= ICE_ITR_NONE);
1287 
1288 	val = (QINT_TQCTL_CAUSE_ENA_M |
1289 	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1290 	       (vector << QINT_TQCTL_MSIX_INDX_S));
1291 	wr32(hw, QINT_TQCTL(txqid), val);
1292 }
1293 
1294 /**
1295  * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1296  * @vsi: the VSI to configure
1297  *
1298  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1299  */
1300 void
1301 ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1302 {
1303 	struct ice_hw *hw = &vsi->sc->hw;
1304 	int i;
1305 
1306 	for (i = 0; i < vsi->num_tx_queues; i++) {
1307 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1308 
1309 		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1310 					    txq->irqv->me, ICE_TX_ITR);
1311 	}
1312 
1313 	ice_flush(hw);
1314 }
1315 
1316 /**
1317  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1318  * @vsi: the VSI to configure
1319  *
1320  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1321  * a software interrupt on that cause. This is required as part of the Rx
1322  * queue disable logic to dissociate the Rx queue from the interrupt.
1323  *
1324  * Note: this function must be called prior to disabling Rx queues with
1325  * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1326  */
1327 void
1328 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1329 {
1330 	struct ice_hw *hw = &vsi->sc->hw;
1331 	int i;
1332 
1333 	for (i = 0; i < vsi->num_rx_queues; i++) {
1334 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1335 		u32 reg, val;
1336 
1337 		/* Clear the CAUSE_ENA flag */
1338 		reg = vsi->rx_qmap[rxq->me];
1339 		val = rd32(hw, QINT_RQCTL(reg));
1340 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1341 		wr32(hw, QINT_RQCTL(reg), val);
1342 
1343 		ice_flush(hw);
1344 
1345 		/* Trigger a software interrupt to complete interrupt
1346 		 * dissociation.
1347 		 */
1348 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1349 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1350 	}
1351 }
1352 
1353 /**
1354  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1355  * @vsi: the VSI to configure
1356  *
1357  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1358  * a software interrupt on that cause. This is required as part of the Tx
1359  * queue disable logic to dissociate the Tx queue from the interrupt.
1360  *
1361  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1362  * the Tx queue disable may not complete properly.
1363  */
1364 void
1365 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1366 {
1367 	struct ice_hw *hw = &vsi->sc->hw;
1368 	int i;
1369 
1370 	for (i = 0; i < vsi->num_tx_queues; i++) {
1371 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1372 		u32 reg, val;
1373 
1374 		/* Clear the CAUSE_ENA flag */
1375 		reg = vsi->tx_qmap[txq->me];
1376 		val = rd32(hw, QINT_TQCTL(reg));
1377 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1378 		wr32(hw, QINT_TQCTL(reg), val);
1379 
1380 		ice_flush(hw);
1381 
1382 		/* Trigger a software interrupt to complete interrupt
1383 		 * dissociation.
1384 		 */
1385 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1386 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1387 	}
1388 }
1389 
1390 /**
1391  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1392  * @vsi: the VSI to configure
1393  *
1394  * Program the hardware ITR registers with the settings for this VSI.
1395  */
1396 void
1397 ice_configure_rx_itr(struct ice_vsi *vsi)
1398 {
1399 	struct ice_hw *hw = &vsi->sc->hw;
1400 	int i;
1401 
1402 	/* TODO: Handle per-queue/per-vector ITR? */
1403 
1404 	for (i = 0; i < vsi->num_rx_queues; i++) {
1405 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1406 
1407 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1408 		     ice_itr_to_reg(hw, vsi->rx_itr));
1409 	}
1410 
1411 	ice_flush(hw);
1412 }
1413 
1414 /**
1415  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1416  * @vsi: the VSI to configure
1417  *
1418  * Program the hardware ITR registers with the settings for this VSI.
1419  */
1420 void
1421 ice_configure_tx_itr(struct ice_vsi *vsi)
1422 {
1423 	struct ice_hw *hw = &vsi->sc->hw;
1424 	int i;
1425 
1426 	/* TODO: Handle per-queue/per-vector ITR? */
1427 
1428 	for (i = 0; i < vsi->num_tx_queues; i++) {
1429 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1430 
1431 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1432 		     ice_itr_to_reg(hw, vsi->tx_itr));
1433 	}
1434 
1435 	ice_flush(hw);
1436 }
1437 
1438 /**
1439  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1440  * @txq: the Tx queue to configure
1441  * @tlan_ctx: the Tx LAN queue context structure to initialize
1442  * @pf_q: real queue number
1443  */
1444 static int
1445 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1446 {
1447 	struct ice_vsi *vsi = txq->vsi;
1448 	struct ice_softc *sc = vsi->sc;
1449 	struct ice_hw *hw = &sc->hw;
1450 
1451 	tlan_ctx->port_num = hw->port_info->lport;
1452 
1453 	/* number of descriptors in the queue */
1454 	tlan_ctx->qlen = txq->desc_count;
1455 
1456 	/* set the transmit queue base address, defined in 128 byte units */
1457 	tlan_ctx->base = txq->tx_paddr >> 7;
1458 
1459 	tlan_ctx->pf_num = hw->pf_id;
1460 
1461 	switch (vsi->type) {
1462 	case ICE_VSI_PF:
1463 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1464 		break;
1465 	default:
1466 		return (ENODEV);
1467 	}
1468 
1469 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1470 
1471 	/* Enable TSO */
1472 	tlan_ctx->tso_ena = 1;
1473 	tlan_ctx->internal_usage_flag = 1;
1474 
1475 	tlan_ctx->tso_qnum = pf_q;
1476 
1477 	/*
1478 	 * Stick with the older legacy Tx queue interface, instead of the new
1479 	 * advanced queue interface.
1480 	 */
1481 	tlan_ctx->legacy_int = 1;
1482 
1483 	/* Descriptor WB mode */
1484 	tlan_ctx->wb_mode = 0;
1485 
1486 	return (0);
1487 }
1488 
1489 /**
1490  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1491  * @vsi: the VSI to configure
1492  *
1493  * Configure the device Tx queues through firmware AdminQ commands. After
1494  * this, Tx queues will be ready for transmit.
1495  */
1496 int
1497 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1498 {
1499 	struct ice_aqc_add_tx_qgrp *qg;
1500 	struct ice_hw *hw = &vsi->sc->hw;
1501 	device_t dev = vsi->sc->dev;
1502 	enum ice_status status;
1503 	int i;
1504 	int err = 0;
1505 	u16 qg_size, pf_q;
1506 
1507 	qg_size = ice_struct_size(qg, txqs, 1);
1508 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1509 	if (!qg)
1510 		return (ENOMEM);
1511 
1512 	qg->num_txqs = 1;
1513 
1514 	for (i = 0; i < vsi->num_tx_queues; i++) {
1515 		struct ice_tlan_ctx tlan_ctx = { 0 };
1516 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1517 
1518 		pf_q = vsi->tx_qmap[txq->me];
1519 		qg->txqs[0].txq_id = htole16(pf_q);
1520 
1521 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1522 		if (err)
1523 			goto free_txqg;
1524 
1525 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1526 			    ice_tlan_ctx_info);
1527 
1528 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1529 					 txq->q_handle, 1, qg, qg_size, NULL);
1530 		if (status) {
1531 			device_printf(dev,
1532 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1533 				      i, txq->tc, txq->q_handle,
1534 				      ice_status_str(status),
1535 				      ice_aq_str(hw->adminq.sq_last_status));
1536 			err = ENODEV;
1537 			goto free_txqg;
1538 		}
1539 
1540 		/* Keep track of the Tx queue TEID */
1541 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1542 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1543 	}
1544 
1545 free_txqg:
1546 	free(qg, M_ICE);
1547 
1548 	return (err);
1549 }
1550 
1551 /**
1552  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1553  * @rxq: the receive queue to program
1554  *
1555  * Setup an Rx queue context structure and program it into the hardware
1556  * registers. This is a necessary step for enabling the Rx queue.
1557  *
1558  * @pre the VSI associated with this queue must have initialized mbuf_sz
1559  */
1560 static int
1561 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1562 {
1563 	struct ice_rlan_ctx rlan_ctx = {0};
1564 	struct ice_vsi *vsi = rxq->vsi;
1565 	struct ice_softc *sc = vsi->sc;
1566 	struct ice_hw *hw = &sc->hw;
1567 	enum ice_status status;
1568 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1569 	u32 regval;
1570 	u16 pf_q;
1571 
1572 	pf_q = vsi->rx_qmap[rxq->me];
1573 
1574 	/* set the receive queue base address, defined in 128 byte units */
1575 	rlan_ctx.base = rxq->rx_paddr >> 7;
1576 
1577 	rlan_ctx.qlen = rxq->desc_count;
1578 
1579 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1580 
1581 	/* use 32 byte descriptors */
1582 	rlan_ctx.dsize = 1;
1583 
1584 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1585 	 * host memory.
1586 	 */
1587 	rlan_ctx.crcstrip = 1;
1588 
1589 	rlan_ctx.l2tsel = 1;
1590 
1591 	/* don't do header splitting */
1592 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1593 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1594 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1595 
1596 	/* strip VLAN from inner headers */
1597 	rlan_ctx.showiv = 1;
1598 
1599 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1600 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1601 
1602 	rlan_ctx.lrxqthresh = 1;
1603 
1604 	if (vsi->type != ICE_VSI_VF) {
1605 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1606 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1607 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1608 			QRXFLXP_CNTXT_RXDID_IDX_M;
1609 
1610 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1611 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1612 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1613 
1614 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1615 	}
1616 
1617 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1618 	if (status) {
1619 		device_printf(sc->dev,
1620 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1621 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1622 		return (EIO);
1623 	}
1624 
1625 	wr32(hw, rxq->tail, 0);
1626 
1627 	return 0;
1628 }
1629 
1630 /**
1631  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1632  * @vsi: the VSI to configure
1633  *
1634  * Prepare an Rx context descriptor and configure the device to receive
1635  * traffic.
1636  *
1637  * @pre the VSI must have initialized mbuf_sz
1638  */
1639 int
1640 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1641 {
1642 	int i, err;
1643 
1644 	for (i = 0; i < vsi->num_rx_queues; i++) {
1645 		MPASS(vsi->mbuf_sz > 0);
1646 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1647 		if (err)
1648 			return err;
1649 	}
1650 
1651 	return (0);
1652 }
1653 
1654 /**
1655  * ice_is_rxq_ready - Check if an Rx queue is ready
1656  * @hw: ice hw structure
1657  * @pf_q: absolute PF queue index to check
1658  * @reg: on successful return, contains qrx_ctrl contents
1659  *
1660  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1661  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1662  * a request to change the queue, as well as to verify the request has
1663  * finished. The queue should change status within a few microseconds, so we
1664  * use a small delay while polling the register.
1665  *
1666  * Returns an error code if the queue does not update after a few retries.
1667  */
1668 static int
1669 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1670 {
1671 	u32 qrx_ctrl, qena_req, qena_stat;
1672 	int i;
1673 
1674 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1675 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1676 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1677 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1678 
1679 		/* if the request and status bits equal, then the queue is
1680 		 * fully disabled or enabled.
1681 		 */
1682 		if (qena_req == qena_stat) {
1683 			*reg = qrx_ctrl;
1684 			return (0);
1685 		}
1686 
1687 		/* wait a few microseconds before we check again */
1688 		DELAY(10);
1689 	}
1690 
1691 	return (ETIMEDOUT);
1692 }
1693 
1694 /**
1695  * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1696  * @vsi: VSI containing queue to enable/disable
1697  * @qidx: Queue index in VSI space
1698  * @enable: true to enable queue, false to disable
1699  *
1700  * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1701  * it. Wait for the appropriate time to ensure that the queue has actually
1702  * reached the expected state.
1703  */
1704 int
1705 ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1706 {
1707 	struct ice_hw *hw = &vsi->sc->hw;
1708 	device_t dev = vsi->sc->dev;
1709 	u32 qrx_ctrl = 0;
1710 	int err;
1711 
1712 	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1713 	int pf_q = vsi->rx_qmap[rxq->me];
1714 
1715 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1716 	if (err) {
1717 		device_printf(dev,
1718 			      "Rx queue %d is not ready\n",
1719 			      pf_q);
1720 		return err;
1721 	}
1722 
1723 	/* Skip if the queue is already in correct state */
1724 	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1725 		return (0);
1726 
1727 	if (enable)
1728 		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1729 	else
1730 		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1731 	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1732 
1733 	/* wait for the queue to finalize the request */
1734 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1735 	if (err) {
1736 		device_printf(dev,
1737 			      "Rx queue %d %sable timeout\n",
1738 			      pf_q, (enable ? "en" : "dis"));
1739 		return err;
1740 	}
1741 
1742 	/* this should never happen */
1743 	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1744 		device_printf(dev,
1745 			      "Rx queue %d invalid state\n",
1746 			      pf_q);
1747 		return (EDOOFUS);
1748 	}
1749 
1750 	return (0);
1751 }
1752 
1753 /**
1754  * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1755  * @vsi: VSI to enable/disable queues
1756  * @enable: true to enable queues, false to disable
1757  *
1758  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1759  * them. Wait for the appropriate time to ensure that the queues have actually
1760  * reached the expected state.
1761  */
1762 int
1763 ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1764 {
1765 	int i, err;
1766 
1767 	/* TODO: amortize waits by changing all queues up front and then
1768 	 * checking their status afterwards. This will become more necessary
1769 	 * when we have a large number of queues.
1770 	 */
1771 	for (i = 0; i < vsi->num_rx_queues; i++) {
1772 		err = ice_control_rx_queue(vsi, i, enable);
1773 		if (err)
1774 			break;
1775 	}
1776 
1777 	return (0);
1778 }
1779 
1780 /**
1781  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1782  * @vsi: the VSI to forward to
1783  * @list: list which contains MAC filter entries
1784  * @addr: the MAC address to be added
1785  * @action: filter action to perform on match
1786  *
1787  * Adds a MAC address filter to the list which will be forwarded to firmware
1788  * to add a series of MAC address filters.
1789  *
1790  * Returns 0 on success, and an error code on failure.
1791  *
1792  */
1793 static int
1794 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1795 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1796 {
1797 	struct ice_fltr_list_entry *entry;
1798 
1799 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1800 	if (!entry)
1801 		return (ENOMEM);
1802 
1803 	entry->fltr_info.flag = ICE_FLTR_TX;
1804 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1805 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1806 	entry->fltr_info.fltr_act = action;
1807 	entry->fltr_info.vsi_handle = vsi->idx;
1808 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1809 
1810 	LIST_ADD(&entry->list_entry, list);
1811 
1812 	return 0;
1813 }
1814 
1815 /**
1816  * ice_free_fltr_list - Free memory associated with a MAC address list
1817  * @list: the list to free
1818  *
1819  * Free the memory of each entry associated with the list.
1820  */
1821 static void
1822 ice_free_fltr_list(struct ice_list_head *list)
1823 {
1824 	struct ice_fltr_list_entry *e, *tmp;
1825 
1826 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1827 		LIST_DEL(&e->list_entry);
1828 		free(e, M_ICE);
1829 	}
1830 }
1831 
1832 /**
1833  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1834  * @vsi: the VSI to add the filter for
1835  * @addr: MAC address to add a filter for
1836  *
1837  * Add a MAC address filter for a given VSI. This is a wrapper around
1838  * ice_add_mac to simplify the interface. First, it only accepts a single
1839  * address, so we don't have to mess around with the list setup in other
1840  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1841  * callers don't need to worry about attempting to add the same filter twice.
1842  */
1843 int
1844 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1845 {
1846 	struct ice_list_head mac_addr_list;
1847 	struct ice_hw *hw = &vsi->sc->hw;
1848 	device_t dev = vsi->sc->dev;
1849 	enum ice_status status;
1850 	int err = 0;
1851 
1852 	INIT_LIST_HEAD(&mac_addr_list);
1853 
1854 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1855 	if (err)
1856 		goto free_mac_list;
1857 
1858 	status = ice_add_mac(hw, &mac_addr_list);
1859 	if (status == ICE_ERR_ALREADY_EXISTS) {
1860 		; /* Don't complain if we try to add a filter that already exists */
1861 	} else if (status) {
1862 		device_printf(dev,
1863 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1864 			      addr, ":",
1865 			      ice_status_str(status),
1866 			      ice_aq_str(hw->adminq.sq_last_status));
1867 		err = (EIO);
1868 	}
1869 
1870 free_mac_list:
1871 	ice_free_fltr_list(&mac_addr_list);
1872 	return err;
1873 }
1874 
1875 /**
1876  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1877  * @sc: device softc structure
1878  *
1879  * Program the default unicast and broadcast filters for the PF VSI.
1880  */
1881 int
1882 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1883 {
1884 	struct ice_vsi *vsi = &sc->pf_vsi;
1885 	struct ice_hw *hw = &sc->hw;
1886 	int err;
1887 
1888 	/* Add the LAN MAC address */
1889 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1890 	if (err)
1891 		return err;
1892 
1893 	/* Add the broadcast address */
1894 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
1895 	if (err)
1896 		return err;
1897 
1898 	return (0);
1899 }
1900 
1901 /**
1902  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
1903  * @vsi: the VSI to add the filter for
1904  * @addr: MAC address to remove a filter for
1905  *
1906  * Remove a MAC address filter from a given VSI. This is a wrapper around
1907  * ice_remove_mac to simplify the interface. First, it only accepts a single
1908  * address, so we don't have to mess around with the list setup in other
1909  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
1910  * callers don't need to worry about attempting to remove filters which
1911  * haven't yet been added.
1912  */
1913 int
1914 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1915 {
1916 	struct ice_list_head mac_addr_list;
1917 	struct ice_hw *hw = &vsi->sc->hw;
1918 	device_t dev = vsi->sc->dev;
1919 	enum ice_status status;
1920 	int err = 0;
1921 
1922 	INIT_LIST_HEAD(&mac_addr_list);
1923 
1924 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1925 	if (err)
1926 		goto free_mac_list;
1927 
1928 	status = ice_remove_mac(hw, &mac_addr_list);
1929 	if (status == ICE_ERR_DOES_NOT_EXIST) {
1930 		; /* Don't complain if we try to remove a filter that doesn't exist */
1931 	} else if (status) {
1932 		device_printf(dev,
1933 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
1934 			      addr, ":",
1935 			      ice_status_str(status),
1936 			      ice_aq_str(hw->adminq.sq_last_status));
1937 		err = (EIO);
1938 	}
1939 
1940 free_mac_list:
1941 	ice_free_fltr_list(&mac_addr_list);
1942 	return err;
1943 }
1944 
1945 /**
1946  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
1947  * @sc: device softc structure
1948  *
1949  * Remove the default unicast and broadcast filters from the PF VSI.
1950  */
1951 int
1952 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
1953 {
1954 	struct ice_vsi *vsi = &sc->pf_vsi;
1955 	struct ice_hw *hw = &sc->hw;
1956 	int err;
1957 
1958 	/* Remove the LAN MAC address */
1959 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1960 	if (err)
1961 		return err;
1962 
1963 	/* Remove the broadcast address */
1964 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
1965 	if (err)
1966 		return (EIO);
1967 
1968 	return (0);
1969 }
1970 
1971 /**
1972  * ice_check_ctrlq_errors - Check for and report controlq errors
1973  * @sc: device private structure
1974  * @qname: name of the controlq
1975  * @cq: the controlq to check
1976  *
1977  * Check and report controlq errors. Currently all we do is report them to the
1978  * kernel message log, but we might want to improve this in the future, such
1979  * as to keep track of statistics.
1980  */
1981 static void
1982 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
1983 		       struct ice_ctl_q_info *cq)
1984 {
1985 	struct ice_hw *hw = &sc->hw;
1986 	u32 val;
1987 
1988 	/* Check for error indications. Note that all the controlqs use the
1989 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
1990 	 */
1991 	val = rd32(hw, cq->rq.len);
1992 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1993 		   PF_FW_ARQLEN_ARQCRIT_M)) {
1994 		if (val & PF_FW_ARQLEN_ARQVFE_M)
1995 			device_printf(sc->dev,
1996 				"%s Receive Queue VF Error detected\n", qname);
1997 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
1998 			device_printf(sc->dev,
1999 				"%s Receive Queue Overflow Error detected\n",
2000 				qname);
2001 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2002 			device_printf(sc->dev,
2003 				"%s Receive Queue Critical Error detected\n",
2004 				qname);
2005 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2006 			 PF_FW_ARQLEN_ARQCRIT_M);
2007 		wr32(hw, cq->rq.len, val);
2008 	}
2009 
2010 	val = rd32(hw, cq->sq.len);
2011 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2012 		   PF_FW_ATQLEN_ATQCRIT_M)) {
2013 		if (val & PF_FW_ATQLEN_ATQVFE_M)
2014 			device_printf(sc->dev,
2015 				"%s Send Queue VF Error detected\n", qname);
2016 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2017 			device_printf(sc->dev,
2018 				"%s Send Queue Overflow Error detected\n",
2019 				qname);
2020 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2021 			device_printf(sc->dev,
2022 				"%s Send Queue Critical Error detected\n",
2023 				qname);
2024 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2025 			 PF_FW_ATQLEN_ATQCRIT_M);
2026 		wr32(hw, cq->sq.len, val);
2027 	}
2028 }
2029 
2030 /**
2031  * ice_process_link_event - Process a link event indication from firmware
2032  * @sc: device softc structure
2033  * @e: the received event data
2034  *
2035  * Gets the current link status from hardware, and may print a message if an
2036  * unqualified is detected.
2037  */
2038 static void
2039 ice_process_link_event(struct ice_softc *sc,
2040 		       struct ice_rq_event_info __invariant_only *e)
2041 {
2042 	struct ice_port_info *pi = sc->hw.port_info;
2043 	struct ice_hw *hw = &sc->hw;
2044 	device_t dev = sc->dev;
2045 	enum ice_status status;
2046 
2047 	/* Sanity check that the data length isn't too small */
2048 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2049 
2050 	/*
2051 	 * Even though the adapter gets link status information inside the
2052 	 * event, it needs to send a Get Link Status AQ command in order
2053 	 * to re-enable link events.
2054 	 */
2055 	pi->phy.get_link_info = true;
2056 	ice_get_link_status(pi, &sc->link_up);
2057 
2058 	if (pi->phy.link_info.topo_media_conflict &
2059 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2060 	    ICE_AQ_LINK_TOPO_CORRUPT))
2061 		device_printf(dev,
2062 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2063 
2064 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2065 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2066 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2067 			device_printf(dev,
2068 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2069 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2070 			device_printf(dev,
2071 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2072 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2073 			device_printf(dev,
2074 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2075 	}
2076 
2077 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2078 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2079 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2080 			if (status != ICE_SUCCESS)
2081 				device_printf(dev,
2082 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2083 				    __func__, ice_status_str(status),
2084 				    ice_aq_str(hw->adminq.sq_last_status));
2085 		}
2086 	}
2087 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2088 
2089 	/* Indicate that link status must be reported again */
2090 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2091 
2092 	/* OS link info is updated elsewhere */
2093 }
2094 
2095 /**
2096  * ice_process_ctrlq_event - Respond to a controlq event
2097  * @sc: device private structure
2098  * @qname: the name for this controlq
2099  * @event: the event to process
2100  *
2101  * Perform actions in response to various controlq event notifications.
2102  */
2103 static void
2104 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2105 			struct ice_rq_event_info *event)
2106 {
2107 	u16 opcode;
2108 
2109 	opcode = le16toh(event->desc.opcode);
2110 
2111 	switch (opcode) {
2112 	case ice_aqc_opc_get_link_status:
2113 		ice_process_link_event(sc, event);
2114 		break;
2115 	case ice_mbx_opc_send_msg_to_pf:
2116 		/* TODO: handle IOV event */
2117 		break;
2118 	case ice_aqc_opc_fw_logs_event:
2119 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2120 		break;
2121 	case ice_aqc_opc_lldp_set_mib_change:
2122 		ice_handle_mib_change_event(sc, event);
2123 		break;
2124 	case ice_aqc_opc_event_lan_overflow:
2125 		ice_handle_lan_overflow_event(sc, event);
2126 		break;
2127 	case ice_aqc_opc_get_health_status:
2128 		ice_handle_health_status_event(sc, event);
2129 		break;
2130 	default:
2131 		device_printf(sc->dev,
2132 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2133 			      qname, opcode);
2134 	}
2135 }
2136 
2137 /**
2138  * ice_process_ctrlq - helper function to process controlq rings
2139  * @sc: device private structure
2140  * @q_type: specific control queue type
2141  * @pending: return parameter to track remaining events
2142  *
2143  * Process controlq events for a given control queue type. Returns zero on
2144  * success, and an error code on failure. If successful, pending is the number
2145  * of remaining events left in the queue.
2146  */
2147 int
2148 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2149 {
2150 	struct ice_rq_event_info event = { { 0 } };
2151 	struct ice_hw *hw = &sc->hw;
2152 	struct ice_ctl_q_info *cq;
2153 	enum ice_status status;
2154 	const char *qname;
2155 	int loop = 0;
2156 
2157 	switch (q_type) {
2158 	case ICE_CTL_Q_ADMIN:
2159 		cq = &hw->adminq;
2160 		qname = "Admin";
2161 		break;
2162 	case ICE_CTL_Q_MAILBOX:
2163 		cq = &hw->mailboxq;
2164 		qname = "Mailbox";
2165 		break;
2166 	default:
2167 		device_printf(sc->dev,
2168 			      "Unknown control queue type 0x%x\n",
2169 			      q_type);
2170 		return 0;
2171 	}
2172 
2173 	ice_check_ctrlq_errors(sc, qname, cq);
2174 
2175 	/*
2176 	 * Control queue processing happens during the admin task which may be
2177 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2178 	 */
2179 	event.buf_len = cq->rq_buf_size;
2180 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2181 	if (!event.msg_buf) {
2182 		device_printf(sc->dev,
2183 			      "Unable to allocate memory for %s Receive Queue event\n",
2184 			      qname);
2185 		return (ENOMEM);
2186 	}
2187 
2188 	do {
2189 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2190 		if (status == ICE_ERR_AQ_NO_WORK)
2191 			break;
2192 		if (status) {
2193 			if (q_type == ICE_CTL_Q_ADMIN)
2194 				device_printf(sc->dev,
2195 					      "%s Receive Queue event error %s\n",
2196 					      qname, ice_status_str(status));
2197 			else
2198 				device_printf(sc->dev,
2199 					      "%s Receive Queue event error %s\n",
2200 					      qname, ice_status_str(status));
2201 			free(event.msg_buf, M_ICE);
2202 			return (EIO);
2203 		}
2204 		/* XXX should we separate this handler by controlq type? */
2205 		ice_process_ctrlq_event(sc, qname, &event);
2206 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2207 
2208 	free(event.msg_buf, M_ICE);
2209 
2210 	return 0;
2211 }
2212 
2213 /**
2214  * pkg_ver_empty - Check if a package version is empty
2215  * @pkg_ver: the package version to check
2216  * @pkg_name: the package name to check
2217  *
2218  * Checks if the package version structure is empty. We consider a package
2219  * version as empty if none of the versions are non-zero and the name string
2220  * is null as well.
2221  *
2222  * This is used to check if the package version was initialized by the driver,
2223  * as we do not expect an actual DDP package file to have a zero'd version and
2224  * name.
2225  *
2226  * @returns true if the package version is valid, or false otherwise.
2227  */
2228 static bool
2229 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2230 {
2231 	return (pkg_name[0] == '\0' &&
2232 		pkg_ver->major == 0 &&
2233 		pkg_ver->minor == 0 &&
2234 		pkg_ver->update == 0 &&
2235 		pkg_ver->draft == 0);
2236 }
2237 
2238 /**
2239  * pkg_ver_compatible - Check if the package version is compatible
2240  * @pkg_ver: the package version to check
2241  *
2242  * Compares the package version number to the driver's expected major/minor
2243  * version. Returns an integer indicating whether the version is older, newer,
2244  * or compatible with the driver.
2245  *
2246  * @returns 0 if the package version is compatible, -1 if the package version
2247  * is older, and 1 if the package version is newer than the driver version.
2248  */
2249 static int
2250 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2251 {
2252 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2253 		return (1); /* newer */
2254 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2255 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2256 		return (1); /* newer */
2257 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2258 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2259 		return (0); /* compatible */
2260 	else
2261 		return (-1); /* older */
2262 }
2263 
2264 /**
2265  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2266  * @hw: device hw structure
2267  * @buf: string buffer to store name/version string
2268  *
2269  * Formats the name and version of the OS DDP package as found in the ice_ddp
2270  * module into a string.
2271  *
2272  * @remark This will almost always be the same as the active package, but
2273  * could be different in some cases. Use ice_active_pkg_version_str to get the
2274  * version of the active DDP package.
2275  */
2276 static void
2277 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2278 {
2279 	char name_buf[ICE_PKG_NAME_SIZE];
2280 
2281 	/* If the OS DDP package info is empty, use "None" */
2282 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2283 		sbuf_printf(buf, "None");
2284 		return;
2285 	}
2286 
2287 	/*
2288 	 * This should already be null-terminated, but since this is a raw
2289 	 * value from an external source, strlcpy() into a new buffer to
2290 	 * make sure.
2291 	 */
2292 	bzero(name_buf, sizeof(name_buf));
2293 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2294 
2295 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2296 	    name_buf,
2297 	    hw->pkg_ver.major,
2298 	    hw->pkg_ver.minor,
2299 	    hw->pkg_ver.update,
2300 	    hw->pkg_ver.draft);
2301 }
2302 
2303 /**
2304  * ice_active_pkg_version_str - Format active package version info into a sbuf
2305  * @hw: device hw structure
2306  * @buf: string buffer to store name/version string
2307  *
2308  * Formats the name and version of the active DDP package info into a string
2309  * buffer for use.
2310  */
2311 static void
2312 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2313 {
2314 	char name_buf[ICE_PKG_NAME_SIZE];
2315 
2316 	/* If the active DDP package info is empty, use "None" */
2317 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2318 		sbuf_printf(buf, "None");
2319 		return;
2320 	}
2321 
2322 	/*
2323 	 * This should already be null-terminated, but since this is a raw
2324 	 * value from an external source, strlcpy() into a new buffer to
2325 	 * make sure.
2326 	 */
2327 	bzero(name_buf, sizeof(name_buf));
2328 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2329 
2330 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2331 	    name_buf,
2332 	    hw->active_pkg_ver.major,
2333 	    hw->active_pkg_ver.minor,
2334 	    hw->active_pkg_ver.update,
2335 	    hw->active_pkg_ver.draft);
2336 
2337 	if (hw->active_track_id != 0)
2338 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2339 }
2340 
2341 /**
2342  * ice_nvm_version_str - Format the NVM version information into a sbuf
2343  * @hw: device hw structure
2344  * @buf: string buffer to store version string
2345  *
2346  * Formats the NVM information including firmware version, API version, NVM
2347  * version, the EETRACK id, and OEM specific version information into a string
2348  * buffer.
2349  */
2350 static void
2351 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2352 {
2353 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2354 	struct ice_orom_info *orom = &hw->flash.orom;
2355 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2356 
2357 	/* Note that the netlist versions are stored in packed Binary Coded
2358 	 * Decimal format. The use of '%x' will correctly display these as
2359 	 * decimal numbers. This works because every 4 bits will be displayed
2360 	 * as a hexadecimal digit, and the BCD format will only use the values
2361 	 * 0-9.
2362 	 */
2363 	sbuf_printf(buf,
2364 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2365 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2366 		    hw->api_maj_ver, hw->api_min_ver,
2367 		    nvm->major, nvm->minor, nvm->eetrack,
2368 		    netlist->major, netlist->minor,
2369 		    netlist->type >> 16, netlist->type & 0xFFFF,
2370 		    netlist->rev, netlist->cust_ver, netlist->hash,
2371 		    orom->major, orom->build, orom->patch);
2372 }
2373 
2374 /**
2375  * ice_print_nvm_version - Print the NVM info to the kernel message log
2376  * @sc: the device softc structure
2377  *
2378  * Format and print an NVM version string using ice_nvm_version_str().
2379  */
2380 void
2381 ice_print_nvm_version(struct ice_softc *sc)
2382 {
2383 	struct ice_hw *hw = &sc->hw;
2384 	device_t dev = sc->dev;
2385 	struct sbuf *sbuf;
2386 
2387 	sbuf = sbuf_new_auto();
2388 	ice_nvm_version_str(hw, sbuf);
2389 	sbuf_finish(sbuf);
2390 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2391 	sbuf_delete(sbuf);
2392 }
2393 
2394 /**
2395  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2396  * @vsi: the VSI to be updated
2397  *
2398  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2399  * the updated values.
2400  */
2401 void
2402 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2403 {
2404 	struct ice_eth_stats *prev_es, *cur_es;
2405 	struct ice_hw *hw = &vsi->sc->hw;
2406 	u16 vsi_num;
2407 
2408 	if (!ice_is_vsi_valid(hw, vsi->idx))
2409 		return;
2410 
2411 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2412 	prev_es = &vsi->hw_stats.prev;
2413 	cur_es = &vsi->hw_stats.cur;
2414 
2415 #define ICE_VSI_STAT40(name, location) \
2416 	ice_stat_update40(hw, name ## L(vsi_num), \
2417 			  vsi->hw_stats.offsets_loaded, \
2418 			  &prev_es->location, &cur_es->location)
2419 
2420 #define ICE_VSI_STAT32(name, location) \
2421 	ice_stat_update32(hw, name(vsi_num), \
2422 			  vsi->hw_stats.offsets_loaded, \
2423 			  &prev_es->location, &cur_es->location)
2424 
2425 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2426 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2427 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2428 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2429 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2430 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2431 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2432 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2433 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2434 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2435 
2436 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2437 			     cur_es);
2438 
2439 #undef ICE_VSI_STAT40
2440 #undef ICE_VSI_STAT32
2441 
2442 	vsi->hw_stats.offsets_loaded = true;
2443 }
2444 
2445 /**
2446  * ice_reset_vsi_stats - Reset VSI statistics counters
2447  * @vsi: VSI structure
2448  *
2449  * Resets the software tracking counters for the VSI statistics, and indicate
2450  * that the offsets haven't been loaded. This is intended to be called
2451  * post-reset so that VSI statistics count from zero again.
2452  */
2453 void
2454 ice_reset_vsi_stats(struct ice_vsi *vsi)
2455 {
2456 	/* Reset HW stats */
2457 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2458 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2459 	vsi->hw_stats.offsets_loaded = false;
2460 }
2461 
2462 /**
2463  * ice_update_pf_stats - Update port stats counters
2464  * @sc: device private softc structure
2465  *
2466  * Reads hardware statistics registers and updates the software tracking
2467  * structure with new values.
2468  */
2469 void
2470 ice_update_pf_stats(struct ice_softc *sc)
2471 {
2472 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2473 	struct ice_hw *hw = &sc->hw;
2474 	u8 lport;
2475 
2476 	MPASS(hw->port_info);
2477 
2478 	prev_ps = &sc->stats.prev;
2479 	cur_ps = &sc->stats.cur;
2480 	lport = hw->port_info->lport;
2481 
2482 #define ICE_PF_STAT_PFC(name, location, index) \
2483 	ice_stat_update40(hw, name(lport, index), \
2484 			  sc->stats.offsets_loaded, \
2485 			  &prev_ps->location[index], &cur_ps->location[index])
2486 
2487 #define ICE_PF_STAT40(name, location) \
2488 	ice_stat_update40(hw, name ## L(lport), \
2489 			  sc->stats.offsets_loaded, \
2490 			  &prev_ps->location, &cur_ps->location)
2491 
2492 #define ICE_PF_STAT32(name, location) \
2493 	ice_stat_update32(hw, name(lport), \
2494 			  sc->stats.offsets_loaded, \
2495 			  &prev_ps->location, &cur_ps->location)
2496 
2497 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2498 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2499 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2500 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2501 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2502 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2503 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2504 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2505 	/* This stat register doesn't have an lport */
2506 	ice_stat_update32(hw, PRTRPB_RDPC,
2507 			  sc->stats.offsets_loaded,
2508 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2509 
2510 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2511 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2512 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2513 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2514 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2515 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2516 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2517 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2518 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2519 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2520 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2521 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2522 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2523 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2524 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2525 
2526 	/* Update Priority Flow Control Stats */
2527 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2528 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2529 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2530 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2531 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2532 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2533 	}
2534 
2535 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2536 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2537 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2538 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2539 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2540 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2541 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2542 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2543 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2544 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2545 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2546 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2547 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2548 
2549 #undef ICE_PF_STAT40
2550 #undef ICE_PF_STAT32
2551 #undef ICE_PF_STAT_PFC
2552 
2553 	sc->stats.offsets_loaded = true;
2554 }
2555 
2556 /**
2557  * ice_reset_pf_stats - Reset port stats counters
2558  * @sc: Device private softc structure
2559  *
2560  * Reset software tracking values for statistics to zero, and indicate that
2561  * offsets haven't been loaded. Intended to be called after a device reset so
2562  * that statistics count from zero again.
2563  */
2564 void
2565 ice_reset_pf_stats(struct ice_softc *sc)
2566 {
2567 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2568 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2569 	sc->stats.offsets_loaded = false;
2570 }
2571 
2572 /**
2573  * ice_sysctl_show_fw - sysctl callback to show firmware information
2574  * @oidp: sysctl oid structure
2575  * @arg1: pointer to private data structure
2576  * @arg2: unused
2577  * @req: sysctl request pointer
2578  *
2579  * Callback for the fw_version sysctl, to display the current firmware
2580  * information found at hardware init time.
2581  */
2582 static int
2583 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2584 {
2585 	struct ice_softc *sc = (struct ice_softc *)arg1;
2586 	struct ice_hw *hw = &sc->hw;
2587 	struct sbuf *sbuf;
2588 
2589 	UNREFERENCED_PARAMETER(oidp);
2590 	UNREFERENCED_PARAMETER(arg2);
2591 
2592 	if (ice_driver_is_detaching(sc))
2593 		return (ESHUTDOWN);
2594 
2595 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2596 	ice_nvm_version_str(hw, sbuf);
2597 	sbuf_finish(sbuf);
2598 	sbuf_delete(sbuf);
2599 
2600 	return (0);
2601 }
2602 
2603 /**
2604  * ice_sysctl_pba_number - sysctl callback to show PBA number
2605  * @oidp: sysctl oid structure
2606  * @arg1: pointer to private data structure
2607  * @arg2: unused
2608  * @req: sysctl request pointer
2609  *
2610  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2611  * number for this device.
2612  */
2613 static int
2614 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2615 {
2616 	struct ice_softc *sc = (struct ice_softc *)arg1;
2617 	struct ice_hw *hw = &sc->hw;
2618 	device_t dev = sc->dev;
2619 	u8 pba_string[32] = "";
2620 	enum ice_status status;
2621 
2622 	UNREFERENCED_PARAMETER(arg2);
2623 
2624 	if (ice_driver_is_detaching(sc))
2625 		return (ESHUTDOWN);
2626 
2627 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2628 	if (status) {
2629 		device_printf(dev,
2630 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2631 		    __func__, ice_status_str(status),
2632 		    ice_aq_str(hw->adminq.sq_last_status));
2633 		return (EIO);
2634 	}
2635 
2636 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2637 }
2638 
2639 /**
2640  * ice_sysctl_pkg_version - sysctl to show the active package version info
2641  * @oidp: sysctl oid structure
2642  * @arg1: pointer to private data structure
2643  * @arg2: unused
2644  * @req: sysctl request pointer
2645  *
2646  * Callback for the pkg_version sysctl, to display the active DDP package name
2647  * and version information.
2648  */
2649 static int
2650 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2651 {
2652 	struct ice_softc *sc = (struct ice_softc *)arg1;
2653 	struct ice_hw *hw = &sc->hw;
2654 	struct sbuf *sbuf;
2655 
2656 	UNREFERENCED_PARAMETER(oidp);
2657 	UNREFERENCED_PARAMETER(arg2);
2658 
2659 	if (ice_driver_is_detaching(sc))
2660 		return (ESHUTDOWN);
2661 
2662 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2663 	ice_active_pkg_version_str(hw, sbuf);
2664 	sbuf_finish(sbuf);
2665 	sbuf_delete(sbuf);
2666 
2667 	return (0);
2668 }
2669 
2670 /**
2671  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2672  * @oidp: sysctl oid structure
2673  * @arg1: pointer to private data structure
2674  * @arg2: unused
2675  * @req: sysctl request pointer
2676  *
2677  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2678  * version info found in the ice_ddp module.
2679  */
2680 static int
2681 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2682 {
2683 	struct ice_softc *sc = (struct ice_softc *)arg1;
2684 	struct ice_hw *hw = &sc->hw;
2685 	struct sbuf *sbuf;
2686 
2687 	UNREFERENCED_PARAMETER(oidp);
2688 	UNREFERENCED_PARAMETER(arg2);
2689 
2690 	if (ice_driver_is_detaching(sc))
2691 		return (ESHUTDOWN);
2692 
2693 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2694 	ice_os_pkg_version_str(hw, sbuf);
2695 	sbuf_finish(sbuf);
2696 	sbuf_delete(sbuf);
2697 
2698 	return (0);
2699 }
2700 
2701 /**
2702  * ice_sysctl_current_speed - sysctl callback to show current link speed
2703  * @oidp: sysctl oid structure
2704  * @arg1: pointer to private data structure
2705  * @arg2: unused
2706  * @req: sysctl request pointer
2707  *
2708  * Callback for the current_speed sysctl, to display the string representing
2709  * the current link speed.
2710  */
2711 static int
2712 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2713 {
2714 	struct ice_softc *sc = (struct ice_softc *)arg1;
2715 	struct ice_hw *hw = &sc->hw;
2716 	struct sbuf *sbuf;
2717 
2718 	UNREFERENCED_PARAMETER(oidp);
2719 	UNREFERENCED_PARAMETER(arg2);
2720 
2721 	if (ice_driver_is_detaching(sc))
2722 		return (ESHUTDOWN);
2723 
2724 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2725 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2726 	sbuf_finish(sbuf);
2727 	sbuf_delete(sbuf);
2728 
2729 	return (0);
2730 }
2731 
2732 /**
2733  * @var phy_link_speeds
2734  * @brief PHY link speed conversion array
2735  *
2736  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2737  * link speeds used by the link speed sysctls.
2738  *
2739  * @remark these are based on the indices used in the BIT() macros for the
2740  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2741  */
2742 static const uint16_t phy_link_speeds[] = {
2743     ICE_AQ_LINK_SPEED_100MB,
2744     ICE_AQ_LINK_SPEED_100MB,
2745     ICE_AQ_LINK_SPEED_1000MB,
2746     ICE_AQ_LINK_SPEED_1000MB,
2747     ICE_AQ_LINK_SPEED_1000MB,
2748     ICE_AQ_LINK_SPEED_1000MB,
2749     ICE_AQ_LINK_SPEED_1000MB,
2750     ICE_AQ_LINK_SPEED_2500MB,
2751     ICE_AQ_LINK_SPEED_2500MB,
2752     ICE_AQ_LINK_SPEED_2500MB,
2753     ICE_AQ_LINK_SPEED_5GB,
2754     ICE_AQ_LINK_SPEED_5GB,
2755     ICE_AQ_LINK_SPEED_10GB,
2756     ICE_AQ_LINK_SPEED_10GB,
2757     ICE_AQ_LINK_SPEED_10GB,
2758     ICE_AQ_LINK_SPEED_10GB,
2759     ICE_AQ_LINK_SPEED_10GB,
2760     ICE_AQ_LINK_SPEED_10GB,
2761     ICE_AQ_LINK_SPEED_10GB,
2762     ICE_AQ_LINK_SPEED_25GB,
2763     ICE_AQ_LINK_SPEED_25GB,
2764     ICE_AQ_LINK_SPEED_25GB,
2765     ICE_AQ_LINK_SPEED_25GB,
2766     ICE_AQ_LINK_SPEED_25GB,
2767     ICE_AQ_LINK_SPEED_25GB,
2768     ICE_AQ_LINK_SPEED_25GB,
2769     ICE_AQ_LINK_SPEED_25GB,
2770     ICE_AQ_LINK_SPEED_25GB,
2771     ICE_AQ_LINK_SPEED_25GB,
2772     ICE_AQ_LINK_SPEED_25GB,
2773     ICE_AQ_LINK_SPEED_40GB,
2774     ICE_AQ_LINK_SPEED_40GB,
2775     ICE_AQ_LINK_SPEED_40GB,
2776     ICE_AQ_LINK_SPEED_40GB,
2777     ICE_AQ_LINK_SPEED_40GB,
2778     ICE_AQ_LINK_SPEED_40GB,
2779     ICE_AQ_LINK_SPEED_50GB,
2780     ICE_AQ_LINK_SPEED_50GB,
2781     ICE_AQ_LINK_SPEED_50GB,
2782     ICE_AQ_LINK_SPEED_50GB,
2783     ICE_AQ_LINK_SPEED_50GB,
2784     ICE_AQ_LINK_SPEED_50GB,
2785     ICE_AQ_LINK_SPEED_50GB,
2786     ICE_AQ_LINK_SPEED_50GB,
2787     ICE_AQ_LINK_SPEED_50GB,
2788     ICE_AQ_LINK_SPEED_50GB,
2789     ICE_AQ_LINK_SPEED_50GB,
2790     ICE_AQ_LINK_SPEED_50GB,
2791     ICE_AQ_LINK_SPEED_50GB,
2792     ICE_AQ_LINK_SPEED_50GB,
2793     ICE_AQ_LINK_SPEED_50GB,
2794     ICE_AQ_LINK_SPEED_100GB,
2795     ICE_AQ_LINK_SPEED_100GB,
2796     ICE_AQ_LINK_SPEED_100GB,
2797     ICE_AQ_LINK_SPEED_100GB,
2798     ICE_AQ_LINK_SPEED_100GB,
2799     ICE_AQ_LINK_SPEED_100GB,
2800     ICE_AQ_LINK_SPEED_100GB,
2801     ICE_AQ_LINK_SPEED_100GB,
2802     ICE_AQ_LINK_SPEED_100GB,
2803     ICE_AQ_LINK_SPEED_100GB,
2804     ICE_AQ_LINK_SPEED_100GB,
2805     ICE_AQ_LINK_SPEED_100GB,
2806     ICE_AQ_LINK_SPEED_100GB,
2807     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2808     ICE_AQ_LINK_SPEED_100GB,
2809     ICE_AQ_LINK_SPEED_100GB,
2810     ICE_AQ_LINK_SPEED_100GB,
2811     ICE_AQ_LINK_SPEED_100GB,
2812     ICE_AQ_LINK_SPEED_100GB
2813 };
2814 
2815 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2816 "\nControl advertised link speed."		\
2817 "\nFlags:"					\
2818 "\n\t   0x0 - Auto"				\
2819 "\n\t   0x1 - 10 Mb"				\
2820 "\n\t   0x2 - 100 Mb"				\
2821 "\n\t   0x4 - 1G"				\
2822 "\n\t   0x8 - 2.5G"				\
2823 "\n\t  0x10 - 5G"				\
2824 "\n\t  0x20 - 10G"				\
2825 "\n\t  0x40 - 20G"				\
2826 "\n\t  0x80 - 25G"				\
2827 "\n\t 0x100 - 40G"				\
2828 "\n\t 0x200 - 50G"				\
2829 "\n\t 0x400 - 100G"				\
2830 "\n\t0x8000 - Unknown"				\
2831 "\n\t"						\
2832 "\nUse \"sysctl -x\" to view flags properly."
2833 
2834 #define ICE_PHYS_100MB			\
2835     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2836      ICE_PHY_TYPE_LOW_100M_SGMII)
2837 #define ICE_PHYS_1000MB			\
2838     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2839      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2840      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2841      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2842      ICE_PHY_TYPE_LOW_1G_SGMII)
2843 #define ICE_PHYS_2500MB			\
2844     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2845      ICE_PHY_TYPE_LOW_2500BASE_X |	\
2846      ICE_PHY_TYPE_LOW_2500BASE_KX)
2847 #define ICE_PHYS_5GB			\
2848     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2849      ICE_PHY_TYPE_LOW_5GBASE_KR)
2850 #define ICE_PHYS_10GB			\
2851     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2852      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2853      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2854      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2855      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2856      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2857      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2858 #define ICE_PHYS_25GB			\
2859     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2860      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2861      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2862      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2863      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2864      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2865      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2866      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2867      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2868      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2869      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2870 #define ICE_PHYS_40GB			\
2871     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2872      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2873      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2874      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2875      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2876      ICE_PHY_TYPE_LOW_40G_XLAUI)
2877 #define ICE_PHYS_50GB			\
2878     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2879      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2880      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2881      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2882      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2883      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2884      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2885      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2886      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2887      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2888      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2889      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2890      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2891      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2892      ICE_PHY_TYPE_LOW_50G_AUI1)
2893 #define ICE_PHYS_100GB_LOW		\
2894     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
2895      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
2896      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
2897      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
2898      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
2899      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
2900      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
2901      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
2902      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
2903      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
2904      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
2905      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
2906      ICE_PHY_TYPE_LOW_100GBASE_DR)
2907 #define ICE_PHYS_100GB_HIGH		\
2908     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
2909      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
2910      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
2911      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
2912      ICE_PHY_TYPE_HIGH_100G_AUI2)
2913 
2914 /**
2915  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
2916  * @phy_type_low: lower 64-bit PHY Type bitmask
2917  * @phy_type_high: upper 64-bit PHY Type bitmask
2918  *
2919  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
2920  * link speed flags. If phy_type_high has an unknown PHY type, then the return
2921  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
2922  */
2923 static u16
2924 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
2925 {
2926 	u16 sysctl_speeds = 0;
2927 	int bit;
2928 
2929 	/* coverity[address_of] */
2930 	for_each_set_bit(bit, &phy_type_low, 64)
2931 		sysctl_speeds |= phy_link_speeds[bit];
2932 
2933 	/* coverity[address_of] */
2934 	for_each_set_bit(bit, &phy_type_high, 64) {
2935 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
2936 			sysctl_speeds |= phy_link_speeds[bit + 64];
2937 		else
2938 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
2939 	}
2940 
2941 	return (sysctl_speeds);
2942 }
2943 
2944 /**
2945  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
2946  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
2947  * @phy_type_low: output parameter for lower AQ PHY flags
2948  * @phy_type_high: output parameter for higher AQ PHY flags
2949  *
2950  * Converts the given link speed flags into AQ PHY type flag sets appropriate
2951  * for use in a Set PHY Config command.
2952  */
2953 static void
2954 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
2955 				  u64 *phy_type_high)
2956 {
2957 	*phy_type_low = 0, *phy_type_high = 0;
2958 
2959 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
2960 		*phy_type_low |= ICE_PHYS_100MB;
2961 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
2962 		*phy_type_low |= ICE_PHYS_1000MB;
2963 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
2964 		*phy_type_low |= ICE_PHYS_2500MB;
2965 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
2966 		*phy_type_low |= ICE_PHYS_5GB;
2967 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
2968 		*phy_type_low |= ICE_PHYS_10GB;
2969 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
2970 		*phy_type_low |= ICE_PHYS_25GB;
2971 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
2972 		*phy_type_low |= ICE_PHYS_40GB;
2973 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
2974 		*phy_type_low |= ICE_PHYS_50GB;
2975 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
2976 		*phy_type_low |= ICE_PHYS_100GB_LOW;
2977 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
2978 	}
2979 }
2980 
2981 /**
2982  * @struct ice_phy_data
2983  * @brief PHY caps and link speeds
2984  *
2985  * Buffer providing report mode and user speeds;
2986  * returning intersection of PHY types and speeds.
2987  */
2988 struct ice_phy_data {
2989 	u64 phy_low_orig;     /* PHY low quad from report */
2990 	u64 phy_high_orig;    /* PHY high quad from report */
2991 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
2992 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
2993 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
2994 	u16 user_speeds_intr; /* Intersect with report speeds */
2995 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
2996 };
2997 
2998 /**
2999  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
3000  * @sc: device private structure
3001  * @phy_data: device PHY data
3002  *
3003  * On read: Displays the currently supported speeds
3004  * On write: Sets the device's supported speeds
3005  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3006  */
3007 static int
3008 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3009 				   struct ice_phy_data *phy_data)
3010 {
3011 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3012 	const char *report_types[5] = { "w/o MEDIA",
3013 					"w/MEDIA",
3014 					"ACTIVE",
3015 					"EDOOFUS", /* Not used */
3016 					"DFLT" };
3017 	struct ice_hw *hw = &sc->hw;
3018 	struct ice_port_info *pi = hw->port_info;
3019 	enum ice_status status;
3020 	u16 report_speeds, temp_speeds;
3021 	u8 report_type;
3022 	bool apply_speed_filter = false;
3023 
3024 	switch (phy_data->report_mode) {
3025 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3026 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3027 	case ICE_AQC_REPORT_ACTIVE_CFG:
3028 	case ICE_AQC_REPORT_DFLT_CFG:
3029 		report_type = phy_data->report_mode >> 1;
3030 		break;
3031 	default:
3032 		device_printf(sc->dev,
3033 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3034 		    __func__, phy_data->report_mode);
3035 		return (EINVAL);
3036 	}
3037 
3038 	/* 0 is treated as "Auto"; the driver will handle selecting the
3039 	 * correct speeds. Including, in some cases, applying an override
3040 	 * if provided.
3041 	 */
3042 	if (phy_data->user_speeds_orig == 0)
3043 		phy_data->user_speeds_orig = USHRT_MAX;
3044 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3045 		apply_speed_filter = true;
3046 
3047 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3048 	if (status != ICE_SUCCESS) {
3049 		device_printf(sc->dev,
3050 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3051 		    __func__, report_types[report_type],
3052 		    ice_status_str(status),
3053 		    ice_aq_str(sc->hw.adminq.sq_last_status));
3054 		return (EIO);
3055 	}
3056 
3057 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3058 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3059 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3060 	    phy_data->phy_high_orig);
3061 	if (apply_speed_filter) {
3062 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3063 		    pcaps.module_type[0]);
3064 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3065 			device_printf(sc->dev,
3066 			    "User-specified speeds (\"0x%04X\") not supported\n",
3067 			    phy_data->user_speeds_orig);
3068 			return (EINVAL);
3069 		}
3070 		report_speeds = temp_speeds;
3071 	}
3072 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3073 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3074 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3075 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3076 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3077 
3078 	return (0);
3079  }
3080 
3081 /**
3082  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3083  * @oidp: sysctl oid structure
3084  * @arg1: pointer to private data structure
3085  * @arg2: unused
3086  * @req: sysctl request pointer
3087  *
3088  * On read: Displays the currently supported speeds
3089  * On write: Sets the device's supported speeds
3090  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3091  */
3092 static int
3093 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3094 {
3095 	struct ice_softc *sc = (struct ice_softc *)arg1;
3096 	struct ice_port_info *pi = sc->hw.port_info;
3097 	struct ice_phy_data phy_data = { 0 };
3098 	device_t dev = sc->dev;
3099 	u16 sysctl_speeds;
3100 	int ret;
3101 
3102 	UNREFERENCED_PARAMETER(arg2);
3103 
3104 	if (ice_driver_is_detaching(sc))
3105 		return (ESHUTDOWN);
3106 
3107 	/* Get the current speeds from the adapter's "active" configuration. */
3108 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3109 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3110 	if (ret) {
3111 		/* Error message already printed within function */
3112 		return (ret);
3113 	}
3114 
3115 	sysctl_speeds = phy_data.user_speeds_intr;
3116 
3117 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3118 	if ((ret) || (req->newptr == NULL))
3119 		return (ret);
3120 
3121 	if (sysctl_speeds > 0x7FF) {
3122 		device_printf(dev,
3123 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3124 			      __func__, sysctl_speeds);
3125 		return (EINVAL);
3126 	}
3127 
3128 	pi->phy.curr_user_speed_req = sysctl_speeds;
3129 
3130 	/* Apply settings requested by user */
3131 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3132 }
3133 
3134 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3135 "\nDisplay or set the port's requested FEC mode."	\
3136 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3137 "\n\tfc - " ICE_FEC_STRING_BASER			\
3138 "\n\trs - " ICE_FEC_STRING_RS				\
3139 "\n\tnone - " ICE_FEC_STRING_NONE			\
3140 "\nEither of the left or right strings above can be used to set the requested mode."
3141 
3142 /**
3143  * ice_sysctl_fec_config - Display/change the configured FEC mode
3144  * @oidp: sysctl oid structure
3145  * @arg1: pointer to private data structure
3146  * @arg2: unused
3147  * @req: sysctl request pointer
3148  *
3149  * On read: Displays the configured FEC mode
3150  * On write: Sets the device's FEC mode to the input string, if it's valid.
3151  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3152  */
3153 static int
3154 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3155 {
3156 	struct ice_softc *sc = (struct ice_softc *)arg1;
3157 	struct ice_port_info *pi = sc->hw.port_info;
3158 	enum ice_fec_mode new_mode;
3159 	device_t dev = sc->dev;
3160 	char req_fec[32];
3161 	int ret;
3162 
3163 	UNREFERENCED_PARAMETER(arg2);
3164 
3165 	if (ice_driver_is_detaching(sc))
3166 		return (ESHUTDOWN);
3167 
3168 	bzero(req_fec, sizeof(req_fec));
3169 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3170 
3171 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3172 	if ((ret) || (req->newptr == NULL))
3173 		return (ret);
3174 
3175 	if (strcmp(req_fec, "auto") == 0 ||
3176 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3177 		if (sc->allow_no_fec_mod_in_auto)
3178 			new_mode = ICE_FEC_DIS_AUTO;
3179 		else
3180 			new_mode = ICE_FEC_AUTO;
3181 	} else if (strcmp(req_fec, "fc") == 0 ||
3182 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3183 		new_mode = ICE_FEC_BASER;
3184 	} else if (strcmp(req_fec, "rs") == 0 ||
3185 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3186 		new_mode = ICE_FEC_RS;
3187 	} else if (strcmp(req_fec, "none") == 0 ||
3188 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3189 		new_mode = ICE_FEC_NONE;
3190 	} else {
3191 		device_printf(dev,
3192 		    "%s: \"%s\" is not a valid FEC mode\n",
3193 		    __func__, req_fec);
3194 		return (EINVAL);
3195 	}
3196 
3197 	/* Cache user FEC mode for later link ups */
3198 	pi->phy.curr_user_fec_req = new_mode;
3199 
3200 	/* Apply settings requested by user */
3201 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3202 }
3203 
3204 /**
3205  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3206  * @oidp: sysctl oid structure
3207  * @arg1: pointer to private data structure
3208  * @arg2: unused
3209  * @req: sysctl request pointer
3210  *
3211  * On read: Displays the negotiated FEC mode, in a string
3212  */
3213 static int
3214 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3215 {
3216 	struct ice_softc *sc = (struct ice_softc *)arg1;
3217 	struct ice_hw *hw = &sc->hw;
3218 	char neg_fec[32];
3219 	int ret;
3220 
3221 	UNREFERENCED_PARAMETER(arg2);
3222 
3223 	if (ice_driver_is_detaching(sc))
3224 		return (ESHUTDOWN);
3225 
3226 	/* Copy const string into a buffer to drop const qualifier */
3227 	bzero(neg_fec, sizeof(neg_fec));
3228 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3229 
3230 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3231 	if (req->newptr != NULL)
3232 		return (EPERM);
3233 
3234 	return (ret);
3235 }
3236 
3237 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3238 "\nDisplay or set the port's advertised flow control mode.\n"	\
3239 "\t0 - " ICE_FC_STRING_NONE					\
3240 "\n\t1 - " ICE_FC_STRING_RX					\
3241 "\n\t2 - " ICE_FC_STRING_TX					\
3242 "\n\t3 - " ICE_FC_STRING_FULL					\
3243 "\nEither the numbers or the strings above can be used to set the advertised mode."
3244 
3245 /**
3246  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3247  * @oidp: sysctl oid structure
3248  * @arg1: pointer to private data structure
3249  * @arg2: unused
3250  * @req: sysctl request pointer
3251  *
3252  * On read: Displays the configured flow control mode
3253  * On write: Sets the device's flow control mode to the input, if it's valid.
3254  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3255  */
3256 static int
3257 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3258 {
3259 	struct ice_softc *sc = (struct ice_softc *)arg1;
3260 	struct ice_port_info *pi = sc->hw.port_info;
3261 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3262 	enum ice_fc_mode old_mode, new_mode;
3263 	struct ice_hw *hw = &sc->hw;
3264 	device_t dev = sc->dev;
3265 	enum ice_status status;
3266 	int ret, fc_num;
3267 	bool mode_set = false;
3268 	struct sbuf buf;
3269 	char *fc_str_end;
3270 	char fc_str[32];
3271 
3272 	UNREFERENCED_PARAMETER(arg2);
3273 
3274 	if (ice_driver_is_detaching(sc))
3275 		return (ESHUTDOWN);
3276 
3277 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3278 				     &pcaps, NULL);
3279 	if (status != ICE_SUCCESS) {
3280 		device_printf(dev,
3281 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3282 		    __func__, ice_status_str(status),
3283 		    ice_aq_str(hw->adminq.sq_last_status));
3284 		return (EIO);
3285 	}
3286 
3287 	/* Convert HW response format to SW enum value */
3288 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3289 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3290 		old_mode = ICE_FC_FULL;
3291 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3292 		old_mode = ICE_FC_TX_PAUSE;
3293 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3294 		old_mode = ICE_FC_RX_PAUSE;
3295 	else
3296 		old_mode = ICE_FC_NONE;
3297 
3298 	/* Create "old" string for output */
3299 	bzero(fc_str, sizeof(fc_str));
3300 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3301 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3302 	sbuf_finish(&buf);
3303 	sbuf_delete(&buf);
3304 
3305 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3306 	if ((ret) || (req->newptr == NULL))
3307 		return (ret);
3308 
3309 	/* Try to parse input as a string, first */
3310 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3311 		new_mode = ICE_FC_FULL;
3312 		mode_set = true;
3313 	}
3314 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3315 		new_mode = ICE_FC_TX_PAUSE;
3316 		mode_set = true;
3317 	}
3318 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3319 		new_mode = ICE_FC_RX_PAUSE;
3320 		mode_set = true;
3321 	}
3322 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3323 		new_mode = ICE_FC_NONE;
3324 		mode_set = true;
3325 	}
3326 
3327 	/*
3328 	 * Then check if it's an integer, for compatibility with the method
3329 	 * used in older drivers.
3330 	 */
3331 	if (!mode_set) {
3332 		fc_num = strtol(fc_str, &fc_str_end, 0);
3333 		if (fc_str_end == fc_str)
3334 			fc_num = -1;
3335 		switch (fc_num) {
3336 		case 3:
3337 			new_mode = ICE_FC_FULL;
3338 			break;
3339 		case 2:
3340 			new_mode = ICE_FC_TX_PAUSE;
3341 			break;
3342 		case 1:
3343 			new_mode = ICE_FC_RX_PAUSE;
3344 			break;
3345 		case 0:
3346 			new_mode = ICE_FC_NONE;
3347 			break;
3348 		default:
3349 			device_printf(dev,
3350 			    "%s: \"%s\" is not a valid flow control mode\n",
3351 			    __func__, fc_str);
3352 			return (EINVAL);
3353 		}
3354 	}
3355 
3356 	/* Save flow control mode from user */
3357 	pi->phy.curr_user_fc_req = new_mode;
3358 
3359 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3360 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3361 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3362 	    (new_mode != ICE_FC_NONE)) {
3363 		ret = ice_config_pfc(sc, 0x0);
3364 		if (ret)
3365 			return (ret);
3366 	}
3367 
3368 	/* Apply settings requested by user */
3369 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3370 }
3371 
3372 /**
3373  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3374  * @oidp: sysctl oid structure
3375  * @arg1: pointer to private data structure
3376  * @arg2: unused
3377  * @req: sysctl request pointer
3378  *
3379  * On read: Displays the currently negotiated flow control settings.
3380  *
3381  * If link is not established, this will report ICE_FC_NONE, as no flow
3382  * control is negotiated while link is down.
3383  */
3384 static int
3385 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3386 {
3387 	struct ice_softc *sc = (struct ice_softc *)arg1;
3388 	struct ice_port_info *pi = sc->hw.port_info;
3389 	const char *negotiated_fc;
3390 
3391 	UNREFERENCED_PARAMETER(arg2);
3392 
3393 	if (ice_driver_is_detaching(sc))
3394 		return (ESHUTDOWN);
3395 
3396 	negotiated_fc = ice_flowcontrol_mode(pi);
3397 
3398 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3399 }
3400 
3401 /**
3402  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3403  * @oidp: sysctl oid structure
3404  * @arg1: pointer to private data structure
3405  * @arg2: unused
3406  * @req: sysctl request pointer
3407  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3408  *
3409  * Private handler for phy_type_high and phy_type_low sysctls.
3410  */
3411 static int
3412 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3413 {
3414 	struct ice_softc *sc = (struct ice_softc *)arg1;
3415 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3416 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3417 	struct ice_hw *hw = &sc->hw;
3418 	device_t dev = sc->dev;
3419 	enum ice_status status;
3420 	uint64_t types;
3421 	int ret;
3422 
3423 	UNREFERENCED_PARAMETER(arg2);
3424 
3425 	if (ice_driver_is_detaching(sc))
3426 		return (ESHUTDOWN);
3427 
3428 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3429 				     &pcaps, NULL);
3430 	if (status != ICE_SUCCESS) {
3431 		device_printf(dev,
3432 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3433 		    __func__, ice_status_str(status),
3434 		    ice_aq_str(hw->adminq.sq_last_status));
3435 		return (EIO);
3436 	}
3437 
3438 	if (is_phy_type_high)
3439 		types = pcaps.phy_type_high;
3440 	else
3441 		types = pcaps.phy_type_low;
3442 
3443 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3444 	if ((ret) || (req->newptr == NULL))
3445 		return (ret);
3446 
3447 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3448 
3449 	if (is_phy_type_high)
3450 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3451 	else
3452 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3453 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3454 
3455 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3456 	if (status != ICE_SUCCESS) {
3457 		device_printf(dev,
3458 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3459 		    __func__, ice_status_str(status),
3460 		    ice_aq_str(hw->adminq.sq_last_status));
3461 		return (EIO);
3462 	}
3463 
3464 	return (0);
3465 
3466 }
3467 
3468 /**
3469  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3470  * @oidp: sysctl oid structure
3471  * @arg1: pointer to private data structure
3472  * @arg2: unused
3473  * @req: sysctl request pointer
3474  *
3475  * On read: Displays the currently supported lower PHY types
3476  * On write: Sets the device's supported low PHY types
3477  */
3478 static int
3479 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3480 {
3481 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3482 }
3483 
3484 /**
3485  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3486  * @oidp: sysctl oid structure
3487  * @arg1: pointer to private data structure
3488  * @arg2: unused
3489  * @req: sysctl request pointer
3490  *
3491  * On read: Displays the currently supported higher PHY types
3492  * On write: Sets the device's supported high PHY types
3493  */
3494 static int
3495 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3496 {
3497 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3498 }
3499 
3500 /**
3501  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3502  * @oidp: sysctl oid structure
3503  * @arg1: pointer to private data structure
3504  * @arg2: unused
3505  * @req: sysctl request pointer
3506  * @report_mode: the mode to report
3507  *
3508  * On read: Display the response from Get PHY abillities with the given report
3509  * mode.
3510  */
3511 static int
3512 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3513 {
3514 	struct ice_softc *sc = (struct ice_softc *)arg1;
3515 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3516 	struct ice_hw *hw = &sc->hw;
3517 	struct ice_port_info *pi = hw->port_info;
3518 	device_t dev = sc->dev;
3519 	enum ice_status status;
3520 	int ret;
3521 
3522 	UNREFERENCED_PARAMETER(arg2);
3523 
3524 	ret = priv_check(curthread, PRIV_DRIVER);
3525 	if (ret)
3526 		return (ret);
3527 
3528 	if (ice_driver_is_detaching(sc))
3529 		return (ESHUTDOWN);
3530 
3531 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3532 	if (status != ICE_SUCCESS) {
3533 		device_printf(dev,
3534 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3535 		    __func__, ice_status_str(status),
3536 		    ice_aq_str(hw->adminq.sq_last_status));
3537 		return (EIO);
3538 	}
3539 
3540 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3541 	if (req->newptr != NULL)
3542 		return (EPERM);
3543 
3544 	return (ret);
3545 }
3546 
3547 /**
3548  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3549  * @oidp: sysctl oid structure
3550  * @arg1: pointer to private data structure
3551  * @arg2: unused
3552  * @req: sysctl request pointer
3553  *
3554  * On read: Display the response from Get PHY abillities reporting the last
3555  * software configuration.
3556  */
3557 static int
3558 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3559 {
3560 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3561 				   ICE_AQC_REPORT_ACTIVE_CFG);
3562 }
3563 
3564 /**
3565  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3566  * @oidp: sysctl oid structure
3567  * @arg1: pointer to private data structure
3568  * @arg2: unused
3569  * @req: sysctl request pointer
3570  *
3571  * On read: Display the response from Get PHY abillities reporting the NVM
3572  * configuration.
3573  */
3574 static int
3575 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3576 {
3577 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3578 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3579 }
3580 
3581 /**
3582  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3583  * @oidp: sysctl oid structure
3584  * @arg1: pointer to private data structure
3585  * @arg2: unused
3586  * @req: sysctl request pointer
3587  *
3588  * On read: Display the response from Get PHY abillities reporting the
3589  * topology configuration.
3590  */
3591 static int
3592 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3593 {
3594 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3595 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3596 }
3597 
3598 /**
3599  * ice_sysctl_phy_link_status - Display response from Get Link Status
3600  * @oidp: sysctl oid structure
3601  * @arg1: pointer to private data structure
3602  * @arg2: unused
3603  * @req: sysctl request pointer
3604  *
3605  * On read: Display the response from firmware for the Get Link Status
3606  * request.
3607  */
3608 static int
3609 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3610 {
3611 	struct ice_aqc_get_link_status_data link_data = { 0 };
3612 	struct ice_softc *sc = (struct ice_softc *)arg1;
3613 	struct ice_hw *hw = &sc->hw;
3614 	struct ice_port_info *pi = hw->port_info;
3615 	struct ice_aqc_get_link_status *resp;
3616 	struct ice_aq_desc desc;
3617 	device_t dev = sc->dev;
3618 	enum ice_status status;
3619 	int ret;
3620 
3621 	UNREFERENCED_PARAMETER(arg2);
3622 
3623 	/*
3624 	 * Ensure that only contexts with driver privilege are allowed to
3625 	 * access this information
3626 	 */
3627 	ret = priv_check(curthread, PRIV_DRIVER);
3628 	if (ret)
3629 		return (ret);
3630 
3631 	if (ice_driver_is_detaching(sc))
3632 		return (ESHUTDOWN);
3633 
3634 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3635 	resp = &desc.params.get_link_status;
3636 	resp->lport_num = pi->lport;
3637 
3638 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3639 	if (status != ICE_SUCCESS) {
3640 		device_printf(dev,
3641 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3642 		    __func__, ice_status_str(status),
3643 		    ice_aq_str(hw->adminq.sq_last_status));
3644 		return (EIO);
3645 	}
3646 
3647 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3648 	if (req->newptr != NULL)
3649 		return (EPERM);
3650 
3651 	return (ret);
3652 }
3653 
3654 /**
3655  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3656  * @oidp: sysctl oid structure
3657  * @arg1: pointer to private softc structure
3658  * @arg2: unused
3659  * @req: sysctl request pointer
3660  *
3661  * On read: Displays current persistent LLDP status.
3662  */
3663 static int
3664 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3665 {
3666 	struct ice_softc *sc = (struct ice_softc *)arg1;
3667 	struct ice_hw *hw = &sc->hw;
3668 	device_t dev = sc->dev;
3669 	enum ice_status status;
3670 	struct sbuf *sbuf;
3671 	u32 lldp_state;
3672 
3673 	UNREFERENCED_PARAMETER(arg2);
3674 	UNREFERENCED_PARAMETER(oidp);
3675 
3676 	if (ice_driver_is_detaching(sc))
3677 		return (ESHUTDOWN);
3678 
3679 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3680 	if (status) {
3681 		device_printf(dev,
3682 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3683 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3684 		return (EIO);
3685 	}
3686 
3687 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3688 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3689 	sbuf_finish(sbuf);
3690 	sbuf_delete(sbuf);
3691 
3692 	return (0);
3693 }
3694 
3695 /**
3696  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3697  * @oidp: sysctl oid structure
3698  * @arg1: pointer to private softc structure
3699  * @arg2: unused
3700  * @req: sysctl request pointer
3701  *
3702  * On read: Displays default persistent LLDP status.
3703  */
3704 static int
3705 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3706 {
3707 	struct ice_softc *sc = (struct ice_softc *)arg1;
3708 	struct ice_hw *hw = &sc->hw;
3709 	device_t dev = sc->dev;
3710 	enum ice_status status;
3711 	struct sbuf *sbuf;
3712 	u32 lldp_state;
3713 
3714 	UNREFERENCED_PARAMETER(arg2);
3715 	UNREFERENCED_PARAMETER(oidp);
3716 
3717 	if (ice_driver_is_detaching(sc))
3718 		return (ESHUTDOWN);
3719 
3720 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3721 	if (status) {
3722 		device_printf(dev,
3723 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3724 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3725 		return (EIO);
3726 	}
3727 
3728 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3729 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3730 	sbuf_finish(sbuf);
3731 	sbuf_delete(sbuf);
3732 
3733 	return (0);
3734 }
3735 
3736 /**
3737  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3738  * @dcbcfg: Configuration struct to check for mappings in
3739  *
3740  * @return true if there exists a non-zero DSCP to TC mapping
3741  * inside the input DCB configuration struct.
3742  */
3743 static bool
3744 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3745 {
3746 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3747 		if (dcbcfg->dscp_map[i] != 0)
3748 			return (true);
3749 
3750 	return (false);
3751 }
3752 
3753 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3754 "\nDisplay or change FW LLDP agent state:" \
3755 "\n\t0 - disabled"			\
3756 "\n\t1 - enabled"
3757 
3758 /**
3759  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3760  * @oidp: sysctl oid structure
3761  * @arg1: pointer to private softc structure
3762  * @arg2: unused
3763  * @req: sysctl request pointer
3764  *
3765  * On read: Displays whether the FW LLDP agent is running
3766  * On write: Persistently enables or disables the FW LLDP agent
3767  */
3768 static int
3769 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3770 {
3771 	struct ice_softc *sc = (struct ice_softc *)arg1;
3772 	struct ice_dcbx_cfg *local_dcbx_cfg;
3773 	struct ice_hw *hw = &sc->hw;
3774 	device_t dev = sc->dev;
3775 	enum ice_status status;
3776 	int ret;
3777 	u32 old_state;
3778 	u8 fw_lldp_enabled;
3779 	bool retried_start_lldp = false;
3780 
3781 	UNREFERENCED_PARAMETER(arg2);
3782 
3783 	if (ice_driver_is_detaching(sc))
3784 		return (ESHUTDOWN);
3785 
3786 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3787 	if (status) {
3788 		device_printf(dev,
3789 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3790 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3791 		return (EIO);
3792 	}
3793 
3794 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3795 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3796 		if (status) {
3797 			device_printf(dev,
3798 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3799 			    ice_status_str(status),
3800 			    ice_aq_str(hw->adminq.sq_last_status));
3801 			return (EIO);
3802 		}
3803 	}
3804 	if (old_state == 0)
3805 		fw_lldp_enabled = false;
3806 	else
3807 		fw_lldp_enabled = true;
3808 
3809 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3810 	if ((ret) || (req->newptr == NULL))
3811 		return (ret);
3812 
3813 	if (old_state == 0 && fw_lldp_enabled == false)
3814 		return (0);
3815 
3816 	if (old_state != 0 && fw_lldp_enabled == true)
3817 		return (0);
3818 
3819 	/* Block transition to FW LLDP if DSCP mode is enabled */
3820 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3821 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) &&
3822 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3823 		device_printf(dev,
3824 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3825 		return (EOPNOTSUPP);
3826 	}
3827 
3828 	if (fw_lldp_enabled == false) {
3829 		status = ice_aq_stop_lldp(hw, true, true, NULL);
3830 		/* EPERM is returned if the LLDP agent is already shutdown */
3831 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3832 			device_printf(dev,
3833 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3834 			    __func__, ice_status_str(status),
3835 			    ice_aq_str(hw->adminq.sq_last_status));
3836 			return (EIO);
3837 		}
3838 		ice_aq_set_dcb_parameters(hw, true, NULL);
3839 		hw->port_info->qos_cfg.is_sw_lldp = true;
3840 		ice_add_rx_lldp_filter(sc);
3841 	} else {
3842 		ice_del_rx_lldp_filter(sc);
3843 retry_start_lldp:
3844 		status = ice_aq_start_lldp(hw, true, NULL);
3845 		if (status) {
3846 			switch (hw->adminq.sq_last_status) {
3847 			/* EEXIST is returned if the LLDP agent is already started */
3848 			case ICE_AQ_RC_EEXIST:
3849 				break;
3850 			case ICE_AQ_RC_EAGAIN:
3851 				/* Retry command after a 2 second wait */
3852 				if (retried_start_lldp == false) {
3853 					retried_start_lldp = true;
3854 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3855 					goto retry_start_lldp;
3856 				}
3857 				/* Fallthrough */
3858 			default:
3859 				device_printf(dev,
3860 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3861 				    __func__, ice_status_str(status),
3862 				    ice_aq_str(hw->adminq.sq_last_status));
3863 				return (EIO);
3864 			}
3865 		}
3866 		ice_start_dcbx_agent(sc);
3867 		hw->port_info->qos_cfg.is_sw_lldp = false;
3868 	}
3869 
3870 	return (ret);
3871 }
3872 
3873 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3874 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
3875 "\nIn SW DCB mode, displays and allows setting the table." \
3876 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
3877 "\nWhere the bandwidth total must add up to 100"
3878 
3879 /**
3880  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
3881  * @oidp: sysctl oid structure
3882  * @arg1: pointer to private data structure
3883  * @arg2: unused
3884  * @req: sysctl request pointer
3885  *
3886  * Returns the current ETS TC bandwidth table
3887  * cached by the driver.
3888  *
3889  * In SW DCB mode this sysctl also accepts a value that will
3890  * be sent to the firmware for configuration.
3891  */
3892 static int
3893 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
3894 {
3895 	struct ice_softc *sc = (struct ice_softc *)arg1;
3896 	struct ice_dcbx_cfg *local_dcbx_cfg;
3897 	struct ice_port_info *pi;
3898 	struct ice_hw *hw = &sc->hw;
3899 	device_t dev = sc->dev;
3900 	enum ice_status status;
3901 	struct sbuf *sbuf;
3902 	int ret;
3903 
3904 	/* Store input rates from user */
3905 	char ets_user_buf[128] = "";
3906 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
3907 
3908 	UNREFERENCED_PARAMETER(arg2);
3909 
3910 	if (ice_driver_is_detaching(sc))
3911 		return (ESHUTDOWN);
3912 
3913 	if (req->oldptr == NULL && req->newptr == NULL) {
3914 		ret = SYSCTL_OUT(req, 0, 128);
3915 		return (ret);
3916 	}
3917 
3918 	pi = hw->port_info;
3919 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
3920 
3921 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3922 
3923 	/* Format ETS BW data for output */
3924 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3925 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
3926 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
3927 			sbuf_printf(sbuf, ",");
3928 	}
3929 
3930 	sbuf_finish(sbuf);
3931 	sbuf_delete(sbuf);
3932 
3933 	/* Read in the new ETS values */
3934 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
3935 	if ((ret) || (req->newptr == NULL))
3936 		return (ret);
3937 
3938 	/* Don't allow setting changes in FW DCB mode */
3939 	if (!hw->port_info->qos_cfg.is_sw_lldp)
3940 		return (EPERM);
3941 
3942 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
3943 	if (ret) {
3944 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
3945 		    __func__, ets_user_buf);
3946 		return (ret);
3947 	}
3948 
3949 	if (!ice_check_ets_bw(new_ets_table)) {
3950 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
3951 		    __func__, ets_user_buf);
3952 		return (EINVAL);
3953 	}
3954 
3955 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
3956 	    sizeof(new_ets_table));
3957 
3958 	/* If BW > 0, then set TSA entry to 2 */
3959 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3960 		if (new_ets_table[i] > 0)
3961 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
3962 		else
3963 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
3964 	}
3965 	local_dcbx_cfg->etscfg.willing = 0;
3966 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
3967 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
3968 
3969 	status = ice_set_dcb_cfg(pi);
3970 	if (status) {
3971 		device_printf(dev,
3972 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
3973 		    __func__, ice_status_str(status),
3974 		    ice_aq_str(hw->adminq.sq_last_status));
3975 		return (EIO);
3976 	}
3977 
3978 	ice_do_dcb_reconfig(sc, false);
3979 
3980 	return (0);
3981 }
3982 
3983 #define ICE_SYSCTL_HELP_UP2TC_MAP \
3984 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
3985 "\nIn SW DCB mode, displays and allows setting the table." \
3986 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
3987 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
3988 
3989 /**
3990  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
3991  * @oidp: sysctl oid structure
3992  * @arg1: pointer to private data structure
3993  * @arg2: unused
3994  * @req: sysctl request pointer
3995  *
3996  * In FW DCB mode, returns the current ETS prio table /
3997  * UP2TC mapping from the local MIB.
3998  *
3999  * In SW DCB mode this sysctl also accepts a value that will
4000  * be sent to the firmware for configuration.
4001  */
4002 static int
4003 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4004 {
4005 	struct ice_softc *sc = (struct ice_softc *)arg1;
4006 	struct ice_dcbx_cfg *local_dcbx_cfg;
4007 	struct ice_port_info *pi;
4008 	struct ice_hw *hw = &sc->hw;
4009 	device_t dev = sc->dev;
4010 	enum ice_status status;
4011 	struct sbuf *sbuf;
4012 	int ret;
4013 
4014 	/* Store input rates from user */
4015 	char up2tc_user_buf[128] = "";
4016 	/* This array is indexed by UP, not TC */
4017 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4018 
4019 	UNREFERENCED_PARAMETER(arg2);
4020 
4021 	if (ice_driver_is_detaching(sc))
4022 		return (ESHUTDOWN);
4023 
4024 	if (req->oldptr == NULL && req->newptr == NULL) {
4025 		ret = SYSCTL_OUT(req, 0, 128);
4026 		return (ret);
4027 	}
4028 
4029 	pi = hw->port_info;
4030 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4031 
4032 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4033 
4034 	/* Format ETS Priority Mapping Table for output */
4035 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4036 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4037 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4038 			sbuf_printf(sbuf, ",");
4039 	}
4040 
4041 	sbuf_finish(sbuf);
4042 	sbuf_delete(sbuf);
4043 
4044 	/* Read in the new ETS priority mapping */
4045 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4046 	if ((ret) || (req->newptr == NULL))
4047 		return (ret);
4048 
4049 	/* Don't allow setting changes in FW DCB mode */
4050 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4051 		return (EPERM);
4052 
4053 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
4054 	if (ret) {
4055 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4056 		    __func__, up2tc_user_buf);
4057 		return (ret);
4058 	}
4059 
4060 	/* Prepare updated ETS CFG/REC TLVs */
4061 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4062 	    sizeof(new_up2tc));
4063 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4064 	    sizeof(new_up2tc));
4065 
4066 	status = ice_set_dcb_cfg(pi);
4067 	if (status) {
4068 		device_printf(dev,
4069 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4070 		    __func__, ice_status_str(status),
4071 		    ice_aq_str(hw->adminq.sq_last_status));
4072 		return (EIO);
4073 	}
4074 
4075 	ice_do_dcb_reconfig(sc, false);
4076 
4077 	return (0);
4078 }
4079 
4080 /**
4081  * ice_config_pfc - helper function to set PFC config in FW
4082  * @sc: device private structure
4083  * @new_mode: bit flags indicating PFC status for TCs
4084  *
4085  * @pre must be in SW DCB mode
4086  *
4087  * Configures the driver's local PFC TLV and sends it to the
4088  * FW for configuration, then reconfigures the driver/VSI
4089  * for DCB if needed.
4090  */
4091 static int
4092 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4093 {
4094 	struct ice_dcbx_cfg *local_dcbx_cfg;
4095 	struct ice_hw *hw = &sc->hw;
4096 	struct ice_port_info *pi;
4097 	device_t dev = sc->dev;
4098 	enum ice_status status;
4099 
4100 	pi = hw->port_info;
4101 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4102 
4103 	/* Prepare updated PFC TLV */
4104 	local_dcbx_cfg->pfc.pfcena = new_mode;
4105 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4106 	local_dcbx_cfg->pfc.willing = 0;
4107 	local_dcbx_cfg->pfc.mbc = 0;
4108 
4109 	/* Warn if PFC is being disabled with RoCE v2 in use */
4110 	if (new_mode == 0 && sc->rdma_entry.attached)
4111 		device_printf(dev,
4112 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4113 
4114 	status = ice_set_dcb_cfg(pi);
4115 	if (status) {
4116 		device_printf(dev,
4117 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4118 		    __func__, ice_status_str(status),
4119 		    ice_aq_str(hw->adminq.sq_last_status));
4120 		return (EIO);
4121 	}
4122 
4123 	ice_do_dcb_reconfig(sc, false);
4124 
4125 	return (0);
4126 }
4127 
4128 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4129 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4130 "\nIn SW DCB mode, displays and allows setting the configuration" \
4131 "\nInput/Output is in this format: 0xff" \
4132 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4133 
4134 /**
4135  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4136  * @oidp: sysctl oid structure
4137  * @arg1: pointer to private data structure
4138  * @arg2: unused
4139  * @req: sysctl request pointer
4140  *
4141  * In FW DCB mode, returns a bitmap containing the current TCs
4142  * that have PFC enabled on them.
4143  *
4144  * In SW DCB mode this sysctl also accepts a value that will
4145  * be sent to the firmware for configuration.
4146  */
4147 static int
4148 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4149 {
4150 	struct ice_softc *sc = (struct ice_softc *)arg1;
4151 	struct ice_dcbx_cfg *local_dcbx_cfg;
4152 	struct ice_port_info *pi;
4153 	struct ice_hw *hw = &sc->hw;
4154 	int ret;
4155 
4156 	/* Store input flags from user */
4157 	u8 user_pfc;
4158 
4159 	UNREFERENCED_PARAMETER(arg2);
4160 
4161 	if (ice_driver_is_detaching(sc))
4162 		return (ESHUTDOWN);
4163 
4164 	if (req->oldptr == NULL && req->newptr == NULL) {
4165 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4166 		return (ret);
4167 	}
4168 
4169 	pi = hw->port_info;
4170 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4171 
4172 	/* Format current PFC enable setting for output */
4173 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4174 
4175 	/* Read in the new PFC config */
4176 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4177 	if ((ret) || (req->newptr == NULL))
4178 		return (ret);
4179 
4180 	/* Don't allow setting changes in FW DCB mode */
4181 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4182 		return (EPERM);
4183 
4184 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4185 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4186 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4187 		ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4188 		if (ret)
4189 			return (ret);
4190 	}
4191 
4192 	return ice_config_pfc(sc, user_pfc);
4193 }
4194 
4195 #define ICE_SYSCTL_HELP_PFC_MODE \
4196 "\nDisplay and set the current QoS mode for the firmware" \
4197 "\n\t0: VLAN UP mode" \
4198 "\n\t1: DSCP mode"
4199 
4200 /**
4201  * ice_sysctl_pfc_mode
4202  * @oidp: sysctl oid structure
4203  * @arg1: pointer to private data structure
4204  * @arg2: unused
4205  * @req: sysctl request pointer
4206  *
4207  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4208  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4209  * -based settings are configured for DCB.
4210  */
4211 static int
4212 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4213 {
4214 	struct ice_softc *sc = (struct ice_softc *)arg1;
4215 	struct ice_dcbx_cfg *local_dcbx_cfg;
4216 	struct ice_port_info *pi;
4217 	struct ice_hw *hw = &sc->hw;
4218 	device_t dev = sc->dev;
4219 	enum ice_status status;
4220 	u8 user_pfc_mode, aq_pfc_mode;
4221 	int ret;
4222 
4223 	UNREFERENCED_PARAMETER(arg2);
4224 
4225 	if (ice_driver_is_detaching(sc))
4226 		return (ESHUTDOWN);
4227 
4228 	if (req->oldptr == NULL && req->newptr == NULL) {
4229 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4230 		return (ret);
4231 	}
4232 
4233 	pi = hw->port_info;
4234 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4235 
4236 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4237 
4238 	/* Read in the new mode */
4239 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4240 	if ((ret) || (req->newptr == NULL))
4241 		return (ret);
4242 
4243 	/* Don't allow setting changes in FW DCB mode */
4244 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4245 		return (EPERM);
4246 
4247 	/* Currently, there are only two modes */
4248 	switch (user_pfc_mode) {
4249 	case 0:
4250 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4251 		break;
4252 	case 1:
4253 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4254 		break;
4255 	default:
4256 		device_printf(dev,
4257 		    "%s: Valid input range is 0-1 (input %d)\n",
4258 		    __func__, user_pfc_mode);
4259 		return (EINVAL);
4260 	}
4261 
4262 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4263 	if (status == ICE_ERR_NOT_SUPPORTED) {
4264 		device_printf(dev,
4265 		    "%s: Failed to set PFC mode; DCB not supported\n",
4266 		    __func__);
4267 		return (ENODEV);
4268 	}
4269 	if (status) {
4270 		device_printf(dev,
4271 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4272 		    __func__, ice_status_str(status),
4273 		    ice_aq_str(hw->adminq.sq_last_status));
4274 		return (EIO);
4275 	}
4276 
4277 	/* Reset settings to default when mode is changed */
4278 	ice_set_default_local_mib_settings(sc);
4279 	/* Cache current settings and reconfigure */
4280 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4281 	ice_do_dcb_reconfig(sc, false);
4282 
4283 	return (0);
4284 }
4285 
4286 /**
4287  * ice_add_device_sysctls - add device specific dynamic sysctls
4288  * @sc: device private structure
4289  *
4290  * Add per-device dynamic sysctls which show device configuration or enable
4291  * configuring device functionality. For tunable values which can be set prior
4292  * to load, see ice_add_device_tunables.
4293  *
4294  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4295  * and likely should be called near the end of the attach process.
4296  */
4297 void
4298 ice_add_device_sysctls(struct ice_softc *sc)
4299 {
4300 	struct sysctl_oid *hw_node;
4301 	device_t dev = sc->dev;
4302 
4303 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4304 	struct sysctl_oid_list *ctx_list =
4305 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4306 
4307 	SYSCTL_ADD_PROC(ctx, ctx_list,
4308 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4309 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4310 
4311 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4312 		SYSCTL_ADD_PROC(ctx, ctx_list,
4313 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4314 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4315 	}
4316 
4317 	SYSCTL_ADD_PROC(ctx, ctx_list,
4318 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4319 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4320 
4321 	SYSCTL_ADD_PROC(ctx, ctx_list,
4322 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4323 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4324 
4325 	SYSCTL_ADD_PROC(ctx, ctx_list,
4326 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4327 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4328 
4329 	SYSCTL_ADD_PROC(ctx, ctx_list,
4330 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4331 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4332 
4333 	SYSCTL_ADD_PROC(ctx, ctx_list,
4334 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4335 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4336 
4337 	SYSCTL_ADD_PROC(ctx, ctx_list,
4338 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4339 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4340 
4341 	SYSCTL_ADD_PROC(ctx, ctx_list,
4342 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4343 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4344 
4345 	SYSCTL_ADD_PROC(ctx, ctx_list,
4346 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4347 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4348 
4349 	SYSCTL_ADD_PROC(ctx, ctx_list,
4350 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4351 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4352 
4353 	SYSCTL_ADD_PROC(ctx, ctx_list,
4354 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4355 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4356 
4357 	SYSCTL_ADD_PROC(ctx, ctx_list,
4358 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4359 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4360 
4361 	SYSCTL_ADD_PROC(ctx, ctx_list,
4362 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4363 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4364 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4365 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4366 
4367 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4368 
4369 	/* Differentiate software and hardware statistics, by keeping hw stats
4370 	 * in their own node. This isn't in ice_add_device_tunables, because
4371 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4372 	 */
4373 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4374 				  NULL, "Port Hardware Statistics");
4375 
4376 	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4377 
4378 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4379 	 * during creation
4380 	 */
4381 	ice_add_vsi_sysctls(&sc->pf_vsi);
4382 
4383 	/* Add sysctls related to debugging the device driver. This includes
4384 	 * sysctls which display additional internal driver state for use in
4385 	 * understanding what is happening within the driver.
4386 	 */
4387 	ice_add_debug_sysctls(sc);
4388 }
4389 
4390 /**
4391  * @enum hmc_error_type
4392  * @brief enumeration of HMC errors
4393  *
4394  * Enumeration defining the possible HMC errors that might occur.
4395  */
4396 enum hmc_error_type {
4397 	HMC_ERR_PMF_INVALID = 0,
4398 	HMC_ERR_VF_IDX_INVALID = 1,
4399 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4400 	/* 3 is reserved */
4401 	HMC_ERR_INDEX_TOO_BIG = 4,
4402 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4403 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4404 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4405 	HMC_ERR_PAGE_DESC_INVALID = 8,
4406 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4407 	/* 10 is reserved */
4408 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4409 	/* 12 is reserved */
4410 };
4411 
4412 /**
4413  * ice_log_hmc_error - Log an HMC error message
4414  * @hw: device hw structure
4415  * @dev: the device to pass to device_printf()
4416  *
4417  * Log a message when an HMC error interrupt is triggered.
4418  */
4419 void
4420 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4421 {
4422 	u32 info, data;
4423 	u8 index, errtype, objtype;
4424 	bool isvf;
4425 
4426 	info = rd32(hw, PFHMC_ERRORINFO);
4427 	data = rd32(hw, PFHMC_ERRORDATA);
4428 
4429 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4430 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4431 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4432 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4433 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4434 
4435 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4436 
4437 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4438 		      isvf ? "VF" : "PF", index);
4439 
4440 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4441 		      errtype, objtype, data);
4442 
4443 	switch (errtype) {
4444 	case HMC_ERR_PMF_INVALID:
4445 		device_printf(dev, "Private Memory Function is not valid\n");
4446 		break;
4447 	case HMC_ERR_VF_IDX_INVALID:
4448 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4449 		break;
4450 	case HMC_ERR_VF_PARENT_PF_INVALID:
4451 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4452 		break;
4453 	case HMC_ERR_INDEX_TOO_BIG:
4454 		device_printf(dev, "Object index too big\n");
4455 		break;
4456 	case HMC_ERR_ADDRESS_TOO_LARGE:
4457 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4458 		break;
4459 	case HMC_ERR_SEGMENT_DESC_INVALID:
4460 		device_printf(dev, "Segment descriptor is invalid\n");
4461 		break;
4462 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4463 		device_printf(dev, "Segment descriptor is too small\n");
4464 		break;
4465 	case HMC_ERR_PAGE_DESC_INVALID:
4466 		device_printf(dev, "Page descriptor is invalid\n");
4467 		break;
4468 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4469 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4470 		break;
4471 	case HMC_ERR_INVALID_OBJECT_TYPE:
4472 		device_printf(dev, "Invalid object type\n");
4473 		break;
4474 	default:
4475 		device_printf(dev, "Unknown HMC error\n");
4476 	}
4477 
4478 	/* Clear the error indication */
4479 	wr32(hw, PFHMC_ERRORINFO, 0);
4480 }
4481 
4482 /**
4483  * @struct ice_sysctl_info
4484  * @brief sysctl information
4485  *
4486  * Structure used to simplify the process of defining the many similar
4487  * statistics sysctls.
4488  */
4489 struct ice_sysctl_info {
4490 	u64		*stat;
4491 	const char	*name;
4492 	const char	*description;
4493 };
4494 
4495 /**
4496  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4497  * @ctx: sysctl ctx to use
4498  * @parent: the parent node to add sysctls under
4499  * @stats: the ethernet stats structure to source values from
4500  *
4501  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4502  * Will add them under the parent node specified.
4503  *
4504  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4505  * statistics, so it is not included here. Similarly, rx_discards has different
4506  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4507  */
4508 void
4509 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4510 			  struct sysctl_oid *parent,
4511 			  struct ice_eth_stats *stats)
4512 {
4513 	const struct ice_sysctl_info ctls[] = {
4514 		/* Rx Stats */
4515 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4516 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4517 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4518 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4519 		/* Tx Stats */
4520 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4521 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4522 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4523 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4524 		/* End */
4525 		{ 0, 0, 0 }
4526 	};
4527 
4528 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4529 
4530 	const struct ice_sysctl_info *entry = ctls;
4531 	while (entry->stat != 0) {
4532 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4533 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4534 			       entry->description);
4535 		entry++;
4536 	}
4537 }
4538 
4539 /**
4540  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4541  * @oidp: sysctl oid structure
4542  * @arg1: pointer to private data structure
4543  * @arg2: Tx CSO stat to read
4544  * @req: sysctl request pointer
4545  *
4546  * On read: Sums the per-queue Tx CSO stat and displays it.
4547  */
4548 static int
4549 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4550 {
4551 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4552 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4553 	u64 stat = 0;
4554 	int i;
4555 
4556 	if (ice_driver_is_detaching(vsi->sc))
4557 		return (ESHUTDOWN);
4558 
4559 	/* Check that the type is valid */
4560 	if (type >= ICE_CSO_STAT_TX_COUNT)
4561 		return (EDOOFUS);
4562 
4563 	/* Sum the stat for each of the Tx queues */
4564 	for (i = 0; i < vsi->num_tx_queues; i++)
4565 		stat += vsi->tx_queues[i].stats.cso[type];
4566 
4567 	return sysctl_handle_64(oidp, NULL, stat, req);
4568 }
4569 
4570 /**
4571  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4572  * @oidp: sysctl oid structure
4573  * @arg1: pointer to private data structure
4574  * @arg2: Rx CSO stat to read
4575  * @req: sysctl request pointer
4576  *
4577  * On read: Sums the per-queue Rx CSO stat and displays it.
4578  */
4579 static int
4580 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4581 {
4582 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4583 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4584 	u64 stat = 0;
4585 	int i;
4586 
4587 	if (ice_driver_is_detaching(vsi->sc))
4588 		return (ESHUTDOWN);
4589 
4590 	/* Check that the type is valid */
4591 	if (type >= ICE_CSO_STAT_RX_COUNT)
4592 		return (EDOOFUS);
4593 
4594 	/* Sum the stat for each of the Rx queues */
4595 	for (i = 0; i < vsi->num_rx_queues; i++)
4596 		stat += vsi->rx_queues[i].stats.cso[type];
4597 
4598 	return sysctl_handle_64(oidp, NULL, stat, req);
4599 }
4600 
4601 /**
4602  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4603  * @oidp: sysctl oid structure
4604  * @arg1: pointer to private data structure
4605  * @arg2: unused
4606  * @req: sysctl request pointer
4607  *
4608  * On read: Sums current values of Rx error statistics and
4609  * displays it.
4610  */
4611 static int
4612 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4613 {
4614 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4615 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4616 	u64 stat = 0;
4617 	int i, type;
4618 
4619 	UNREFERENCED_PARAMETER(arg2);
4620 
4621 	if (ice_driver_is_detaching(vsi->sc))
4622 		return (ESHUTDOWN);
4623 
4624 	stat += hs->rx_undersize;
4625 	stat += hs->rx_fragments;
4626 	stat += hs->rx_oversize;
4627 	stat += hs->rx_jabber;
4628 	stat += hs->rx_len_errors;
4629 	stat += hs->crc_errors;
4630 	stat += hs->illegal_bytes;
4631 
4632 	/* Checksum error stats */
4633 	for (i = 0; i < vsi->num_rx_queues; i++)
4634 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4635 		     type < ICE_CSO_STAT_RX_COUNT;
4636 		     type++)
4637 			stat += vsi->rx_queues[i].stats.cso[type];
4638 
4639 	return sysctl_handle_64(oidp, NULL, stat, req);
4640 }
4641 
4642 /**
4643  * @struct ice_rx_cso_stat_info
4644  * @brief sysctl information for an Rx checksum offload statistic
4645  *
4646  * Structure used to simplify the process of defining the checksum offload
4647  * statistics.
4648  */
4649 struct ice_rx_cso_stat_info {
4650 	enum ice_rx_cso_stat	type;
4651 	const char		*name;
4652 	const char		*description;
4653 };
4654 
4655 /**
4656  * @struct ice_tx_cso_stat_info
4657  * @brief sysctl information for a Tx checksum offload statistic
4658  *
4659  * Structure used to simplify the process of defining the checksum offload
4660  * statistics.
4661  */
4662 struct ice_tx_cso_stat_info {
4663 	enum ice_tx_cso_stat	type;
4664 	const char		*name;
4665 	const char		*description;
4666 };
4667 
4668 /**
4669  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4670  * @vsi: pointer to the VSI to add sysctls for
4671  * @ctx: sysctl ctx to use
4672  * @parent: the parent node to add sysctls under
4673  *
4674  * Add statistics sysctls for software tracked statistics of a VSI.
4675  *
4676  * Currently this only adds checksum offload statistics, but more counters may
4677  * be added in the future.
4678  */
4679 static void
4680 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4681 			 struct sysctl_ctx_list *ctx,
4682 			 struct sysctl_oid *parent)
4683 {
4684 	struct sysctl_oid *cso_node;
4685 	struct sysctl_oid_list *cso_list;
4686 
4687 	/* Tx CSO Stats */
4688 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4689 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4690 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4691 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4692 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4693 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4694 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4695 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4696 		/* End */
4697 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4698 	};
4699 
4700 	/* Rx CSO Stats */
4701 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4702 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4703 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4704 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4705 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4706 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4707 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4708 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4709 		/* End */
4710 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4711 	};
4712 
4713 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4714 
4715 	/* Add a node for statistics tracked by software. */
4716 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4717 				  NULL, "Checksum offload Statistics");
4718 	cso_list = SYSCTL_CHILDREN(cso_node);
4719 
4720 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4721 	while (tx_entry->name && tx_entry->description) {
4722 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4723 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4724 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4725 				tx_entry->description);
4726 		tx_entry++;
4727 	}
4728 
4729 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4730 	while (rx_entry->name && rx_entry->description) {
4731 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4732 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4733 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4734 				rx_entry->description);
4735 		rx_entry++;
4736 	}
4737 }
4738 
4739 /**
4740  * ice_add_vsi_sysctls - Add sysctls for a VSI
4741  * @vsi: pointer to VSI structure
4742  *
4743  * Add various sysctls for a given VSI.
4744  */
4745 void
4746 ice_add_vsi_sysctls(struct ice_vsi *vsi)
4747 {
4748 	struct sysctl_ctx_list *ctx = &vsi->ctx;
4749 	struct sysctl_oid *hw_node, *sw_node;
4750 	struct sysctl_oid_list *vsi_list, *hw_list;
4751 
4752 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4753 
4754 	/* Keep hw stats in their own node. */
4755 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4756 				  NULL, "VSI Hardware Statistics");
4757 	hw_list = SYSCTL_CHILDREN(hw_node);
4758 
4759 	/* Add the ethernet statistics for this VSI */
4760 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4761 
4762 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4763 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4764 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4765 
4766 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4767 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4768 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4769 			"Aggregate of all Rx errors");
4770 
4771 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4772 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4773 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4774 
4775 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4776 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4777 			0, "Tx Packets Discarded Due To Error");
4778 
4779 	/* Add a node for statistics tracked by software. */
4780 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4781 				  NULL, "VSI Software Statistics");
4782 
4783 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4784 }
4785 
4786 /**
4787  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4788  * @ctx: sysctl ctx to use
4789  * @parent_list: parent sysctl list to add sysctls under
4790  * @pfc_stat_location: address of statistic for sysctl to display
4791  * @node_name: Name for statistic node
4792  * @descr: Description used for nodes added in this function
4793  *
4794  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
4795  * for a stat and leaves for each traffic class for that stat.
4796  */
4797 static void
4798 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
4799 				 struct sysctl_oid_list *parent_list,
4800 				 u64* pfc_stat_location,
4801 				 const char *node_name,
4802 				 const char *descr)
4803 {
4804 	struct sysctl_oid_list *node_list;
4805 	struct sysctl_oid *node;
4806 	struct sbuf *namebuf, *descbuf;
4807 
4808 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
4809 				   NULL, descr);
4810 	node_list = SYSCTL_CHILDREN(node);
4811 
4812 	namebuf = sbuf_new_auto();
4813 	descbuf = sbuf_new_auto();
4814 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4815 		sbuf_clear(namebuf);
4816 		sbuf_clear(descbuf);
4817 
4818 		sbuf_printf(namebuf, "%d", i);
4819 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
4820 
4821 		sbuf_finish(namebuf);
4822 		sbuf_finish(descbuf);
4823 
4824 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
4825 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
4826 			sbuf_data(descbuf));
4827 	}
4828 
4829 	sbuf_delete(namebuf);
4830 	sbuf_delete(descbuf);
4831 }
4832 
4833 /**
4834  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
4835  * @ctx: the sysctl ctx to use
4836  * @parent: parent node to add the sysctls under
4837  * @stats: the hw ports stat structure to pull values from
4838  *
4839  * Add global Priority Flow Control MAC statistics sysctls. These are
4840  * structured as a node with the PFC statistic, where there are eight
4841  * nodes for each traffic class.
4842  */
4843 static void
4844 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
4845 			      struct sysctl_oid *parent,
4846 			      struct ice_hw_port_stats *stats)
4847 {
4848 	struct sysctl_oid_list *parent_list;
4849 
4850 	parent_list = SYSCTL_CHILDREN(parent);
4851 
4852 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
4853 	    "p_xon_recvd", "PFC XON received");
4854 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
4855 	    "p_xoff_recvd", "PFC XOFF received");
4856 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
4857 	    "p_xon_txd", "PFC XON transmitted");
4858 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
4859 	    "p_xoff_txd", "PFC XOFF transmitted");
4860 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
4861 	    "p_xon2xoff", "PFC XON to XOFF transitions");
4862 }
4863 
4864 /**
4865  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
4866  * @ctx: the sysctl ctx to use
4867  * @parent: parent node to add the sysctls under
4868  * @stats: the hw ports stat structure to pull values from
4869  *
4870  * Add global MAC statistics sysctls.
4871  */
4872 void
4873 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
4874 			  struct sysctl_oid *parent,
4875 			  struct ice_hw_port_stats *stats)
4876 {
4877 	struct sysctl_oid *mac_node;
4878 	struct sysctl_oid_list *parent_list, *mac_list;
4879 
4880 	parent_list = SYSCTL_CHILDREN(parent);
4881 
4882 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
4883 				   NULL, "Mac Hardware Statistics");
4884 	mac_list = SYSCTL_CHILDREN(mac_node);
4885 
4886 	/* Add the ethernet statistics common to VSI and MAC */
4887 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
4888 
4889 	/* Add PFC stats that add per-TC counters */
4890 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
4891 
4892 	const struct ice_sysctl_info ctls[] = {
4893 		/* Packet Reception Stats */
4894 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
4895 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
4896 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
4897 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
4898 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
4899 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
4900 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
4901 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
4902 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
4903 		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
4904 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
4905 		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
4906 		{&stats->eth.rx_discards, "rx_discards",
4907 		    "Discarded Rx Packets by Port (shortage of storage space)"},
4908 		/* Packet Transmission Stats */
4909 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
4910 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
4911 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
4912 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
4913 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
4914 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
4915 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
4916 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
4917 		/* Flow control */
4918 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
4919 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
4920 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
4921 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
4922 		/* Other */
4923 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
4924 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
4925 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
4926 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
4927 		/* End */
4928 		{ 0, 0, 0 }
4929 	};
4930 
4931 	const struct ice_sysctl_info *entry = ctls;
4932 	while (entry->stat != 0) {
4933 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
4934 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4935 			entry->description);
4936 		entry++;
4937 	}
4938 }
4939 
4940 /**
4941  * ice_configure_misc_interrupts - enable 'other' interrupt causes
4942  * @sc: pointer to device private softc
4943  *
4944  * Enable various "other" interrupt causes, and associate them to interrupt 0,
4945  * which is our administrative interrupt.
4946  */
4947 void
4948 ice_configure_misc_interrupts(struct ice_softc *sc)
4949 {
4950 	struct ice_hw *hw = &sc->hw;
4951 	u32 val;
4952 
4953 	/* Read the OICR register to clear it */
4954 	rd32(hw, PFINT_OICR);
4955 
4956 	/* Enable useful "other" interrupt causes */
4957 	val = (PFINT_OICR_ECC_ERR_M |
4958 	       PFINT_OICR_MAL_DETECT_M |
4959 	       PFINT_OICR_GRST_M |
4960 	       PFINT_OICR_PCI_EXCEPTION_M |
4961 	       PFINT_OICR_VFLR_M |
4962 	       PFINT_OICR_HMC_ERR_M |
4963 	       PFINT_OICR_PE_CRITERR_M);
4964 
4965 	wr32(hw, PFINT_OICR_ENA, val);
4966 
4967 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
4968 	 * not explicitly program them when writing to the PFINT_*_CTL
4969 	 * registers. Nevertheless, these writes are associating the
4970 	 * interrupts with the ITR 0 vector
4971 	 */
4972 
4973 	/* Associate the OICR interrupt with ITR 0, and enable it */
4974 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
4975 
4976 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
4977 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
4978 
4979 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
4980 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
4981 }
4982 
4983 /**
4984  * ice_filter_is_mcast - Check if info is a multicast filter
4985  * @vsi: vsi structure addresses are targeted towards
4986  * @info: filter info
4987  *
4988  * @returns true if the provided info is a multicast filter, and false
4989  * otherwise.
4990  */
4991 static bool
4992 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
4993 {
4994 	const u8 *addr = info->l_data.mac.mac_addr;
4995 
4996 	/*
4997 	 * Check if this info matches a multicast filter added by
4998 	 * ice_add_mac_to_list
4999 	 */
5000 	if ((info->flag == ICE_FLTR_TX) &&
5001 	    (info->src_id == ICE_SRC_ID_VSI) &&
5002 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5003 	    (info->vsi_handle == vsi->idx) &&
5004 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5005 		return true;
5006 
5007 	return false;
5008 }
5009 
5010 /**
5011  * @struct ice_mcast_sync_data
5012  * @brief data used by ice_sync_one_mcast_filter function
5013  *
5014  * Structure used to store data needed for processing by the
5015  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5016  * to be added, an error indication, and a pointer to the device softc.
5017  */
5018 struct ice_mcast_sync_data {
5019 	struct ice_list_head add_list;
5020 	struct ice_softc *sc;
5021 	int err;
5022 };
5023 
5024 /**
5025  * ice_sync_one_mcast_filter - Check if we need to program the filter
5026  * @p: void pointer to algorithm data
5027  * @sdl: link level socket address
5028  * @count: unused count value
5029  *
5030  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5031  * list. For the given address, search our internal list to see if we have
5032  * found the filter. If not, add it to our list of filters that need to be
5033  * programmed.
5034  *
5035  * @returns (1) if we've actually setup the filter to be added
5036  */
5037 static u_int
5038 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5039 			  u_int __unused count)
5040 {
5041 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5042 	struct ice_softc *sc = data->sc;
5043 	struct ice_hw *hw = &sc->hw;
5044 	struct ice_switch_info *sw = hw->switch_info;
5045 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5046 	struct ice_fltr_mgmt_list_entry *itr;
5047 	struct ice_list_head *rules;
5048 	int err;
5049 
5050 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5051 
5052 	/*
5053 	 * If a previous filter already indicated an error, there is no need
5054 	 * for us to finish processing the rest of the filters.
5055 	 */
5056 	if (data->err)
5057 		return (0);
5058 
5059 	/* See if this filter has already been programmed */
5060 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5061 		struct ice_fltr_info *info = &itr->fltr_info;
5062 		const u8 *addr = info->l_data.mac.mac_addr;
5063 
5064 		/* Only check multicast filters */
5065 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5066 			continue;
5067 
5068 		/*
5069 		 * If this filter matches, mark the internal filter as
5070 		 * "found", and exit.
5071 		 */
5072 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5073 			itr->marker = ICE_FLTR_FOUND;
5074 			return (1);
5075 		}
5076 	}
5077 
5078 	/*
5079 	 * If we failed to locate the filter in our internal list, we need to
5080 	 * place it into our add list.
5081 	 */
5082 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5083 				  ICE_FWD_TO_VSI);
5084 	if (err) {
5085 		device_printf(sc->dev,
5086 			      "Failed to place MAC %6D onto add list, err %s\n",
5087 			      sdl_addr, ":", ice_err_str(err));
5088 		data->err = err;
5089 
5090 		return (0);
5091 	}
5092 
5093 	return (1);
5094 }
5095 
5096 /**
5097  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5098  * @sc: device private structure
5099  *
5100  * Called in response to SIOCDELMULTI to synchronize the operating system
5101  * multicast address list with the internal list of filters programmed to
5102  * firmware.
5103  *
5104  * Works in one phase to find added and deleted filters using a marker bit on
5105  * the internal list.
5106  *
5107  * First, a loop over the internal list clears the marker bit. Second, for
5108  * each filter in the ifp list is checked. If we find it in the internal list,
5109  * the marker bit is set. Otherwise, the filter is added to the add list.
5110  * Third, a loop over the internal list determines if any filters have not
5111  * been found. Each of these is added to the delete list. Finally, the add and
5112  * delete lists are programmed to firmware to update the filters.
5113  *
5114  * @returns zero on success or an integer error code on failure.
5115  */
5116 int
5117 ice_sync_multicast_filters(struct ice_softc *sc)
5118 {
5119 	struct ice_hw *hw = &sc->hw;
5120 	struct ice_switch_info *sw = hw->switch_info;
5121 	struct ice_fltr_mgmt_list_entry *itr;
5122 	struct ice_mcast_sync_data data = {};
5123 	struct ice_list_head *rules, remove_list;
5124 	enum ice_status status;
5125 	int err = 0;
5126 
5127 	INIT_LIST_HEAD(&data.add_list);
5128 	INIT_LIST_HEAD(&remove_list);
5129 	data.sc = sc;
5130 	data.err = 0;
5131 
5132 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5133 
5134 	/* Acquire the lock for the entire duration */
5135 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5136 
5137 	/* (1) Reset the marker state for all filters */
5138 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5139 		itr->marker = ICE_FLTR_NOT_FOUND;
5140 
5141 	/* (2) determine which filters need to be added and removed */
5142 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5143 	if (data.err) {
5144 		/* ice_sync_one_mcast_filter already prints an error */
5145 		err = data.err;
5146 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5147 		goto free_filter_lists;
5148 	}
5149 
5150 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5151 		struct ice_fltr_info *info = &itr->fltr_info;
5152 		const u8 *addr = info->l_data.mac.mac_addr;
5153 
5154 		/* Only check multicast filters */
5155 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5156 			continue;
5157 
5158 		/*
5159 		 * If the filter is not marked as found, then it must no
5160 		 * longer be in the ifp address list, so we need to remove it.
5161 		 */
5162 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5163 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5164 						  addr, ICE_FWD_TO_VSI);
5165 			if (err) {
5166 				device_printf(sc->dev,
5167 					      "Failed to place MAC %6D onto remove list, err %s\n",
5168 					      addr, ":", ice_err_str(err));
5169 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5170 				goto free_filter_lists;
5171 			}
5172 		}
5173 	}
5174 
5175 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5176 
5177 	status = ice_add_mac(hw, &data.add_list);
5178 	if (status) {
5179 		device_printf(sc->dev,
5180 			      "Could not add new MAC filters, err %s aq_err %s\n",
5181 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5182 		err = (EIO);
5183 		goto free_filter_lists;
5184 	}
5185 
5186 	status = ice_remove_mac(hw, &remove_list);
5187 	if (status) {
5188 		device_printf(sc->dev,
5189 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5190 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5191 		err = (EIO);
5192 		goto free_filter_lists;
5193 	}
5194 
5195 free_filter_lists:
5196 	ice_free_fltr_list(&data.add_list);
5197 	ice_free_fltr_list(&remove_list);
5198 
5199 	return (err);
5200 }
5201 
5202 /**
5203  * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5204  * @vsi: The VSI to add the filter for
5205  * @vid: array of VLAN ids to add
5206  * @length: length of vid array
5207  *
5208  * Programs HW filters so that the given VSI will receive the specified VLANs.
5209  */
5210 enum ice_status
5211 ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5212 {
5213 	struct ice_hw *hw = &vsi->sc->hw;
5214 	struct ice_list_head vlan_list;
5215 	struct ice_fltr_list_entry *vlan_entries;
5216 	enum ice_status status;
5217 
5218 	MPASS(length > 0);
5219 
5220 	INIT_LIST_HEAD(&vlan_list);
5221 
5222 	vlan_entries = (struct ice_fltr_list_entry *)
5223 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5224 	if (!vlan_entries)
5225 		return (ICE_ERR_NO_MEMORY);
5226 
5227 	for (u16 i = 0; i < length; i++) {
5228 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5229 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5230 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5231 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5232 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5233 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5234 
5235 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5236 	}
5237 
5238 	status = ice_add_vlan(hw, &vlan_list);
5239 	if (!status)
5240 		goto done;
5241 
5242 	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5243 	for (u16 i = 0; i < length; i++) {
5244 		device_printf(vsi->sc->dev,
5245 		    "- vlan %d, status %d\n",
5246 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5247 		    vlan_entries[i].status);
5248 	}
5249 done:
5250 	free(vlan_entries, M_ICE);
5251 	return (status);
5252 }
5253 
5254 /**
5255  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5256  * @vsi: The VSI to add the filter for
5257  * @vid: VLAN to add
5258  *
5259  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5260  */
5261 enum ice_status
5262 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5263 {
5264 	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5265 }
5266 
5267 /**
5268  * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5269  * @vsi: The VSI to remove the filters from
5270  * @vid: array of VLAN ids to remove
5271  * @length: length of vid array
5272  *
5273  * Removes previously programmed HW filters for the specified VSI.
5274  */
5275 enum ice_status
5276 ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5277 {
5278 	struct ice_hw *hw = &vsi->sc->hw;
5279 	struct ice_list_head vlan_list;
5280 	struct ice_fltr_list_entry *vlan_entries;
5281 	enum ice_status status;
5282 
5283 	MPASS(length > 0);
5284 
5285 	INIT_LIST_HEAD(&vlan_list);
5286 
5287 	vlan_entries = (struct ice_fltr_list_entry *)
5288 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5289 	if (!vlan_entries)
5290 		return (ICE_ERR_NO_MEMORY);
5291 
5292 	for (u16 i = 0; i < length; i++) {
5293 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5294 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5295 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5296 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5297 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5298 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5299 
5300 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5301 	}
5302 
5303 	status = ice_remove_vlan(hw, &vlan_list);
5304 	if (!status)
5305 		goto done;
5306 
5307 	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5308 	for (u16 i = 0; i < length; i++) {
5309 		device_printf(vsi->sc->dev,
5310 		    "- vlan %d, status %d\n",
5311 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5312 		    vlan_entries[i].status);
5313 	}
5314 done:
5315 	free(vlan_entries, M_ICE);
5316 	return (status);
5317 }
5318 
5319 /**
5320  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5321  * @vsi: The VSI to remove the filter from
5322  * @vid: VLAN to remove
5323  *
5324  * Removes a previously programmed HW filter for the specified VSI.
5325  */
5326 enum ice_status
5327 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5328 {
5329 	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5330 }
5331 
5332 #define ICE_SYSCTL_HELP_RX_ITR			\
5333 "\nControl Rx interrupt throttle rate."		\
5334 "\n\t0-8160 - sets interrupt rate in usecs"	\
5335 "\n\t    -1 - reset the Rx itr to default"
5336 
5337 /**
5338  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5339  * @oidp: sysctl oid structure
5340  * @arg1: pointer to private data structure
5341  * @arg2: unused
5342  * @req: sysctl request pointer
5343  *
5344  * On read: Displays the current Rx ITR value
5345  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5346  */
5347 static int
5348 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5349 {
5350 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5351 	struct ice_softc *sc = vsi->sc;
5352 	int increment, ret;
5353 
5354 	UNREFERENCED_PARAMETER(arg2);
5355 
5356 	if (ice_driver_is_detaching(sc))
5357 		return (ESHUTDOWN);
5358 
5359 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5360 	if ((ret) || (req->newptr == NULL))
5361 		return (ret);
5362 
5363 	if (vsi->rx_itr < 0)
5364 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5365 	if (vsi->rx_itr > ICE_ITR_MAX)
5366 		vsi->rx_itr = ICE_ITR_MAX;
5367 
5368 	/* Assume 2usec increment if it hasn't been loaded yet */
5369 	increment = sc->hw.itr_gran ? : 2;
5370 
5371 	/* We need to round the value to the hardware's ITR granularity */
5372 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5373 
5374 	/* If the driver has finished initializing, then we need to reprogram
5375 	 * the ITR registers now. Otherwise, they will be programmed during
5376 	 * driver initialization.
5377 	 */
5378 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5379 		ice_configure_rx_itr(vsi);
5380 
5381 	return (0);
5382 }
5383 
5384 #define ICE_SYSCTL_HELP_TX_ITR			\
5385 "\nControl Tx interrupt throttle rate."		\
5386 "\n\t0-8160 - sets interrupt rate in usecs"	\
5387 "\n\t    -1 - reset the Tx itr to default"
5388 
5389 /**
5390  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5391  * @oidp: sysctl oid structure
5392  * @arg1: pointer to private data structure
5393  * @arg2: unused
5394  * @req: sysctl request pointer
5395  *
5396  * On read: Displays the current Tx ITR value
5397  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5398  */
5399 static int
5400 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5401 {
5402 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5403 	struct ice_softc *sc = vsi->sc;
5404 	int increment, ret;
5405 
5406 	UNREFERENCED_PARAMETER(arg2);
5407 
5408 	if (ice_driver_is_detaching(sc))
5409 		return (ESHUTDOWN);
5410 
5411 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5412 	if ((ret) || (req->newptr == NULL))
5413 		return (ret);
5414 
5415 	/* Allow configuring a negative value to reset to the default */
5416 	if (vsi->tx_itr < 0)
5417 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5418 	if (vsi->tx_itr > ICE_ITR_MAX)
5419 		vsi->tx_itr = ICE_ITR_MAX;
5420 
5421 	/* Assume 2usec increment if it hasn't been loaded yet */
5422 	increment = sc->hw.itr_gran ? : 2;
5423 
5424 	/* We need to round the value to the hardware's ITR granularity */
5425 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5426 
5427 	/* If the driver has finished initializing, then we need to reprogram
5428 	 * the ITR registers now. Otherwise, they will be programmed during
5429 	 * driver initialization.
5430 	 */
5431 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5432 		ice_configure_tx_itr(vsi);
5433 
5434 	return (0);
5435 }
5436 
5437 /**
5438  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5439  * @vsi: pointer to VSI structure
5440  * @parent: parent node to add the tunables under
5441  *
5442  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5443  * dynamically removed upon VSI removal.
5444  *
5445  * Add various tunables and set up the basic node structure for the VSI. Must
5446  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5447  * possible after the VSI memory is initialized.
5448  *
5449  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5450  * their values can be read from loader.conf prior to their first use in the
5451  * driver.
5452  */
5453 void
5454 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5455 {
5456 	struct sysctl_oid_list *vsi_list;
5457 	char vsi_name[32], vsi_desc[32];
5458 
5459 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5460 
5461 	/* Initialize the sysctl context for this VSI */
5462 	sysctl_ctx_init(&vsi->ctx);
5463 
5464 	/* Add a node to collect this VSI's statistics together */
5465 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5466 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5467 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5468 					CTLFLAG_RD, NULL, vsi_desc);
5469 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5470 
5471 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5472 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5473 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5474 			vsi, 0, ice_sysctl_rx_itr, "S",
5475 			ICE_SYSCTL_HELP_RX_ITR);
5476 
5477 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5478 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5479 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5480 			vsi, 0, ice_sysctl_tx_itr, "S",
5481 			ICE_SYSCTL_HELP_TX_ITR);
5482 }
5483 
5484 /**
5485  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5486  * @vsi: the VSI to remove contexts for
5487  *
5488  * Free the context for the VSI sysctls. This includes the main context, as
5489  * well as the per-queue sysctls.
5490  */
5491 void
5492 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5493 {
5494 	device_t dev = vsi->sc->dev;
5495 	int err;
5496 
5497 	if (vsi->vsi_node) {
5498 		err = sysctl_ctx_free(&vsi->ctx);
5499 		if (err)
5500 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5501 				      vsi->idx, ice_err_str(err));
5502 		vsi->vsi_node = NULL;
5503 	}
5504 }
5505 
5506 /**
5507  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5508  * @sc: pointer to device private softc
5509  * @ctx: the sysctl ctx to use
5510  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5511  *
5512  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5513  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5514  * of 64 DSCP to TC map values that the user can configure.
5515  */
5516 void
5517 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5518 			    struct sysctl_ctx_list *ctx,
5519 			    struct sysctl_oid_list *ctx_list)
5520 {
5521 	struct sysctl_oid_list *node_list;
5522 	struct sysctl_oid *node;
5523 	struct sbuf *namebuf, *descbuf;
5524 	int first_dscp_val, last_dscp_val;
5525 
5526 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5527 			       NULL, "Map of DSCP values to DCB TCs");
5528 	node_list = SYSCTL_CHILDREN(node);
5529 
5530 	namebuf = sbuf_new_auto();
5531 	descbuf = sbuf_new_auto();
5532 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5533 		sbuf_clear(namebuf);
5534 		sbuf_clear(descbuf);
5535 
5536 		first_dscp_val = i * 8;
5537 		last_dscp_val = first_dscp_val + 7;
5538 
5539 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5540 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5541 			    first_dscp_val, last_dscp_val);
5542 
5543 		sbuf_finish(namebuf);
5544 		sbuf_finish(descbuf);
5545 
5546 		SYSCTL_ADD_PROC(ctx, node_list,
5547 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5548 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5549 	}
5550 
5551 	sbuf_delete(namebuf);
5552 	sbuf_delete(descbuf);
5553 }
5554 
5555 /**
5556  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5557  * @sc: device private structure
5558  *
5559  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5560  * for re-use by ice_add_device_sysctls.
5561  *
5562  * In order for the sysctl fields to be initialized before use, this function
5563  * should be called as early as possible during attach activities.
5564  *
5565  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5566  * here in this function, rather than later in ice_add_device_sysctls.
5567  *
5568  * To make things easier, this function is also expected to setup the various
5569  * sysctl nodes in addition to tunables so that other sysctls which can't be
5570  * initialized early can hook into the same nodes.
5571  */
5572 void
5573 ice_add_device_tunables(struct ice_softc *sc)
5574 {
5575 	device_t dev = sc->dev;
5576 
5577 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5578 	struct sysctl_oid_list *ctx_list =
5579 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5580 
5581 	sc->enable_health_events = ice_enable_health_events;
5582 
5583 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5584 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5585 			"Enable FW health event reporting for this PF");
5586 
5587 	/* Add a node to track VSI sysctls. Keep track of the node in the
5588 	 * softc so that we can hook other sysctls into it later. This
5589 	 * includes both the VSI statistics, as well as potentially dynamic
5590 	 * VSIs in the future.
5591 	 */
5592 
5593 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5594 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5595 
5596 	/* Add debug tunables */
5597 	ice_add_debug_tunables(sc);
5598 }
5599 
5600 /**
5601  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5602  * @oidp: sysctl oid structure
5603  * @arg1: pointer to private data structure
5604  * @arg2: unused
5605  * @req: sysctl request pointer
5606  *
5607  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5608  */
5609 static int
5610 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5611 {
5612 	struct ice_softc *sc = (struct ice_softc *)arg1;
5613 	struct ice_hw *hw = &sc->hw;
5614 	struct ice_switch_info *sw = hw->switch_info;
5615 	struct ice_fltr_mgmt_list_entry *fm_entry;
5616 	struct ice_list_head *rule_head;
5617 	struct ice_lock *rule_lock;
5618 	struct ice_fltr_info *fi;
5619 	struct sbuf *sbuf;
5620 	int ret;
5621 
5622 	UNREFERENCED_PARAMETER(oidp);
5623 	UNREFERENCED_PARAMETER(arg2);
5624 
5625 	if (ice_driver_is_detaching(sc))
5626 		return (ESHUTDOWN);
5627 
5628 	/* Wire the old buffer so we can take a non-sleepable lock */
5629 	ret = sysctl_wire_old_buffer(req, 0);
5630 	if (ret)
5631 		return (ret);
5632 
5633 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5634 
5635 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5636 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5637 
5638 	sbuf_printf(sbuf, "MAC Filter List");
5639 
5640 	ice_acquire_lock(rule_lock);
5641 
5642 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5643 		fi = &fm_entry->fltr_info;
5644 
5645 		sbuf_printf(sbuf,
5646 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5647 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5648 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5649 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5650 
5651 		/* if we have a vsi_list_info, print some information about that */
5652 		if (fm_entry->vsi_list_info) {
5653 			sbuf_printf(sbuf,
5654 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5655 				    fm_entry->vsi_count,
5656 				    fm_entry->vsi_list_info->vsi_list_id,
5657 				    fm_entry->vsi_list_info->ref_cnt);
5658 		}
5659 	}
5660 
5661 	ice_release_lock(rule_lock);
5662 
5663 	sbuf_finish(sbuf);
5664 	sbuf_delete(sbuf);
5665 
5666 	return (0);
5667 }
5668 
5669 /**
5670  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5671  * @oidp: sysctl oid structure
5672  * @arg1: pointer to private data structure
5673  * @arg2: unused
5674  * @req: sysctl request pointer
5675  *
5676  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5677  */
5678 static int
5679 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5680 {
5681 	struct ice_softc *sc = (struct ice_softc *)arg1;
5682 	struct ice_hw *hw = &sc->hw;
5683 	struct ice_switch_info *sw = hw->switch_info;
5684 	struct ice_fltr_mgmt_list_entry *fm_entry;
5685 	struct ice_list_head *rule_head;
5686 	struct ice_lock *rule_lock;
5687 	struct ice_fltr_info *fi;
5688 	struct sbuf *sbuf;
5689 	int ret;
5690 
5691 	UNREFERENCED_PARAMETER(oidp);
5692 	UNREFERENCED_PARAMETER(arg2);
5693 
5694 	if (ice_driver_is_detaching(sc))
5695 		return (ESHUTDOWN);
5696 
5697 	/* Wire the old buffer so we can take a non-sleepable lock */
5698 	ret = sysctl_wire_old_buffer(req, 0);
5699 	if (ret)
5700 		return (ret);
5701 
5702 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5703 
5704 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5705 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5706 
5707 	sbuf_printf(sbuf, "VLAN Filter List");
5708 
5709 	ice_acquire_lock(rule_lock);
5710 
5711 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5712 		fi = &fm_entry->fltr_info;
5713 
5714 		sbuf_printf(sbuf,
5715 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5716 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5717 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5718 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5719 
5720 		/* if we have a vsi_list_info, print some information about that */
5721 		if (fm_entry->vsi_list_info) {
5722 			sbuf_printf(sbuf,
5723 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5724 				    fm_entry->vsi_count,
5725 				    fm_entry->vsi_list_info->vsi_list_id,
5726 				    fm_entry->vsi_list_info->ref_cnt);
5727 		}
5728 	}
5729 
5730 	ice_release_lock(rule_lock);
5731 
5732 	sbuf_finish(sbuf);
5733 	sbuf_delete(sbuf);
5734 
5735 	return (0);
5736 }
5737 
5738 /**
5739  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5740  * @oidp: sysctl oid structure
5741  * @arg1: pointer to private data structure
5742  * @arg2: unused
5743  * @req: sysctl request pointer
5744  *
5745  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5746  * filters.
5747  */
5748 static int
5749 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5750 {
5751 	struct ice_softc *sc = (struct ice_softc *)arg1;
5752 	struct ice_hw *hw = &sc->hw;
5753 	struct ice_switch_info *sw = hw->switch_info;
5754 	struct ice_fltr_mgmt_list_entry *fm_entry;
5755 	struct ice_list_head *rule_head;
5756 	struct ice_lock *rule_lock;
5757 	struct ice_fltr_info *fi;
5758 	struct sbuf *sbuf;
5759 	int ret;
5760 
5761 	UNREFERENCED_PARAMETER(oidp);
5762 	UNREFERENCED_PARAMETER(arg2);
5763 
5764 	if (ice_driver_is_detaching(sc))
5765 		return (ESHUTDOWN);
5766 
5767 	/* Wire the old buffer so we can take a non-sleepable lock */
5768 	ret = sysctl_wire_old_buffer(req, 0);
5769 	if (ret)
5770 		return (ret);
5771 
5772 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5773 
5774 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5775 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5776 
5777 	sbuf_printf(sbuf, "Ethertype Filter List");
5778 
5779 	ice_acquire_lock(rule_lock);
5780 
5781 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5782 		fi = &fm_entry->fltr_info;
5783 
5784 		sbuf_printf(sbuf,
5785 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5786 			fi->l_data.ethertype_mac.ethertype,
5787 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5788 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5789 			fi->fltr_rule_id);
5790 
5791 		/* if we have a vsi_list_info, print some information about that */
5792 		if (fm_entry->vsi_list_info) {
5793 			sbuf_printf(sbuf,
5794 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5795 				    fm_entry->vsi_count,
5796 				    fm_entry->vsi_list_info->vsi_list_id,
5797 				    fm_entry->vsi_list_info->ref_cnt);
5798 		}
5799 	}
5800 
5801 	ice_release_lock(rule_lock);
5802 
5803 	sbuf_finish(sbuf);
5804 	sbuf_delete(sbuf);
5805 
5806 	return (0);
5807 }
5808 
5809 /**
5810  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
5811  * @oidp: sysctl oid structure
5812  * @arg1: pointer to private data structure
5813  * @arg2: unused
5814  * @req: sysctl request pointer
5815  *
5816  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
5817  * Ethertype/MAC filters.
5818  */
5819 static int
5820 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
5821 {
5822 	struct ice_softc *sc = (struct ice_softc *)arg1;
5823 	struct ice_hw *hw = &sc->hw;
5824 	struct ice_switch_info *sw = hw->switch_info;
5825 	struct ice_fltr_mgmt_list_entry *fm_entry;
5826 	struct ice_list_head *rule_head;
5827 	struct ice_lock *rule_lock;
5828 	struct ice_fltr_info *fi;
5829 	struct sbuf *sbuf;
5830 	int ret;
5831 
5832 	UNREFERENCED_PARAMETER(oidp);
5833 	UNREFERENCED_PARAMETER(arg2);
5834 
5835 	if (ice_driver_is_detaching(sc))
5836 		return (ESHUTDOWN);
5837 
5838 	/* Wire the old buffer so we can take a non-sleepable lock */
5839 	ret = sysctl_wire_old_buffer(req, 0);
5840 	if (ret)
5841 		return (ret);
5842 
5843 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5844 
5845 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
5846 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
5847 
5848 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
5849 
5850 	ice_acquire_lock(rule_lock);
5851 
5852 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5853 		fi = &fm_entry->fltr_info;
5854 
5855 		sbuf_printf(sbuf,
5856 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5857 			    fi->l_data.ethertype_mac.ethertype,
5858 			    fi->l_data.ethertype_mac.mac_addr, ":",
5859 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5860 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5861 			    fi->fltr_rule_id);
5862 
5863 		/* if we have a vsi_list_info, print some information about that */
5864 		if (fm_entry->vsi_list_info) {
5865 			sbuf_printf(sbuf,
5866 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5867 				    fm_entry->vsi_count,
5868 				    fm_entry->vsi_list_info->vsi_list_id,
5869 				    fm_entry->vsi_list_info->ref_cnt);
5870 		}
5871 	}
5872 
5873 	ice_release_lock(rule_lock);
5874 
5875 	sbuf_finish(sbuf);
5876 	sbuf_delete(sbuf);
5877 
5878 	return (0);
5879 }
5880 
5881 /**
5882  * ice_sysctl_dump_state_flags - Dump device driver state flags
5883  * @oidp: sysctl oid structure
5884  * @arg1: pointer to private data structure
5885  * @arg2: unused
5886  * @req: sysctl request pointer
5887  *
5888  * Callback for "state" sysctl to display currently set driver state flags.
5889  */
5890 static int
5891 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
5892 {
5893 	struct ice_softc *sc = (struct ice_softc *)arg1;
5894 	struct sbuf *sbuf;
5895 	u32 copied_state;
5896 	unsigned int i;
5897 	bool at_least_one = false;
5898 
5899 	UNREFERENCED_PARAMETER(oidp);
5900 	UNREFERENCED_PARAMETER(arg2);
5901 
5902 	if (ice_driver_is_detaching(sc))
5903 		return (ESHUTDOWN);
5904 
5905 	/* Make a copy of the state to ensure we display coherent values */
5906 	copied_state = atomic_load_acq_32(&sc->state);
5907 
5908 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5909 
5910 	/* Add the string for each set state to the sbuf */
5911 	for (i = 0; i < 32; i++) {
5912 		if (copied_state & BIT(i)) {
5913 			const char *str = ice_state_to_str((enum ice_state)i);
5914 
5915 			at_least_one = true;
5916 
5917 			if (str)
5918 				sbuf_printf(sbuf, "\n%s", str);
5919 			else
5920 				sbuf_printf(sbuf, "\nBIT(%u)", i);
5921 		}
5922 	}
5923 
5924 	if (!at_least_one)
5925 		sbuf_printf(sbuf, "Nothing set");
5926 
5927 	sbuf_finish(sbuf);
5928 	sbuf_delete(sbuf);
5929 
5930 	return (0);
5931 }
5932 
5933 #define ICE_SYSCTL_DEBUG_MASK_HELP \
5934 "\nSelect debug statements to print to kernel messages"		\
5935 "\nFlags:"							\
5936 "\n\t        0x1 - Function Tracing"				\
5937 "\n\t        0x2 - Driver Initialization"			\
5938 "\n\t        0x4 - Release"					\
5939 "\n\t        0x8 - FW Logging"					\
5940 "\n\t       0x10 - Link"					\
5941 "\n\t       0x20 - PHY"						\
5942 "\n\t       0x40 - Queue Context"				\
5943 "\n\t       0x80 - NVM"						\
5944 "\n\t      0x100 - LAN"						\
5945 "\n\t      0x200 - Flow"					\
5946 "\n\t      0x400 - DCB"						\
5947 "\n\t      0x800 - Diagnostics"					\
5948 "\n\t     0x1000 - Flow Director"				\
5949 "\n\t     0x2000 - Switch"					\
5950 "\n\t     0x4000 - Scheduler"					\
5951 "\n\t     0x8000 - RDMA"					\
5952 "\n\t    0x10000 - DDP Package"					\
5953 "\n\t    0x20000 - Resources"					\
5954 "\n\t    0x40000 - ACL"						\
5955 "\n\t    0x80000 - PTP"						\
5956 "\n\t   0x100000 - Admin Queue messages"			\
5957 "\n\t   0x200000 - Admin Queue descriptors"			\
5958 "\n\t   0x400000 - Admin Queue descriptor buffers"		\
5959 "\n\t   0x800000 - Admin Queue commands"			\
5960 "\n\t  0x1000000 - Parser"					\
5961 "\n\t  ..."							\
5962 "\n\t  0x8000000 - (Reserved for user)"				\
5963 "\n\t"								\
5964 "\nUse \"sysctl -x\" to view flags properly."
5965 
5966 /**
5967  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
5968  * @sc: device private structure
5969  *
5970  * Add sysctl tunable values related to debugging the device driver. For now,
5971  * this means a tunable to set the debug mask early during driver load.
5972  *
5973  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
5974  * that in normal kernel builds, these will all be hidden, but on a debug
5975  * kernel they will be more easily visible.
5976  */
5977 static void
5978 ice_add_debug_tunables(struct ice_softc *sc)
5979 {
5980 	struct sysctl_oid_list *debug_list;
5981 	device_t dev = sc->dev;
5982 
5983 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5984 	struct sysctl_oid_list *ctx_list =
5985 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5986 
5987 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
5988 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
5989 					    NULL, "Debug Sysctls");
5990 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
5991 
5992 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
5993 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
5994 		       &sc->hw.debug_mask, 0,
5995 		       ICE_SYSCTL_DEBUG_MASK_HELP);
5996 
5997 	/* Load the default value from the global sysctl first */
5998 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
5999 
6000 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6001 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6002 			&sc->enable_tx_fc_filter, 0,
6003 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6004 
6005 	sc->tx_balance_en = ice_tx_balance_en;
6006 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6007 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6008 			&sc->tx_balance_en, 0,
6009 			"Enable 5-layer scheduler topology");
6010 
6011 	/* Load the default value from the global sysctl first */
6012 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6013 
6014 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6015 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6016 			&sc->enable_tx_lldp_filter, 0,
6017 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6018 
6019 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6020 }
6021 
6022 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
6023 "\nRequest the driver to initiate a reset."	\
6024 "\n\tpfr - Initiate a PF reset"			\
6025 "\n\tcorer - Initiate a CORE reset"		\
6026 "\n\tglobr - Initiate a GLOBAL reset"
6027 
6028 /**
6029  * @var rl_sysctl_ticks
6030  * @brief timestamp for latest reset request sysctl call
6031  *
6032  * Helps rate-limit the call to the sysctl which resets the device
6033  */
6034 int rl_sysctl_ticks = 0;
6035 
6036 /**
6037  * ice_sysctl_request_reset - Request that the driver initiate a reset
6038  * @oidp: sysctl oid structure
6039  * @arg1: pointer to private data structure
6040  * @arg2: unused
6041  * @req: sysctl request pointer
6042  *
6043  * Callback for "request_reset" sysctl to request that the driver initiate
6044  * a reset. Expects to be passed one of the following strings
6045  *
6046  * "pfr" - Initiate a PF reset
6047  * "corer" - Initiate a CORE reset
6048  * "globr" - Initiate a Global reset
6049  */
6050 static int
6051 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6052 {
6053 	struct ice_softc *sc = (struct ice_softc *)arg1;
6054 	struct ice_hw *hw = &sc->hw;
6055 	enum ice_status status;
6056 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6057 	const char *reset_message;
6058 	int ret;
6059 
6060 	/* Buffer to store the requested reset string. Must contain enough
6061 	 * space to store the largest expected reset string, which currently
6062 	 * means 6 bytes of space.
6063 	 */
6064 	char reset[6] = "";
6065 
6066 	UNREFERENCED_PARAMETER(arg2);
6067 
6068 	ret = priv_check(curthread, PRIV_DRIVER);
6069 	if (ret)
6070 		return (ret);
6071 
6072 	if (ice_driver_is_detaching(sc))
6073 		return (ESHUTDOWN);
6074 
6075 	/* Read in the requested reset type. */
6076 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6077 	if ((ret) || (req->newptr == NULL))
6078 		return (ret);
6079 
6080 	if (strcmp(reset, "pfr") == 0) {
6081 		reset_message = "Requesting a PF reset";
6082 		reset_type = ICE_RESET_PFR;
6083 	} else if (strcmp(reset, "corer") == 0) {
6084 		reset_message = "Initiating a CORE reset";
6085 		reset_type = ICE_RESET_CORER;
6086 	} else if (strcmp(reset, "globr") == 0) {
6087 		reset_message = "Initiating a GLOBAL reset";
6088 		reset_type = ICE_RESET_GLOBR;
6089 	} else if (strcmp(reset, "empr") == 0) {
6090 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6091 		return (EOPNOTSUPP);
6092 	}
6093 
6094 	if (reset_type == ICE_RESET_INVAL) {
6095 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6096 		return (EINVAL);
6097 	}
6098 
6099 	/*
6100 	 * Rate-limit the frequency at which this function is called.
6101 	 * Assuming this is called successfully once, typically,
6102 	 * everything should be handled within the allotted time frame.
6103 	 * However, in the odd setup situations, we've also put in
6104 	 * guards for when the reset has finished, but we're in the
6105 	 * process of rebuilding. And instead of queueing an intent,
6106 	 * simply error out and let the caller retry, if so desired.
6107 	 */
6108 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6109 		device_printf(sc->dev,
6110 		    "Call frequency too high. Operation aborted.\n");
6111 		return (EBUSY);
6112 	}
6113 	rl_sysctl_ticks = ticks;
6114 
6115 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6116 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6117 		return (EBUSY);
6118 	}
6119 
6120 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6121 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6122 		return (EBUSY);
6123 	}
6124 
6125 	device_printf(sc->dev, "%s\n", reset_message);
6126 
6127 	/* Initiate the PF reset during the admin status task */
6128 	if (reset_type == ICE_RESET_PFR) {
6129 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6130 		return (0);
6131 	}
6132 
6133 	/*
6134 	 * Other types of resets including CORE and GLOBAL resets trigger an
6135 	 * interrupt on all PFs. Initiate the reset now. Preparation and
6136 	 * rebuild logic will be handled by the admin status task.
6137 	 */
6138 	status = ice_reset(hw, reset_type);
6139 
6140 	/*
6141 	 * Resets can take a long time and we still don't want another call
6142 	 * to this function before we settle down.
6143 	 */
6144 	rl_sysctl_ticks = ticks;
6145 
6146 	if (status) {
6147 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6148 			      ice_status_str(status));
6149 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6150 		return (EFAULT);
6151 	}
6152 
6153 	return (0);
6154 }
6155 
6156 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6157 "\nSelect clusters to dump with \"dump\" sysctl"		\
6158 "\nFlags:"							\
6159 "\n\t   0x1 - Switch"						\
6160 "\n\t   0x2 - ACL"						\
6161 "\n\t   0x4 - Tx Scheduler"					\
6162 "\n\t   0x8 - Profile Configuration"				\
6163 "\n\t  0x20 - Link"						\
6164 "\n\t  0x80 - DCB"						\
6165 "\n\t 0x100 - L2P"						\
6166 "\n\t"								\
6167 "\nUse \"sysctl -x\" to view flags properly."
6168 
6169 /**
6170  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6171  *     from FW when FW debug dump occurs
6172  * @oidp: sysctl oid structure
6173  * @arg1: pointer to private data structure
6174  * @arg2: unused
6175  * @req: sysctl request pointer
6176  */
6177 static int
6178 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6179 {
6180 	struct ice_softc *sc = (struct ice_softc *)arg1;
6181 	device_t dev = sc->dev;
6182 	u16 clusters;
6183 	int ret;
6184 
6185 	UNREFERENCED_PARAMETER(arg2);
6186 
6187 	ret = priv_check(curthread, PRIV_DRIVER);
6188 	if (ret)
6189 		return (ret);
6190 
6191 	if (ice_driver_is_detaching(sc))
6192 		return (ESHUTDOWN);
6193 
6194 	clusters = sc->fw_debug_dump_cluster_mask;
6195 
6196 	ret = sysctl_handle_16(oidp, &clusters, 0, req);
6197 	if ((ret) || (req->newptr == NULL))
6198 		return (ret);
6199 
6200 	if (!clusters ||
6201 	    (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK))) {
6202 		device_printf(dev,
6203 		    "%s: ERROR: Incorrect settings requested\n",
6204 		    __func__);
6205 		return (EINVAL);
6206 	}
6207 
6208 	sc->fw_debug_dump_cluster_mask = clusters;
6209 
6210 	return (0);
6211 }
6212 
6213 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6214 
6215 /**
6216  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6217  * @sc: the device softc
6218  * @sbuf: initialized sbuf to print data to
6219  * @cluster_id: FW cluster ID to print data from
6220  *
6221  * Reads debug data from the specified cluster id in the FW and prints it to
6222  * the input sbuf. This function issues multiple AQ commands to the FW in
6223  * order to get all of the data in the cluster.
6224  *
6225  * @remark Only intended to be used by the sysctl handler
6226  * ice_sysctl_fw_debug_dump_do_dump
6227  */
6228 static void
6229 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6230 {
6231 	struct ice_hw *hw = &sc->hw;
6232 	device_t dev = sc->dev;
6233 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6234 	const u8 reserved_buf[8] = {};
6235 	enum ice_status status;
6236 	int counter = 0;
6237 	u8 *data_buf;
6238 
6239 	/* Other setup */
6240 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6241 	if (!data_buf)
6242 		return;
6243 
6244 	/* Input parameters / loop variables */
6245 	u16 table_id = 0;
6246 	u32 offset = 0;
6247 
6248 	/* Output from the Get Internal Data AQ command */
6249 	u16 ret_buf_size = 0;
6250 	u16 ret_next_table = 0;
6251 	u32 ret_next_index = 0;
6252 
6253 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6254 	    cluster_id);
6255 
6256 	for (;;) {
6257 		/* Do not trust the FW behavior to be completely correct */
6258 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6259 			device_printf(dev,
6260 			    "%s: Exceeded counter limit for cluster %d\n",
6261 			    __func__, cluster_id);
6262 			break;
6263 		}
6264 
6265 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6266 		ice_debug(hw, ICE_DBG_DIAG,
6267 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6268 		    table_id, offset, data_buf_size);
6269 
6270 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6271 		    offset, data_buf, data_buf_size, &ret_buf_size,
6272 		    &ret_next_table, &ret_next_index, NULL);
6273 		if (status) {
6274 			device_printf(dev,
6275 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6276 			    __func__, cluster_id, ice_status_str(status),
6277 			    ice_aq_str(hw->adminq.sq_last_status));
6278 			break;
6279 		}
6280 
6281 		ice_debug(hw, ICE_DBG_DIAG,
6282 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6283 		    ret_next_table, ret_next_index, ret_buf_size);
6284 
6285 		/* Print cluster id */
6286 		u32 print_cluster_id = (u32)cluster_id;
6287 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6288 		/* Print table id */
6289 		u32 print_table_id = (u32)table_id;
6290 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6291 		/* Print table length */
6292 		u32 print_table_length = (u32)ret_buf_size;
6293 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6294 		/* Print current offset */
6295 		u32 print_curr_offset = offset;
6296 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6297 		/* Print reserved bytes */
6298 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6299 		/* Print data */
6300 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6301 
6302 		/* Adjust loop variables */
6303 		memset(data_buf, 0, data_buf_size);
6304 		bool same_table_next = (table_id == ret_next_table);
6305 		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6306 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6307 
6308 		if ((!same_table_next && !last_offset_next) ||
6309 		    (same_table_next && last_table_next)) {
6310 			device_printf(dev,
6311 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6312 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6313 			break;
6314 		}
6315 
6316 		if (!same_table_next && !last_table_next && last_offset_next) {
6317 			/* We've hit the end of the table */
6318 			table_id = ret_next_table;
6319 			offset = 0;
6320 		}
6321 		else if (!same_table_next && last_table_next && last_offset_next) {
6322 			/* We've hit the end of the cluster */
6323 			break;
6324 		}
6325 		else if (same_table_next && !last_table_next && last_offset_next) {
6326 			if (cluster_id == 0x1 && table_id < 39)
6327 				table_id += 1;
6328 			else
6329 				break;
6330 		}
6331 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6332 			/* More data left in the table */
6333 			offset = ret_next_index;
6334 		}
6335 	}
6336 
6337 	free(data_buf, M_ICE);
6338 }
6339 
6340 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6341 "\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6342 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6343 "\nthis data is opaque and not a string."
6344 
6345 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6346 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6347 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6348 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6349 
6350 /**
6351  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6352  * @oidp: sysctl oid structure
6353  * @arg1: pointer to private data structure
6354  * @arg2: unused
6355  * @req: sysctl request pointer
6356  *
6357  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6358  * formatted dump of some debug FW data intended to be processed by a special
6359  * Intel tool. Prints out the cluster data specified by the "clusters"
6360  * sysctl.
6361  *
6362  * @remark The actual AQ calls and printing are handled by a helper
6363  * function above.
6364  */
6365 static int
6366 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6367 {
6368 	struct ice_softc *sc = (struct ice_softc *)arg1;
6369 	device_t dev = sc->dev;
6370 	struct sbuf *sbuf;
6371 	int bit, ret;
6372 
6373 	UNREFERENCED_PARAMETER(arg2);
6374 
6375 	ret = priv_check(curthread, PRIV_DRIVER);
6376 	if (ret)
6377 		return (ret);
6378 
6379 	if (ice_driver_is_detaching(sc))
6380 		return (ESHUTDOWN);
6381 
6382 	/* If the user hasn't written "1" to this sysctl yet: */
6383 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6384 		/* Avoid output on the first set of reads to this sysctl in
6385 		 * order to prevent a null byte from being written to the
6386 		 * end result when called via sysctl(8).
6387 		 */
6388 		if (req->oldptr == NULL && req->newptr == NULL) {
6389 			ret = SYSCTL_OUT(req, 0, 0);
6390 			return (ret);
6391 		}
6392 
6393 		char input_buf[2] = "";
6394 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6395 		if ((ret) || (req->newptr == NULL))
6396 			return (ret);
6397 
6398 		/* If we get '1', then indicate we'll do a dump in the next
6399 		 * sysctl read call.
6400 		 */
6401 		if (input_buf[0] == '1') {
6402 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6403 			return (0);
6404 		}
6405 
6406 		return (EINVAL);
6407 	}
6408 
6409 	/* --- FW debug dump state is set --- */
6410 
6411 	if (!sc->fw_debug_dump_cluster_mask) {
6412 		device_printf(dev,
6413 		    "%s: Debug Dump failed because no cluster was specified.\n",
6414 		    __func__);
6415 		ret = EINVAL;
6416 		goto out;
6417 	}
6418 
6419 	/* Caller just wants the upper bound for size */
6420 	if (req->oldptr == NULL && req->newptr == NULL) {
6421 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6422 		if (sc->fw_debug_dump_cluster_mask & 0x1)
6423 			est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6424 		if (sc->fw_debug_dump_cluster_mask & 0x2)
6425 			est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6426 		if (sc->fw_debug_dump_cluster_mask & 0x4)
6427 			est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6428 
6429 		ret = SYSCTL_OUT(req, 0, est_output_len);
6430 		return (ret);
6431 	}
6432 
6433 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6434 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6435 
6436 	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6437 
6438 	for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6439 	    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6440 		ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6441 
6442 	sbuf_finish(sbuf);
6443 	sbuf_delete(sbuf);
6444 
6445 out:
6446 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6447 	return (ret);
6448 }
6449 
6450 /**
6451  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6452  * @sc: device private structure
6453  *
6454  * Add sysctls related to debugging the device driver. Generally these should
6455  * simply be sysctls which dump internal driver state, to aid in understanding
6456  * what the driver is doing.
6457  */
6458 static void
6459 ice_add_debug_sysctls(struct ice_softc *sc)
6460 {
6461 	struct sysctl_oid *sw_node, *dump_node;
6462 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6463 	device_t dev = sc->dev;
6464 
6465 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6466 
6467 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6468 
6469 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6470 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6471 			ice_sysctl_request_reset, "A",
6472 			ICE_SYSCTL_HELP_REQUEST_RESET);
6473 
6474 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6475 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6476 		       &sc->soft_stats.pfr_count, 0,
6477 		       "# of PF resets handled");
6478 
6479 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6480 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6481 		       &sc->soft_stats.corer_count, 0,
6482 		       "# of CORE resets handled");
6483 
6484 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6485 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6486 		       &sc->soft_stats.globr_count, 0,
6487 		       "# of Global resets handled");
6488 
6489 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6490 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6491 		       &sc->soft_stats.empr_count, 0,
6492 		       "# of EMP resets handled");
6493 
6494 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6495 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6496 		       &sc->soft_stats.tx_mdd_count, 0,
6497 		       "# of Tx MDD events detected");
6498 
6499 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6500 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6501 		       &sc->soft_stats.rx_mdd_count, 0,
6502 		       "# of Rx MDD events detected");
6503 
6504 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6505 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6506 			ice_sysctl_dump_state_flags, "A",
6507 			"Driver State Flags");
6508 
6509 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6510 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6511 			ice_sysctl_phy_type_low, "QU",
6512 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6513 
6514 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6515 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6516 			ice_sysctl_phy_type_high, "QU",
6517 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6518 
6519 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6520 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6521 			ice_sysctl_phy_sw_caps, "",
6522 			"Get PHY Capabilities (Software configuration)");
6523 
6524 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6525 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6526 			ice_sysctl_phy_nvm_caps, "",
6527 			"Get PHY Capabilities (NVM configuration)");
6528 
6529 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6530 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6531 			ice_sysctl_phy_topo_caps, "",
6532 			"Get PHY Capabilities (Topology configuration)");
6533 
6534 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6535 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6536 			ice_sysctl_phy_link_status, "",
6537 			"Get PHY Link Status");
6538 
6539 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6540 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6541 			ice_sysctl_read_i2c_diag_data, "A",
6542 			"Dump selected diagnostic data from FW");
6543 
6544 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6545 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6546 		       "FW Build ID");
6547 
6548 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6549 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6550 			ice_sysctl_os_pkg_version, "A",
6551 			"DDP package name and version found in ice_ddp");
6552 
6553 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6554 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6555 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6556 			"Current LLDP persistent status");
6557 
6558 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6559 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6560 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6561 			"Default LLDP persistent status");
6562 
6563 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6564 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6565 			ice_sysctl_negotiated_fc, "A",
6566 			"Current Negotiated Flow Control mode");
6567 
6568 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6569 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6570 			ice_sysctl_dump_dcbx_cfg, "A",
6571 			"Dumps Local MIB information from firmware");
6572 
6573 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6574 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6575 			ice_sysctl_dump_dcbx_cfg, "A",
6576 			"Dumps Remote MIB information from firmware");
6577 
6578 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6579 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6580 			"Dumps Selected PF VSI parameters from firmware");
6581 
6582 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6583 			sc, 0, ice_sysctl_query_port_ets, "A",
6584 			"Prints selected output from Query Port ETS AQ command");
6585 
6586 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6587 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6588 				  "Switch Configuration");
6589 	sw_list = SYSCTL_CHILDREN(sw_node);
6590 
6591 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6592 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6593 			ice_sysctl_dump_mac_filters, "A",
6594 			"MAC Filters");
6595 
6596 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6597 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6598 			ice_sysctl_dump_vlan_filters, "A",
6599 			"VLAN Filters");
6600 
6601 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6602 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6603 			ice_sysctl_dump_ethertype_filters, "A",
6604 			"Ethertype Filters");
6605 
6606 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6607 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6608 			ice_sysctl_dump_ethertype_mac_filters, "A",
6609 			"Ethertype/MAC Filters");
6610 
6611 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6612 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6613 				  "Internal FW Dump");
6614 	dump_list = SYSCTL_CHILDREN(dump_node);
6615 
6616 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6617 			ICE_CTLFLAG_DEBUG | CTLTYPE_U16 | CTLFLAG_RW, sc, 0,
6618 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6619 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6620 
6621 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6622 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6623 			ice_sysctl_fw_debug_dump_do_dump, "",
6624 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6625 }
6626 
6627 /**
6628  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6629  * @vsi: the VSI to disable
6630  *
6631  * Disables the Tx queues associated with this VSI. Essentially the opposite
6632  * of ice_cfg_vsi_for_tx.
6633  */
6634 int
6635 ice_vsi_disable_tx(struct ice_vsi *vsi)
6636 {
6637 	struct ice_softc *sc = vsi->sc;
6638 	struct ice_hw *hw = &sc->hw;
6639 	enum ice_status status;
6640 	u32 *q_teids;
6641 	u16 *q_ids, *q_handles;
6642 	size_t q_teids_size, q_ids_size, q_handles_size;
6643 	int tc, j, buf_idx, err = 0;
6644 
6645 	if (vsi->num_tx_queues > 255)
6646 		return (ENOSYS);
6647 
6648 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6649 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6650 	if (!q_teids)
6651 		return (ENOMEM);
6652 
6653 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6654 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6655 	if (!q_ids) {
6656 		err = (ENOMEM);
6657 		goto free_q_teids;
6658 	}
6659 
6660 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6661 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6662 	if (!q_handles) {
6663 		err = (ENOMEM);
6664 		goto free_q_ids;
6665 	}
6666 
6667 	ice_for_each_traffic_class(tc) {
6668 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6669 		u16 start_idx, end_idx;
6670 
6671 		/* Skip rest of disabled TCs once the first
6672 		 * disabled TC is found */
6673 		if (!(vsi->tc_map & BIT(tc)))
6674 			break;
6675 
6676 		/* Fill out TX queue information for this TC */
6677 		start_idx = tc_info->qoffset;
6678 		end_idx = start_idx + tc_info->qcount_tx;
6679 		buf_idx = 0;
6680 		for (j = start_idx; j < end_idx; j++) {
6681 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6682 
6683 			q_ids[buf_idx] = vsi->tx_qmap[j];
6684 			q_handles[buf_idx] = txq->q_handle;
6685 			q_teids[buf_idx] = txq->q_teid;
6686 			buf_idx++;
6687 		}
6688 
6689 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6690 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6691 		if (status == ICE_ERR_DOES_NOT_EXIST) {
6692 			; /* Queues have already been disabled, no need to report this as an error */
6693 		} else if (status == ICE_ERR_RESET_ONGOING) {
6694 			device_printf(sc->dev,
6695 				      "Reset in progress. LAN Tx queues already disabled\n");
6696 			break;
6697 		} else if (status) {
6698 			device_printf(sc->dev,
6699 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6700 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6701 			err = (ENODEV);
6702 			break;
6703 		}
6704 
6705 		/* Clear buffers */
6706 		memset(q_teids, 0, q_teids_size);
6707 		memset(q_ids, 0, q_ids_size);
6708 		memset(q_handles, 0, q_handles_size);
6709 	}
6710 
6711 /* free_q_handles: */
6712 	free(q_handles, M_ICE);
6713 free_q_ids:
6714 	free(q_ids, M_ICE);
6715 free_q_teids:
6716 	free(q_teids, M_ICE);
6717 
6718 	return err;
6719 }
6720 
6721 /**
6722  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6723  * @vsi: the VSI to configure
6724  *
6725  * Sets the RSS table size and lookup table type for the VSI based on its
6726  * VSI type.
6727  */
6728 static void
6729 ice_vsi_set_rss_params(struct ice_vsi *vsi)
6730 {
6731 	struct ice_softc *sc = vsi->sc;
6732 	struct ice_hw_common_caps *cap;
6733 
6734 	cap = &sc->hw.func_caps.common_cap;
6735 
6736 	switch (vsi->type) {
6737 	case ICE_VSI_PF:
6738 		/* The PF VSI inherits RSS instance of the PF */
6739 		vsi->rss_table_size = cap->rss_table_size;
6740 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
6741 		break;
6742 	case ICE_VSI_VF:
6743 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6744 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
6745 		break;
6746 	default:
6747 		device_printf(sc->dev,
6748 			      "VSI %d: RSS not supported for VSI type %d\n",
6749 			      vsi->idx, vsi->type);
6750 		break;
6751 	}
6752 }
6753 
6754 /**
6755  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6756  * @vsi: The VSI to add the context for
6757  *
6758  * Creates a sysctl context for storing txq sysctls. Additionally creates
6759  * a node rooted at the given VSI's main sysctl node. This context will be
6760  * used to store per-txq sysctls which may need to be released during the
6761  * driver's lifetime.
6762  */
6763 void
6764 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6765 {
6766 	struct sysctl_oid_list *vsi_list;
6767 
6768 	sysctl_ctx_init(&vsi->txqs_ctx);
6769 
6770 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6771 
6772 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
6773 					 CTLFLAG_RD, NULL, "Tx Queues");
6774 }
6775 
6776 /**
6777  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
6778  * @vsi: The VSI to add the context for
6779  *
6780  * Creates a sysctl context for storing rxq sysctls. Additionally creates
6781  * a node rooted at the given VSI's main sysctl node. This context will be
6782  * used to store per-rxq sysctls which may need to be released during the
6783  * driver's lifetime.
6784  */
6785 void
6786 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
6787 {
6788 	struct sysctl_oid_list *vsi_list;
6789 
6790 	sysctl_ctx_init(&vsi->rxqs_ctx);
6791 
6792 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6793 
6794 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
6795 					 CTLFLAG_RD, NULL, "Rx Queues");
6796 }
6797 
6798 /**
6799  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
6800  * @vsi: The VSI to delete from
6801  *
6802  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
6803  * Must be called prior to freeing the Tx queue memory, in order to avoid
6804  * having sysctls point at stale memory.
6805  */
6806 void
6807 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
6808 {
6809 	device_t dev = vsi->sc->dev;
6810 	int err;
6811 
6812 	if (vsi->txqs_node) {
6813 		err = sysctl_ctx_free(&vsi->txqs_ctx);
6814 		if (err)
6815 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
6816 				      vsi->idx, ice_err_str(err));
6817 		vsi->txqs_node = NULL;
6818 	}
6819 }
6820 
6821 /**
6822  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
6823  * @vsi: The VSI to delete from
6824  *
6825  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
6826  * Must be called prior to freeing the Rx queue memory, in order to avoid
6827  * having sysctls point at stale memory.
6828  */
6829 void
6830 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
6831 {
6832 	device_t dev = vsi->sc->dev;
6833 	int err;
6834 
6835 	if (vsi->rxqs_node) {
6836 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
6837 		if (err)
6838 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
6839 				      vsi->idx, ice_err_str(err));
6840 		vsi->rxqs_node = NULL;
6841 	}
6842 }
6843 
6844 /**
6845  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
6846  * @txq: pointer to the Tx queue
6847  *
6848 * Add per-queue sysctls for a given Tx queue. Can't be called during
6849 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6850  */
6851 void
6852 ice_add_txq_sysctls(struct ice_tx_queue *txq)
6853 {
6854 	struct ice_vsi *vsi = txq->vsi;
6855 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
6856 	struct sysctl_oid_list *txqs_list, *this_txq_list;
6857 	struct sysctl_oid *txq_node;
6858 	char txq_name[32], txq_desc[32];
6859 
6860 	const struct ice_sysctl_info ctls[] = {
6861 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
6862 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
6863 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
6864 		{ 0, 0, 0 }
6865 	};
6866 
6867 	const struct ice_sysctl_info *entry = ctls;
6868 
6869 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
6870 
6871 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
6872 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
6873 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
6874 				   CTLFLAG_RD, NULL, txq_desc);
6875 	this_txq_list = SYSCTL_CHILDREN(txq_node);
6876 
6877 	/* Add the Tx queue statistics */
6878 	while (entry->stat != 0) {
6879 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
6880 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6881 			       entry->description);
6882 		entry++;
6883 	}
6884 
6885 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
6886 		       CTLFLAG_RD, &txq->tc, 0,
6887 		       "Traffic Class that Queue belongs to");
6888 }
6889 
6890 /**
6891  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
6892  * @rxq: pointer to the Rx queue
6893  *
6894  * Add per-queue sysctls for a given Rx queue. Can't be called during
6895  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6896  */
6897 void
6898 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
6899 {
6900 	struct ice_vsi *vsi = rxq->vsi;
6901 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
6902 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
6903 	struct sysctl_oid *rxq_node;
6904 	char rxq_name[32], rxq_desc[32];
6905 
6906 	const struct ice_sysctl_info ctls[] = {
6907 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
6908 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
6909 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
6910 		{ 0, 0, 0 }
6911 	};
6912 
6913 	const struct ice_sysctl_info *entry = ctls;
6914 
6915 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
6916 
6917 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
6918 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
6919 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
6920 				   CTLFLAG_RD, NULL, rxq_desc);
6921 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
6922 
6923 	/* Add the Rx queue statistics */
6924 	while (entry->stat != 0) {
6925 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
6926 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6927 			       entry->description);
6928 		entry++;
6929 	}
6930 
6931 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
6932 		       CTLFLAG_RD, &rxq->tc, 0,
6933 		       "Traffic Class that Queue belongs to");
6934 }
6935 
6936 /**
6937  * ice_get_default_rss_key - Obtain a default RSS key
6938  * @seed: storage for the RSS key data
6939  *
6940  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
6941  * point to a block of memory that is at least 40 bytes in size.
6942  *
6943  * The key isn't randomly generated each time this function is called because
6944  * that makes the RSS key change every time we reconfigure RSS. This does mean
6945  * that we're hard coding a possibly 'well known' key. We might want to
6946  * investigate randomly generating this key once during the first call.
6947  */
6948 static void
6949 ice_get_default_rss_key(u8 *seed)
6950 {
6951 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
6952 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
6953 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
6954 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
6955 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
6956 	};
6957 
6958 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
6959 }
6960 
6961 /**
6962  * ice_set_rss_key - Configure a given VSI with the default RSS key
6963  * @vsi: the VSI to configure
6964  *
6965  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
6966  * If the kernel RSS interface is not available, this will fall back to our
6967  * pre-generated hash seed from ice_get_default_rss_key().
6968  */
6969 static int
6970 ice_set_rss_key(struct ice_vsi *vsi)
6971 {
6972 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
6973 	struct ice_softc *sc = vsi->sc;
6974 	struct ice_hw *hw = &sc->hw;
6975 	enum ice_status status;
6976 
6977 	/*
6978 	 * If the RSS kernel interface is disabled, this will return the
6979 	 * default RSS key above.
6980 	 */
6981 	rss_getkey(keydata.standard_rss_key);
6982 
6983 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
6984 	if (status) {
6985 		device_printf(sc->dev,
6986 			      "ice_aq_set_rss_key status %s, error %s\n",
6987 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6988 		return (EIO);
6989 	}
6990 
6991 	return (0);
6992 }
6993 
6994 /**
6995  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
6996  * @vsi: the VSI to configure
6997  *
6998  * If the package file is initialized, the default RSS flows are reset. We
6999  * need to reprogram the expected hash configuration. We'll use
7000  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7001  * support is not enabled, this macro will fall back to suitable defaults.
7002  */
7003 static void
7004 ice_set_rss_flow_flds(struct ice_vsi *vsi)
7005 {
7006 	struct ice_softc *sc = vsi->sc;
7007 	struct ice_hw *hw = &sc->hw;
7008 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7009 	device_t dev = sc->dev;
7010 	enum ice_status status;
7011 	u_int rss_hash_config;
7012 
7013 	rss_hash_config = rss_gethashconfig();
7014 
7015 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7016 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7017 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7018 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7019 		if (status)
7020 			device_printf(dev,
7021 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7022 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7023 	}
7024 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7025 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7026 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7027 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7028 		if (status)
7029 			device_printf(dev,
7030 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7031 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7032 	}
7033 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7034 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7035 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7036 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7037 		if (status)
7038 			device_printf(dev,
7039 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7040 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7041 	}
7042 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7043 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7044 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7045 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7046 		if (status)
7047 			device_printf(dev,
7048 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7049 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7050 	}
7051 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7052 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7053 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7054 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7055 		if (status)
7056 			device_printf(dev,
7057 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7058 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7059 	}
7060 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7061 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7062 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7063 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7064 		if (status)
7065 			device_printf(dev,
7066 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7067 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7068 	}
7069 
7070 	/* Warn about RSS hash types which are not supported */
7071 	/* coverity[dead_error_condition] */
7072 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7073 		device_printf(dev,
7074 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7075 			      vsi->idx);
7076 	}
7077 }
7078 
7079 /**
7080  * ice_set_rss_lut - Program the RSS lookup table for a VSI
7081  * @vsi: the VSI to configure
7082  *
7083  * Programs the RSS lookup table for a given VSI. We use
7084  * rss_get_indirection_to_bucket which will use the indirection table provided
7085  * by the kernel RSS interface when available. If the kernel RSS interface is
7086  * not available, we will fall back to a simple round-robin fashion queue
7087  * assignment.
7088  */
7089 static int
7090 ice_set_rss_lut(struct ice_vsi *vsi)
7091 {
7092 	struct ice_softc *sc = vsi->sc;
7093 	struct ice_hw *hw = &sc->hw;
7094 	device_t dev = sc->dev;
7095 	struct ice_aq_get_set_rss_lut_params lut_params;
7096 	enum ice_status status;
7097 	int i, err = 0;
7098 	u8 *lut;
7099 
7100 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7101 	if (!lut) {
7102 		device_printf(dev, "Failed to allocate RSS lut memory\n");
7103 		return (ENOMEM);
7104 	}
7105 
7106 	/* Populate the LUT with max no. of queues. If the RSS kernel
7107 	 * interface is disabled, this will assign the lookup table in
7108 	 * a simple round robin fashion
7109 	 */
7110 	for (i = 0; i < vsi->rss_table_size; i++) {
7111 		/* XXX: this needs to be changed if num_rx_queues ever counts
7112 		 * more than just the RSS queues */
7113 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7114 	}
7115 
7116 	lut_params.vsi_handle = vsi->idx;
7117 	lut_params.lut_size = vsi->rss_table_size;
7118 	lut_params.lut_type = vsi->rss_lut_type;
7119 	lut_params.lut = lut;
7120 	lut_params.global_lut_id = 0;
7121 	status = ice_aq_set_rss_lut(hw, &lut_params);
7122 	if (status) {
7123 		device_printf(dev,
7124 			      "Cannot set RSS lut, err %s aq_err %s\n",
7125 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7126 		err = (EIO);
7127 	}
7128 
7129 	free(lut, M_ICE);
7130 	return err;
7131 }
7132 
7133 /**
7134  * ice_config_rss - Configure RSS for a VSI
7135  * @vsi: the VSI to configure
7136  *
7137  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7138  * a given VSI.
7139  */
7140 int
7141 ice_config_rss(struct ice_vsi *vsi)
7142 {
7143 	int err;
7144 
7145 	/* Nothing to do, if RSS is not enabled */
7146 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7147 		return 0;
7148 
7149 	err = ice_set_rss_key(vsi);
7150 	if (err)
7151 		return err;
7152 
7153 	ice_set_rss_flow_flds(vsi);
7154 
7155 	return ice_set_rss_lut(vsi);
7156 }
7157 
7158 /**
7159  * ice_log_pkg_init - Log a message about status of DDP initialization
7160  * @sc: the device softc pointer
7161  * @pkg_status: the status result of ice_copy_and_init_pkg
7162  *
7163  * Called by ice_load_pkg after an attempt to download the DDP package
7164  * contents to the device to log an appropriate message for the system
7165  * administrator about download status.
7166  *
7167  * @post ice_is_init_pkg_successful function is used to determine
7168  * whether the download was successful and DDP package is compatible
7169  * with this driver. Otherwise driver will transition to Safe Mode.
7170  */
7171 void
7172 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7173 {
7174 	struct ice_hw *hw = &sc->hw;
7175 	device_t dev = sc->dev;
7176 	struct sbuf *active_pkg, *os_pkg;
7177 
7178 	active_pkg = sbuf_new_auto();
7179 	ice_active_pkg_version_str(hw, active_pkg);
7180 	sbuf_finish(active_pkg);
7181 
7182 	os_pkg = sbuf_new_auto();
7183 	ice_os_pkg_version_str(hw, os_pkg);
7184 	sbuf_finish(os_pkg);
7185 
7186 	switch (pkg_status) {
7187 	case ICE_DDP_PKG_SUCCESS:
7188 		device_printf(dev,
7189 			      "The DDP package was successfully loaded: %s.\n",
7190 			      sbuf_data(active_pkg));
7191 		break;
7192 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7193 	case ICE_DDP_PKG_ALREADY_LOADED:
7194 		device_printf(dev,
7195 			      "DDP package already present on device: %s.\n",
7196 			      sbuf_data(active_pkg));
7197 		break;
7198 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7199 		device_printf(dev,
7200 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7201 			      sbuf_data(active_pkg),
7202 			      sbuf_data(os_pkg));
7203 		break;
7204 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7205 		device_printf(dev,
7206 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7207 			      sbuf_data(active_pkg),
7208 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7209 		break;
7210 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7211 		device_printf(dev,
7212 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7213 			      sbuf_data(active_pkg),
7214 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7215 		break;
7216 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7217 		/*
7218 		 * This assumes that the active_pkg_ver will not be
7219 		 * initialized if the ice_ddp package version is not
7220 		 * supported.
7221 		 */
7222 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7223 			/* The ice_ddp version is not supported */
7224 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7225 				device_printf(dev,
7226 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7227 					      sbuf_data(os_pkg),
7228 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7229 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7230 				device_printf(dev,
7231 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7232 					      sbuf_data(os_pkg),
7233 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7234 			} else {
7235 				device_printf(dev,
7236 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7237 					      sbuf_data(os_pkg),
7238 					      sbuf_data(active_pkg),
7239 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7240 			}
7241 		} else {
7242 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7243 				device_printf(dev,
7244 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7245 					      sbuf_data(active_pkg),
7246 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7247 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7248 				device_printf(dev,
7249 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7250 					      sbuf_data(active_pkg),
7251 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7252 			} else {
7253 				device_printf(dev,
7254 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7255 					      sbuf_data(os_pkg),
7256 					      sbuf_data(active_pkg),
7257 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7258 			}
7259 		}
7260 		break;
7261 	case ICE_DDP_PKG_INVALID_FILE:
7262 		device_printf(dev,
7263 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7264 		break;
7265 	case ICE_DDP_PKG_FW_MISMATCH:
7266 		device_printf(dev,
7267 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7268 		break;
7269 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7270 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7271 		device_printf(dev,
7272 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7273 		break;
7274 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7275 		device_printf(dev,
7276 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7277 		break;
7278 	case ICE_DDP_PKG_MANIFEST_INVALID:
7279 	case ICE_DDP_PKG_BUFFER_INVALID:
7280 		device_printf(dev,
7281 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7282 		break;
7283 	default:
7284 		device_printf(dev,
7285 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7286 		break;
7287 	}
7288 
7289 	sbuf_delete(active_pkg);
7290 	sbuf_delete(os_pkg);
7291 }
7292 
7293 /**
7294  * ice_load_pkg_file - Load the DDP package file using firmware_get
7295  * @sc: device private softc
7296  *
7297  * Use firmware_get to load the DDP package memory and then request that
7298  * firmware download the package contents and program the relevant hardware
7299  * bits.
7300  *
7301  * This function makes a copy of the DDP package memory which is tracked in
7302  * the ice_hw structure. The copy will be managed and released by
7303  * ice_deinit_hw(). This allows the firmware reference to be immediately
7304  * released using firmware_put.
7305  */
7306 enum ice_status
7307 ice_load_pkg_file(struct ice_softc *sc)
7308 {
7309 	struct ice_hw *hw = &sc->hw;
7310 	device_t dev = sc->dev;
7311 	enum ice_ddp_state state;
7312 	const struct firmware *pkg;
7313 	enum ice_status status = ICE_SUCCESS;
7314 	u8 cached_layer_count;
7315 	u8 *buf_copy;
7316 
7317 	pkg = firmware_get("ice_ddp");
7318 	if (!pkg) {
7319 		device_printf(dev,
7320 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7321 		if (cold)
7322 			device_printf(dev,
7323 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7324 		status = ICE_ERR_CFG;
7325 		goto err_load_pkg;
7326 	}
7327 
7328 	/* Check for topology change */
7329 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7330 		cached_layer_count = hw->num_tx_sched_layers;
7331 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7332 		if (buf_copy == NULL)
7333 			return ICE_ERR_NO_MEMORY;
7334 		memcpy(buf_copy, pkg->data, pkg->datasize);
7335 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7336 		free(buf_copy, M_ICE);
7337 		/* Success indicates a change was made */
7338 		if (status == ICE_SUCCESS) {
7339 			/* 9 -> 5 */
7340 			if (cached_layer_count == 9)
7341 				device_printf(dev,
7342 				    "Transmit balancing feature enabled\n");
7343 			else
7344 				device_printf(dev,
7345 				    "Transmit balancing feature disabled\n");
7346 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7347 			return (status);
7348 		}
7349 	}
7350 
7351 	/* Copy and download the pkg contents */
7352 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7353 
7354 	/* Release the firmware reference */
7355 	firmware_put(pkg, FIRMWARE_UNLOAD);
7356 
7357 	/* Check the active DDP package version and log a message */
7358 	ice_log_pkg_init(sc, state);
7359 
7360 	/* Place the driver into safe mode */
7361 	if (ice_is_init_pkg_successful(state))
7362 		return (ICE_ERR_ALREADY_EXISTS);
7363 
7364 err_load_pkg:
7365 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7366 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7367 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7368 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7369 
7370 	return (status);
7371 }
7372 
7373 /**
7374  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7375  * @vsi: the vsi to retrieve the value for
7376  * @counter: the counter type to retrieve
7377  *
7378  * Returns the value for a given ifnet counter. To do so, we calculate the
7379  * value based on the matching hardware statistics.
7380  */
7381 uint64_t
7382 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7383 {
7384 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7385 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7386 
7387 	/* For some statistics, especially those related to error flows, we do
7388 	 * not have per-VSI counters. In this case, we just report the global
7389 	 * counters.
7390 	 */
7391 
7392 	switch (counter) {
7393 	case IFCOUNTER_IPACKETS:
7394 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7395 	case IFCOUNTER_IERRORS:
7396 		return (hs->crc_errors + hs->illegal_bytes +
7397 			hs->mac_local_faults + hs->mac_remote_faults +
7398 			hs->rx_len_errors + hs->rx_undersize +
7399 			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7400 	case IFCOUNTER_OPACKETS:
7401 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7402 	case IFCOUNTER_OERRORS:
7403 		return (es->tx_errors);
7404 	case IFCOUNTER_COLLISIONS:
7405 		return (0);
7406 	case IFCOUNTER_IBYTES:
7407 		return (es->rx_bytes);
7408 	case IFCOUNTER_OBYTES:
7409 		return (es->tx_bytes);
7410 	case IFCOUNTER_IMCASTS:
7411 		return (es->rx_multicast);
7412 	case IFCOUNTER_OMCASTS:
7413 		return (es->tx_multicast);
7414 	case IFCOUNTER_IQDROPS:
7415 		return (es->rx_discards);
7416 	case IFCOUNTER_OQDROPS:
7417 		return (hs->tx_dropped_link_down);
7418 	case IFCOUNTER_NOPROTO:
7419 		return (es->rx_unknown_protocol);
7420 	default:
7421 		return if_get_counter_default(vsi->sc->ifp, counter);
7422 	}
7423 }
7424 
7425 /**
7426  * ice_save_pci_info - Save PCI configuration fields in HW struct
7427  * @hw: the ice_hw struct to save the PCI information in
7428  * @dev: the device to get the PCI information from
7429  *
7430  * This should only be called once, early in the device attach
7431  * process.
7432  */
7433 void
7434 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7435 {
7436 	hw->vendor_id = pci_get_vendor(dev);
7437 	hw->device_id = pci_get_device(dev);
7438 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7439 	hw->subsystem_device_id = pci_get_subdevice(dev);
7440 	hw->revision_id = pci_get_revid(dev);
7441 	hw->bus.device = pci_get_slot(dev);
7442 	hw->bus.func = pci_get_function(dev);
7443 }
7444 
7445 /**
7446  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7447  * @sc: the device softc
7448  *
7449  * Replace the configuration for each VSI, and then cleanup replay
7450  * information. Called after a hardware reset in order to reconfigure the
7451  * active VSIs.
7452  */
7453 int
7454 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7455 {
7456 	struct ice_hw *hw = &sc->hw;
7457 	enum ice_status status;
7458 	int i;
7459 
7460 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7461 		struct ice_vsi *vsi = sc->all_vsi[i];
7462 
7463 		if (!vsi)
7464 			continue;
7465 
7466 		status = ice_replay_vsi(hw, vsi->idx);
7467 		if (status) {
7468 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7469 				      vsi->idx, ice_status_str(status),
7470 				      ice_aq_str(hw->adminq.sq_last_status));
7471 			return (EIO);
7472 		}
7473 	}
7474 
7475 	/* Cleanup replay filters after successful reconfiguration */
7476 	ice_replay_post(hw);
7477 	return (0);
7478 }
7479 
7480 /**
7481  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7482  * @vsi: pointer to the VSI structure
7483  *
7484  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7485  * during driver removal to ensure that all RSS resources are properly
7486  * released.
7487  *
7488  * @remark this function doesn't report an error as it is expected to be
7489  * called during driver reset and unload, and there isn't much the driver can
7490  * do if freeing RSS resources fails.
7491  */
7492 static void
7493 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7494 {
7495 	struct ice_softc *sc = vsi->sc;
7496 	struct ice_hw *hw = &sc->hw;
7497 	device_t dev = sc->dev;
7498 	enum ice_status status;
7499 
7500 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7501 	if (status)
7502 		device_printf(dev,
7503 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7504 			      vsi->idx, ice_status_str(status));
7505 
7506 	/* Remove this VSI from the RSS list */
7507 	ice_rem_vsi_rss_list(hw, vsi->idx);
7508 }
7509 
7510 /**
7511  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7512  * @sc: the device softc pointer
7513  *
7514  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7515  * interface.
7516  *
7517  * @remark This should be called while preparing for a reset, to cleanup stale
7518  * RSS configuration for all VSIs.
7519  */
7520 void
7521 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7522 {
7523 	int i;
7524 
7525 	/* No need to cleanup if RSS is not enabled */
7526 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7527 		return;
7528 
7529 	for (i = 0; i < sc->num_available_vsi; i++) {
7530 		struct ice_vsi *vsi = sc->all_vsi[i];
7531 
7532 		if (vsi)
7533 			ice_clean_vsi_rss_cfg(vsi);
7534 	}
7535 }
7536 
7537 /**
7538  * ice_requested_fec_mode - Return the requested FEC mode as a string
7539  * @pi: The port info structure
7540  *
7541  * Return a string representing the requested FEC mode.
7542  */
7543 static const char *
7544 ice_requested_fec_mode(struct ice_port_info *pi)
7545 {
7546 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7547 	enum ice_status status;
7548 
7549 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7550 				     &pcaps, NULL);
7551 	if (status)
7552 		/* Just report unknown if we can't get capabilities */
7553 		return "Unknown";
7554 
7555 	/* Check if RS-FEC has been requested first */
7556 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7557 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7558 		return ice_fec_str(ICE_FEC_RS);
7559 
7560 	/* If RS FEC has not been requested, then check BASE-R */
7561 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7562 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7563 		return ice_fec_str(ICE_FEC_BASER);
7564 
7565 	return ice_fec_str(ICE_FEC_NONE);
7566 }
7567 
7568 /**
7569  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7570  * @pi: The port info structure
7571  *
7572  * Return a string representing the current FEC mode.
7573  */
7574 static const char *
7575 ice_negotiated_fec_mode(struct ice_port_info *pi)
7576 {
7577 	/* First, check if RS has been requested first */
7578 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7579 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7580 		return ice_fec_str(ICE_FEC_RS);
7581 
7582 	/* If RS FEC has not been requested, then check BASE-R */
7583 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7584 		return ice_fec_str(ICE_FEC_BASER);
7585 
7586 	return ice_fec_str(ICE_FEC_NONE);
7587 }
7588 
7589 /**
7590  * ice_autoneg_mode - Return string indicating of autoneg completed
7591  * @pi: The port info structure
7592  *
7593  * Return "True" if autonegotiation is completed, "False" otherwise.
7594  */
7595 static const char *
7596 ice_autoneg_mode(struct ice_port_info *pi)
7597 {
7598 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7599 		return "True";
7600 	else
7601 		return "False";
7602 }
7603 
7604 /**
7605  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7606  * @pi: The port info structure
7607  *
7608  * Returns the current Flow Control mode as a string.
7609  */
7610 static const char *
7611 ice_flowcontrol_mode(struct ice_port_info *pi)
7612 {
7613 	return ice_fc_str(pi->fc.current_mode);
7614 }
7615 
7616 /**
7617  * ice_link_up_msg - Log a link up message with associated info
7618  * @sc: the device private softc
7619  *
7620  * Log a link up message with LOG_NOTICE message level. Include information
7621  * about the duplex, FEC mode, autonegotiation and flow control.
7622  */
7623 void
7624 ice_link_up_msg(struct ice_softc *sc)
7625 {
7626 	struct ice_hw *hw = &sc->hw;
7627 	struct ifnet *ifp = sc->ifp;
7628 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7629 
7630 	speed = ice_aq_speed_to_str(hw->port_info);
7631 	req_fec = ice_requested_fec_mode(hw->port_info);
7632 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7633 	autoneg = ice_autoneg_mode(hw->port_info);
7634 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7635 
7636 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7637 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7638 }
7639 
7640 /**
7641  * ice_update_laa_mac - Update MAC address if Locally Administered
7642  * @sc: the device softc
7643  *
7644  * Update the device MAC address when a Locally Administered Address is
7645  * assigned.
7646  *
7647  * This function does *not* update the MAC filter list itself. Instead, it
7648  * should be called after ice_rm_pf_default_mac_filters, so that the previous
7649  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7650  * so that the new address filter will be assigned.
7651  */
7652 int
7653 ice_update_laa_mac(struct ice_softc *sc)
7654 {
7655 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7656 	struct ice_hw *hw = &sc->hw;
7657 	enum ice_status status;
7658 
7659 	/* If the address is the same, then there is nothing to update */
7660 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7661 		return (0);
7662 
7663 	/* Reject Multicast addresses */
7664 	if (ETHER_IS_MULTICAST(lladdr))
7665 		return (EINVAL);
7666 
7667 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7668 	if (status) {
7669 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7670 			      lladdr, ":", ice_status_str(status),
7671 			      ice_aq_str(hw->adminq.sq_last_status));
7672 		return (EFAULT);
7673 	}
7674 
7675 	/* Copy the address into place of the LAN address. */
7676 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7677 
7678 	return (0);
7679 }
7680 
7681 /**
7682  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7683  * @sc: device softc
7684  *
7685  * This will potentially print out a warning message if bus bandwidth
7686  * is insufficient for full-speed operation.
7687  *
7688  * This should only be called once, during the attach process, after
7689  * hw->port_info has been filled out with port link topology information
7690  * (from the Get PHY Capabilities Admin Queue command).
7691  */
7692 void
7693 ice_get_and_print_bus_info(struct ice_softc *sc)
7694 {
7695 	struct ice_hw *hw = &sc->hw;
7696 	device_t dev = sc->dev;
7697 	u16 pci_link_status;
7698 	int offset;
7699 
7700 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7701 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7702 
7703 	/* Fill out hw struct with PCIE link status info */
7704 	ice_set_pci_link_status_data(hw, pci_link_status);
7705 
7706 	/* Use info to print out bandwidth messages */
7707 	ice_print_bus_link_data(dev, hw);
7708 
7709 	if (ice_pcie_bandwidth_check(sc)) {
7710 		device_printf(dev,
7711 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7712 		device_printf(dev,
7713 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7714 	}
7715 }
7716 
7717 /**
7718  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7719  * a 64-bit baudrate.
7720  * @speed: enum value to convert
7721  *
7722  * This only goes up to PCIE Gen 4.
7723  */
7724 static uint64_t
7725 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7726 {
7727 	/* If the PCI-E speed is Gen1 or Gen2, then report
7728 	 * only 80% of bus speed to account for encoding overhead.
7729 	 */
7730 	switch (speed) {
7731 	case ice_pcie_speed_2_5GT:
7732 		return IF_Gbps(2);
7733 	case ice_pcie_speed_5_0GT:
7734 		return IF_Gbps(4);
7735 	case ice_pcie_speed_8_0GT:
7736 		return IF_Gbps(8);
7737 	case ice_pcie_speed_16_0GT:
7738 		return IF_Gbps(16);
7739 	case ice_pcie_speed_unknown:
7740 	default:
7741 		return 0;
7742 	}
7743 }
7744 
7745 /**
7746  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7747  * a 32-bit number.
7748  * @width: enum value to convert
7749  */
7750 static int
7751 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7752 {
7753 	switch (width) {
7754 	case ice_pcie_lnk_x1:
7755 		return (1);
7756 	case ice_pcie_lnk_x2:
7757 		return (2);
7758 	case ice_pcie_lnk_x4:
7759 		return (4);
7760 	case ice_pcie_lnk_x8:
7761 		return (8);
7762 	case ice_pcie_lnk_x12:
7763 		return (12);
7764 	case ice_pcie_lnk_x16:
7765 		return (16);
7766 	case ice_pcie_lnk_x32:
7767 		return (32);
7768 	case ice_pcie_lnk_width_resrv:
7769 	case ice_pcie_lnk_width_unknown:
7770 	default:
7771 		return (0);
7772 	}
7773 }
7774 
7775 /**
7776  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
7777  * full-speed device operation.
7778  * @sc: adapter softc
7779  *
7780  * Returns 0 if sufficient; 1 if not.
7781  */
7782 static uint8_t
7783 ice_pcie_bandwidth_check(struct ice_softc *sc)
7784 {
7785 	struct ice_hw *hw = &sc->hw;
7786 	int num_ports, pcie_width;
7787 	u64 pcie_speed, port_speed;
7788 
7789 	MPASS(hw->port_info);
7790 
7791 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
7792 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
7793 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
7794 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
7795 
7796 	/*
7797 	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
7798 	 * failover.
7799 	 */
7800 	if (port_speed == IF_Gbps(100))
7801 		num_ports = 1;
7802 
7803 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
7804 }
7805 
7806 /**
7807  * ice_print_bus_link_data - Print PCI-E bandwidth information
7808  * @dev: device to print string for
7809  * @hw: hw struct with PCI-e link information
7810  */
7811 static void
7812 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
7813 {
7814         device_printf(dev, "PCI Express Bus: Speed %s %s\n",
7815             ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
7816             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
7817             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
7818             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
7819             (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
7820             (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
7821             (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
7822             (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
7823             (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
7824             (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
7825             (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
7826 }
7827 
7828 /**
7829  * ice_set_pci_link_status_data - store PCI bus info
7830  * @hw: pointer to hardware structure
7831  * @link_status: the link status word from PCI config space
7832  *
7833  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
7834  **/
7835 static void
7836 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
7837 {
7838 	u16 reg;
7839 
7840 	hw->bus.type = ice_bus_pci_express;
7841 
7842 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
7843 
7844 	switch (reg) {
7845 	case ice_pcie_lnk_x1:
7846 	case ice_pcie_lnk_x2:
7847 	case ice_pcie_lnk_x4:
7848 	case ice_pcie_lnk_x8:
7849 	case ice_pcie_lnk_x12:
7850 	case ice_pcie_lnk_x16:
7851 	case ice_pcie_lnk_x32:
7852 		hw->bus.width = (enum ice_pcie_link_width)reg;
7853 		break;
7854 	default:
7855 		hw->bus.width = ice_pcie_lnk_width_unknown;
7856 		break;
7857 	}
7858 
7859 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
7860 
7861 	switch (reg) {
7862 	case ice_pcie_speed_2_5GT:
7863 	case ice_pcie_speed_5_0GT:
7864 	case ice_pcie_speed_8_0GT:
7865 	case ice_pcie_speed_16_0GT:
7866 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
7867 		break;
7868 	default:
7869 		hw->bus.speed = ice_pcie_speed_unknown;
7870 		break;
7871 	}
7872 }
7873 
7874 /**
7875  * ice_init_link_events - Initialize Link Status Events mask
7876  * @sc: the device softc
7877  *
7878  * Initialize the Link Status Events mask to disable notification of link
7879  * events we don't care about in software. Also request that link status
7880  * events be enabled.
7881  */
7882 int
7883 ice_init_link_events(struct ice_softc *sc)
7884 {
7885 	struct ice_hw *hw = &sc->hw;
7886 	enum ice_status status;
7887 	u16 wanted_events;
7888 
7889 	/* Set the bits for the events that we want to be notified by */
7890 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
7891 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
7892 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
7893 
7894 	/* request that every event except the wanted events be masked */
7895 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
7896 	if (status) {
7897 		device_printf(sc->dev,
7898 			      "Failed to set link status event mask, err %s aq_err %s\n",
7899 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7900 		return (EIO);
7901 	}
7902 
7903 	/* Request link info with the LSE bit set to enable link status events */
7904 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
7905 	if (status) {
7906 		device_printf(sc->dev,
7907 			      "Failed to enable link status events, err %s aq_err %s\n",
7908 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7909 		return (EIO);
7910 	}
7911 
7912 	return (0);
7913 }
7914 
7915 /**
7916  * ice_handle_mdd_event - Handle possibly malicious events
7917  * @sc: the device softc
7918  *
7919  * Called by the admin task if an MDD detection interrupt is triggered.
7920  * Identifies possibly malicious events coming from VFs. Also triggers for
7921  * similar incorrect behavior from the PF as well.
7922  */
7923 void
7924 ice_handle_mdd_event(struct ice_softc *sc)
7925 {
7926 	struct ice_hw *hw = &sc->hw;
7927 	bool mdd_detected = false, request_reinit = false;
7928 	device_t dev = sc->dev;
7929 	u32 reg;
7930 
7931 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
7932 		return;
7933 
7934 	reg = rd32(hw, GL_MDET_TX_TCLAN);
7935 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
7936 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
7937 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
7938 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
7939 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
7940 
7941 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
7942 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
7943 
7944 		/* Only clear this event if it matches this PF, that way other
7945 		 * PFs can read the event and determine VF and queue number.
7946 		 */
7947 		if (pf_num == hw->pf_id)
7948 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
7949 
7950 		mdd_detected = true;
7951 	}
7952 
7953 	/* Determine what triggered the MDD event */
7954 	reg = rd32(hw, GL_MDET_TX_PQM);
7955 	if (reg & GL_MDET_TX_PQM_VALID_M) {
7956 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
7957 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
7958 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
7959 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
7960 
7961 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
7962 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
7963 
7964 		/* Only clear this event if it matches this PF, that way other
7965 		 * PFs can read the event and determine VF and queue number.
7966 		 */
7967 		if (pf_num == hw->pf_id)
7968 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
7969 
7970 		mdd_detected = true;
7971 	}
7972 
7973 	reg = rd32(hw, GL_MDET_RX);
7974 	if (reg & GL_MDET_RX_VALID_M) {
7975 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
7976 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
7977 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
7978 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
7979 
7980 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
7981 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
7982 
7983 		/* Only clear this event if it matches this PF, that way other
7984 		 * PFs can read the event and determine VF and queue number.
7985 		 */
7986 		if (pf_num == hw->pf_id)
7987 			wr32(hw, GL_MDET_RX, 0xffffffff);
7988 
7989 		mdd_detected = true;
7990 	}
7991 
7992 	/* Now, confirm that this event actually affects this PF, by checking
7993 	 * the PF registers.
7994 	 */
7995 	if (mdd_detected) {
7996 		reg = rd32(hw, PF_MDET_TX_TCLAN);
7997 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
7998 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
7999 			sc->soft_stats.tx_mdd_count++;
8000 			request_reinit = true;
8001 		}
8002 
8003 		reg = rd32(hw, PF_MDET_TX_PQM);
8004 		if (reg & PF_MDET_TX_PQM_VALID_M) {
8005 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8006 			sc->soft_stats.tx_mdd_count++;
8007 			request_reinit = true;
8008 		}
8009 
8010 		reg = rd32(hw, PF_MDET_RX);
8011 		if (reg & PF_MDET_RX_VALID_M) {
8012 			wr32(hw, PF_MDET_RX, 0xffff);
8013 			sc->soft_stats.rx_mdd_count++;
8014 			request_reinit = true;
8015 		}
8016 	}
8017 
8018 	/* TODO: Implement logic to detect and handle events caused by VFs. */
8019 
8020 	/* request that the upper stack re-initialize the Tx/Rx queues */
8021 	if (request_reinit)
8022 		ice_request_stack_reinit(sc);
8023 
8024 	ice_flush(hw);
8025 }
8026 
8027 /**
8028  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8029  * @sc: the device softc
8030  *
8031  * @pre device is DCB capable and the FW LLDP agent has started
8032  *
8033  * Checks DCBX status and starts the DCBX agent if it is not in
8034  * a valid state via an AQ command.
8035  */
8036 static void
8037 ice_start_dcbx_agent(struct ice_softc *sc)
8038 {
8039 	struct ice_hw *hw = &sc->hw;
8040 	device_t dev = sc->dev;
8041 	bool dcbx_agent_status;
8042 	enum ice_status status;
8043 
8044 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8045 
8046 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8047 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8048 		/*
8049 		 * Start DCBX agent, but not LLDP. The return value isn't
8050 		 * checked here because a more detailed dcbx agent status is
8051 		 * retrieved and checked in ice_init_dcb() and elsewhere.
8052 		 */
8053 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8054 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8055 			device_printf(dev,
8056 			    "start_stop_dcbx failed, err %s aq_err %s\n",
8057 			    ice_status_str(status),
8058 			    ice_aq_str(hw->adminq.sq_last_status));
8059 	}
8060 }
8061 
8062 /**
8063  * ice_init_dcb_setup - Initialize DCB settings for HW
8064  * @sc: the device softc
8065  *
8066  * This needs to be called after the fw_lldp_agent sysctl is added, since that
8067  * can update the device's LLDP agent status if a tunable value is set.
8068  *
8069  * Get and store the initial state of DCB settings on driver load. Print out
8070  * informational messages as well.
8071  */
8072 void
8073 ice_init_dcb_setup(struct ice_softc *sc)
8074 {
8075 	struct ice_dcbx_cfg *local_dcbx_cfg;
8076 	struct ice_hw *hw = &sc->hw;
8077 	device_t dev = sc->dev;
8078 	enum ice_status status;
8079 	u8 pfcmode_ret;
8080 
8081 	/* Don't do anything if DCB isn't supported */
8082 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8083 		device_printf(dev, "%s: No DCB support\n", __func__);
8084 		return;
8085 	}
8086 
8087 	/* Starts DCBX agent if it needs starting */
8088 	ice_start_dcbx_agent(sc);
8089 
8090 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8091 	status = ice_init_dcb(hw, true);
8092 
8093 	/* If there is an error, then FW LLDP is not in a usable state */
8094 	if (status != 0 && status != ICE_ERR_NOT_READY) {
8095 		/* Don't print an error message if the return code from the AQ
8096 		 * cmd performed in ice_init_dcb() is EPERM; that means the
8097 		 * FW LLDP engine is disabled, and that is a valid state.
8098 		 */
8099 		if (!(status == ICE_ERR_AQ_ERROR &&
8100 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8101 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8102 				      ice_status_str(status),
8103 				      ice_aq_str(hw->adminq.sq_last_status));
8104 		}
8105 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8106 	}
8107 
8108 	switch (hw->port_info->qos_cfg.dcbx_status) {
8109 	case ICE_DCBX_STATUS_DIS:
8110 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8111 		break;
8112 	case ICE_DCBX_STATUS_NOT_STARTED:
8113 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8114 		break;
8115 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8116 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8117 		break;
8118 	default:
8119 		break;
8120 	}
8121 
8122 	/* LLDP disabled in FW */
8123 	if (hw->port_info->qos_cfg.is_sw_lldp) {
8124 		ice_add_rx_lldp_filter(sc);
8125 		device_printf(dev, "Firmware LLDP agent disabled\n");
8126 	}
8127 
8128 	/* Query and cache PFC mode */
8129 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8130 	if (status) {
8131 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8132 			      ice_status_str(status),
8133 			      ice_aq_str(hw->adminq.sq_last_status));
8134 	}
8135 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8136 	switch (pfcmode_ret) {
8137 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8138 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8139 		break;
8140 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8141 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8142 		break;
8143 	default:
8144 		/* DCB is disabled, but we shouldn't get here */
8145 		break;
8146 	}
8147 
8148 	/* Set default SW MIB for init */
8149 	ice_set_default_local_mib_settings(sc);
8150 
8151 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8152 }
8153 
8154 /**
8155  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8156  * @dcbcfg: DCB configuration to examine
8157  *
8158  * Scans a TC mapping table inside dcbcfg to find traffic classes
8159  * enabled and @returns a bitmask of enabled TCs
8160  */
8161 u8
8162 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8163 {
8164 	u8 tc_map = 0;
8165 	int i = 0;
8166 
8167 	switch (dcbcfg->pfc_mode) {
8168 	case ICE_QOS_MODE_VLAN:
8169 		/* XXX: "i" is actually "User Priority" here, not
8170 		 * Traffic Class, but the max for both is 8, so it works
8171 		 * out here.
8172 		 */
8173 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8174 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8175 		break;
8176 	case ICE_QOS_MODE_DSCP:
8177 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8178 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8179 		break;
8180 	default:
8181 		/* Invalid Mode */
8182 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8183 		break;
8184 	}
8185 
8186 	return (tc_map);
8187 }
8188 
8189 /**
8190  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8191  * @dcbcfg: config to retrieve number of TCs from
8192  *
8193  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8194  * Priority Assignment Table, a value from 1 to 8. If there are
8195  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8196  * then returns 0.
8197  */
8198 static u8
8199 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8200 {
8201 	u8 tc_map;
8202 
8203 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8204 
8205 	return (ice_dcb_tc_contig(tc_map));
8206 }
8207 
8208 /**
8209  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8210  * @sc: the device private softc
8211  * @event: event received on a control queue
8212  *
8213  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8214  */
8215 static void
8216 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8217 {
8218 	struct ice_aqc_lldp_get_mib *params =
8219 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8220 	u8 mib_type, bridge_type, tx_status;
8221 
8222 	static const char* mib_type_strings[] = {
8223 	    "Local MIB",
8224 	    "Remote MIB",
8225 	    "Reserved",
8226 	    "Reserved"
8227 	};
8228 	static const char* bridge_type_strings[] = {
8229 	    "Nearest Bridge",
8230 	    "Non-TPMR Bridge",
8231 	    "Reserved",
8232 	    "Reserved"
8233 	};
8234 	static const char* tx_status_strings[] = {
8235 	    "Port's TX active",
8236 	    "Port's TX suspended and drained",
8237 	    "Reserved",
8238 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8239 	};
8240 
8241 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8242 	    ICE_AQ_LLDP_MIB_TYPE_S;
8243 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8244 	    ICE_AQ_LLDP_BRID_TYPE_S;
8245 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8246 	    ICE_AQ_LLDP_TX_S;
8247 
8248 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8249 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8250 	    tx_status_strings[tx_status]);
8251 
8252 	/* Nothing else to report */
8253 	if (!event->msg_buf)
8254 		return;
8255 
8256 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8257 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8258 			event->msg_len);
8259 }
8260 
8261 /**
8262  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8263  * @sc: the device private softc
8264  * @old_cfg: Old DCBX configuration to compare against
8265  * @new_cfg: New DCBX configuration to check
8266  *
8267  * @return true if something changed in new_cfg that requires the driver
8268  * to do some reconfiguration.
8269  */
8270 static bool
8271 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8272     struct ice_dcbx_cfg *new_cfg)
8273 {
8274 	struct ice_hw *hw = &sc->hw;
8275 	bool needs_reconfig = false;
8276 
8277 	/* No change detected in DCBX config */
8278 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8279 		ice_debug(hw, ICE_DBG_DCB,
8280 		    "No change detected in local DCBX configuration\n");
8281 		return (false);
8282 	}
8283 
8284 	/* Check if ETS config has changed */
8285 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8286 		   sizeof(new_cfg->etscfg))) {
8287 		/* If Priority Table has changed, then driver reconfig is needed */
8288 		if (memcmp(&new_cfg->etscfg.prio_table,
8289 			   &old_cfg->etscfg.prio_table,
8290 			   sizeof(new_cfg->etscfg.prio_table))) {
8291 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8292 			needs_reconfig = true;
8293 		}
8294 
8295 		/* These are just informational */
8296 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8297 			   &old_cfg->etscfg.tcbwtable,
8298 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8299 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8300 			needs_reconfig = true;
8301 		}
8302 
8303 		if (memcmp(&new_cfg->etscfg.tsatable,
8304 			   &old_cfg->etscfg.tsatable,
8305 			   sizeof(new_cfg->etscfg.tsatable))) {
8306 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8307 			needs_reconfig = true;
8308 		}
8309 	}
8310 
8311 	/* Check if PFC config has changed */
8312 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8313 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8314 		needs_reconfig = true;
8315 	}
8316 
8317 	/* Check if APP table has changed */
8318 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8319 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8320 
8321 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8322 
8323 	return (needs_reconfig);
8324 }
8325 
8326 /**
8327  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8328  * @sc: the device private softc
8329  *
8330  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8331  */
8332 static void
8333 ice_stop_pf_vsi(struct ice_softc *sc)
8334 {
8335 	/* Dissociate the Tx and Rx queues from the interrupts */
8336 	ice_flush_txq_interrupts(&sc->pf_vsi);
8337 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8338 
8339 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8340 		return;
8341 
8342 	/* Disable the Tx and Rx queues */
8343 	ice_vsi_disable_tx(&sc->pf_vsi);
8344 	ice_control_all_rx_queues(&sc->pf_vsi, false);
8345 }
8346 
8347 /**
8348  * ice_vsi_setup_q_map - Setup a VSI queue map
8349  * @vsi: the VSI being configured
8350  * @ctxt: VSI context structure
8351  */
8352 static void
8353 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8354 {
8355 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8356 	u16 offset = 0, qmap = 0, pow = 0;
8357 	u16 num_q_per_tc, qcount_rx, rem_queues;
8358 	int i, j, k;
8359 
8360 	if (vsi->num_tcs == 0) {
8361 		/* at least TC0 should be enabled by default */
8362 		vsi->num_tcs = 1;
8363 		vsi->tc_map = 0x1;
8364 	}
8365 
8366 	qcount_rx = vsi->num_rx_queues;
8367 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8368 
8369 	if (!num_q_per_tc)
8370 		num_q_per_tc = 1;
8371 
8372 	/* Set initial values for # of queues to use for each active TC */
8373 	ice_for_each_traffic_class(i)
8374 		if (i < vsi->num_tcs)
8375 			qcounts[i] = num_q_per_tc;
8376 
8377 	/* If any queues are unassigned, add them to TC 0 */
8378 	rem_queues = qcount_rx % vsi->num_tcs;
8379 	if (rem_queues > 0)
8380 		qcounts[0] += rem_queues;
8381 
8382 	/* TC mapping is a function of the number of Rx queues assigned to the
8383 	 * VSI for each traffic class and the offset of these queues.
8384 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8385 	 * queues allocated to TC0. No:of queues is a power-of-2.
8386 	 *
8387 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8388 	 * queue, this way, traffic for the given TC will be sent to the default
8389 	 * queue.
8390 	 *
8391 	 * Setup number and offset of Rx queues for all TCs for the VSI
8392 	 */
8393 	ice_for_each_traffic_class(i) {
8394 		if (!(vsi->tc_map & BIT(i))) {
8395 			/* TC is not enabled */
8396 			vsi->tc_info[i].qoffset = 0;
8397 			vsi->tc_info[i].qcount_rx = 1;
8398 			vsi->tc_info[i].qcount_tx = 1;
8399 
8400 			ctxt->info.tc_mapping[i] = 0;
8401 			continue;
8402 		}
8403 
8404 		/* TC is enabled */
8405 		vsi->tc_info[i].qoffset = offset;
8406 		vsi->tc_info[i].qcount_rx = qcounts[i];
8407 		vsi->tc_info[i].qcount_tx = qcounts[i];
8408 
8409 		/* find the (rounded up) log-2 of queue count for current TC */
8410 		pow = fls(qcounts[i] - 1);
8411 
8412 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8413 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8414 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8415 			 ICE_AQ_VSI_TC_Q_NUM_M);
8416 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8417 
8418 		/* Store traffic class and handle data in queue structures */
8419 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8420 			vsi->tx_queues[j].q_handle = k;
8421 			vsi->tx_queues[j].tc = i;
8422 
8423 			vsi->rx_queues[j].tc = i;
8424 		}
8425 
8426 		offset += qcounts[i];
8427 	}
8428 
8429 	/* Rx queue mapping */
8430 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8431 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8432 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8433 }
8434 
8435 /**
8436  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8437  * @sc: the device private softc
8438  * @tc_map: traffic class bitmap
8439  *
8440  * @pre VSI queues are stopped
8441  *
8442  * @return 0 if configuration is successful
8443  * @return EIO if Update VSI AQ cmd fails
8444  * @return ENODEV if updating Tx Scheduler fails
8445  */
8446 static int
8447 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8448 {
8449 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8450 	struct ice_vsi *vsi = &sc->pf_vsi;
8451 	struct ice_hw *hw = &sc->hw;
8452 	struct ice_vsi_ctx ctx = { 0 };
8453 	device_t dev = sc->dev;
8454 	enum ice_status status;
8455 	u8 num_tcs = 0;
8456 	int i = 0;
8457 
8458 	/* Count the number of enabled Traffic Classes */
8459 	ice_for_each_traffic_class(i)
8460 		if (tc_map & BIT(i))
8461 			num_tcs++;
8462 
8463 	vsi->tc_map = tc_map;
8464 	vsi->num_tcs = num_tcs;
8465 
8466 	/* Set default parameters for context */
8467 	ctx.vf_num = 0;
8468 	ctx.info = vsi->info;
8469 
8470 	/* Setup queue map */
8471 	ice_vsi_setup_q_map(vsi, &ctx);
8472 
8473 	/* Update VSI configuration in firmware (RX queues) */
8474 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8475 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8476 	if (status) {
8477 		device_printf(dev,
8478 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8479 		    __func__, ice_status_str(status),
8480 		    ice_aq_str(hw->adminq.sq_last_status));
8481 		return (EIO);
8482 	}
8483 	vsi->info = ctx.info;
8484 
8485 	/* Use values derived in ice_vsi_setup_q_map() */
8486 	for (i = 0; i < num_tcs; i++)
8487 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8488 
8489 	if (hw->debug_mask & ICE_DBG_DCB) {
8490 		device_printf(dev, "%s: max_txqs:", __func__);
8491 		ice_for_each_traffic_class(i)
8492 			printf(" %d", max_txqs[i]);
8493 		printf("\n");
8494 	}
8495 
8496 	/* Update LAN Tx queue info in firmware */
8497 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8498 				 max_txqs);
8499 	if (status) {
8500 		device_printf(dev,
8501 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8502 		    __func__, ice_status_str(status),
8503 		    ice_aq_str(hw->adminq.sq_last_status));
8504 		return (ENODEV);
8505 	}
8506 
8507 	vsi->info.valid_sections = 0;
8508 
8509 	return (0);
8510 }
8511 
8512 /**
8513  * ice_dcb_tc_contig - Count TCs if they're contiguous
8514  * @tc_map: pointer to priority table
8515  *
8516  * @return The number of traffic classes in
8517  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8518  */
8519 static u8
8520 ice_dcb_tc_contig(u8 tc_map)
8521 {
8522 	bool tc_unused = false;
8523 	u8 ret = 0;
8524 
8525 	/* Scan bitmask for contiguous TCs starting with TC0 */
8526 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8527 		if (tc_map & BIT(i)) {
8528 			if (!tc_unused) {
8529 				ret++;
8530 			} else {
8531 				/* Non-contiguous TCs detected */
8532 				return (0);
8533 			}
8534 		} else
8535 			tc_unused = true;
8536 	}
8537 
8538 	return (ret);
8539 }
8540 
8541 /**
8542  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8543  * @sc: the device private softc
8544  *
8545  * @pre All VSIs have been disabled/stopped
8546  *
8547  * Reconfigures VSI settings based on local_dcbx_cfg.
8548  */
8549 static void
8550 ice_dcb_recfg(struct ice_softc *sc)
8551 {
8552 	struct ice_dcbx_cfg *dcbcfg =
8553 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8554 	device_t dev = sc->dev;
8555 	u8 tc_map = 0;
8556 	int ret;
8557 
8558 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8559 
8560 	/* If non-contiguous TCs are used, then configure
8561 	 * the default TC instead. There's no support for
8562 	 * non-contiguous TCs being used.
8563 	 */
8564 	if (ice_dcb_tc_contig(tc_map) == 0) {
8565 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8566 		ice_set_default_local_lldp_mib(sc);
8567 	}
8568 
8569 	/* Reconfigure VSI queues to add/remove traffic classes */
8570 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8571 	if (ret)
8572 		device_printf(dev,
8573 		    "Failed to configure TCs for PF VSI, err %s\n",
8574 		    ice_err_str(ret));
8575 
8576 }
8577 
8578 /**
8579  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8580  * @sc: device softc structure
8581  *
8582  * Overwrites the driver's SW local LLDP MIB with default settings. This
8583  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8584  * admin queue command.
8585  */
8586 static void
8587 ice_set_default_local_mib_settings(struct ice_softc *sc)
8588 {
8589 	struct ice_dcbx_cfg *dcbcfg;
8590 	struct ice_hw *hw = &sc->hw;
8591 	struct ice_port_info *pi;
8592 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8593 
8594 	pi = hw->port_info;
8595 
8596 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8597 
8598 	maxtcs = hw->func_caps.common_cap.maxtc;
8599 	/* This value is only 3 bits; 8 TCs maps to 0 */
8600 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8601 
8602 	/* VLAN vs DSCP mode needs to be preserved */
8603 	old_pfc_mode = dcbcfg->pfc_mode;
8604 
8605 	/**
8606 	 * Setup the default settings used by the driver for the Set Local
8607 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8608 	 * PFC, TSA=2).
8609 	 */
8610 	memset(dcbcfg, 0, sizeof(*dcbcfg));
8611 
8612 	dcbcfg->etscfg.willing = 1;
8613 	dcbcfg->etscfg.tcbwtable[0] = 100;
8614 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8615 	dcbcfg->etscfg.tsatable[0] = 2;
8616 
8617 	dcbcfg->etsrec = dcbcfg->etscfg;
8618 	dcbcfg->etsrec.willing = 0;
8619 
8620 	dcbcfg->pfc.willing = 1;
8621 	dcbcfg->pfc.pfccap = maxtcs;
8622 
8623 	dcbcfg->pfc_mode = old_pfc_mode;
8624 }
8625 
8626 /**
8627  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8628  * @sc: the device private softc
8629  * @pending_mib: FW has a pending MIB change to execute
8630  *
8631  * @pre Determined that the DCB configuration requires a change
8632  *
8633  * Reconfigures the PF LAN VSI based on updated DCB configuration
8634  * found in the hw struct's/port_info's/ local dcbx configuration.
8635  */
8636 static void
8637 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8638 {
8639 	struct ice_aqc_port_ets_elem port_ets = { 0 };
8640 	struct ice_dcbx_cfg *local_dcbx_cfg;
8641 	struct ice_hw *hw = &sc->hw;
8642 	struct ice_port_info *pi;
8643 	device_t dev = sc->dev;
8644 	enum ice_status status;
8645 
8646 	pi = sc->hw.port_info;
8647 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8648 
8649 	ice_rdma_notify_dcb_qos_change(sc);
8650 	/* If there's a pending MIB, tell the FW to execute the MIB change
8651 	 * now.
8652 	 */
8653 	if (pending_mib) {
8654 		status = ice_lldp_execute_pending_mib(hw);
8655 		if ((status == ICE_ERR_AQ_ERROR) &&
8656 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8657 			device_printf(dev,
8658 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8659 		} else if (status) {
8660 			device_printf(dev,
8661 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8662 			    ice_status_str(status),
8663 			    ice_aq_str(hw->adminq.sq_last_status));
8664 			/* This won't break traffic, but QoS will not work as expected */
8665 		}
8666 	}
8667 
8668 	/* Set state when there's more than one TC */
8669 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8670 		device_printf(dev, "Multiple traffic classes enabled\n");
8671 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8672 	} else {
8673 		device_printf(dev, "Multiple traffic classes disabled\n");
8674 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8675 	}
8676 
8677 	/* Disable PF VSI since it's going to be reconfigured */
8678 	ice_stop_pf_vsi(sc);
8679 
8680 	/* Query ETS configuration and update SW Tx scheduler info */
8681 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8682 	if (status != ICE_SUCCESS) {
8683 		device_printf(dev,
8684 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8685 		    ice_status_str(status),
8686 		    ice_aq_str(hw->adminq.sq_last_status));
8687 		/* This won't break traffic, but QoS will not work as expected */
8688 	}
8689 
8690 	/* Change PF VSI configuration */
8691 	ice_dcb_recfg(sc);
8692 
8693 	/* Send new configuration to RDMA client driver */
8694 	ice_rdma_dcb_qos_update(sc, pi);
8695 
8696 	ice_request_stack_reinit(sc);
8697 }
8698 
8699 /**
8700  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8701  * @sc: the device private softc
8702  * @event: event received on a control queue
8703  *
8704  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8705  * VSI depending on what has changed. This will also print out some debug
8706  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8707  */
8708 static void
8709 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8710 {
8711 	struct ice_aqc_lldp_get_mib *params =
8712 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8713 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8714 	struct ice_port_info *pi;
8715 	device_t dev = sc->dev;
8716 	struct ice_hw *hw = &sc->hw;
8717 	bool needs_reconfig, mib_is_pending;
8718 	enum ice_status status;
8719 	u8 mib_type, bridge_type;
8720 
8721 	ASSERT_CFG_LOCKED(sc);
8722 
8723 	ice_debug_print_mib_change_event(sc, event);
8724 
8725 	pi = sc->hw.port_info;
8726 
8727 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8728 	    ICE_AQ_LLDP_MIB_TYPE_S;
8729 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8730 	    ICE_AQ_LLDP_BRID_TYPE_S;
8731 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8732 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8733 
8734 	/* Ignore if event is not for Nearest Bridge */
8735 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8736 		return;
8737 
8738 	/* Check MIB Type and return if event for Remote MIB update */
8739 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8740 		/* Update the cached remote MIB and return */
8741 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8742 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8743 					 &pi->qos_cfg.remote_dcbx_cfg);
8744 		if (status)
8745 			device_printf(dev,
8746 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8747 			    __func__, ice_status_str(status),
8748 			    ice_aq_str(hw->adminq.sq_last_status));
8749 		/* Not fatal if this fails */
8750 		return;
8751 	}
8752 
8753 	/* Save line length by aliasing the local dcbx cfg */
8754 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8755 	/* Save off the old configuration and clear current config */
8756 	tmp_dcbx_cfg = *local_dcbx_cfg;
8757 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8758 
8759 	/* Update the current local_dcbx_cfg with new data */
8760 	if (mib_is_pending) {
8761 		ice_get_dcb_cfg_from_mib_change(pi, event);
8762 	} else {
8763 		/* Get updated DCBX data from firmware */
8764 		status = ice_get_dcb_cfg(pi);
8765 		if (status) {
8766 			device_printf(dev,
8767 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
8768 			    __func__, ice_status_str(status),
8769 			    ice_aq_str(hw->adminq.sq_last_status));
8770 			return;
8771 		}
8772 	}
8773 
8774 	/* Check to see if DCB needs reconfiguring */
8775 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
8776 	    local_dcbx_cfg);
8777 
8778 	if (!needs_reconfig && !mib_is_pending)
8779 		return;
8780 
8781 	/* Reconfigure -- this will also notify FW that configuration is done,
8782 	 * if the FW MIB change is only pending instead of executed.
8783 	 */
8784 	ice_do_dcb_reconfig(sc, mib_is_pending);
8785 }
8786 
8787 /**
8788  * ice_send_version - Send driver version to firmware
8789  * @sc: the device private softc
8790  *
8791  * Send the driver version to the firmware. This must be called as early as
8792  * possible after ice_init_hw().
8793  */
8794 int
8795 ice_send_version(struct ice_softc *sc)
8796 {
8797 	struct ice_driver_ver driver_version = {0};
8798 	struct ice_hw *hw = &sc->hw;
8799 	device_t dev = sc->dev;
8800 	enum ice_status status;
8801 
8802 	driver_version.major_ver = ice_major_version;
8803 	driver_version.minor_ver = ice_minor_version;
8804 	driver_version.build_ver = ice_patch_version;
8805 	driver_version.subbuild_ver = ice_rc_version;
8806 
8807 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
8808 		sizeof(driver_version.driver_string));
8809 
8810 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
8811 	if (status) {
8812 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
8813 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8814 		return (EIO);
8815 	}
8816 
8817 	return (0);
8818 }
8819 
8820 /**
8821  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
8822  * @sc: device softc
8823  * @event: event received on a control queue
8824  *
8825  * Prints out a message when a LAN overflow event is detected on a receive
8826  * queue.
8827  */
8828 static void
8829 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8830 {
8831 	struct ice_aqc_event_lan_overflow *params =
8832 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
8833 	struct ice_hw *hw = &sc->hw;
8834 
8835 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
8836 		  LE32_TO_CPU(params->prtdcb_ruptq),
8837 		  LE32_TO_CPU(params->qtx_ctl));
8838 }
8839 
8840 /**
8841  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
8842  * @vsi: the VSI to target packets to
8843  * @list: the list to add the filter to
8844  * @ethertype: the Ethertype to filter on
8845  * @direction: The direction of the filter (Tx or Rx)
8846  * @action: the action to take
8847  *
8848  * Add an Ethertype filter to a filter list. Used to forward a series of
8849  * filters to the firmware for configuring the switch.
8850  *
8851  * Returns 0 on success, and an error code on failure.
8852  */
8853 static int
8854 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
8855 			  u16 ethertype, u16 direction,
8856 			  enum ice_sw_fwd_act_type action)
8857 {
8858 	struct ice_fltr_list_entry *entry;
8859 
8860 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
8861 
8862 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
8863 	if (!entry)
8864 		return (ENOMEM);
8865 
8866 	entry->fltr_info.flag = direction;
8867 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
8868 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
8869 	entry->fltr_info.fltr_act = action;
8870 	entry->fltr_info.vsi_handle = vsi->idx;
8871 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
8872 
8873 	LIST_ADD(&entry->list_entry, list);
8874 
8875 	return 0;
8876 }
8877 
8878 #define ETHERTYPE_PAUSE_FRAMES 0x8808
8879 #define ETHERTYPE_LLDP_FRAMES 0x88cc
8880 
8881 /**
8882  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
8883  * @sc: the device private softc
8884  *
8885  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
8886  * the host. This prevents malicious VFs from sending these frames and being
8887  * able to control or configure the network.
8888  */
8889 int
8890 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
8891 {
8892 	struct ice_list_head ethertype_list;
8893 	struct ice_vsi *vsi = &sc->pf_vsi;
8894 	struct ice_hw *hw = &sc->hw;
8895 	device_t dev = sc->dev;
8896 	enum ice_status status;
8897 	int err = 0;
8898 
8899 	INIT_LIST_HEAD(&ethertype_list);
8900 
8901 	/*
8902 	 * Note that the switch filters will ignore the VSI index for the drop
8903 	 * action, so we only need to program drop filters once for the main
8904 	 * VSI.
8905 	 */
8906 
8907 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
8908 	if (sc->enable_tx_fc_filter) {
8909 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8910 						ETHERTYPE_PAUSE_FRAMES,
8911 						ICE_FLTR_TX, ICE_DROP_PACKET);
8912 		if (err)
8913 			goto free_ethertype_list;
8914 	}
8915 
8916 	/* Configure switch to drop LLDP frames coming from any VSI */
8917 	if (sc->enable_tx_lldp_filter) {
8918 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8919 						ETHERTYPE_LLDP_FRAMES,
8920 						ICE_FLTR_TX, ICE_DROP_PACKET);
8921 		if (err)
8922 			goto free_ethertype_list;
8923 	}
8924 
8925 	status = ice_add_eth_mac(hw, &ethertype_list);
8926 	if (status) {
8927 		device_printf(dev,
8928 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
8929 			      ice_status_str(status),
8930 			      ice_aq_str(hw->adminq.sq_last_status));
8931 		err = (EIO);
8932 	}
8933 
8934 free_ethertype_list:
8935 	ice_free_fltr_list(&ethertype_list);
8936 	return err;
8937 }
8938 
8939 /**
8940  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
8941  * @sc: the device private structure
8942  *
8943  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
8944  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
8945  * be forwarded to the stack.
8946  */
8947 static void
8948 ice_add_rx_lldp_filter(struct ice_softc *sc)
8949 {
8950 	struct ice_list_head ethertype_list;
8951 	struct ice_vsi *vsi = &sc->pf_vsi;
8952 	struct ice_hw *hw = &sc->hw;
8953 	device_t dev = sc->dev;
8954 	enum ice_status status;
8955 	int err;
8956 	u16 vsi_num;
8957 
8958 	/*
8959 	 * If FW is new enough, use a direct AQ command to perform the filter
8960 	 * addition.
8961 	 */
8962 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
8963 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
8964 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
8965 		if (status) {
8966 			device_printf(dev,
8967 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8968 			    ice_status_str(status),
8969 			    ice_aq_str(hw->adminq.sq_last_status));
8970 		} else
8971 			ice_set_state(&sc->state,
8972 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
8973 		return;
8974 	}
8975 
8976 	INIT_LIST_HEAD(&ethertype_list);
8977 
8978 	/* Forward Rx LLDP frames to the stack */
8979 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8980 					ETHERTYPE_LLDP_FRAMES,
8981 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
8982 	if (err) {
8983 		device_printf(dev,
8984 			      "Failed to add Rx LLDP filter, err %s\n",
8985 			      ice_err_str(err));
8986 		goto free_ethertype_list;
8987 	}
8988 
8989 	status = ice_add_eth_mac(hw, &ethertype_list);
8990 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
8991 		device_printf(dev,
8992 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8993 			      ice_status_str(status),
8994 			      ice_aq_str(hw->adminq.sq_last_status));
8995 	} else {
8996 		/*
8997 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
8998 		 * already existing filter as an error case.
8999 		 */
9000 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9001 	}
9002 
9003 free_ethertype_list:
9004 	ice_free_fltr_list(&ethertype_list);
9005 }
9006 
9007 /**
9008  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9009  * @sc: the device private structure
9010  *
9011  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9012  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9013  * stack.
9014  */
9015 static void
9016 ice_del_rx_lldp_filter(struct ice_softc *sc)
9017 {
9018 	struct ice_list_head ethertype_list;
9019 	struct ice_vsi *vsi = &sc->pf_vsi;
9020 	struct ice_hw *hw = &sc->hw;
9021 	device_t dev = sc->dev;
9022 	enum ice_status status;
9023 	int err;
9024 	u16 vsi_num;
9025 
9026 	/*
9027 	 * Only in the scenario where the driver added the filter during
9028 	 * this session (while the driver was loaded) would we be able to
9029 	 * delete this filter.
9030 	 */
9031 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9032 		return;
9033 
9034 	/*
9035 	 * If FW is new enough, use a direct AQ command to perform the filter
9036 	 * removal.
9037 	 */
9038 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9039 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9040 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9041 		if (status) {
9042 			device_printf(dev,
9043 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9044 			    ice_status_str(status),
9045 			    ice_aq_str(hw->adminq.sq_last_status));
9046 		}
9047 		return;
9048 	}
9049 
9050 	INIT_LIST_HEAD(&ethertype_list);
9051 
9052 	/* Remove filter forwarding Rx LLDP frames to the stack */
9053 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9054 					ETHERTYPE_LLDP_FRAMES,
9055 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9056 	if (err) {
9057 		device_printf(dev,
9058 			      "Failed to remove Rx LLDP filter, err %s\n",
9059 			      ice_err_str(err));
9060 		goto free_ethertype_list;
9061 	}
9062 
9063 	status = ice_remove_eth_mac(hw, &ethertype_list);
9064 	if (status == ICE_ERR_DOES_NOT_EXIST) {
9065 		; /* Don't complain if we try to remove a filter that doesn't exist */
9066 	} else if (status) {
9067 		device_printf(dev,
9068 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9069 			      ice_status_str(status),
9070 			      ice_aq_str(hw->adminq.sq_last_status));
9071 	}
9072 
9073 free_ethertype_list:
9074 	ice_free_fltr_list(&ethertype_list);
9075 }
9076 
9077 /**
9078  * ice_init_link_configuration -- Setup link in different ways depending
9079  * on whether media is available or not.
9080  * @sc: device private structure
9081  *
9082  * Called at the end of the attach process to either set default link
9083  * parameters if there is media available, or force HW link down and
9084  * set a state bit if there is no media.
9085  */
9086 void
9087 ice_init_link_configuration(struct ice_softc *sc)
9088 {
9089 	struct ice_port_info *pi = sc->hw.port_info;
9090 	struct ice_hw *hw = &sc->hw;
9091 	device_t dev = sc->dev;
9092 	enum ice_status status;
9093 
9094 	pi->phy.get_link_info = true;
9095 	status = ice_get_link_status(pi, &sc->link_up);
9096 	if (status != ICE_SUCCESS) {
9097 		device_printf(dev,
9098 		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9099 		    __func__, ice_status_str(status),
9100 		    ice_aq_str(hw->adminq.sq_last_status));
9101 		return;
9102 	}
9103 
9104 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9105 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9106 		/* Apply default link settings */
9107 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9108 	} else {
9109 		 /* Set link down, and poll for media available in timer. This prevents the
9110 		  * driver from receiving spurious link-related events.
9111 		  */
9112 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9113 		status = ice_aq_set_link_restart_an(pi, false, NULL);
9114 		if (status != ICE_SUCCESS)
9115 			device_printf(dev,
9116 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9117 			    __func__, ice_status_str(status),
9118 			    ice_aq_str(hw->adminq.sq_last_status));
9119 	}
9120 }
9121 
9122 /**
9123  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9124  * @sc: device private structure
9125  * @cfg: new PHY config data to be modified
9126  *
9127  * Applies user settings for advertised speeds to the PHY type fields in the
9128  * supplied PHY config struct. It uses the data from pcaps to check if the
9129  * saved settings are invalid and uses the pcaps data instead if they are
9130  * invalid.
9131  */
9132 static int
9133 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9134 			       struct ice_aqc_set_phy_cfg_data *cfg)
9135 {
9136 	struct ice_phy_data phy_data = { 0 };
9137 	struct ice_port_info *pi = sc->hw.port_info;
9138 	u64 phy_low = 0, phy_high = 0;
9139 	u16 link_speeds;
9140 	int ret;
9141 
9142 	link_speeds = pi->phy.curr_user_speed_req;
9143 
9144 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9145 		memset(&phy_data, 0, sizeof(phy_data));
9146 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9147 		phy_data.user_speeds_orig = link_speeds;
9148 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9149 		if (ret != 0) {
9150 			/* Error message already printed within function */
9151 			return (ret);
9152 		}
9153 		phy_low = phy_data.phy_low_intr;
9154 		phy_high = phy_data.phy_high_intr;
9155 
9156 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9157 			goto finalize_link_speed;
9158 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9159 			memset(&phy_data, 0, sizeof(phy_data));
9160 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9161 			phy_data.user_speeds_orig = link_speeds;
9162 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9163 			if (ret != 0) {
9164 				/* Error message already printed within function */
9165 				return (ret);
9166 			}
9167 			phy_low = phy_data.phy_low_intr;
9168 			phy_high = phy_data.phy_high_intr;
9169 
9170 			if (!phy_data.user_speeds_intr) {
9171 				phy_low = phy_data.phy_low_orig;
9172 				phy_high = phy_data.phy_high_orig;
9173 			}
9174 			goto finalize_link_speed;
9175 		}
9176 		/* If we're here, then it means the benefits of Version 2
9177 		 * link management aren't utilized.  We fall through to
9178 		 * handling Strict Link Mode the same as Version 1 link
9179 		 * management.
9180 		 */
9181 	}
9182 
9183 	memset(&phy_data, 0, sizeof(phy_data));
9184 	if ((link_speeds == 0) &&
9185 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9186 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9187 	else
9188 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9189 	phy_data.user_speeds_orig = link_speeds;
9190 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9191 	if (ret != 0) {
9192 		/* Error message already printed within function */
9193 		return (ret);
9194 	}
9195 	phy_low = phy_data.phy_low_intr;
9196 	phy_high = phy_data.phy_high_intr;
9197 
9198 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9199 		if (phy_low == 0 && phy_high == 0) {
9200 			device_printf(sc->dev,
9201 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9202 			return (EINVAL);
9203 		}
9204 	} else {
9205 		if (link_speeds == 0) {
9206 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9207 			    sc->ldo_tlv.phy_type_high & phy_high) {
9208 				phy_low &= sc->ldo_tlv.phy_type_low;
9209 				phy_high &= sc->ldo_tlv.phy_type_high;
9210 			}
9211 		} else if (phy_low == 0 && phy_high == 0) {
9212 			memset(&phy_data, 0, sizeof(phy_data));
9213 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9214 			phy_data.user_speeds_orig = link_speeds;
9215 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9216 			if (ret != 0) {
9217 				/* Error message already printed within function */
9218 				return (ret);
9219 			}
9220 			phy_low = phy_data.phy_low_intr;
9221 			phy_high = phy_data.phy_high_intr;
9222 
9223 			if (!phy_data.user_speeds_intr) {
9224 				phy_low = phy_data.phy_low_orig;
9225 				phy_high = phy_data.phy_high_orig;
9226 			}
9227 		}
9228 	}
9229 
9230 finalize_link_speed:
9231 
9232 	/* Cache new user settings for speeds */
9233 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9234 	cfg->phy_type_low = htole64(phy_low);
9235 	cfg->phy_type_high = htole64(phy_high);
9236 
9237 	return (ret);
9238 }
9239 
9240 /**
9241  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9242  * @sc: device private structure
9243  * @cfg: new PHY config data to be modified
9244  *
9245  * Applies user setting for FEC mode to PHY config struct. It uses the data
9246  * from pcaps to check if the saved settings are invalid and uses the pcaps
9247  * data instead if they are invalid.
9248  */
9249 static int
9250 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9251 			       struct ice_aqc_set_phy_cfg_data *cfg)
9252 {
9253 	struct ice_port_info *pi = sc->hw.port_info;
9254 	enum ice_status status;
9255 
9256 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9257 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9258 	if (status)
9259 		return (EIO);
9260 
9261 	return (0);
9262 }
9263 
9264 /**
9265  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9266  * @pi: port info struct
9267  * @cfg: new PHY config data to be modified
9268  *
9269  * Applies user setting for flow control mode to PHY config struct. There are
9270  * no invalid flow control mode settings; if there are, then this function
9271  * treats them like "ICE_FC_NONE".
9272  */
9273 static void
9274 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9275 			      struct ice_aqc_set_phy_cfg_data *cfg)
9276 {
9277 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9278 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9279 
9280 	switch (pi->phy.curr_user_fc_req) {
9281 	case ICE_FC_FULL:
9282 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9283 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9284 		break;
9285 	case ICE_FC_RX_PAUSE:
9286 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9287 		break;
9288 	case ICE_FC_TX_PAUSE:
9289 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9290 		break;
9291 	default:
9292 		/* ICE_FC_NONE */
9293 		break;
9294 	}
9295 }
9296 
9297 /**
9298  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9299  * @sc: device private structure
9300  * @settings: which settings to apply
9301  *
9302  * Applies user settings for advertised speeds, FEC mode, and flow
9303  * control mode to a PHY config struct; it uses the data from pcaps
9304  * to check if the saved settings are invalid and uses the pcaps
9305  * data instead if they are invalid.
9306  *
9307  * For things like sysctls where only one setting needs to be
9308  * updated, the bitmap allows the caller to specify which setting
9309  * to update.
9310  */
9311 int
9312 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9313 {
9314 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9315 	struct ice_port_info *pi = sc->hw.port_info;
9316 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9317 	struct ice_hw *hw = &sc->hw;
9318 	device_t dev = sc->dev;
9319 	u64 phy_low, phy_high;
9320 	enum ice_status status;
9321 	enum ice_fec_mode dflt_fec_mode;
9322 	u16 dflt_user_speed;
9323 
9324 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9325 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9326 		    settings);
9327 	}
9328 
9329 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9330 				     &pcaps, NULL);
9331 	if (status != ICE_SUCCESS) {
9332 		device_printf(dev,
9333 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9334 		    __func__, ice_status_str(status),
9335 		    ice_aq_str(hw->adminq.sq_last_status));
9336 		return (EIO);
9337 	}
9338 
9339 	phy_low = le64toh(pcaps.phy_type_low);
9340 	phy_high = le64toh(pcaps.phy_type_high);
9341 
9342 	/* Save off initial config parameters */
9343 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9344 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9345 
9346 	/* Setup new PHY config */
9347 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9348 
9349 	/* On error, restore active configuration values */
9350 	if ((settings & ICE_APPLY_LS) &&
9351 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9352 		pi->phy.curr_user_speed_req = dflt_user_speed;
9353 		cfg.phy_type_low = pcaps.phy_type_low;
9354 		cfg.phy_type_high = pcaps.phy_type_high;
9355 	}
9356 	if ((settings & ICE_APPLY_FEC) &&
9357 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9358 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9359 	}
9360 	if (settings & ICE_APPLY_FC) {
9361 		/* No real error indicators for this process,
9362 		 * so we'll just have to assume it works. */
9363 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9364 	}
9365 
9366 	/* Enable link and re-negotiate it */
9367 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9368 
9369 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9370 	if (status != ICE_SUCCESS) {
9371 		/* Don't indicate failure if there's no media in the port.
9372 		 * The settings have been saved and will apply when media
9373 		 * is inserted.
9374 		 */
9375 		if ((status == ICE_ERR_AQ_ERROR) &&
9376 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9377 			device_printf(dev,
9378 			    "%s: Setting will be applied when media is inserted\n",
9379 			    __func__);
9380 			return (0);
9381 		} else {
9382 			device_printf(dev,
9383 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9384 			    __func__, ice_status_str(status),
9385 			    ice_aq_str(hw->adminq.sq_last_status));
9386 			return (EIO);
9387 		}
9388 	}
9389 
9390 	return (0);
9391 }
9392 
9393 /**
9394  * ice_print_ldo_tlv - Print out LDO TLV information
9395  * @sc: device private structure
9396  * @tlv: LDO TLV information from the adapter NVM
9397  *
9398  * Dump out the information in tlv to the kernel message buffer; intended for
9399  * debugging purposes.
9400  */
9401 static void
9402 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9403 {
9404 	device_t dev = sc->dev;
9405 
9406 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9407 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9408 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9409 	device_printf(dev, "     -phy_high    0x%016llx\n",
9410 	    (unsigned long long)tlv->phy_type_high);
9411 	device_printf(dev, "     -phy_low     0x%016llx\n",
9412 	    (unsigned long long)tlv->phy_type_low);
9413 }
9414 
9415 /**
9416  * ice_set_link_management_mode -- Strict or lenient link management
9417  * @sc: device private structure
9418  *
9419  * Some NVMs give the adapter the option to advertise a superset of link
9420  * configurations.  This checks to see if that option is enabled.
9421  * Further, the NVM could also provide a specific set of configurations
9422  * to try; these are cached in the driver's private structure if they
9423  * are available.
9424  */
9425 void
9426 ice_set_link_management_mode(struct ice_softc *sc)
9427 {
9428 	struct ice_port_info *pi = sc->hw.port_info;
9429 	device_t dev = sc->dev;
9430 	struct ice_link_default_override_tlv tlv = { 0 };
9431 	enum ice_status status;
9432 
9433 	/* Port must be in strict mode if FW version is below a certain
9434 	 * version. (i.e. Don't set lenient mode features)
9435 	 */
9436 	if (!(ice_fw_supports_link_override(&sc->hw)))
9437 		return;
9438 
9439 	status = ice_get_link_default_override(&tlv, pi);
9440 	if (status != ICE_SUCCESS) {
9441 		device_printf(dev,
9442 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9443 		    __func__, ice_status_str(status),
9444 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9445 		return;
9446 	}
9447 
9448 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9449 		ice_print_ldo_tlv(sc, &tlv);
9450 
9451 	/* Set lenient link mode */
9452 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9453 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9454 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9455 
9456 	/* FW supports reporting a default configuration */
9457 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9458 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9459 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9460 		/* Knowing we're at a high enough firmware revision to
9461 		 * support this link management configuration, we don't
9462 		 * need to check/support earlier versions.
9463 		 */
9464 		return;
9465 	}
9466 
9467 	/* Default overrides only work if in lenient link mode */
9468 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9469 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9470 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9471 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9472 
9473 	/* Cache the LDO TLV structure in the driver, since it
9474 	 * won't change during the driver's lifetime.
9475 	 */
9476 	sc->ldo_tlv = tlv;
9477 }
9478 
9479 /**
9480  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9481  * @sc: device private structure
9482  *
9483  * This should be called before the tunables for these link settings
9484  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9485  * the cached values that the sysctl handlers will write.
9486  *
9487  * This also needs to be called before ice_init_link_configuration, to ensure
9488  * that there are sane values that can be written if there is media available
9489  * in the port.
9490  */
9491 void
9492 ice_init_saved_phy_cfg(struct ice_softc *sc)
9493 {
9494 	struct ice_port_info *pi = sc->hw.port_info;
9495 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9496 	struct ice_hw *hw = &sc->hw;
9497 	device_t dev = sc->dev;
9498 	enum ice_status status;
9499 	u64 phy_low, phy_high;
9500 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9501 
9502 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9503 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9504 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9505 	if (status != ICE_SUCCESS) {
9506 		device_printf(dev,
9507 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9508 		    __func__,
9509 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9510 		    ice_status_str(status),
9511 		    ice_aq_str(hw->adminq.sq_last_status));
9512 		return;
9513 	}
9514 
9515 	phy_low = le64toh(pcaps.phy_type_low);
9516 	phy_high = le64toh(pcaps.phy_type_high);
9517 
9518 	/* Save off initial config parameters */
9519 	pi->phy.curr_user_speed_req =
9520 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9521 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9522 	    pcaps.link_fec_options);
9523 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9524 }
9525 
9526 /**
9527  * ice_module_init - Driver callback to handle module load
9528  *
9529  * Callback for handling module load events. This function should initialize
9530  * any data structures that are used for the life of the device driver.
9531  */
9532 static int
9533 ice_module_init(void)
9534 {
9535 	ice_rdma_init();
9536 	return (0);
9537 }
9538 
9539 /**
9540  * ice_module_exit - Driver callback to handle module exit
9541  *
9542  * Callback for handling module unload events. This function should release
9543  * any resources initialized during ice_module_init.
9544  *
9545  * If this function returns non-zero, the module will not be unloaded. It
9546  * should only return such a value if the module cannot be unloaded at all,
9547  * such as due to outstanding memory references that cannot be revoked.
9548  */
9549 static int
9550 ice_module_exit(void)
9551 {
9552 	ice_rdma_exit();
9553 	return (0);
9554 }
9555 
9556 /**
9557  * ice_module_event_handler - Callback for module events
9558  * @mod: unused module_t parameter
9559  * @what: the event requested
9560  * @arg: unused event argument
9561  *
9562  * Callback used to handle module events from the stack. Used to allow the
9563  * driver to define custom behavior that should happen at module load and
9564  * unload.
9565  */
9566 int
9567 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9568 {
9569 	switch (what) {
9570 	case MOD_LOAD:
9571 		return ice_module_init();
9572 	case MOD_UNLOAD:
9573 		return ice_module_exit();
9574 	default:
9575 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9576 		return (EOPNOTSUPP);
9577 	}
9578 }
9579 
9580 /**
9581  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9582  * @sc: the device private softc
9583  * @ifd: ifdrv ioctl request pointer
9584  */
9585 int
9586 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9587 {
9588 	union ice_nvm_access_data *data;
9589 	struct ice_nvm_access_cmd *cmd;
9590 	size_t ifd_len = ifd->ifd_len, malloc_len;
9591 	struct ice_hw *hw = &sc->hw;
9592 	device_t dev = sc->dev;
9593 	enum ice_status status;
9594 	u8 *nvm_buffer;
9595 	int err;
9596 
9597 	/*
9598 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9599 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9600 	 * without performing a privilege check. Perform one here to ensure
9601 	 * that non-privileged threads cannot access this interface.
9602 	 */
9603 	err = priv_check(curthread, PRIV_DRIVER);
9604 	if (err)
9605 		return (err);
9606 
9607 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9608 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9609 			      __func__);
9610 		return (EBUSY);
9611 	}
9612 
9613 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9614 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9615 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9616 		return (EINVAL);
9617 	}
9618 
9619 	if (ifd->ifd_data == NULL) {
9620 		device_printf(dev, "%s: ifd data buffer not present.\n",
9621 			      __func__);
9622 		return (EINVAL);
9623 	}
9624 
9625 	/*
9626 	 * If everything works correctly, ice_handle_nvm_access should not
9627 	 * modify data past the size of the ioctl length. However, it could
9628 	 * lead to memory corruption if it did. Make sure to allocate at least
9629 	 * enough space for the command and data regardless. This
9630 	 * ensures that any access to the data union will not access invalid
9631 	 * memory.
9632 	 */
9633 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9634 
9635 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9636 	if (!nvm_buffer)
9637 		return (ENOMEM);
9638 
9639 	/* Copy the NVM access command and data in from user space */
9640 	/* coverity[tainted_data_argument] */
9641 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9642 	if (err) {
9643 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9644 			      __func__, ice_err_str(err));
9645 		goto cleanup_free_nvm_buffer;
9646 	}
9647 
9648 	/*
9649 	 * The NVM command structure is immediately followed by data which
9650 	 * varies in size based on the command.
9651 	 */
9652 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9653 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9654 
9655 	/* Handle the NVM access request */
9656 	status = ice_handle_nvm_access(hw, cmd, data);
9657 	if (status)
9658 		ice_debug(hw, ICE_DBG_NVM,
9659 			  "NVM access request failed, err %s\n",
9660 			  ice_status_str(status));
9661 
9662 	/* Copy the possibly modified contents of the handled request out */
9663 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9664 	if (err) {
9665 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9666 			      __func__, ice_err_str(err));
9667 		goto cleanup_free_nvm_buffer;
9668 	}
9669 
9670 	/* Convert private status to an error code for proper ioctl response */
9671 	switch (status) {
9672 	case ICE_SUCCESS:
9673 		err = (0);
9674 		break;
9675 	case ICE_ERR_NO_MEMORY:
9676 		err = (ENOMEM);
9677 		break;
9678 	case ICE_ERR_OUT_OF_RANGE:
9679 		err = (ENOTTY);
9680 		break;
9681 	case ICE_ERR_PARAM:
9682 	default:
9683 		err = (EINVAL);
9684 		break;
9685 	}
9686 
9687 cleanup_free_nvm_buffer:
9688 	free(nvm_buffer, M_ICE);
9689 	return err;
9690 }
9691 
9692 /**
9693  * ice_read_sff_eeprom - Read data from SFF eeprom
9694  * @sc: device softc
9695  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9696  * @offset: offset into the eeprom
9697  * @data: pointer to data buffer to store read data in
9698  * @length: length to read; max length is 16
9699  *
9700  * Read from the SFF eeprom in the module for this PF's port. For more details
9701  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9702  * and SFF-8024 (both).
9703  */
9704 int
9705 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9706 {
9707 	struct ice_hw *hw = &sc->hw;
9708 	int ret = 0, retries = 0;
9709 	enum ice_status status;
9710 
9711 	if (length > 16)
9712 		return (EINVAL);
9713 
9714 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9715 		return (ENOSYS);
9716 
9717 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9718 		return (ENXIO);
9719 
9720 	do {
9721 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
9722 					   offset, 0, 0, data, length,
9723 					   false, NULL);
9724 		if (!status) {
9725 			ret = 0;
9726 			break;
9727 		}
9728 		if (status == ICE_ERR_AQ_ERROR &&
9729 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
9730 			ret = EBUSY;
9731 			continue;
9732 		}
9733 		if (status == ICE_ERR_AQ_ERROR &&
9734 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
9735 			/* FW says I2C access isn't supported */
9736 			ret = EACCES;
9737 			break;
9738 		}
9739 		if (status == ICE_ERR_AQ_ERROR &&
9740 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
9741 			device_printf(sc->dev,
9742 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
9743 				  __func__);
9744 			ret = EPERM;
9745 			break;
9746 		} else {
9747 			device_printf(sc->dev,
9748 				  "%s: Error reading I2C data: err %s aq_err %s\n",
9749 				  __func__, ice_status_str(status),
9750 				  ice_aq_str(hw->adminq.sq_last_status));
9751 			ret = EIO;
9752 			break;
9753 		}
9754 	} while (retries++ < ICE_I2C_MAX_RETRIES);
9755 
9756 	if (ret == EBUSY)
9757 		device_printf(sc->dev,
9758 			  "%s: Error reading I2C data after %d retries\n",
9759 			  __func__, ICE_I2C_MAX_RETRIES);
9760 
9761 	return (ret);
9762 }
9763 
9764 /**
9765  * ice_handle_i2c_req - Driver independent I2C request handler
9766  * @sc: device softc
9767  * @req: The I2C parameters to use
9768  *
9769  * Read from the port's I2C eeprom using the parameters from the ioctl.
9770  */
9771 int
9772 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
9773 {
9774 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
9775 }
9776 
9777 /**
9778  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
9779  * @oidp: sysctl oid structure
9780  * @arg1: pointer to private data structure
9781  * @arg2: unused
9782  * @req: sysctl request pointer
9783  *
9784  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
9785  * inserted into the port.
9786  *
9787  *             | SFP A2  | QSFP Lower Page
9788  * ------------|---------|----------------
9789  * Temperature | 96-97	 | 22-23
9790  * Vcc         | 98-99   | 26-27
9791  * TX power    | 102-103 | 34-35..40-41
9792  * RX power    | 104-105 | 50-51..56-57
9793  */
9794 static int
9795 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
9796 {
9797 	struct ice_softc *sc = (struct ice_softc *)arg1;
9798 	device_t dev = sc->dev;
9799 	struct sbuf *sbuf;
9800 	int ret;
9801 	u8 data[16];
9802 
9803 	UNREFERENCED_PARAMETER(arg2);
9804 	UNREFERENCED_PARAMETER(oidp);
9805 
9806 	if (ice_driver_is_detaching(sc))
9807 		return (ESHUTDOWN);
9808 
9809 	if (req->oldptr == NULL) {
9810 		ret = SYSCTL_OUT(req, 0, 128);
9811 		return (ret);
9812 	}
9813 
9814 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
9815 	if (ret)
9816 		return (ret);
9817 
9818 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
9819 	if (data[0] == 0x3) {
9820 		/*
9821 		 * Check for:
9822 		 * - Internally calibrated data
9823 		 * - Diagnostic monitoring is implemented
9824 		 */
9825 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
9826 		if (!(data[0] & 0x60)) {
9827 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
9828 			return (ENODEV);
9829 		}
9830 
9831 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9832 
9833 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
9834 		for (int i = 0; i < 4; i++)
9835 			sbuf_printf(sbuf, "%02X ", data[i]);
9836 
9837 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
9838 		for (int i = 0; i < 4; i++)
9839 			sbuf_printf(sbuf, "%02X ", data[i]);
9840 	} else if (data[0] == 0xD || data[0] == 0x11) {
9841 		/*
9842 		 * QSFP+ modules are always internally calibrated, and must indicate
9843 		 * what types of diagnostic monitoring are implemented
9844 		 */
9845 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9846 
9847 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
9848 		for (int i = 0; i < 2; i++)
9849 			sbuf_printf(sbuf, "%02X ", data[i]);
9850 
9851 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
9852 		for (int i = 0; i < 2; i++)
9853 			sbuf_printf(sbuf, "%02X ", data[i]);
9854 
9855 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
9856 		for (int i = 0; i < 2; i++)
9857 			sbuf_printf(sbuf, "%02X ", data[i]);
9858 
9859 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
9860 		for (int i = 0; i < 2; i++)
9861 			sbuf_printf(sbuf, "%02X ", data[i]);
9862 	} else {
9863 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
9864 		return (ENODEV);
9865 	}
9866 
9867 	sbuf_finish(sbuf);
9868 	sbuf_delete(sbuf);
9869 
9870 	return (0);
9871 }
9872 
9873 /**
9874  * ice_alloc_intr_tracking - Setup interrupt tracking structures
9875  * @sc: device softc structure
9876  *
9877  * Sets up the resource manager for keeping track of interrupt allocations,
9878  * and initializes the tracking maps for the PF's interrupt allocations.
9879  *
9880  * Unlike the scheme for queues, this is done in one step since both the
9881  * manager and the maps both have the same lifetime.
9882  *
9883  * @returns 0 on success, or an error code on failure.
9884  */
9885 int
9886 ice_alloc_intr_tracking(struct ice_softc *sc)
9887 {
9888 	struct ice_hw *hw = &sc->hw;
9889 	device_t dev = sc->dev;
9890 	int err;
9891 
9892 	/* Initialize the interrupt allocation manager */
9893 	err = ice_resmgr_init_contig_only(&sc->imgr,
9894 	    hw->func_caps.common_cap.num_msix_vectors);
9895 	if (err) {
9896 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
9897 			      ice_err_str(err));
9898 		return (err);
9899 	}
9900 
9901 	/* Allocate PF interrupt mapping storage */
9902 	if (!(sc->pf_imap =
9903 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9904 	      M_ICE, M_NOWAIT))) {
9905 		device_printf(dev, "Unable to allocate PF imap memory\n");
9906 		err = ENOMEM;
9907 		goto free_imgr;
9908 	}
9909 	if (!(sc->rdma_imap =
9910 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9911 	      M_ICE, M_NOWAIT))) {
9912 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
9913 		err = ENOMEM;
9914 		free(sc->pf_imap, M_ICE);
9915 		goto free_imgr;
9916 	}
9917 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
9918 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
9919 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
9920 	}
9921 
9922 	return (0);
9923 
9924 free_imgr:
9925 	ice_resmgr_destroy(&sc->imgr);
9926 	return (err);
9927 }
9928 
9929 /**
9930  * ice_free_intr_tracking - Free PF interrupt tracking structures
9931  * @sc: device softc structure
9932  *
9933  * Frees the interrupt resource allocation manager and the PF's owned maps.
9934  *
9935  * VF maps are released when the owning VF's are destroyed, which should always
9936  * happen before this function is called.
9937  */
9938 void
9939 ice_free_intr_tracking(struct ice_softc *sc)
9940 {
9941 	if (sc->pf_imap) {
9942 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
9943 				       sc->lan_vectors);
9944 		free(sc->pf_imap, M_ICE);
9945 		sc->pf_imap = NULL;
9946 	}
9947 	if (sc->rdma_imap) {
9948 		ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
9949 				       sc->lan_vectors);
9950 		free(sc->rdma_imap, M_ICE);
9951 		sc->rdma_imap = NULL;
9952 	}
9953 
9954 	ice_resmgr_destroy(&sc->imgr);
9955 }
9956 
9957 /**
9958  * ice_apply_supported_speed_filter - Mask off unsupported speeds
9959  * @report_speeds: bit-field for the desired link speeds
9960  * @mod_type: type of module/sgmii connection we have
9961  *
9962  * Given a bitmap of the desired lenient mode link speeds,
9963  * this function will mask off the speeds that are not currently
9964  * supported by the device.
9965  */
9966 static u16
9967 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
9968 {
9969 	u16 speed_mask;
9970 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
9971 
9972 	/*
9973 	 * The SFF specification says 0 is unknown, so we'll
9974 	 * treat it like we're connected through SGMII for now.
9975 	 * This may need revisiting if a new type is supported
9976 	 * in the future.
9977 	 */
9978 	switch (mod_type) {
9979 	case 0:
9980 		module = IS_SGMII;
9981 		break;
9982 	case 3:
9983 		module = IS_SFP;
9984 		break;
9985 	default:
9986 		module = IS_QSFP;
9987 		break;
9988 	}
9989 
9990 	/* We won't offer anything lower than 100M for any part,
9991 	 * but we'll need to mask off other speeds based on the
9992 	 * device and module type.
9993 	 */
9994 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
9995 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
9996 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9997 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
9998 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9999 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
10000 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10001 		if (module == IS_QSFP)
10002 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10003 	}
10004 	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
10005 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10006 	return (report_speeds & speed_mask);
10007 }
10008 
10009 /**
10010  * ice_init_health_events - Enable FW health event reporting
10011  * @sc: device softc
10012  *
10013  * Will try to enable firmware health event reporting, but shouldn't
10014  * cause any grief (to the caller) if this fails.
10015  */
10016 void
10017 ice_init_health_events(struct ice_softc *sc)
10018 {
10019 	enum ice_status status;
10020 	u8 health_mask;
10021 
10022 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10023 		(!sc->enable_health_events))
10024 		return;
10025 
10026 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10027 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10028 
10029 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10030 	if (status)
10031 		device_printf(sc->dev,
10032 		    "Failed to enable firmware health events, err %s aq_err %s\n",
10033 		    ice_status_str(status),
10034 		    ice_aq_str(sc->hw.adminq.sq_last_status));
10035 	else
10036 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10037 }
10038 
10039 /**
10040  * ice_print_health_status_string - Print message for given FW health event
10041  * @dev: the PCIe device
10042  * @elem: health status element containing status code
10043  *
10044  * A rather large list of possible health status codes and their associated
10045  * messages.
10046  */
10047 static void
10048 ice_print_health_status_string(device_t dev,
10049 			       struct ice_aqc_health_status_elem *elem)
10050 {
10051 	u16 status_code = le16toh(elem->health_status_code);
10052 
10053 	switch (status_code) {
10054 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10055 		device_printf(dev, "The device is in firmware recovery mode.\n");
10056 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10057 		break;
10058 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10059 		device_printf(dev, "The flash chip cannot be accessed.\n");
10060 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10061 		break;
10062 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10063 		device_printf(dev, "NVM authentication failed.\n");
10064 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10065 		break;
10066 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10067 		device_printf(dev, "Option ROM authentication failed.\n");
10068 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10069 		break;
10070 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10071 		device_printf(dev, "DDP package failed.\n");
10072 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10073 		break;
10074 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10075 		device_printf(dev, "NVM image is incompatible.\n");
10076 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10077 		break;
10078 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10079 		device_printf(dev, "Option ROM is incompatible.\n");
10080 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10081 		break;
10082 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10083 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10084 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10085 		break;
10086 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10087 		device_printf(dev, "An unsupported module was detected.\n");
10088 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10089 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10090 		break;
10091 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10092 		device_printf(dev, "Module type is not supported.\n");
10093 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10094 		break;
10095 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10096 		device_printf(dev, "Module is not qualified.\n");
10097 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10098 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10099 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10100 		break;
10101 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10102 		device_printf(dev, "Device cannot communicate with the module.\n");
10103 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10104 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10105 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10106 		break;
10107 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10108 		device_printf(dev, "Unresolved module conflict.\n");
10109 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10110 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10111 		break;
10112 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10113 		device_printf(dev, "Module is not present.\n");
10114 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10115 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10116 		break;
10117 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10118 		device_printf(dev, "Underutilized module.\n");
10119 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10120 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10121 		break;
10122 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10123 		device_printf(dev, "An unsupported module was detected.\n");
10124 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10125 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10126 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10127 		break;
10128 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10129 		device_printf(dev, "Invalid link configuration.\n");
10130 		break;
10131 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10132 		device_printf(dev, "Port hardware access error.\n");
10133 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10134 		break;
10135 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10136 		device_printf(dev, "A port is unreachable.\n");
10137 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10138 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10139 		break;
10140 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10141 		device_printf(dev, "Port speed is limited due to module.\n");
10142 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10143 		break;
10144 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10145 		device_printf(dev, "A parallel fault was detected.\n");
10146 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10147 		break;
10148 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10149 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10150 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10151 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10152 		break;
10153 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10154 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10155 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10156 		break;
10157 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10158 		device_printf(dev, "Unrecoverable netlist error.\n");
10159 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10160 		break;
10161 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10162 		device_printf(dev, "Port topology conflict.\n");
10163 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10164 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10165 		break;
10166 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10167 		device_printf(dev, "Unrecoverable hardware access error.\n");
10168 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10169 		break;
10170 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10171 		device_printf(dev, "Unrecoverable runtime error.\n");
10172 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10173 		break;
10174 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10175 		device_printf(dev, "Link management engine failed to initialize.\n");
10176 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10177 		break;
10178 	default:
10179 		break;
10180 	}
10181 }
10182 
10183 /**
10184  * ice_handle_health_status_event - helper function to output health status
10185  * @sc: device softc structure
10186  * @event: event received on a control queue
10187  *
10188  * Prints out the appropriate string based on the given Health Status Event
10189  * code.
10190  */
10191 static void
10192 ice_handle_health_status_event(struct ice_softc *sc,
10193 			       struct ice_rq_event_info *event)
10194 {
10195 	struct ice_aqc_health_status_elem *health_info;
10196 	u16 status_count;
10197 	int i;
10198 
10199 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10200 		return;
10201 
10202 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10203 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10204 
10205 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10206 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10207 		return;
10208 	}
10209 
10210 	for (i = 0; i < status_count; i++) {
10211 		ice_print_health_status_string(sc->dev, health_info);
10212 		health_info++;
10213 	}
10214 }
10215 
10216 /**
10217  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10218  * @sc: device softc structure
10219  *
10220  * This function needs to be called after link up; it makes sure the FW has
10221  * certain PFC/DCB settings. In certain configurations this will re-apply a
10222  * default local LLDP MIB configuration; this is intended to workaround a FW
10223  * behavior where these settings seem to be cleared on link up.
10224  */
10225 void
10226 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10227 {
10228 	struct ice_hw *hw = &sc->hw;
10229 	struct ice_port_info *pi;
10230 	device_t dev = sc->dev;
10231 	enum ice_status status;
10232 
10233 	/* Set Local MIB can disrupt flow control settings for
10234 	 * non-DCB-supported devices.
10235 	 */
10236 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10237 		return;
10238 
10239 	pi = hw->port_info;
10240 
10241 	/* Don't overwrite a custom SW configuration */
10242 	if (!pi->qos_cfg.is_sw_lldp &&
10243 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10244 		ice_set_default_local_mib_settings(sc);
10245 
10246 	status = ice_set_dcb_cfg(pi);
10247 
10248 	if (status)
10249 		device_printf(dev,
10250 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10251 		    ice_status_str(status),
10252 		    ice_aq_str(hw->adminq.sq_last_status));
10253 }
10254 
10255 /**
10256  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10257  * @sbuf: string buffer to print to
10258  * @name: prefix string to use
10259  * @ets: structure to pull values from
10260  *
10261  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10262  * formats the ETS rec and cfg TLVs into text.
10263  */
10264 static void
10265 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10266 {
10267 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10268 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10269 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10270 
10271 	sbuf_printf(sbuf, "%s.prio_table:", name);
10272 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10273 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10274 	sbuf_printf(sbuf, "\n");
10275 
10276 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10277 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10278 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10279 	sbuf_printf(sbuf, "\n");
10280 
10281 	sbuf_printf(sbuf, "%s.tsatable:", name);
10282 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10283 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10284 	sbuf_printf(sbuf, "\n");
10285 }
10286 
10287 /**
10288  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10289  * @oidp: sysctl oid structure
10290  * @arg1: pointer to private data structure
10291  * @arg2: AQ define for either Local or Remote MIB
10292  * @req: sysctl request pointer
10293  *
10294  * Prints out DCB/DCBX configuration, including the contents
10295  * of either the local or remote MIB, depending on the value
10296  * used in arg2.
10297  */
10298 static int
10299 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10300 {
10301 	struct ice_softc *sc = (struct ice_softc *)arg1;
10302 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10303 	struct ice_dcbx_cfg dcb_buf = {};
10304 	struct ice_dcbx_cfg *dcbcfg;
10305 	struct ice_hw *hw = &sc->hw;
10306 	device_t dev = sc->dev;
10307 	struct sbuf *sbuf;
10308 	enum ice_status status;
10309 	u8 maxtcs, dcbx_status, is_sw_lldp;
10310 
10311 	UNREFERENCED_PARAMETER(oidp);
10312 
10313 	if (ice_driver_is_detaching(sc))
10314 		return (ESHUTDOWN);
10315 
10316 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10317 
10318 	/* The driver doesn't receive a Remote MIB via SW */
10319 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10320 		return (ENOENT);
10321 
10322 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10323 	if (!is_sw_lldp) {
10324 		/* Collect information from the FW in FW LLDP mode */
10325 		dcbcfg = &dcb_buf;
10326 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10327 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10328 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10329 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10330 			device_printf(dev,
10331 			    "Unable to query Remote MIB; port has not received one yet\n");
10332 			return (ENOENT);
10333 		}
10334 		if (status) {
10335 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10336 			    ice_status_str(status),
10337 			    ice_aq_str(hw->adminq.sq_last_status));
10338 			return (EIO);
10339 		}
10340 	}
10341 
10342 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10343 	if (status == ICE_SUCCESS)
10344 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10345 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10346 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10347 	else
10348 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10349 		    ice_status_str(status),
10350 		    ice_aq_str(hw->adminq.sq_last_status));
10351 
10352 	maxtcs = hw->func_caps.common_cap.maxtc;
10353 	dcbx_status = ice_get_dcbx_status(hw);
10354 
10355 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10356 
10357 	/* Do the actual printing */
10358 	sbuf_printf(sbuf, "\n");
10359 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10360 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10361 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10362 
10363 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10364 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10365 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10366 	    "DSCP" : "VLAN");
10367 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10368 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10369 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10370 	    "Unknown");
10371 
10372 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10373 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10374 
10375 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10376 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10377 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10378 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10379 
10380 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10381 		sbuf_printf(sbuf, "dscp_map:\n");
10382 		for (int i = 0; i < 8; i++) {
10383 			for (int j = 0; j < 8; j++)
10384 				sbuf_printf(sbuf, " %d",
10385 					    dcbcfg->dscp_map[i * 8 + j]);
10386 			sbuf_printf(sbuf, "\n");
10387 		}
10388 
10389 		sbuf_printf(sbuf, "\nLocal registers:\n");
10390 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10391 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10392 		        >> PRTDCB_GENC_NUMTC_S);
10393 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10394 		    (rd32(hw, PRTDCB_TUP2TC)));
10395 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10396 		    (rd32(hw, PRTDCB_RUP2TC)));
10397 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10398 		    (rd32(hw, GLDCB_TC2PFC)));
10399 	}
10400 
10401 	/* Finish */
10402 	sbuf_finish(sbuf);
10403 	sbuf_delete(sbuf);
10404 
10405 	return (0);
10406 }
10407 
10408 /**
10409  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10410  * @oidp: sysctl oid structure
10411  * @arg1: pointer to private data structure
10412  * @arg2: unused
10413  * @req: sysctl request pointer
10414  *
10415  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10416  * but for simplicity, this only works on the PF's LAN VSI.
10417  */
10418 static int
10419 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10420 {
10421 	struct ice_softc *sc = (struct ice_softc *)arg1;
10422 	struct ice_vsi_ctx ctx = { 0 };
10423 	struct ice_hw *hw = &sc->hw;
10424 	device_t dev = sc->dev;
10425 	struct sbuf *sbuf;
10426 	enum ice_status status;
10427 
10428 	UNREFERENCED_PARAMETER(oidp);
10429 	UNREFERENCED_PARAMETER(arg2);
10430 
10431 	if (ice_driver_is_detaching(sc))
10432 		return (ESHUTDOWN);
10433 
10434 	/* Get HW absolute index of a VSI */
10435 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10436 
10437 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10438 	if (status != ICE_SUCCESS) {
10439 		device_printf(dev,
10440 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10441 		    ice_status_str(status),
10442 		    ice_aq_str(hw->adminq.sq_last_status));
10443 		return (EIO);
10444 	}
10445 
10446 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10447 
10448 	/* Do the actual printing */
10449 	sbuf_printf(sbuf, "\n");
10450 
10451 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10452 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10453 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10454 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10455 
10456 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10457 	    LE16_TO_CPU(ctx.info.mapping_flags));
10458 	/* The PF VSI is always contiguous, so there's no if-statement here */
10459 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10460 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10461 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10462 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10463 
10464 	sbuf_printf(sbuf, "TC qbases  :");
10465 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10466 		sbuf_printf(sbuf, " %4d",
10467 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10468 	}
10469 	sbuf_printf(sbuf, "\n");
10470 
10471 	sbuf_printf(sbuf, "TC qcounts :");
10472 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10473 		sbuf_printf(sbuf, " %4d",
10474 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10475 	}
10476 
10477 	/* Finish */
10478 	sbuf_finish(sbuf);
10479 	sbuf_delete(sbuf);
10480 
10481 	return (0);
10482 }
10483 
10484 /**
10485  * ice_ets_str_to_tbl - Parse string into ETS table
10486  * @str: input string to parse
10487  * @table: output eight values used for ETS values
10488  * @limit: max valid value to accept for ETS values
10489  *
10490  * Parses a string and converts the eight values within
10491  * into a table that can be used in setting ETS settings
10492  * in a MIB.
10493  *
10494  * @return 0 on success, EINVAL if a parsed value is
10495  * not between 0 and limit.
10496  */
10497 static int
10498 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10499 {
10500 	const char *str_start = str;
10501 	char *str_end;
10502 	long token;
10503 
10504 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10505 		token = strtol(str_start, &str_end, 0);
10506 		if (token < 0 || token > limit)
10507 			return (EINVAL);
10508 
10509 		table[i] = (u8)token;
10510 		str_start = (str_end + 1);
10511 	}
10512 
10513 	return (0);
10514 }
10515 
10516 /**
10517  * ice_check_ets_bw - Check if ETS bw vals are valid
10518  * @table: eight values used for ETS bandwidth
10519  *
10520  * @return true if the sum of all 8 values in table
10521  * equals 100.
10522  */
10523 static bool
10524 ice_check_ets_bw(u8 *table)
10525 {
10526 	int sum = 0;
10527 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10528 		sum += (int)table[i];
10529 
10530 	return (sum == 100);
10531 }
10532 
10533 /**
10534  * ice_cfg_pba_num - Determine if PBA Number is retrievable
10535  * @sc: the device private softc structure
10536  *
10537  * Sets the feature flag for the existence of a PBA number
10538  * based on the success of the read command.  This does not
10539  * cache the result.
10540  */
10541 void
10542 ice_cfg_pba_num(struct ice_softc *sc)
10543 {
10544 	u8 pba_string[32] = "";
10545 
10546 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10547 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10548 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10549 }
10550 
10551 /**
10552  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10553  * @oidp: sysctl oid structure
10554  * @arg1: pointer to private data structure
10555  * @arg2: unused
10556  * @req: sysctl request pointer
10557  */
10558 static int
10559 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10560 {
10561 	struct ice_softc *sc = (struct ice_softc *)arg1;
10562 	struct ice_aqc_port_ets_elem port_ets = { 0 };
10563 	struct ice_hw *hw = &sc->hw;
10564 	struct ice_port_info *pi;
10565 	device_t dev = sc->dev;
10566 	struct sbuf *sbuf;
10567 	enum ice_status status;
10568 	int i = 0;
10569 
10570 	UNREFERENCED_PARAMETER(oidp);
10571 	UNREFERENCED_PARAMETER(arg2);
10572 
10573 	if (ice_driver_is_detaching(sc))
10574 		return (ESHUTDOWN);
10575 
10576 	pi = hw->port_info;
10577 
10578 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10579 	if (status != ICE_SUCCESS) {
10580 		device_printf(dev,
10581 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10582 		    ice_status_str(status),
10583 		    ice_aq_str(hw->adminq.sq_last_status));
10584 		return (EIO);
10585 	}
10586 
10587 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10588 
10589 	/* Do the actual printing */
10590 	sbuf_printf(sbuf, "\n");
10591 
10592 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10593 
10594 	sbuf_printf(sbuf, "TC BW %%:");
10595 	ice_for_each_traffic_class(i) {
10596 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10597 	}
10598 	sbuf_printf(sbuf, "\n");
10599 
10600 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10601 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10602 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10603 
10604 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10605 	ice_for_each_traffic_class(i) {
10606 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10607 	}
10608 
10609 	/* Finish */
10610 	sbuf_finish(sbuf);
10611 	sbuf_delete(sbuf);
10612 
10613 	return (0);
10614 }
10615 
10616 /**
10617  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10618  * @oidp: sysctl oid structure
10619  * @arg1: pointer to private data structure
10620  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10621  * @req: sysctl request pointer
10622  *
10623  * Gets or sets the current DSCP to UP table cached by the driver. Since there
10624  * are 64 possible DSCP values to configure, this sysctl only configures
10625  * chunks of 8 in that space at a time.
10626  *
10627  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10628  * mode.
10629  */
10630 static int
10631 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10632 {
10633 	struct ice_softc *sc = (struct ice_softc *)arg1;
10634 	struct ice_dcbx_cfg *local_dcbx_cfg;
10635 	struct ice_port_info *pi;
10636 	struct ice_hw *hw = &sc->hw;
10637 	device_t dev = sc->dev;
10638 	enum ice_status status;
10639 	struct sbuf *sbuf;
10640 	int ret;
10641 
10642 	/* Store input rates from user */
10643 	char dscp_user_buf[128] = "";
10644 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10645 
10646 	if (ice_driver_is_detaching(sc))
10647 		return (ESHUTDOWN);
10648 
10649 	if (req->oldptr == NULL && req->newptr == NULL) {
10650 		ret = SYSCTL_OUT(req, 0, 128);
10651 		return (ret);
10652 	}
10653 
10654 	pi = hw->port_info;
10655 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10656 
10657 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10658 
10659 	/* Format DSCP-to-UP data for output */
10660 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10661 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10662 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10663 			sbuf_printf(sbuf, ",");
10664 	}
10665 
10666 	sbuf_finish(sbuf);
10667 	sbuf_delete(sbuf);
10668 
10669 	/* Read in the new DSCP mapping values */
10670 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10671 	if ((ret) || (req->newptr == NULL))
10672 		return (ret);
10673 
10674 	/* Don't allow setting changes in FW DCB mode */
10675 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10676 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10677 		    __func__);
10678 		return (EINVAL);
10679 	}
10680 
10681 	/* Convert 8 values in a string to a table; this is similar to what
10682 	 * needs to be done for ETS settings, so this function can be re-used
10683 	 * for that purpose.
10684 	 */
10685 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10686 	if (ret) {
10687 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10688 		    __func__, dscp_user_buf);
10689 		return (ret);
10690 	}
10691 
10692 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10693 	    sizeof(new_dscp_table_seg));
10694 
10695 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10696 
10697 	status = ice_set_dcb_cfg(pi);
10698 	if (status) {
10699 		device_printf(dev,
10700 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10701 		    __func__, ice_status_str(status),
10702 		    ice_aq_str(hw->adminq.sq_last_status));
10703 		return (EIO);
10704 	}
10705 
10706 	ice_do_dcb_reconfig(sc, false);
10707 
10708 	return (0);
10709 }
10710 
10711 /**
10712  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10713  * @sc: the device private softc
10714  * @ifd: ifdrv ioctl request pointer
10715  */
10716 int
10717 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10718 {
10719 	size_t ifd_len = ifd->ifd_len;
10720 	struct ice_hw *hw = &sc->hw;
10721 	device_t dev = sc->dev;
10722 	struct ice_debug_dump_cmd *ddc;
10723 	enum ice_status status;
10724 	int err = 0;
10725 
10726 	/* Returned arguments from the Admin Queue */
10727 	u16 ret_buf_size = 0;
10728 	u16 ret_next_table = 0;
10729 	u32 ret_next_index = 0;
10730 
10731 	/*
10732 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
10733 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
10734 	 * without performing a privilege check. Perform one here to ensure
10735 	 * that non-privileged threads cannot access this interface.
10736 	 */
10737 	err = priv_check(curthread, PRIV_DRIVER);
10738 	if (err)
10739 		return (err);
10740 
10741 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
10742 		device_printf(dev,
10743 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
10744 		    __func__);
10745 		return (EBUSY);
10746 	}
10747 
10748 	if (ifd_len < sizeof(*ddc)) {
10749 		device_printf(dev,
10750 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
10751 		    __func__, ifd_len, sizeof(*ddc));
10752 		return (EINVAL);
10753 	}
10754 
10755 	if (ifd->ifd_data == NULL) {
10756 		device_printf(dev, "%s: ifd data buffer not present.\n",
10757 		     __func__);
10758 		return (EINVAL);
10759 	}
10760 
10761 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
10762 	if (!ddc)
10763 		return (ENOMEM);
10764 
10765 	/* Copy the NVM access command and data in from user space */
10766 	/* coverity[tainted_data_argument] */
10767 	err = copyin(ifd->ifd_data, ddc, ifd_len);
10768 	if (err) {
10769 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
10770 			      __func__, ice_err_str(err));
10771 		goto out;
10772 	}
10773 
10774 	/* The data_size arg must be at least 1 for the AQ cmd to work */
10775 	if (ddc->data_size == 0) {
10776 		device_printf(dev,
10777 		    "%s: data_size must be greater than 0\n", __func__);
10778 		err = EINVAL;
10779 		goto out;
10780 	}
10781 	/* ...and it can't be too long */
10782 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
10783 		device_printf(dev,
10784 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
10785 		    ddc->data_size, ifd_len - sizeof(*ddc));
10786 		err = EINVAL;
10787 		goto out;
10788 	}
10789 
10790 	/* Make sure any possible data buffer space is zeroed */
10791 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
10792 
10793 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
10794 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size, &ret_next_table, &ret_next_index, NULL);
10795 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
10796 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
10797 	if (status) {
10798 		device_printf(dev,
10799 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
10800 		    __func__,
10801 		    ice_status_str(status),
10802 		    ice_aq_str(hw->adminq.sq_last_status));
10803 		goto aq_error;
10804 	}
10805 
10806 	ddc->table_id = ret_next_table;
10807 	ddc->offset = ret_next_index;
10808 	ddc->data_size = ret_buf_size;
10809 
10810 	/* Copy the possibly modified contents of the handled request out */
10811 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
10812 	if (err) {
10813 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
10814 			      __func__, ice_err_str(err));
10815 		goto out;
10816 	}
10817 
10818 aq_error:
10819 	/* Convert private status to an error code for proper ioctl response */
10820 	switch (status) {
10821 	case ICE_SUCCESS:
10822 		err = (0);
10823 		break;
10824 	case ICE_ERR_NO_MEMORY:
10825 		err = (ENOMEM);
10826 		break;
10827 	case ICE_ERR_OUT_OF_RANGE:
10828 		err = (ENOTTY);
10829 		break;
10830 	case ICE_ERR_AQ_ERROR:
10831 		err = (EIO);
10832 		break;
10833 	case ICE_ERR_PARAM:
10834 	default:
10835 		err = (EINVAL);
10836 		break;
10837 	}
10838 
10839 out:
10840 	free(ddc, M_ICE);
10841 	return (err);
10842 }
10843 
10844 /**
10845  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
10846  * @oidp: sysctl oid structure
10847  * @arg1: pointer to private data structure
10848  * @arg2: unused
10849  * @req: sysctl request pointer
10850  *
10851  * Allows user to let "No FEC" mode to be used in "Auto"
10852  * FEC mode during FEC negotiation. This is only supported
10853  * on newer firmware versions.
10854  */
10855 static int
10856 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
10857 {
10858 	struct ice_softc *sc = (struct ice_softc *)arg1;
10859 	struct ice_hw *hw = &sc->hw;
10860 	device_t dev = sc->dev;
10861 	u8 user_flag;
10862 	int ret;
10863 
10864 	UNREFERENCED_PARAMETER(arg2);
10865 
10866 	ret = priv_check(curthread, PRIV_DRIVER);
10867 	if (ret)
10868 		return (ret);
10869 
10870 	if (ice_driver_is_detaching(sc))
10871 		return (ESHUTDOWN);
10872 
10873 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
10874 
10875 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
10876 	if ((ret) || (req->newptr == NULL))
10877 		return (ret);
10878 
10879 	if (!ice_fw_supports_fec_dis_auto(hw)) {
10880 		log(LOG_INFO,
10881 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
10882 		    device_get_nameunit(dev));
10883 		return (ENODEV);
10884 	}
10885 
10886 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
10887 		return (0);
10888 
10889 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
10890 
10891 	if (sc->allow_no_fec_mod_in_auto)
10892 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
10893 		    device_get_nameunit(dev));
10894 	else
10895 		log(LOG_INFO,
10896 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
10897 		    device_get_nameunit(dev));
10898 
10899 	return (0);
10900 }
10901 
10902