xref: /freebsd/sys/dev/ice/ice_lib.c (revision 734e82fe33aa764367791a7d603b383996c6b40b)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2023, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_lib.c
34  * @brief Generic device setup and sysctl functions
35  *
36  * Library of generic device functions not specific to the networking stack.
37  *
38  * This includes hardware initialization functions, as well as handlers for
39  * many of the device sysctls used to probe driver status or tune specific
40  * behaviors.
41  */
42 
43 #include "ice_lib.h"
44 #include "ice_iflib.h"
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 #include <machine/resource.h>
48 #include <net/if_dl.h>
49 #include <sys/firmware.h>
50 #include <sys/priv.h>
51 #include <sys/limits.h>
52 
53 /**
54  * @var M_ICE
55  * @brief main ice driver allocation type
56  *
57  * malloc(9) allocation type used by the majority of memory allocations in the
58  * ice driver.
59  */
60 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
61 
62 /*
63  * Helper function prototypes
64  */
65 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
66 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
67 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
68 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
69 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
70 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
71 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
72 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
73 static void ice_free_fltr_list(struct ice_list_head *list);
74 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
75 			       const u8 *addr, enum ice_sw_fwd_act_type action);
76 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
77 				   struct ice_ctl_q_info *cq);
78 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
79 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
80 				    struct ice_rq_event_info *event);
81 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
82 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
83 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
85 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
86 static void ice_add_debug_tunables(struct ice_softc *sc);
87 static void ice_add_debug_sysctls(struct ice_softc *sc);
88 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
89 static void ice_get_default_rss_key(u8 *seed);
90 static int  ice_set_rss_key(struct ice_vsi *vsi);
91 static int  ice_set_rss_lut(struct ice_vsi *vsi);
92 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
93 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
94 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
95 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
96 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
97 static const char *ice_autoneg_mode(struct ice_port_info *pi);
98 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
99 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
100 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
101 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
102 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
103 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
104 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
105 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
106 				     struct sysctl_ctx_list *ctx,
107 				     struct sysctl_oid *parent);
108 static void
109 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
110 				 struct sysctl_oid_list *parent_list,
111 				 u64* pfc_stat_location,
112 				 const char *node_name,
113 				 const char *descr);
114 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
115 					  struct sysctl_oid *parent,
116 					  struct ice_hw_port_stats *stats);
117 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
118 				 enum ice_vsi_type type, int idx,
119 				 bool dynamic);
120 static void ice_handle_mib_change_event(struct ice_softc *sc,
121 				 struct ice_rq_event_info *event);
122 static void
123 ice_handle_lan_overflow_event(struct ice_softc *sc,
124 			      struct ice_rq_event_info *event);
125 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
126 				     struct ice_list_head *list,
127 				     u16 ethertype, u16 direction,
128 				     enum ice_sw_fwd_act_type action);
129 static void ice_add_rx_lldp_filter(struct ice_softc *sc);
130 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
131 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
132 					   u64 phy_type_high);
133 struct ice_phy_data;
134 static int
135 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
136 				   struct ice_phy_data *phy_data);
137 static int
138 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
139 			       struct ice_aqc_set_phy_cfg_data *cfg);
140 static int
141 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
142 			       struct ice_aqc_set_phy_cfg_data *cfg);
143 static void
144 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
145 			      struct ice_aqc_set_phy_cfg_data *cfg);
146 static void
147 ice_print_ldo_tlv(struct ice_softc *sc,
148 		  struct ice_link_default_override_tlv *tlv);
149 static void
150 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
151 				  u64 *phy_type_high);
152 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
153 static void
154 ice_handle_health_status_event(struct ice_softc *sc,
155 			       struct ice_rq_event_info *event);
156 static void
157 ice_print_health_status_string(device_t dev,
158 			       struct ice_aqc_health_status_elem *elem);
159 static void
160 ice_debug_print_mib_change_event(struct ice_softc *sc,
161 				 struct ice_rq_event_info *event);
162 static bool ice_check_ets_bw(u8 *table);
163 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
164 static bool
165 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
166 		       struct ice_dcbx_cfg *new_cfg);
167 static void ice_dcb_recfg(struct ice_softc *sc);
168 static u8 ice_dcb_tc_contig(u8 tc_map);
169 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
170 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
171 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
172 				   struct ice_dcb_ets_cfg *ets);
173 static void ice_stop_pf_vsi(struct ice_softc *sc);
174 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
175 static void ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib);
176 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
177 void
178 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
179 			    struct sysctl_ctx_list *ctx,
180 			    struct sysctl_oid_list *ctx_list);
181 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
182 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
183 static void ice_start_dcbx_agent(struct ice_softc *sc);
184 static void ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
185 					    struct sbuf *sbuf, u16 cluster_id);
186 
187 static int ice_module_init(void);
188 static int ice_module_exit(void);
189 
190 /*
191  * package version comparison functions
192  */
193 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
194 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
195 
196 /*
197  * dynamic sysctl handlers
198  */
199 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
200 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
201 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
202 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
203 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
204 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
205 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
206 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
207 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
208 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
209 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
210 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
211 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
212 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
213 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
214 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
215 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
216 					 bool is_phy_type_high);
217 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
218 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
222 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
223 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
224 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
225 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
230 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
231 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
232 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
239 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
244 
245 /**
246  * ice_map_bar - Map PCIe BAR memory
247  * @dev: the PCIe device
248  * @bar: the BAR info structure
249  * @bar_num: PCIe BAR number
250  *
251  * Maps the specified PCIe BAR. Stores the mapping data in struct
252  * ice_bar_info.
253  */
254 int
255 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
256 {
257 	if (bar->res != NULL) {
258 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
259 		return (EDOOFUS);
260 	}
261 
262 	bar->rid = PCIR_BAR(bar_num);
263 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
264 					  RF_ACTIVE);
265 	if (!bar->res) {
266 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
267 		return (ENXIO);
268 	}
269 
270 	bar->tag = rman_get_bustag(bar->res);
271 	bar->handle = rman_get_bushandle(bar->res);
272 	bar->size = rman_get_size(bar->res);
273 
274 	return (0);
275 }
276 
277 /**
278  * ice_free_bar - Free PCIe BAR memory
279  * @dev: the PCIe device
280  * @bar: the BAR info structure
281  *
282  * Frees the specified PCIe BAR, releasing its resources.
283  */
284 void
285 ice_free_bar(device_t dev, struct ice_bar_info *bar)
286 {
287 	if (bar->res != NULL)
288 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
289 	bar->res = NULL;
290 }
291 
292 /**
293  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
294  * @hw: the device hardware structure
295  *
296  * Configures the control queues for the given device, setting up the
297  * specified lengths, prior to initializing hardware.
298  */
299 void
300 ice_set_ctrlq_len(struct ice_hw *hw)
301 {
302 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
303 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
304 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
305 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
306 
307 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
308 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
309 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
310 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
311 
312 }
313 
314 /**
315  * ice_get_next_vsi - Get the next available VSI slot
316  * @all_vsi: the VSI list
317  * @size: the size of the VSI list
318  *
319  * Returns the index to the first available VSI slot. Will return size (one
320  * past the last index) if there are no slots available.
321  */
322 static int
323 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
324 {
325 	int i;
326 
327 	for (i = 0; i < size; i++) {
328 		if (all_vsi[i] == NULL)
329 			return i;
330 	}
331 
332 	return size;
333 }
334 
335 /**
336  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
337  * @sc: the device private softc structure
338  * @vsi: the VSI to setup
339  * @type: the VSI type of the new VSI
340  * @idx: the index in the all_vsi array to use
341  * @dynamic: whether this VSI memory was dynamically allocated
342  *
343  * Perform setup for a VSI that is common to both dynamically allocated VSIs
344  * and the static PF VSI which is embedded in the softc structure.
345  */
346 static void
347 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
348 		     enum ice_vsi_type type, int idx, bool dynamic)
349 {
350 	/* Store important values in VSI struct */
351 	vsi->type = type;
352 	vsi->sc = sc;
353 	vsi->idx = idx;
354 	sc->all_vsi[idx] = vsi;
355 	vsi->dynamic = dynamic;
356 
357 	/* Setup the VSI tunables now */
358 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
359 }
360 
361 /**
362  * ice_alloc_vsi - Allocate a dynamic VSI
363  * @sc: device softc structure
364  * @type: VSI type
365  *
366  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
367  */
368 struct ice_vsi *
369 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
370 {
371 	struct ice_vsi *vsi;
372 	int idx;
373 
374 	/* Find an open index for a new VSI to be allocated. If the returned
375 	 * index is >= the num_available_vsi then it means no slot is
376 	 * available.
377 	 */
378 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
379 	if (idx >= sc->num_available_vsi) {
380 		device_printf(sc->dev, "No available VSI slots\n");
381 		return NULL;
382 	}
383 
384 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_WAITOK|M_ZERO);
385 	if (!vsi) {
386 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
387 		return NULL;
388 	}
389 
390 	ice_setup_vsi_common(sc, vsi, type, idx, true);
391 
392 	return vsi;
393 }
394 
395 /**
396  * ice_setup_pf_vsi - Setup the PF VSI
397  * @sc: the device private softc
398  *
399  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
400  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
401  * the softc memory, instead of being dynamically allocated at creation.
402  */
403 void
404 ice_setup_pf_vsi(struct ice_softc *sc)
405 {
406 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
407 }
408 
409 /**
410  * ice_alloc_vsi_qmap
411  * @vsi: VSI structure
412  * @max_tx_queues: Number of transmit queues to identify
413  * @max_rx_queues: Number of receive queues to identify
414  *
415  * Allocates a max_[t|r]x_queues array of words for the VSI where each
416  * word contains the index of the queue it represents.  In here, all
417  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
418  * all queues for this VSI are not yet assigned an index and thus,
419  * not ready for use.
420  *
421  * Returns an error code on failure.
422  */
423 int
424 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
425 		   const int max_rx_queues)
426 {
427 	struct ice_softc *sc = vsi->sc;
428 	int i;
429 
430 	MPASS(max_tx_queues > 0);
431 	MPASS(max_rx_queues > 0);
432 
433 	/* Allocate Tx queue mapping memory */
434 	if (!(vsi->tx_qmap =
435 	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
436 		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
437 		return (ENOMEM);
438 	}
439 
440 	/* Allocate Rx queue mapping memory */
441 	if (!(vsi->rx_qmap =
442 	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
443 		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
444 		goto free_tx_qmap;
445 	}
446 
447 	/* Mark every queue map as invalid to start with */
448 	for (i = 0; i < max_tx_queues; i++) {
449 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
450 	}
451 	for (i = 0; i < max_rx_queues; i++) {
452 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
453 	}
454 
455 	return 0;
456 
457 free_tx_qmap:
458 	free(vsi->tx_qmap, M_ICE);
459 	vsi->tx_qmap = NULL;
460 
461 	return (ENOMEM);
462 }
463 
464 /**
465  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
466  * @vsi: the VSI private structure
467  *
468  * Frees the PF qmaps associated with the given VSI. Generally this will be
469  * called by ice_release_vsi, but may need to be called during attach cleanup,
470  * depending on when the qmaps were allocated.
471  */
472 void
473 ice_free_vsi_qmaps(struct ice_vsi *vsi)
474 {
475 	struct ice_softc *sc = vsi->sc;
476 
477 	if (vsi->tx_qmap) {
478 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
479 					   vsi->num_tx_queues);
480 		free(vsi->tx_qmap, M_ICE);
481 		vsi->tx_qmap = NULL;
482 	}
483 
484 	if (vsi->rx_qmap) {
485 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
486 					   vsi->num_rx_queues);
487 		free(vsi->rx_qmap, M_ICE);
488 		vsi->rx_qmap = NULL;
489 	}
490 }
491 
492 /**
493  * ice_set_default_vsi_ctx - Setup default VSI context parameters
494  * @ctx: the VSI context to initialize
495  *
496  * Initialize and prepare a default VSI context for configuring a new VSI.
497  */
498 static void
499 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
500 {
501 	u32 table = 0;
502 
503 	memset(&ctx->info, 0, sizeof(ctx->info));
504 	/* VSI will be allocated from shared pool */
505 	ctx->alloc_from_pool = true;
506 	/* Enable source pruning by default */
507 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
508 	/* Traffic from VSI can be sent to LAN */
509 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
510 	/* Allow all packets untagged/tagged */
511 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
512 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
513 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
514 	/* Show VLAN/UP from packets in Rx descriptors */
515 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
516 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
517 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
518 	/* Have 1:1 UP mapping for both ingress/egress tables */
519 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
520 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
521 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
522 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
523 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
524 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
525 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
526 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
527 	ctx->info.ingress_table = CPU_TO_LE32(table);
528 	ctx->info.egress_table = CPU_TO_LE32(table);
529 	/* Have 1:1 UP mapping for outer to inner UP table */
530 	ctx->info.outer_up_table = CPU_TO_LE32(table);
531 	/* No Outer tag support, so outer_vlan_flags remains zero */
532 }
533 
534 /**
535  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
536  * @ctx: the VSI context to configure
537  * @type: the VSI type
538  *
539  * Configures the VSI context for RSS, based on the VSI type.
540  */
541 static void
542 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
543 {
544 	u8 lut_type, hash_type;
545 
546 	switch (type) {
547 	case ICE_VSI_PF:
548 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
549 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
550 		break;
551 	case ICE_VSI_VF:
552 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
553 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
554 		break;
555 	default:
556 		/* Other VSI types do not support RSS */
557 		return;
558 	}
559 
560 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
561 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
562 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
563 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
564 }
565 
566 /**
567  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
568  * @vsi: the VSI to configure
569  * @ctx: the VSI context to configure
570  *
571  * Configures the context for the given VSI, setting up how the firmware
572  * should map the queues for this VSI.
573  */
574 static int
575 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
576 {
577 	int pow = 0;
578 	u16 qmap;
579 
580 	MPASS(vsi->rx_qmap != NULL);
581 
582 	switch (vsi->qmap_type) {
583 	case ICE_RESMGR_ALLOC_CONTIGUOUS:
584 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
585 
586 		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
587 		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
588 
589 		break;
590 	case ICE_RESMGR_ALLOC_SCATTERED:
591 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
592 
593 		for (int i = 0; i < vsi->num_rx_queues; i++)
594 			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
595 		break;
596 	default:
597 		return (EOPNOTSUPP);
598 	}
599 
600 	/* Calculate the next power-of-2 of number of queues */
601 	if (vsi->num_rx_queues)
602 		pow = flsl(vsi->num_rx_queues - 1);
603 
604 	/* Assign all the queues to traffic class zero */
605 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
606 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
607 
608 	/* Fill out default driver TC queue info for VSI */
609 	vsi->tc_info[0].qoffset = 0;
610 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
611 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
612 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
613 		vsi->tc_info[i].qoffset = 0;
614 		vsi->tc_info[i].qcount_rx = 1;
615 		vsi->tc_info[i].qcount_tx = 1;
616 	}
617 	vsi->tc_map = 0x1;
618 
619 	return 0;
620 }
621 
622 /**
623  * ice_initialize_vsi - Initialize a VSI for use
624  * @vsi: the vsi to initialize
625  *
626  * Initialize a VSI over the adminq and prepare it for operation.
627  */
628 int
629 ice_initialize_vsi(struct ice_vsi *vsi)
630 {
631 	struct ice_vsi_ctx ctx = { 0 };
632 	struct ice_hw *hw = &vsi->sc->hw;
633 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
634 	enum ice_status status;
635 	int err;
636 
637 	/* For now, we only have code supporting PF VSIs */
638 	switch (vsi->type) {
639 	case ICE_VSI_PF:
640 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
641 		break;
642 	default:
643 		return (ENODEV);
644 	}
645 
646 	ice_set_default_vsi_ctx(&ctx);
647 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
648 
649 	/* XXX: VSIs of other types may need different port info? */
650 	ctx.info.sw_id = hw->port_info->sw_id;
651 
652 	/* Set some RSS parameters based on the VSI type */
653 	ice_vsi_set_rss_params(vsi);
654 
655 	/* Initialize the Rx queue mapping for this VSI */
656 	err = ice_setup_vsi_qmap(vsi, &ctx);
657 	if (err) {
658 		return err;
659 	}
660 
661 	/* (Re-)add VSI to HW VSI handle list */
662 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
663 	if (status != 0) {
664 		device_printf(vsi->sc->dev,
665 		    "Add VSI AQ call failed, err %s aq_err %s\n",
666 		    ice_status_str(status),
667 		    ice_aq_str(hw->adminq.sq_last_status));
668 		return (EIO);
669 	}
670 	vsi->info = ctx.info;
671 
672 	/* Initialize VSI with just 1 TC to start */
673 	max_txqs[0] = vsi->num_tx_queues;
674 
675 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
676 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
677 	if (status) {
678 		device_printf(vsi->sc->dev,
679 		    "Failed VSI lan queue config, err %s aq_err %s\n",
680 		    ice_status_str(status),
681 		    ice_aq_str(hw->adminq.sq_last_status));
682 		ice_deinit_vsi(vsi);
683 		return (ENODEV);
684 	}
685 
686 	/* Reset VSI stats */
687 	ice_reset_vsi_stats(vsi);
688 
689 	return 0;
690 }
691 
692 /**
693  * ice_deinit_vsi - Tell firmware to release resources for a VSI
694  * @vsi: the VSI to release
695  *
696  * Helper function which requests the firmware to release the hardware
697  * resources associated with a given VSI.
698  */
699 void
700 ice_deinit_vsi(struct ice_vsi *vsi)
701 {
702 	struct ice_vsi_ctx ctx = { 0 };
703 	struct ice_softc *sc = vsi->sc;
704 	struct ice_hw *hw = &sc->hw;
705 	enum ice_status status;
706 
707 	/* Assert that the VSI pointer matches in the list */
708 	MPASS(vsi == sc->all_vsi[vsi->idx]);
709 
710 	ctx.info = vsi->info;
711 
712 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
713 	if (status) {
714 		/*
715 		 * This should only fail if the VSI handle is invalid, or if
716 		 * any of the nodes have leaf nodes which are still in use.
717 		 */
718 		device_printf(sc->dev,
719 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
720 			      vsi->idx, ice_status_str(status));
721 	}
722 
723 	/* Tell firmware to release the VSI resources */
724 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
725 	if (status != 0) {
726 		device_printf(sc->dev,
727 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
728 		    vsi->idx, ice_status_str(status),
729 		    ice_aq_str(hw->adminq.sq_last_status));
730 	}
731 }
732 
733 /**
734  * ice_release_vsi - Release resources associated with a VSI
735  * @vsi: the VSI to release
736  *
737  * Release software and firmware resources associated with a VSI. Release the
738  * queue managers associated with this VSI. Also free the VSI structure memory
739  * if the VSI was allocated dynamically using ice_alloc_vsi().
740  */
741 void
742 ice_release_vsi(struct ice_vsi *vsi)
743 {
744 	struct ice_softc *sc = vsi->sc;
745 	int idx = vsi->idx;
746 
747 	/* Assert that the VSI pointer matches in the list */
748 	MPASS(vsi == sc->all_vsi[idx]);
749 
750 	/* Cleanup RSS configuration */
751 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
752 		ice_clean_vsi_rss_cfg(vsi);
753 
754 	ice_del_vsi_sysctl_ctx(vsi);
755 
756 	/*
757 	 * If we unload the driver after a reset fails, we do not need to do
758 	 * this step.
759 	 */
760 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
761 		ice_deinit_vsi(vsi);
762 
763 	ice_free_vsi_qmaps(vsi);
764 
765 	if (vsi->dynamic) {
766 		free(sc->all_vsi[idx], M_ICE);
767 	}
768 
769 	sc->all_vsi[idx] = NULL;
770 }
771 
772 /**
773  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
774  * @pi: port info data
775  *
776  * Returns the baudrate value for the current link speed of a given port.
777  */
778 uint64_t
779 ice_aq_speed_to_rate(struct ice_port_info *pi)
780 {
781 	switch (pi->phy.link_info.link_speed) {
782 	case ICE_AQ_LINK_SPEED_100GB:
783 		return IF_Gbps(100);
784 	case ICE_AQ_LINK_SPEED_50GB:
785 		return IF_Gbps(50);
786 	case ICE_AQ_LINK_SPEED_40GB:
787 		return IF_Gbps(40);
788 	case ICE_AQ_LINK_SPEED_25GB:
789 		return IF_Gbps(25);
790 	case ICE_AQ_LINK_SPEED_10GB:
791 		return IF_Gbps(10);
792 	case ICE_AQ_LINK_SPEED_5GB:
793 		return IF_Gbps(5);
794 	case ICE_AQ_LINK_SPEED_2500MB:
795 		return IF_Mbps(2500);
796 	case ICE_AQ_LINK_SPEED_1000MB:
797 		return IF_Mbps(1000);
798 	case ICE_AQ_LINK_SPEED_100MB:
799 		return IF_Mbps(100);
800 	case ICE_AQ_LINK_SPEED_10MB:
801 		return IF_Mbps(10);
802 	case ICE_AQ_LINK_SPEED_UNKNOWN:
803 	default:
804 		/* return 0 if we don't know the link speed */
805 		return 0;
806 	}
807 }
808 
809 /**
810  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
811  * @pi: port info data
812  *
813  * Returns the string representation of the current link speed for a given
814  * port.
815  */
816 static const char *
817 ice_aq_speed_to_str(struct ice_port_info *pi)
818 {
819 	switch (pi->phy.link_info.link_speed) {
820 	case ICE_AQ_LINK_SPEED_100GB:
821 		return "100 Gbps";
822 	case ICE_AQ_LINK_SPEED_50GB:
823 		return "50 Gbps";
824 	case ICE_AQ_LINK_SPEED_40GB:
825 		return "40 Gbps";
826 	case ICE_AQ_LINK_SPEED_25GB:
827 		return "25 Gbps";
828 	case ICE_AQ_LINK_SPEED_20GB:
829 		return "20 Gbps";
830 	case ICE_AQ_LINK_SPEED_10GB:
831 		return "10 Gbps";
832 	case ICE_AQ_LINK_SPEED_5GB:
833 		return "5 Gbps";
834 	case ICE_AQ_LINK_SPEED_2500MB:
835 		return "2.5 Gbps";
836 	case ICE_AQ_LINK_SPEED_1000MB:
837 		return "1 Gbps";
838 	case ICE_AQ_LINK_SPEED_100MB:
839 		return "100 Mbps";
840 	case ICE_AQ_LINK_SPEED_10MB:
841 		return "10 Mbps";
842 	case ICE_AQ_LINK_SPEED_UNKNOWN:
843 	default:
844 		return "Unknown speed";
845 	}
846 }
847 
848 /**
849  * ice_get_phy_type_low - Get media associated with phy_type_low
850  * @phy_type_low: the low 64bits of phy_type from the AdminQ
851  *
852  * Given the lower 64bits of the phy_type from the hardware, return the
853  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
854  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
855  * be called. If phy_type_low is zero, call ice_phy_type_high.
856  */
857 int
858 ice_get_phy_type_low(uint64_t phy_type_low)
859 {
860 	switch (phy_type_low) {
861 	case ICE_PHY_TYPE_LOW_100BASE_TX:
862 		return IFM_100_TX;
863 	case ICE_PHY_TYPE_LOW_100M_SGMII:
864 		return IFM_100_SGMII;
865 	case ICE_PHY_TYPE_LOW_1000BASE_T:
866 		return IFM_1000_T;
867 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
868 		return IFM_1000_SX;
869 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
870 		return IFM_1000_LX;
871 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
872 		return IFM_1000_KX;
873 	case ICE_PHY_TYPE_LOW_1G_SGMII:
874 		return IFM_1000_SGMII;
875 	case ICE_PHY_TYPE_LOW_2500BASE_T:
876 		return IFM_2500_T;
877 	case ICE_PHY_TYPE_LOW_2500BASE_X:
878 		return IFM_2500_X;
879 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
880 		return IFM_2500_KX;
881 	case ICE_PHY_TYPE_LOW_5GBASE_T:
882 		return IFM_5000_T;
883 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
884 		return IFM_5000_KR;
885 	case ICE_PHY_TYPE_LOW_10GBASE_T:
886 		return IFM_10G_T;
887 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
888 		return IFM_10G_TWINAX;
889 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
890 		return IFM_10G_SR;
891 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
892 		return IFM_10G_LR;
893 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
894 		return IFM_10G_KR;
895 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
896 		return IFM_10G_AOC;
897 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
898 		return IFM_10G_SFI;
899 	case ICE_PHY_TYPE_LOW_25GBASE_T:
900 		return IFM_25G_T;
901 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
902 		return IFM_25G_CR;
903 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
904 		return IFM_25G_CR_S;
905 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
906 		return IFM_25G_CR1;
907 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
908 		return IFM_25G_SR;
909 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
910 		return IFM_25G_LR;
911 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
912 		return IFM_25G_KR;
913 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
914 		return IFM_25G_KR_S;
915 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
916 		return IFM_25G_KR1;
917 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
918 		return IFM_25G_AOC;
919 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
920 		return IFM_25G_AUI;
921 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
922 		return IFM_40G_CR4;
923 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
924 		return IFM_40G_SR4;
925 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
926 		return IFM_40G_LR4;
927 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
928 		return IFM_40G_KR4;
929 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
930 		return IFM_40G_XLAUI_AC;
931 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
932 		return IFM_40G_XLAUI;
933 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
934 		return IFM_50G_CR2;
935 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
936 		return IFM_50G_SR2;
937 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
938 		return IFM_50G_LR2;
939 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
940 		return IFM_50G_KR2;
941 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
942 		return IFM_50G_LAUI2_AC;
943 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
944 		return IFM_50G_LAUI2;
945 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
946 		return IFM_50G_AUI2_AC;
947 	case ICE_PHY_TYPE_LOW_50G_AUI2:
948 		return IFM_50G_AUI2;
949 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
950 		return IFM_50G_CP;
951 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
952 		return IFM_50G_SR;
953 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
954 		return IFM_50G_FR;
955 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
956 		return IFM_50G_LR;
957 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
958 		return IFM_50G_KR_PAM4;
959 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
960 		return IFM_50G_AUI1_AC;
961 	case ICE_PHY_TYPE_LOW_50G_AUI1:
962 		return IFM_50G_AUI1;
963 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
964 		return IFM_100G_CR4;
965 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
966 		return IFM_100G_SR4;
967 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
968 		return IFM_100G_LR4;
969 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
970 		return IFM_100G_KR4;
971 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
972 		return IFM_100G_CAUI4_AC;
973 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
974 		return IFM_100G_CAUI4;
975 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
976 		return IFM_100G_AUI4_AC;
977 	case ICE_PHY_TYPE_LOW_100G_AUI4:
978 		return IFM_100G_AUI4;
979 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
980 		return IFM_100G_CR_PAM4;
981 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
982 		return IFM_100G_KR_PAM4;
983 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
984 		return IFM_100G_CP2;
985 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
986 		return IFM_100G_SR2;
987 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
988 		return IFM_100G_DR;
989 	default:
990 		return IFM_UNKNOWN;
991 	}
992 }
993 
994 /**
995  * ice_get_phy_type_high - Get media associated with phy_type_high
996  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
997  *
998  * Given the upper 64bits of the phy_type from the hardware, return the
999  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1000  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1001  * called. If phy_type_high is zero, call ice_get_phy_type_low.
1002  */
1003 int
1004 ice_get_phy_type_high(uint64_t phy_type_high)
1005 {
1006 	switch (phy_type_high) {
1007 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1008 		return IFM_100G_KR2_PAM4;
1009 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1010 		return IFM_100G_CAUI2_AC;
1011 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1012 		return IFM_100G_CAUI2;
1013 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1014 		return IFM_100G_AUI2_AC;
1015 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1016 		return IFM_100G_AUI2;
1017 	default:
1018 		return IFM_UNKNOWN;
1019 	}
1020 }
1021 
1022 /**
1023  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1024  * @pi: port info struct
1025  *
1026  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1027  * to have been called before this function for it to work.
1028  */
1029 static uint64_t
1030 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1031 {
1032 	uint64_t phy_low = pi->phy.phy_type_low;
1033 	uint64_t phy_high = pi->phy.phy_type_high;
1034 	uint64_t max_rate = 0;
1035 	int bit;
1036 
1037 	/*
1038 	 * These are based on the indices used in the BIT() macros for
1039 	 * ICE_PHY_TYPE_LOW_*
1040 	 */
1041 	static const uint64_t phy_rates[] = {
1042 	    IF_Mbps(100),
1043 	    IF_Mbps(100),
1044 	    IF_Gbps(1ULL),
1045 	    IF_Gbps(1ULL),
1046 	    IF_Gbps(1ULL),
1047 	    IF_Gbps(1ULL),
1048 	    IF_Gbps(1ULL),
1049 	    IF_Mbps(2500ULL),
1050 	    IF_Mbps(2500ULL),
1051 	    IF_Mbps(2500ULL),
1052 	    IF_Gbps(5ULL),
1053 	    IF_Gbps(5ULL),
1054 	    IF_Gbps(10ULL),
1055 	    IF_Gbps(10ULL),
1056 	    IF_Gbps(10ULL),
1057 	    IF_Gbps(10ULL),
1058 	    IF_Gbps(10ULL),
1059 	    IF_Gbps(10ULL),
1060 	    IF_Gbps(10ULL),
1061 	    IF_Gbps(25ULL),
1062 	    IF_Gbps(25ULL),
1063 	    IF_Gbps(25ULL),
1064 	    IF_Gbps(25ULL),
1065 	    IF_Gbps(25ULL),
1066 	    IF_Gbps(25ULL),
1067 	    IF_Gbps(25ULL),
1068 	    IF_Gbps(25ULL),
1069 	    IF_Gbps(25ULL),
1070 	    IF_Gbps(25ULL),
1071 	    IF_Gbps(25ULL),
1072 	    IF_Gbps(40ULL),
1073 	    IF_Gbps(40ULL),
1074 	    IF_Gbps(40ULL),
1075 	    IF_Gbps(40ULL),
1076 	    IF_Gbps(40ULL),
1077 	    IF_Gbps(40ULL),
1078 	    IF_Gbps(50ULL),
1079 	    IF_Gbps(50ULL),
1080 	    IF_Gbps(50ULL),
1081 	    IF_Gbps(50ULL),
1082 	    IF_Gbps(50ULL),
1083 	    IF_Gbps(50ULL),
1084 	    IF_Gbps(50ULL),
1085 	    IF_Gbps(50ULL),
1086 	    IF_Gbps(50ULL),
1087 	    IF_Gbps(50ULL),
1088 	    IF_Gbps(50ULL),
1089 	    IF_Gbps(50ULL),
1090 	    IF_Gbps(50ULL),
1091 	    IF_Gbps(50ULL),
1092 	    IF_Gbps(50ULL),
1093 	    IF_Gbps(100ULL),
1094 	    IF_Gbps(100ULL),
1095 	    IF_Gbps(100ULL),
1096 	    IF_Gbps(100ULL),
1097 	    IF_Gbps(100ULL),
1098 	    IF_Gbps(100ULL),
1099 	    IF_Gbps(100ULL),
1100 	    IF_Gbps(100ULL),
1101 	    IF_Gbps(100ULL),
1102 	    IF_Gbps(100ULL),
1103 	    IF_Gbps(100ULL),
1104 	    IF_Gbps(100ULL),
1105 	    IF_Gbps(100ULL),
1106 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1107 	    IF_Gbps(100ULL),
1108 	    IF_Gbps(100ULL),
1109 	    IF_Gbps(100ULL),
1110 	    IF_Gbps(100ULL),
1111 	    IF_Gbps(100ULL)
1112 	};
1113 
1114 	/* coverity[address_of] */
1115 	for_each_set_bit(bit, &phy_high, 64)
1116 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1117 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1118 
1119 	/* coverity[address_of] */
1120 	for_each_set_bit(bit, &phy_low, 64)
1121 		max_rate = uqmax(max_rate, phy_rates[bit]);
1122 
1123 	return (max_rate);
1124 }
1125 
1126 /* The if_media type is split over the original 5 bit media variant field,
1127  * along with extended types using up extra bits in the options section.
1128  * We want to convert this split number into a bitmap index, so we reverse the
1129  * calculation of IFM_X here.
1130  */
1131 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1132 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1133 
1134 /**
1135  * ice_add_media_types - Add supported media types to the media structure
1136  * @sc: ice private softc structure
1137  * @media: ifmedia structure to setup
1138  *
1139  * Looks up the supported phy types, and initializes the various media types
1140  * available.
1141  *
1142  * @pre this function must be protected from being called while another thread
1143  * is accessing the ifmedia types.
1144  */
1145 enum ice_status
1146 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1147 {
1148 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1149 	struct ice_port_info *pi = sc->hw.port_info;
1150 	enum ice_status status;
1151 	uint64_t phy_low, phy_high;
1152 	int bit;
1153 
1154 	ASSERT_CFG_LOCKED(sc);
1155 
1156 	/* the maximum possible media type index is 511. We probably don't
1157 	 * need most of this space, but this ensures future compatibility when
1158 	 * additional media types are used.
1159 	 */
1160 	ice_declare_bitmap(already_added, 511);
1161 
1162 	/* Remove all previous media types */
1163 	ifmedia_removeall(media);
1164 
1165 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1166 				     &pcaps, NULL);
1167 	if (status != ICE_SUCCESS) {
1168 		device_printf(sc->dev,
1169 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1170 		    __func__, ice_status_str(status),
1171 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1172 		return (status);
1173 	}
1174 	phy_low = le64toh(pcaps.phy_type_low);
1175 	phy_high = le64toh(pcaps.phy_type_high);
1176 
1177 	/* make sure the added bitmap is zero'd */
1178 	memset(already_added, 0, sizeof(already_added));
1179 
1180 	/* coverity[address_of] */
1181 	for_each_set_bit(bit, &phy_low, 64) {
1182 		uint64_t type = BIT_ULL(bit);
1183 		int ostype;
1184 
1185 		/* get the OS media type */
1186 		ostype = ice_get_phy_type_low(type);
1187 
1188 		/* don't bother adding the unknown type */
1189 		if (ostype == IFM_UNKNOWN)
1190 			continue;
1191 
1192 		/* only add each media type to the list once */
1193 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1194 			continue;
1195 
1196 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1197 		ice_set_bit(IFM_IDX(ostype), already_added);
1198 	}
1199 
1200 	/* coverity[address_of] */
1201 	for_each_set_bit(bit, &phy_high, 64) {
1202 		uint64_t type = BIT_ULL(bit);
1203 		int ostype;
1204 
1205 		/* get the OS media type */
1206 		ostype = ice_get_phy_type_high(type);
1207 
1208 		/* don't bother adding the unknown type */
1209 		if (ostype == IFM_UNKNOWN)
1210 			continue;
1211 
1212 		/* only add each media type to the list once */
1213 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1214 			continue;
1215 
1216 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1217 		ice_set_bit(IFM_IDX(ostype), already_added);
1218 	}
1219 
1220 	/* Use autoselect media by default */
1221 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1222 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1223 
1224 	return (ICE_SUCCESS);
1225 }
1226 
1227 /**
1228  * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1229  * @hw: ice hw structure
1230  * @rxqid: Rx queue index in PF space
1231  * @vector: MSI-X vector index in PF/VF space
1232  * @itr_idx: ITR index to use for interrupt
1233  *
1234  * @remark ice_flush() may need to be called after this
1235  */
1236 void
1237 ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1238 {
1239 	u32 val;
1240 
1241 	MPASS(itr_idx <= ICE_ITR_NONE);
1242 
1243 	val = (QINT_RQCTL_CAUSE_ENA_M |
1244 	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1245 	       (vector << QINT_RQCTL_MSIX_INDX_S));
1246 	wr32(hw, QINT_RQCTL(rxqid), val);
1247 }
1248 
1249 /**
1250  * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1251  * @vsi: the VSI to configure
1252  *
1253  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1254  */
1255 void
1256 ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1257 {
1258 	struct ice_hw *hw = &vsi->sc->hw;
1259 	int i;
1260 
1261 	for (i = 0; i < vsi->num_rx_queues; i++) {
1262 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1263 
1264 		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1265 					    rxq->irqv->me, ICE_RX_ITR);
1266 	}
1267 
1268 	ice_flush(hw);
1269 }
1270 
1271 /**
1272  * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1273  * @hw: ice hw structure
1274  * @txqid: Tx queue index in PF space
1275  * @vector: MSI-X vector index in PF/VF space
1276  * @itr_idx: ITR index to use for interrupt
1277  *
1278  * @remark ice_flush() may need to be called after this
1279  */
1280 void
1281 ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1282 {
1283 	u32 val;
1284 
1285 	MPASS(itr_idx <= ICE_ITR_NONE);
1286 
1287 	val = (QINT_TQCTL_CAUSE_ENA_M |
1288 	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1289 	       (vector << QINT_TQCTL_MSIX_INDX_S));
1290 	wr32(hw, QINT_TQCTL(txqid), val);
1291 }
1292 
1293 /**
1294  * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1295  * @vsi: the VSI to configure
1296  *
1297  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1298  */
1299 void
1300 ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1301 {
1302 	struct ice_hw *hw = &vsi->sc->hw;
1303 	int i;
1304 
1305 	for (i = 0; i < vsi->num_tx_queues; i++) {
1306 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1307 
1308 		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1309 					    txq->irqv->me, ICE_TX_ITR);
1310 	}
1311 
1312 	ice_flush(hw);
1313 }
1314 
1315 /**
1316  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1317  * @vsi: the VSI to configure
1318  *
1319  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1320  * a software interrupt on that cause. This is required as part of the Rx
1321  * queue disable logic to dissociate the Rx queue from the interrupt.
1322  *
1323  * Note: this function must be called prior to disabling Rx queues with
1324  * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1325  */
1326 void
1327 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1328 {
1329 	struct ice_hw *hw = &vsi->sc->hw;
1330 	int i;
1331 
1332 	for (i = 0; i < vsi->num_rx_queues; i++) {
1333 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1334 		u32 reg, val;
1335 
1336 		/* Clear the CAUSE_ENA flag */
1337 		reg = vsi->rx_qmap[rxq->me];
1338 		val = rd32(hw, QINT_RQCTL(reg));
1339 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1340 		wr32(hw, QINT_RQCTL(reg), val);
1341 
1342 		ice_flush(hw);
1343 
1344 		/* Trigger a software interrupt to complete interrupt
1345 		 * dissociation.
1346 		 */
1347 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1348 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1349 	}
1350 }
1351 
1352 /**
1353  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1354  * @vsi: the VSI to configure
1355  *
1356  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1357  * a software interrupt on that cause. This is required as part of the Tx
1358  * queue disable logic to dissociate the Tx queue from the interrupt.
1359  *
1360  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1361  * the Tx queue disable may not complete properly.
1362  */
1363 void
1364 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1365 {
1366 	struct ice_hw *hw = &vsi->sc->hw;
1367 	int i;
1368 
1369 	for (i = 0; i < vsi->num_tx_queues; i++) {
1370 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1371 		u32 reg, val;
1372 
1373 		/* Clear the CAUSE_ENA flag */
1374 		reg = vsi->tx_qmap[txq->me];
1375 		val = rd32(hw, QINT_TQCTL(reg));
1376 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1377 		wr32(hw, QINT_TQCTL(reg), val);
1378 
1379 		ice_flush(hw);
1380 
1381 		/* Trigger a software interrupt to complete interrupt
1382 		 * dissociation.
1383 		 */
1384 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1385 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1386 	}
1387 }
1388 
1389 /**
1390  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1391  * @vsi: the VSI to configure
1392  *
1393  * Program the hardware ITR registers with the settings for this VSI.
1394  */
1395 void
1396 ice_configure_rx_itr(struct ice_vsi *vsi)
1397 {
1398 	struct ice_hw *hw = &vsi->sc->hw;
1399 	int i;
1400 
1401 	/* TODO: Handle per-queue/per-vector ITR? */
1402 
1403 	for (i = 0; i < vsi->num_rx_queues; i++) {
1404 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1405 
1406 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1407 		     ice_itr_to_reg(hw, vsi->rx_itr));
1408 	}
1409 
1410 	ice_flush(hw);
1411 }
1412 
1413 /**
1414  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1415  * @vsi: the VSI to configure
1416  *
1417  * Program the hardware ITR registers with the settings for this VSI.
1418  */
1419 void
1420 ice_configure_tx_itr(struct ice_vsi *vsi)
1421 {
1422 	struct ice_hw *hw = &vsi->sc->hw;
1423 	int i;
1424 
1425 	/* TODO: Handle per-queue/per-vector ITR? */
1426 
1427 	for (i = 0; i < vsi->num_tx_queues; i++) {
1428 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1429 
1430 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1431 		     ice_itr_to_reg(hw, vsi->tx_itr));
1432 	}
1433 
1434 	ice_flush(hw);
1435 }
1436 
1437 /**
1438  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1439  * @txq: the Tx queue to configure
1440  * @tlan_ctx: the Tx LAN queue context structure to initialize
1441  * @pf_q: real queue number
1442  */
1443 static int
1444 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1445 {
1446 	struct ice_vsi *vsi = txq->vsi;
1447 	struct ice_softc *sc = vsi->sc;
1448 	struct ice_hw *hw = &sc->hw;
1449 
1450 	tlan_ctx->port_num = hw->port_info->lport;
1451 
1452 	/* number of descriptors in the queue */
1453 	tlan_ctx->qlen = txq->desc_count;
1454 
1455 	/* set the transmit queue base address, defined in 128 byte units */
1456 	tlan_ctx->base = txq->tx_paddr >> 7;
1457 
1458 	tlan_ctx->pf_num = hw->pf_id;
1459 
1460 	switch (vsi->type) {
1461 	case ICE_VSI_PF:
1462 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1463 		break;
1464 	default:
1465 		return (ENODEV);
1466 	}
1467 
1468 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1469 
1470 	/* Enable TSO */
1471 	tlan_ctx->tso_ena = 1;
1472 	tlan_ctx->internal_usage_flag = 1;
1473 
1474 	tlan_ctx->tso_qnum = pf_q;
1475 
1476 	/*
1477 	 * Stick with the older legacy Tx queue interface, instead of the new
1478 	 * advanced queue interface.
1479 	 */
1480 	tlan_ctx->legacy_int = 1;
1481 
1482 	/* Descriptor WB mode */
1483 	tlan_ctx->wb_mode = 0;
1484 
1485 	return (0);
1486 }
1487 
1488 /**
1489  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1490  * @vsi: the VSI to configure
1491  *
1492  * Configure the device Tx queues through firmware AdminQ commands. After
1493  * this, Tx queues will be ready for transmit.
1494  */
1495 int
1496 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1497 {
1498 	struct ice_aqc_add_tx_qgrp *qg;
1499 	struct ice_hw *hw = &vsi->sc->hw;
1500 	device_t dev = vsi->sc->dev;
1501 	enum ice_status status;
1502 	int i;
1503 	int err = 0;
1504 	u16 qg_size, pf_q;
1505 
1506 	qg_size = ice_struct_size(qg, txqs, 1);
1507 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1508 	if (!qg)
1509 		return (ENOMEM);
1510 
1511 	qg->num_txqs = 1;
1512 
1513 	for (i = 0; i < vsi->num_tx_queues; i++) {
1514 		struct ice_tlan_ctx tlan_ctx = { 0 };
1515 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1516 
1517 		pf_q = vsi->tx_qmap[txq->me];
1518 		qg->txqs[0].txq_id = htole16(pf_q);
1519 
1520 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1521 		if (err)
1522 			goto free_txqg;
1523 
1524 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1525 			    ice_tlan_ctx_info);
1526 
1527 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1528 					 txq->q_handle, 1, qg, qg_size, NULL);
1529 		if (status) {
1530 			device_printf(dev,
1531 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1532 				      i, txq->tc, txq->q_handle,
1533 				      ice_status_str(status),
1534 				      ice_aq_str(hw->adminq.sq_last_status));
1535 			err = ENODEV;
1536 			goto free_txqg;
1537 		}
1538 
1539 		/* Keep track of the Tx queue TEID */
1540 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1541 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1542 	}
1543 
1544 free_txqg:
1545 	free(qg, M_ICE);
1546 
1547 	return (err);
1548 }
1549 
1550 /**
1551  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1552  * @rxq: the receive queue to program
1553  *
1554  * Setup an Rx queue context structure and program it into the hardware
1555  * registers. This is a necessary step for enabling the Rx queue.
1556  *
1557  * @pre the VSI associated with this queue must have initialized mbuf_sz
1558  */
1559 static int
1560 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1561 {
1562 	struct ice_rlan_ctx rlan_ctx = {0};
1563 	struct ice_vsi *vsi = rxq->vsi;
1564 	struct ice_softc *sc = vsi->sc;
1565 	struct ice_hw *hw = &sc->hw;
1566 	enum ice_status status;
1567 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1568 	u32 regval;
1569 	u16 pf_q;
1570 
1571 	pf_q = vsi->rx_qmap[rxq->me];
1572 
1573 	/* set the receive queue base address, defined in 128 byte units */
1574 	rlan_ctx.base = rxq->rx_paddr >> 7;
1575 
1576 	rlan_ctx.qlen = rxq->desc_count;
1577 
1578 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1579 
1580 	/* use 32 byte descriptors */
1581 	rlan_ctx.dsize = 1;
1582 
1583 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1584 	 * host memory.
1585 	 */
1586 	rlan_ctx.crcstrip = 1;
1587 
1588 	rlan_ctx.l2tsel = 1;
1589 
1590 	/* don't do header splitting */
1591 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1592 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1593 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1594 
1595 	/* strip VLAN from inner headers */
1596 	rlan_ctx.showiv = 1;
1597 
1598 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1599 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1600 
1601 	rlan_ctx.lrxqthresh = 1;
1602 
1603 	if (vsi->type != ICE_VSI_VF) {
1604 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1605 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1606 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1607 			QRXFLXP_CNTXT_RXDID_IDX_M;
1608 
1609 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1610 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1611 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1612 
1613 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1614 	}
1615 
1616 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1617 	if (status) {
1618 		device_printf(sc->dev,
1619 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1620 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1621 		return (EIO);
1622 	}
1623 
1624 	wr32(hw, rxq->tail, 0);
1625 
1626 	return 0;
1627 }
1628 
1629 /**
1630  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1631  * @vsi: the VSI to configure
1632  *
1633  * Prepare an Rx context descriptor and configure the device to receive
1634  * traffic.
1635  *
1636  * @pre the VSI must have initialized mbuf_sz
1637  */
1638 int
1639 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1640 {
1641 	int i, err;
1642 
1643 	for (i = 0; i < vsi->num_rx_queues; i++) {
1644 		MPASS(vsi->mbuf_sz > 0);
1645 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1646 		if (err)
1647 			return err;
1648 	}
1649 
1650 	return (0);
1651 }
1652 
1653 /**
1654  * ice_is_rxq_ready - Check if an Rx queue is ready
1655  * @hw: ice hw structure
1656  * @pf_q: absolute PF queue index to check
1657  * @reg: on successful return, contains qrx_ctrl contents
1658  *
1659  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1660  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1661  * a request to change the queue, as well as to verify the request has
1662  * finished. The queue should change status within a few microseconds, so we
1663  * use a small delay while polling the register.
1664  *
1665  * Returns an error code if the queue does not update after a few retries.
1666  */
1667 static int
1668 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1669 {
1670 	u32 qrx_ctrl, qena_req, qena_stat;
1671 	int i;
1672 
1673 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1674 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1675 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1676 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1677 
1678 		/* if the request and status bits equal, then the queue is
1679 		 * fully disabled or enabled.
1680 		 */
1681 		if (qena_req == qena_stat) {
1682 			*reg = qrx_ctrl;
1683 			return (0);
1684 		}
1685 
1686 		/* wait a few microseconds before we check again */
1687 		DELAY(10);
1688 	}
1689 
1690 	return (ETIMEDOUT);
1691 }
1692 
1693 /**
1694  * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1695  * @vsi: VSI containing queue to enable/disable
1696  * @qidx: Queue index in VSI space
1697  * @enable: true to enable queue, false to disable
1698  *
1699  * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1700  * it. Wait for the appropriate time to ensure that the queue has actually
1701  * reached the expected state.
1702  */
1703 int
1704 ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1705 {
1706 	struct ice_hw *hw = &vsi->sc->hw;
1707 	device_t dev = vsi->sc->dev;
1708 	u32 qrx_ctrl = 0;
1709 	int err;
1710 
1711 	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1712 	int pf_q = vsi->rx_qmap[rxq->me];
1713 
1714 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1715 	if (err) {
1716 		device_printf(dev,
1717 			      "Rx queue %d is not ready\n",
1718 			      pf_q);
1719 		return err;
1720 	}
1721 
1722 	/* Skip if the queue is already in correct state */
1723 	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1724 		return (0);
1725 
1726 	if (enable)
1727 		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1728 	else
1729 		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1730 	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1731 
1732 	/* wait for the queue to finalize the request */
1733 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1734 	if (err) {
1735 		device_printf(dev,
1736 			      "Rx queue %d %sable timeout\n",
1737 			      pf_q, (enable ? "en" : "dis"));
1738 		return err;
1739 	}
1740 
1741 	/* this should never happen */
1742 	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1743 		device_printf(dev,
1744 			      "Rx queue %d invalid state\n",
1745 			      pf_q);
1746 		return (EDOOFUS);
1747 	}
1748 
1749 	return (0);
1750 }
1751 
1752 /**
1753  * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1754  * @vsi: VSI to enable/disable queues
1755  * @enable: true to enable queues, false to disable
1756  *
1757  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1758  * them. Wait for the appropriate time to ensure that the queues have actually
1759  * reached the expected state.
1760  */
1761 int
1762 ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1763 {
1764 	int i, err;
1765 
1766 	/* TODO: amortize waits by changing all queues up front and then
1767 	 * checking their status afterwards. This will become more necessary
1768 	 * when we have a large number of queues.
1769 	 */
1770 	for (i = 0; i < vsi->num_rx_queues; i++) {
1771 		err = ice_control_rx_queue(vsi, i, enable);
1772 		if (err)
1773 			break;
1774 	}
1775 
1776 	return (0);
1777 }
1778 
1779 /**
1780  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1781  * @vsi: the VSI to forward to
1782  * @list: list which contains MAC filter entries
1783  * @addr: the MAC address to be added
1784  * @action: filter action to perform on match
1785  *
1786  * Adds a MAC address filter to the list which will be forwarded to firmware
1787  * to add a series of MAC address filters.
1788  *
1789  * Returns 0 on success, and an error code on failure.
1790  *
1791  */
1792 static int
1793 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1794 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1795 {
1796 	struct ice_fltr_list_entry *entry;
1797 
1798 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1799 	if (!entry)
1800 		return (ENOMEM);
1801 
1802 	entry->fltr_info.flag = ICE_FLTR_TX;
1803 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1804 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1805 	entry->fltr_info.fltr_act = action;
1806 	entry->fltr_info.vsi_handle = vsi->idx;
1807 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1808 
1809 	LIST_ADD(&entry->list_entry, list);
1810 
1811 	return 0;
1812 }
1813 
1814 /**
1815  * ice_free_fltr_list - Free memory associated with a MAC address list
1816  * @list: the list to free
1817  *
1818  * Free the memory of each entry associated with the list.
1819  */
1820 static void
1821 ice_free_fltr_list(struct ice_list_head *list)
1822 {
1823 	struct ice_fltr_list_entry *e, *tmp;
1824 
1825 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1826 		LIST_DEL(&e->list_entry);
1827 		free(e, M_ICE);
1828 	}
1829 }
1830 
1831 /**
1832  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1833  * @vsi: the VSI to add the filter for
1834  * @addr: MAC address to add a filter for
1835  *
1836  * Add a MAC address filter for a given VSI. This is a wrapper around
1837  * ice_add_mac to simplify the interface. First, it only accepts a single
1838  * address, so we don't have to mess around with the list setup in other
1839  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1840  * callers don't need to worry about attempting to add the same filter twice.
1841  */
1842 int
1843 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1844 {
1845 	struct ice_list_head mac_addr_list;
1846 	struct ice_hw *hw = &vsi->sc->hw;
1847 	device_t dev = vsi->sc->dev;
1848 	enum ice_status status;
1849 	int err = 0;
1850 
1851 	INIT_LIST_HEAD(&mac_addr_list);
1852 
1853 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1854 	if (err)
1855 		goto free_mac_list;
1856 
1857 	status = ice_add_mac(hw, &mac_addr_list);
1858 	if (status == ICE_ERR_ALREADY_EXISTS) {
1859 		; /* Don't complain if we try to add a filter that already exists */
1860 	} else if (status) {
1861 		device_printf(dev,
1862 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1863 			      addr, ":",
1864 			      ice_status_str(status),
1865 			      ice_aq_str(hw->adminq.sq_last_status));
1866 		err = (EIO);
1867 	}
1868 
1869 free_mac_list:
1870 	ice_free_fltr_list(&mac_addr_list);
1871 	return err;
1872 }
1873 
1874 /**
1875  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1876  * @sc: device softc structure
1877  *
1878  * Program the default unicast and broadcast filters for the PF VSI.
1879  */
1880 int
1881 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1882 {
1883 	struct ice_vsi *vsi = &sc->pf_vsi;
1884 	struct ice_hw *hw = &sc->hw;
1885 	int err;
1886 
1887 	/* Add the LAN MAC address */
1888 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1889 	if (err)
1890 		return err;
1891 
1892 	/* Add the broadcast address */
1893 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
1894 	if (err)
1895 		return err;
1896 
1897 	return (0);
1898 }
1899 
1900 /**
1901  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
1902  * @vsi: the VSI to add the filter for
1903  * @addr: MAC address to remove a filter for
1904  *
1905  * Remove a MAC address filter from a given VSI. This is a wrapper around
1906  * ice_remove_mac to simplify the interface. First, it only accepts a single
1907  * address, so we don't have to mess around with the list setup in other
1908  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
1909  * callers don't need to worry about attempting to remove filters which
1910  * haven't yet been added.
1911  */
1912 int
1913 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1914 {
1915 	struct ice_list_head mac_addr_list;
1916 	struct ice_hw *hw = &vsi->sc->hw;
1917 	device_t dev = vsi->sc->dev;
1918 	enum ice_status status;
1919 	int err = 0;
1920 
1921 	INIT_LIST_HEAD(&mac_addr_list);
1922 
1923 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1924 	if (err)
1925 		goto free_mac_list;
1926 
1927 	status = ice_remove_mac(hw, &mac_addr_list);
1928 	if (status == ICE_ERR_DOES_NOT_EXIST) {
1929 		; /* Don't complain if we try to remove a filter that doesn't exist */
1930 	} else if (status) {
1931 		device_printf(dev,
1932 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
1933 			      addr, ":",
1934 			      ice_status_str(status),
1935 			      ice_aq_str(hw->adminq.sq_last_status));
1936 		err = (EIO);
1937 	}
1938 
1939 free_mac_list:
1940 	ice_free_fltr_list(&mac_addr_list);
1941 	return err;
1942 }
1943 
1944 /**
1945  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
1946  * @sc: device softc structure
1947  *
1948  * Remove the default unicast and broadcast filters from the PF VSI.
1949  */
1950 int
1951 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
1952 {
1953 	struct ice_vsi *vsi = &sc->pf_vsi;
1954 	struct ice_hw *hw = &sc->hw;
1955 	int err;
1956 
1957 	/* Remove the LAN MAC address */
1958 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1959 	if (err)
1960 		return err;
1961 
1962 	/* Remove the broadcast address */
1963 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
1964 	if (err)
1965 		return (EIO);
1966 
1967 	return (0);
1968 }
1969 
1970 /**
1971  * ice_check_ctrlq_errors - Check for and report controlq errors
1972  * @sc: device private structure
1973  * @qname: name of the controlq
1974  * @cq: the controlq to check
1975  *
1976  * Check and report controlq errors. Currently all we do is report them to the
1977  * kernel message log, but we might want to improve this in the future, such
1978  * as to keep track of statistics.
1979  */
1980 static void
1981 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
1982 		       struct ice_ctl_q_info *cq)
1983 {
1984 	struct ice_hw *hw = &sc->hw;
1985 	u32 val;
1986 
1987 	/* Check for error indications. Note that all the controlqs use the
1988 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
1989 	 */
1990 	val = rd32(hw, cq->rq.len);
1991 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1992 		   PF_FW_ARQLEN_ARQCRIT_M)) {
1993 		if (val & PF_FW_ARQLEN_ARQVFE_M)
1994 			device_printf(sc->dev,
1995 				"%s Receive Queue VF Error detected\n", qname);
1996 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
1997 			device_printf(sc->dev,
1998 				"%s Receive Queue Overflow Error detected\n",
1999 				qname);
2000 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2001 			device_printf(sc->dev,
2002 				"%s Receive Queue Critical Error detected\n",
2003 				qname);
2004 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2005 			 PF_FW_ARQLEN_ARQCRIT_M);
2006 		wr32(hw, cq->rq.len, val);
2007 	}
2008 
2009 	val = rd32(hw, cq->sq.len);
2010 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2011 		   PF_FW_ATQLEN_ATQCRIT_M)) {
2012 		if (val & PF_FW_ATQLEN_ATQVFE_M)
2013 			device_printf(sc->dev,
2014 				"%s Send Queue VF Error detected\n", qname);
2015 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2016 			device_printf(sc->dev,
2017 				"%s Send Queue Overflow Error detected\n",
2018 				qname);
2019 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2020 			device_printf(sc->dev,
2021 				"%s Send Queue Critical Error detected\n",
2022 				qname);
2023 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2024 			 PF_FW_ATQLEN_ATQCRIT_M);
2025 		wr32(hw, cq->sq.len, val);
2026 	}
2027 }
2028 
2029 /**
2030  * ice_process_link_event - Process a link event indication from firmware
2031  * @sc: device softc structure
2032  * @e: the received event data
2033  *
2034  * Gets the current link status from hardware, and may print a message if an
2035  * unqualified is detected.
2036  */
2037 static void
2038 ice_process_link_event(struct ice_softc *sc,
2039 		       struct ice_rq_event_info __invariant_only *e)
2040 {
2041 	struct ice_port_info *pi = sc->hw.port_info;
2042 	struct ice_hw *hw = &sc->hw;
2043 	device_t dev = sc->dev;
2044 	enum ice_status status;
2045 
2046 	/* Sanity check that the data length isn't too small */
2047 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2048 
2049 	/*
2050 	 * Even though the adapter gets link status information inside the
2051 	 * event, it needs to send a Get Link Status AQ command in order
2052 	 * to re-enable link events.
2053 	 */
2054 	pi->phy.get_link_info = true;
2055 	ice_get_link_status(pi, &sc->link_up);
2056 
2057 	if (pi->phy.link_info.topo_media_conflict &
2058 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2059 	    ICE_AQ_LINK_TOPO_CORRUPT))
2060 		device_printf(dev,
2061 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2062 
2063 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2064 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2065 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2066 			device_printf(dev,
2067 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2068 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2069 			device_printf(dev,
2070 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2071 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2072 			device_printf(dev,
2073 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2074 	}
2075 
2076 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2077 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2078 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2079 			if (status != ICE_SUCCESS)
2080 				device_printf(dev,
2081 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2082 				    __func__, ice_status_str(status),
2083 				    ice_aq_str(hw->adminq.sq_last_status));
2084 		}
2085 	}
2086 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2087 
2088 	/* Indicate that link status must be reported again */
2089 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2090 
2091 	/* OS link info is updated elsewhere */
2092 }
2093 
2094 /**
2095  * ice_process_ctrlq_event - Respond to a controlq event
2096  * @sc: device private structure
2097  * @qname: the name for this controlq
2098  * @event: the event to process
2099  *
2100  * Perform actions in response to various controlq event notifications.
2101  */
2102 static void
2103 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2104 			struct ice_rq_event_info *event)
2105 {
2106 	u16 opcode;
2107 
2108 	opcode = le16toh(event->desc.opcode);
2109 
2110 	switch (opcode) {
2111 	case ice_aqc_opc_get_link_status:
2112 		ice_process_link_event(sc, event);
2113 		break;
2114 	case ice_mbx_opc_send_msg_to_pf:
2115 		/* TODO: handle IOV event */
2116 		break;
2117 	case ice_aqc_opc_fw_logs_event:
2118 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2119 		break;
2120 	case ice_aqc_opc_lldp_set_mib_change:
2121 		ice_handle_mib_change_event(sc, event);
2122 		break;
2123 	case ice_aqc_opc_event_lan_overflow:
2124 		ice_handle_lan_overflow_event(sc, event);
2125 		break;
2126 	case ice_aqc_opc_get_health_status:
2127 		ice_handle_health_status_event(sc, event);
2128 		break;
2129 	default:
2130 		device_printf(sc->dev,
2131 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2132 			      qname, opcode);
2133 	}
2134 }
2135 
2136 /**
2137  * ice_process_ctrlq - helper function to process controlq rings
2138  * @sc: device private structure
2139  * @q_type: specific control queue type
2140  * @pending: return parameter to track remaining events
2141  *
2142  * Process controlq events for a given control queue type. Returns zero on
2143  * success, and an error code on failure. If successful, pending is the number
2144  * of remaining events left in the queue.
2145  */
2146 int
2147 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2148 {
2149 	struct ice_rq_event_info event = { { 0 } };
2150 	struct ice_hw *hw = &sc->hw;
2151 	struct ice_ctl_q_info *cq;
2152 	enum ice_status status;
2153 	const char *qname;
2154 	int loop = 0;
2155 
2156 	switch (q_type) {
2157 	case ICE_CTL_Q_ADMIN:
2158 		cq = &hw->adminq;
2159 		qname = "Admin";
2160 		break;
2161 	case ICE_CTL_Q_MAILBOX:
2162 		cq = &hw->mailboxq;
2163 		qname = "Mailbox";
2164 		break;
2165 	default:
2166 		device_printf(sc->dev,
2167 			      "Unknown control queue type 0x%x\n",
2168 			      q_type);
2169 		return 0;
2170 	}
2171 
2172 	ice_check_ctrlq_errors(sc, qname, cq);
2173 
2174 	/*
2175 	 * Control queue processing happens during the admin task which may be
2176 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2177 	 */
2178 	event.buf_len = cq->rq_buf_size;
2179 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2180 	if (!event.msg_buf) {
2181 		device_printf(sc->dev,
2182 			      "Unable to allocate memory for %s Receive Queue event\n",
2183 			      qname);
2184 		return (ENOMEM);
2185 	}
2186 
2187 	do {
2188 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2189 		if (status == ICE_ERR_AQ_NO_WORK)
2190 			break;
2191 		if (status) {
2192 			if (q_type == ICE_CTL_Q_ADMIN)
2193 				device_printf(sc->dev,
2194 					      "%s Receive Queue event error %s\n",
2195 					      qname, ice_status_str(status));
2196 			else
2197 				device_printf(sc->dev,
2198 					      "%s Receive Queue event error %s\n",
2199 					      qname, ice_status_str(status));
2200 			free(event.msg_buf, M_ICE);
2201 			return (EIO);
2202 		}
2203 		/* XXX should we separate this handler by controlq type? */
2204 		ice_process_ctrlq_event(sc, qname, &event);
2205 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2206 
2207 	free(event.msg_buf, M_ICE);
2208 
2209 	return 0;
2210 }
2211 
2212 /**
2213  * pkg_ver_empty - Check if a package version is empty
2214  * @pkg_ver: the package version to check
2215  * @pkg_name: the package name to check
2216  *
2217  * Checks if the package version structure is empty. We consider a package
2218  * version as empty if none of the versions are non-zero and the name string
2219  * is null as well.
2220  *
2221  * This is used to check if the package version was initialized by the driver,
2222  * as we do not expect an actual DDP package file to have a zero'd version and
2223  * name.
2224  *
2225  * @returns true if the package version is valid, or false otherwise.
2226  */
2227 static bool
2228 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2229 {
2230 	return (pkg_name[0] == '\0' &&
2231 		pkg_ver->major == 0 &&
2232 		pkg_ver->minor == 0 &&
2233 		pkg_ver->update == 0 &&
2234 		pkg_ver->draft == 0);
2235 }
2236 
2237 /**
2238  * pkg_ver_compatible - Check if the package version is compatible
2239  * @pkg_ver: the package version to check
2240  *
2241  * Compares the package version number to the driver's expected major/minor
2242  * version. Returns an integer indicating whether the version is older, newer,
2243  * or compatible with the driver.
2244  *
2245  * @returns 0 if the package version is compatible, -1 if the package version
2246  * is older, and 1 if the package version is newer than the driver version.
2247  */
2248 static int
2249 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2250 {
2251 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2252 		return (1); /* newer */
2253 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2254 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2255 		return (1); /* newer */
2256 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2257 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2258 		return (0); /* compatible */
2259 	else
2260 		return (-1); /* older */
2261 }
2262 
2263 /**
2264  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2265  * @hw: device hw structure
2266  * @buf: string buffer to store name/version string
2267  *
2268  * Formats the name and version of the OS DDP package as found in the ice_ddp
2269  * module into a string.
2270  *
2271  * @remark This will almost always be the same as the active package, but
2272  * could be different in some cases. Use ice_active_pkg_version_str to get the
2273  * version of the active DDP package.
2274  */
2275 static void
2276 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2277 {
2278 	char name_buf[ICE_PKG_NAME_SIZE];
2279 
2280 	/* If the OS DDP package info is empty, use "None" */
2281 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2282 		sbuf_printf(buf, "None");
2283 		return;
2284 	}
2285 
2286 	/*
2287 	 * This should already be null-terminated, but since this is a raw
2288 	 * value from an external source, strlcpy() into a new buffer to
2289 	 * make sure.
2290 	 */
2291 	bzero(name_buf, sizeof(name_buf));
2292 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2293 
2294 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2295 	    name_buf,
2296 	    hw->pkg_ver.major,
2297 	    hw->pkg_ver.minor,
2298 	    hw->pkg_ver.update,
2299 	    hw->pkg_ver.draft);
2300 }
2301 
2302 /**
2303  * ice_active_pkg_version_str - Format active package version info into a sbuf
2304  * @hw: device hw structure
2305  * @buf: string buffer to store name/version string
2306  *
2307  * Formats the name and version of the active DDP package info into a string
2308  * buffer for use.
2309  */
2310 static void
2311 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2312 {
2313 	char name_buf[ICE_PKG_NAME_SIZE];
2314 
2315 	/* If the active DDP package info is empty, use "None" */
2316 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2317 		sbuf_printf(buf, "None");
2318 		return;
2319 	}
2320 
2321 	/*
2322 	 * This should already be null-terminated, but since this is a raw
2323 	 * value from an external source, strlcpy() into a new buffer to
2324 	 * make sure.
2325 	 */
2326 	bzero(name_buf, sizeof(name_buf));
2327 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2328 
2329 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2330 	    name_buf,
2331 	    hw->active_pkg_ver.major,
2332 	    hw->active_pkg_ver.minor,
2333 	    hw->active_pkg_ver.update,
2334 	    hw->active_pkg_ver.draft);
2335 
2336 	if (hw->active_track_id != 0)
2337 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2338 }
2339 
2340 /**
2341  * ice_nvm_version_str - Format the NVM version information into a sbuf
2342  * @hw: device hw structure
2343  * @buf: string buffer to store version string
2344  *
2345  * Formats the NVM information including firmware version, API version, NVM
2346  * version, the EETRACK id, and OEM specific version information into a string
2347  * buffer.
2348  */
2349 static void
2350 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2351 {
2352 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2353 	struct ice_orom_info *orom = &hw->flash.orom;
2354 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2355 
2356 	/* Note that the netlist versions are stored in packed Binary Coded
2357 	 * Decimal format. The use of '%x' will correctly display these as
2358 	 * decimal numbers. This works because every 4 bits will be displayed
2359 	 * as a hexadecimal digit, and the BCD format will only use the values
2360 	 * 0-9.
2361 	 */
2362 	sbuf_printf(buf,
2363 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2364 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2365 		    hw->api_maj_ver, hw->api_min_ver,
2366 		    nvm->major, nvm->minor, nvm->eetrack,
2367 		    netlist->major, netlist->minor,
2368 		    netlist->type >> 16, netlist->type & 0xFFFF,
2369 		    netlist->rev, netlist->cust_ver, netlist->hash,
2370 		    orom->major, orom->build, orom->patch);
2371 }
2372 
2373 /**
2374  * ice_print_nvm_version - Print the NVM info to the kernel message log
2375  * @sc: the device softc structure
2376  *
2377  * Format and print an NVM version string using ice_nvm_version_str().
2378  */
2379 void
2380 ice_print_nvm_version(struct ice_softc *sc)
2381 {
2382 	struct ice_hw *hw = &sc->hw;
2383 	device_t dev = sc->dev;
2384 	struct sbuf *sbuf;
2385 
2386 	sbuf = sbuf_new_auto();
2387 	ice_nvm_version_str(hw, sbuf);
2388 	sbuf_finish(sbuf);
2389 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2390 	sbuf_delete(sbuf);
2391 }
2392 
2393 /**
2394  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2395  * @vsi: the VSI to be updated
2396  *
2397  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2398  * the updated values.
2399  */
2400 void
2401 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2402 {
2403 	struct ice_eth_stats *prev_es, *cur_es;
2404 	struct ice_hw *hw = &vsi->sc->hw;
2405 	u16 vsi_num;
2406 
2407 	if (!ice_is_vsi_valid(hw, vsi->idx))
2408 		return;
2409 
2410 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2411 	prev_es = &vsi->hw_stats.prev;
2412 	cur_es = &vsi->hw_stats.cur;
2413 
2414 #define ICE_VSI_STAT40(name, location) \
2415 	ice_stat_update40(hw, name ## L(vsi_num), \
2416 			  vsi->hw_stats.offsets_loaded, \
2417 			  &prev_es->location, &cur_es->location)
2418 
2419 #define ICE_VSI_STAT32(name, location) \
2420 	ice_stat_update32(hw, name(vsi_num), \
2421 			  vsi->hw_stats.offsets_loaded, \
2422 			  &prev_es->location, &cur_es->location)
2423 
2424 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2425 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2426 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2427 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2428 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2429 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2430 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2431 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2432 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2433 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2434 
2435 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2436 			     cur_es);
2437 
2438 #undef ICE_VSI_STAT40
2439 #undef ICE_VSI_STAT32
2440 
2441 	vsi->hw_stats.offsets_loaded = true;
2442 }
2443 
2444 /**
2445  * ice_reset_vsi_stats - Reset VSI statistics counters
2446  * @vsi: VSI structure
2447  *
2448  * Resets the software tracking counters for the VSI statistics, and indicate
2449  * that the offsets haven't been loaded. This is intended to be called
2450  * post-reset so that VSI statistics count from zero again.
2451  */
2452 void
2453 ice_reset_vsi_stats(struct ice_vsi *vsi)
2454 {
2455 	/* Reset HW stats */
2456 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2457 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2458 	vsi->hw_stats.offsets_loaded = false;
2459 }
2460 
2461 /**
2462  * ice_update_pf_stats - Update port stats counters
2463  * @sc: device private softc structure
2464  *
2465  * Reads hardware statistics registers and updates the software tracking
2466  * structure with new values.
2467  */
2468 void
2469 ice_update_pf_stats(struct ice_softc *sc)
2470 {
2471 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2472 	struct ice_hw *hw = &sc->hw;
2473 	u8 lport;
2474 
2475 	MPASS(hw->port_info);
2476 
2477 	prev_ps = &sc->stats.prev;
2478 	cur_ps = &sc->stats.cur;
2479 	lport = hw->port_info->lport;
2480 
2481 #define ICE_PF_STAT_PFC(name, location, index) \
2482 	ice_stat_update40(hw, name(lport, index), \
2483 			  sc->stats.offsets_loaded, \
2484 			  &prev_ps->location[index], &cur_ps->location[index])
2485 
2486 #define ICE_PF_STAT40(name, location) \
2487 	ice_stat_update40(hw, name ## L(lport), \
2488 			  sc->stats.offsets_loaded, \
2489 			  &prev_ps->location, &cur_ps->location)
2490 
2491 #define ICE_PF_STAT32(name, location) \
2492 	ice_stat_update32(hw, name(lport), \
2493 			  sc->stats.offsets_loaded, \
2494 			  &prev_ps->location, &cur_ps->location)
2495 
2496 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2497 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2498 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2499 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2500 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2501 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2502 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2503 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2504 	/* This stat register doesn't have an lport */
2505 	ice_stat_update32(hw, PRTRPB_RDPC,
2506 			  sc->stats.offsets_loaded,
2507 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2508 
2509 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2510 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2511 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2512 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2513 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2514 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2515 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2516 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2517 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2518 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2519 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2520 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2521 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2522 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2523 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2524 
2525 	/* Update Priority Flow Control Stats */
2526 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2527 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2528 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2529 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2530 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2531 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2532 	}
2533 
2534 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2535 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2536 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2537 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2538 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2539 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2540 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2541 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2542 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2543 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2544 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2545 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2546 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2547 
2548 #undef ICE_PF_STAT40
2549 #undef ICE_PF_STAT32
2550 #undef ICE_PF_STAT_PFC
2551 
2552 	sc->stats.offsets_loaded = true;
2553 }
2554 
2555 /**
2556  * ice_reset_pf_stats - Reset port stats counters
2557  * @sc: Device private softc structure
2558  *
2559  * Reset software tracking values for statistics to zero, and indicate that
2560  * offsets haven't been loaded. Intended to be called after a device reset so
2561  * that statistics count from zero again.
2562  */
2563 void
2564 ice_reset_pf_stats(struct ice_softc *sc)
2565 {
2566 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2567 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2568 	sc->stats.offsets_loaded = false;
2569 }
2570 
2571 /**
2572  * ice_sysctl_show_fw - sysctl callback to show firmware information
2573  * @oidp: sysctl oid structure
2574  * @arg1: pointer to private data structure
2575  * @arg2: unused
2576  * @req: sysctl request pointer
2577  *
2578  * Callback for the fw_version sysctl, to display the current firmware
2579  * information found at hardware init time.
2580  */
2581 static int
2582 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2583 {
2584 	struct ice_softc *sc = (struct ice_softc *)arg1;
2585 	struct ice_hw *hw = &sc->hw;
2586 	struct sbuf *sbuf;
2587 
2588 	UNREFERENCED_PARAMETER(oidp);
2589 	UNREFERENCED_PARAMETER(arg2);
2590 
2591 	if (ice_driver_is_detaching(sc))
2592 		return (ESHUTDOWN);
2593 
2594 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2595 	ice_nvm_version_str(hw, sbuf);
2596 	sbuf_finish(sbuf);
2597 	sbuf_delete(sbuf);
2598 
2599 	return (0);
2600 }
2601 
2602 /**
2603  * ice_sysctl_pba_number - sysctl callback to show PBA number
2604  * @oidp: sysctl oid structure
2605  * @arg1: pointer to private data structure
2606  * @arg2: unused
2607  * @req: sysctl request pointer
2608  *
2609  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2610  * number for this device.
2611  */
2612 static int
2613 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2614 {
2615 	struct ice_softc *sc = (struct ice_softc *)arg1;
2616 	struct ice_hw *hw = &sc->hw;
2617 	device_t dev = sc->dev;
2618 	u8 pba_string[32] = "";
2619 	enum ice_status status;
2620 
2621 	UNREFERENCED_PARAMETER(arg2);
2622 
2623 	if (ice_driver_is_detaching(sc))
2624 		return (ESHUTDOWN);
2625 
2626 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2627 	if (status) {
2628 		device_printf(dev,
2629 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2630 		    __func__, ice_status_str(status),
2631 		    ice_aq_str(hw->adminq.sq_last_status));
2632 		return (EIO);
2633 	}
2634 
2635 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2636 }
2637 
2638 /**
2639  * ice_sysctl_pkg_version - sysctl to show the active package version info
2640  * @oidp: sysctl oid structure
2641  * @arg1: pointer to private data structure
2642  * @arg2: unused
2643  * @req: sysctl request pointer
2644  *
2645  * Callback for the pkg_version sysctl, to display the active DDP package name
2646  * and version information.
2647  */
2648 static int
2649 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2650 {
2651 	struct ice_softc *sc = (struct ice_softc *)arg1;
2652 	struct ice_hw *hw = &sc->hw;
2653 	struct sbuf *sbuf;
2654 
2655 	UNREFERENCED_PARAMETER(oidp);
2656 	UNREFERENCED_PARAMETER(arg2);
2657 
2658 	if (ice_driver_is_detaching(sc))
2659 		return (ESHUTDOWN);
2660 
2661 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2662 	ice_active_pkg_version_str(hw, sbuf);
2663 	sbuf_finish(sbuf);
2664 	sbuf_delete(sbuf);
2665 
2666 	return (0);
2667 }
2668 
2669 /**
2670  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2671  * @oidp: sysctl oid structure
2672  * @arg1: pointer to private data structure
2673  * @arg2: unused
2674  * @req: sysctl request pointer
2675  *
2676  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2677  * version info found in the ice_ddp module.
2678  */
2679 static int
2680 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2681 {
2682 	struct ice_softc *sc = (struct ice_softc *)arg1;
2683 	struct ice_hw *hw = &sc->hw;
2684 	struct sbuf *sbuf;
2685 
2686 	UNREFERENCED_PARAMETER(oidp);
2687 	UNREFERENCED_PARAMETER(arg2);
2688 
2689 	if (ice_driver_is_detaching(sc))
2690 		return (ESHUTDOWN);
2691 
2692 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2693 	ice_os_pkg_version_str(hw, sbuf);
2694 	sbuf_finish(sbuf);
2695 	sbuf_delete(sbuf);
2696 
2697 	return (0);
2698 }
2699 
2700 /**
2701  * ice_sysctl_current_speed - sysctl callback to show current link speed
2702  * @oidp: sysctl oid structure
2703  * @arg1: pointer to private data structure
2704  * @arg2: unused
2705  * @req: sysctl request pointer
2706  *
2707  * Callback for the current_speed sysctl, to display the string representing
2708  * the current link speed.
2709  */
2710 static int
2711 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2712 {
2713 	struct ice_softc *sc = (struct ice_softc *)arg1;
2714 	struct ice_hw *hw = &sc->hw;
2715 	struct sbuf *sbuf;
2716 
2717 	UNREFERENCED_PARAMETER(oidp);
2718 	UNREFERENCED_PARAMETER(arg2);
2719 
2720 	if (ice_driver_is_detaching(sc))
2721 		return (ESHUTDOWN);
2722 
2723 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2724 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2725 	sbuf_finish(sbuf);
2726 	sbuf_delete(sbuf);
2727 
2728 	return (0);
2729 }
2730 
2731 /**
2732  * @var phy_link_speeds
2733  * @brief PHY link speed conversion array
2734  *
2735  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2736  * link speeds used by the link speed sysctls.
2737  *
2738  * @remark these are based on the indices used in the BIT() macros for the
2739  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2740  */
2741 static const uint16_t phy_link_speeds[] = {
2742     ICE_AQ_LINK_SPEED_100MB,
2743     ICE_AQ_LINK_SPEED_100MB,
2744     ICE_AQ_LINK_SPEED_1000MB,
2745     ICE_AQ_LINK_SPEED_1000MB,
2746     ICE_AQ_LINK_SPEED_1000MB,
2747     ICE_AQ_LINK_SPEED_1000MB,
2748     ICE_AQ_LINK_SPEED_1000MB,
2749     ICE_AQ_LINK_SPEED_2500MB,
2750     ICE_AQ_LINK_SPEED_2500MB,
2751     ICE_AQ_LINK_SPEED_2500MB,
2752     ICE_AQ_LINK_SPEED_5GB,
2753     ICE_AQ_LINK_SPEED_5GB,
2754     ICE_AQ_LINK_SPEED_10GB,
2755     ICE_AQ_LINK_SPEED_10GB,
2756     ICE_AQ_LINK_SPEED_10GB,
2757     ICE_AQ_LINK_SPEED_10GB,
2758     ICE_AQ_LINK_SPEED_10GB,
2759     ICE_AQ_LINK_SPEED_10GB,
2760     ICE_AQ_LINK_SPEED_10GB,
2761     ICE_AQ_LINK_SPEED_25GB,
2762     ICE_AQ_LINK_SPEED_25GB,
2763     ICE_AQ_LINK_SPEED_25GB,
2764     ICE_AQ_LINK_SPEED_25GB,
2765     ICE_AQ_LINK_SPEED_25GB,
2766     ICE_AQ_LINK_SPEED_25GB,
2767     ICE_AQ_LINK_SPEED_25GB,
2768     ICE_AQ_LINK_SPEED_25GB,
2769     ICE_AQ_LINK_SPEED_25GB,
2770     ICE_AQ_LINK_SPEED_25GB,
2771     ICE_AQ_LINK_SPEED_25GB,
2772     ICE_AQ_LINK_SPEED_40GB,
2773     ICE_AQ_LINK_SPEED_40GB,
2774     ICE_AQ_LINK_SPEED_40GB,
2775     ICE_AQ_LINK_SPEED_40GB,
2776     ICE_AQ_LINK_SPEED_40GB,
2777     ICE_AQ_LINK_SPEED_40GB,
2778     ICE_AQ_LINK_SPEED_50GB,
2779     ICE_AQ_LINK_SPEED_50GB,
2780     ICE_AQ_LINK_SPEED_50GB,
2781     ICE_AQ_LINK_SPEED_50GB,
2782     ICE_AQ_LINK_SPEED_50GB,
2783     ICE_AQ_LINK_SPEED_50GB,
2784     ICE_AQ_LINK_SPEED_50GB,
2785     ICE_AQ_LINK_SPEED_50GB,
2786     ICE_AQ_LINK_SPEED_50GB,
2787     ICE_AQ_LINK_SPEED_50GB,
2788     ICE_AQ_LINK_SPEED_50GB,
2789     ICE_AQ_LINK_SPEED_50GB,
2790     ICE_AQ_LINK_SPEED_50GB,
2791     ICE_AQ_LINK_SPEED_50GB,
2792     ICE_AQ_LINK_SPEED_50GB,
2793     ICE_AQ_LINK_SPEED_100GB,
2794     ICE_AQ_LINK_SPEED_100GB,
2795     ICE_AQ_LINK_SPEED_100GB,
2796     ICE_AQ_LINK_SPEED_100GB,
2797     ICE_AQ_LINK_SPEED_100GB,
2798     ICE_AQ_LINK_SPEED_100GB,
2799     ICE_AQ_LINK_SPEED_100GB,
2800     ICE_AQ_LINK_SPEED_100GB,
2801     ICE_AQ_LINK_SPEED_100GB,
2802     ICE_AQ_LINK_SPEED_100GB,
2803     ICE_AQ_LINK_SPEED_100GB,
2804     ICE_AQ_LINK_SPEED_100GB,
2805     ICE_AQ_LINK_SPEED_100GB,
2806     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2807     ICE_AQ_LINK_SPEED_100GB,
2808     ICE_AQ_LINK_SPEED_100GB,
2809     ICE_AQ_LINK_SPEED_100GB,
2810     ICE_AQ_LINK_SPEED_100GB,
2811     ICE_AQ_LINK_SPEED_100GB
2812 };
2813 
2814 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2815 "\nControl advertised link speed."		\
2816 "\nFlags:"					\
2817 "\n\t   0x0 - Auto"				\
2818 "\n\t   0x1 - 10 Mb"				\
2819 "\n\t   0x2 - 100 Mb"				\
2820 "\n\t   0x4 - 1G"				\
2821 "\n\t   0x8 - 2.5G"				\
2822 "\n\t  0x10 - 5G"				\
2823 "\n\t  0x20 - 10G"				\
2824 "\n\t  0x40 - 20G"				\
2825 "\n\t  0x80 - 25G"				\
2826 "\n\t 0x100 - 40G"				\
2827 "\n\t 0x200 - 50G"				\
2828 "\n\t 0x400 - 100G"				\
2829 "\n\t0x8000 - Unknown"				\
2830 "\n\t"						\
2831 "\nUse \"sysctl -x\" to view flags properly."
2832 
2833 #define ICE_PHYS_100MB			\
2834     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2835      ICE_PHY_TYPE_LOW_100M_SGMII)
2836 #define ICE_PHYS_1000MB			\
2837     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2838      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2839      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2840      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2841      ICE_PHY_TYPE_LOW_1G_SGMII)
2842 #define ICE_PHYS_2500MB			\
2843     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2844      ICE_PHY_TYPE_LOW_2500BASE_X |	\
2845      ICE_PHY_TYPE_LOW_2500BASE_KX)
2846 #define ICE_PHYS_5GB			\
2847     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2848      ICE_PHY_TYPE_LOW_5GBASE_KR)
2849 #define ICE_PHYS_10GB			\
2850     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2851      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2852      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2853      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2854      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2855      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2856      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2857 #define ICE_PHYS_25GB			\
2858     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2859      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2860      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2861      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2862      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2863      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2864      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2865      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2866      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2867      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2868      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2869 #define ICE_PHYS_40GB			\
2870     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2871      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2872      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2873      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2874      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2875      ICE_PHY_TYPE_LOW_40G_XLAUI)
2876 #define ICE_PHYS_50GB			\
2877     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2878      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2879      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2880      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2881      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2882      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2883      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2884      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2885      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2886      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2887      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2888      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2889      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2890      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2891      ICE_PHY_TYPE_LOW_50G_AUI1)
2892 #define ICE_PHYS_100GB_LOW		\
2893     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
2894      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
2895      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
2896      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
2897      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
2898      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
2899      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
2900      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
2901      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
2902      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
2903      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
2904      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
2905      ICE_PHY_TYPE_LOW_100GBASE_DR)
2906 #define ICE_PHYS_100GB_HIGH		\
2907     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
2908      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
2909      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
2910      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
2911      ICE_PHY_TYPE_HIGH_100G_AUI2)
2912 
2913 /**
2914  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
2915  * @phy_type_low: lower 64-bit PHY Type bitmask
2916  * @phy_type_high: upper 64-bit PHY Type bitmask
2917  *
2918  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
2919  * link speed flags. If phy_type_high has an unknown PHY type, then the return
2920  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
2921  */
2922 static u16
2923 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
2924 {
2925 	u16 sysctl_speeds = 0;
2926 	int bit;
2927 
2928 	/* coverity[address_of] */
2929 	for_each_set_bit(bit, &phy_type_low, 64)
2930 		sysctl_speeds |= phy_link_speeds[bit];
2931 
2932 	/* coverity[address_of] */
2933 	for_each_set_bit(bit, &phy_type_high, 64) {
2934 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
2935 			sysctl_speeds |= phy_link_speeds[bit + 64];
2936 		else
2937 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
2938 	}
2939 
2940 	return (sysctl_speeds);
2941 }
2942 
2943 /**
2944  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
2945  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
2946  * @phy_type_low: output parameter for lower AQ PHY flags
2947  * @phy_type_high: output parameter for higher AQ PHY flags
2948  *
2949  * Converts the given link speed flags into AQ PHY type flag sets appropriate
2950  * for use in a Set PHY Config command.
2951  */
2952 static void
2953 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
2954 				  u64 *phy_type_high)
2955 {
2956 	*phy_type_low = 0, *phy_type_high = 0;
2957 
2958 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
2959 		*phy_type_low |= ICE_PHYS_100MB;
2960 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
2961 		*phy_type_low |= ICE_PHYS_1000MB;
2962 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
2963 		*phy_type_low |= ICE_PHYS_2500MB;
2964 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
2965 		*phy_type_low |= ICE_PHYS_5GB;
2966 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
2967 		*phy_type_low |= ICE_PHYS_10GB;
2968 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
2969 		*phy_type_low |= ICE_PHYS_25GB;
2970 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
2971 		*phy_type_low |= ICE_PHYS_40GB;
2972 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
2973 		*phy_type_low |= ICE_PHYS_50GB;
2974 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
2975 		*phy_type_low |= ICE_PHYS_100GB_LOW;
2976 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
2977 	}
2978 }
2979 
2980 /**
2981  * @struct ice_phy_data
2982  * @brief PHY caps and link speeds
2983  *
2984  * Buffer providing report mode and user speeds;
2985  * returning intersection of PHY types and speeds.
2986  */
2987 struct ice_phy_data {
2988 	u64 phy_low_orig;     /* PHY low quad from report */
2989 	u64 phy_high_orig;    /* PHY high quad from report */
2990 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
2991 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
2992 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
2993 	u16 user_speeds_intr; /* Intersect with report speeds */
2994 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
2995 };
2996 
2997 /**
2998  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
2999  * @sc: device private structure
3000  * @phy_data: device PHY data
3001  *
3002  * On read: Displays the currently supported speeds
3003  * On write: Sets the device's supported speeds
3004  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3005  */
3006 static int
3007 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3008 				   struct ice_phy_data *phy_data)
3009 {
3010 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3011 	const char *report_types[5] = { "w/o MEDIA",
3012 					"w/MEDIA",
3013 					"ACTIVE",
3014 					"EDOOFUS", /* Not used */
3015 					"DFLT" };
3016 	struct ice_hw *hw = &sc->hw;
3017 	struct ice_port_info *pi = hw->port_info;
3018 	enum ice_status status;
3019 	u16 report_speeds, temp_speeds;
3020 	u8 report_type;
3021 	bool apply_speed_filter = false;
3022 
3023 	switch (phy_data->report_mode) {
3024 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3025 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3026 	case ICE_AQC_REPORT_ACTIVE_CFG:
3027 	case ICE_AQC_REPORT_DFLT_CFG:
3028 		report_type = phy_data->report_mode >> 1;
3029 		break;
3030 	default:
3031 		device_printf(sc->dev,
3032 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3033 		    __func__, phy_data->report_mode);
3034 		return (EINVAL);
3035 	}
3036 
3037 	/* 0 is treated as "Auto"; the driver will handle selecting the
3038 	 * correct speeds. Including, in some cases, applying an override
3039 	 * if provided.
3040 	 */
3041 	if (phy_data->user_speeds_orig == 0)
3042 		phy_data->user_speeds_orig = USHRT_MAX;
3043 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3044 		apply_speed_filter = true;
3045 
3046 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3047 	if (status != ICE_SUCCESS) {
3048 		device_printf(sc->dev,
3049 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3050 		    __func__, report_types[report_type],
3051 		    ice_status_str(status),
3052 		    ice_aq_str(sc->hw.adminq.sq_last_status));
3053 		return (EIO);
3054 	}
3055 
3056 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3057 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3058 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3059 	    phy_data->phy_high_orig);
3060 	if (apply_speed_filter) {
3061 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3062 		    pcaps.module_type[0]);
3063 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3064 			device_printf(sc->dev,
3065 			    "User-specified speeds (\"0x%04X\") not supported\n",
3066 			    phy_data->user_speeds_orig);
3067 			return (EINVAL);
3068 		}
3069 		report_speeds = temp_speeds;
3070 	}
3071 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3072 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3073 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3074 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3075 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3076 
3077 	return (0);
3078  }
3079 
3080 /**
3081  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3082  * @oidp: sysctl oid structure
3083  * @arg1: pointer to private data structure
3084  * @arg2: unused
3085  * @req: sysctl request pointer
3086  *
3087  * On read: Displays the currently supported speeds
3088  * On write: Sets the device's supported speeds
3089  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3090  */
3091 static int
3092 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3093 {
3094 	struct ice_softc *sc = (struct ice_softc *)arg1;
3095 	struct ice_port_info *pi = sc->hw.port_info;
3096 	struct ice_phy_data phy_data = { 0 };
3097 	device_t dev = sc->dev;
3098 	u16 sysctl_speeds;
3099 	int ret;
3100 
3101 	UNREFERENCED_PARAMETER(arg2);
3102 
3103 	if (ice_driver_is_detaching(sc))
3104 		return (ESHUTDOWN);
3105 
3106 	/* Get the current speeds from the adapter's "active" configuration. */
3107 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3108 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3109 	if (ret) {
3110 		/* Error message already printed within function */
3111 		return (ret);
3112 	}
3113 
3114 	sysctl_speeds = phy_data.user_speeds_intr;
3115 
3116 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3117 	if ((ret) || (req->newptr == NULL))
3118 		return (ret);
3119 
3120 	if (sysctl_speeds > 0x7FF) {
3121 		device_printf(dev,
3122 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3123 			      __func__, sysctl_speeds);
3124 		return (EINVAL);
3125 	}
3126 
3127 	pi->phy.curr_user_speed_req = sysctl_speeds;
3128 
3129 	/* Apply settings requested by user */
3130 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3131 }
3132 
3133 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3134 "\nDisplay or set the port's requested FEC mode."	\
3135 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3136 "\n\tfc - " ICE_FEC_STRING_BASER			\
3137 "\n\trs - " ICE_FEC_STRING_RS				\
3138 "\n\tnone - " ICE_FEC_STRING_NONE			\
3139 "\nEither of the left or right strings above can be used to set the requested mode."
3140 
3141 /**
3142  * ice_sysctl_fec_config - Display/change the configured FEC mode
3143  * @oidp: sysctl oid structure
3144  * @arg1: pointer to private data structure
3145  * @arg2: unused
3146  * @req: sysctl request pointer
3147  *
3148  * On read: Displays the configured FEC mode
3149  * On write: Sets the device's FEC mode to the input string, if it's valid.
3150  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3151  */
3152 static int
3153 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3154 {
3155 	struct ice_softc *sc = (struct ice_softc *)arg1;
3156 	struct ice_port_info *pi = sc->hw.port_info;
3157 	enum ice_fec_mode new_mode;
3158 	device_t dev = sc->dev;
3159 	char req_fec[32];
3160 	int ret;
3161 
3162 	UNREFERENCED_PARAMETER(arg2);
3163 
3164 	if (ice_driver_is_detaching(sc))
3165 		return (ESHUTDOWN);
3166 
3167 	bzero(req_fec, sizeof(req_fec));
3168 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3169 
3170 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3171 	if ((ret) || (req->newptr == NULL))
3172 		return (ret);
3173 
3174 	if (strcmp(req_fec, "auto") == 0 ||
3175 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3176 		if (sc->allow_no_fec_mod_in_auto)
3177 			new_mode = ICE_FEC_DIS_AUTO;
3178 		else
3179 			new_mode = ICE_FEC_AUTO;
3180 	} else if (strcmp(req_fec, "fc") == 0 ||
3181 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3182 		new_mode = ICE_FEC_BASER;
3183 	} else if (strcmp(req_fec, "rs") == 0 ||
3184 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3185 		new_mode = ICE_FEC_RS;
3186 	} else if (strcmp(req_fec, "none") == 0 ||
3187 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3188 		new_mode = ICE_FEC_NONE;
3189 	} else {
3190 		device_printf(dev,
3191 		    "%s: \"%s\" is not a valid FEC mode\n",
3192 		    __func__, req_fec);
3193 		return (EINVAL);
3194 	}
3195 
3196 	/* Cache user FEC mode for later link ups */
3197 	pi->phy.curr_user_fec_req = new_mode;
3198 
3199 	/* Apply settings requested by user */
3200 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3201 }
3202 
3203 /**
3204  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3205  * @oidp: sysctl oid structure
3206  * @arg1: pointer to private data structure
3207  * @arg2: unused
3208  * @req: sysctl request pointer
3209  *
3210  * On read: Displays the negotiated FEC mode, in a string
3211  */
3212 static int
3213 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3214 {
3215 	struct ice_softc *sc = (struct ice_softc *)arg1;
3216 	struct ice_hw *hw = &sc->hw;
3217 	char neg_fec[32];
3218 	int ret;
3219 
3220 	UNREFERENCED_PARAMETER(arg2);
3221 
3222 	if (ice_driver_is_detaching(sc))
3223 		return (ESHUTDOWN);
3224 
3225 	/* Copy const string into a buffer to drop const qualifier */
3226 	bzero(neg_fec, sizeof(neg_fec));
3227 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3228 
3229 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3230 	if (req->newptr != NULL)
3231 		return (EPERM);
3232 
3233 	return (ret);
3234 }
3235 
3236 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3237 "\nDisplay or set the port's advertised flow control mode.\n"	\
3238 "\t0 - " ICE_FC_STRING_NONE					\
3239 "\n\t1 - " ICE_FC_STRING_RX					\
3240 "\n\t2 - " ICE_FC_STRING_TX					\
3241 "\n\t3 - " ICE_FC_STRING_FULL					\
3242 "\nEither the numbers or the strings above can be used to set the advertised mode."
3243 
3244 /**
3245  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3246  * @oidp: sysctl oid structure
3247  * @arg1: pointer to private data structure
3248  * @arg2: unused
3249  * @req: sysctl request pointer
3250  *
3251  * On read: Displays the configured flow control mode
3252  * On write: Sets the device's flow control mode to the input, if it's valid.
3253  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3254  */
3255 static int
3256 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3257 {
3258 	struct ice_softc *sc = (struct ice_softc *)arg1;
3259 	struct ice_port_info *pi = sc->hw.port_info;
3260 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3261 	enum ice_fc_mode old_mode, new_mode;
3262 	struct ice_hw *hw = &sc->hw;
3263 	device_t dev = sc->dev;
3264 	enum ice_status status;
3265 	int ret, fc_num;
3266 	bool mode_set = false;
3267 	struct sbuf buf;
3268 	char *fc_str_end;
3269 	char fc_str[32];
3270 
3271 	UNREFERENCED_PARAMETER(arg2);
3272 
3273 	if (ice_driver_is_detaching(sc))
3274 		return (ESHUTDOWN);
3275 
3276 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3277 				     &pcaps, NULL);
3278 	if (status != ICE_SUCCESS) {
3279 		device_printf(dev,
3280 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3281 		    __func__, ice_status_str(status),
3282 		    ice_aq_str(hw->adminq.sq_last_status));
3283 		return (EIO);
3284 	}
3285 
3286 	/* Convert HW response format to SW enum value */
3287 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3288 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3289 		old_mode = ICE_FC_FULL;
3290 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3291 		old_mode = ICE_FC_TX_PAUSE;
3292 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3293 		old_mode = ICE_FC_RX_PAUSE;
3294 	else
3295 		old_mode = ICE_FC_NONE;
3296 
3297 	/* Create "old" string for output */
3298 	bzero(fc_str, sizeof(fc_str));
3299 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3300 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3301 	sbuf_finish(&buf);
3302 	sbuf_delete(&buf);
3303 
3304 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3305 	if ((ret) || (req->newptr == NULL))
3306 		return (ret);
3307 
3308 	/* Try to parse input as a string, first */
3309 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3310 		new_mode = ICE_FC_FULL;
3311 		mode_set = true;
3312 	}
3313 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3314 		new_mode = ICE_FC_TX_PAUSE;
3315 		mode_set = true;
3316 	}
3317 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3318 		new_mode = ICE_FC_RX_PAUSE;
3319 		mode_set = true;
3320 	}
3321 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3322 		new_mode = ICE_FC_NONE;
3323 		mode_set = true;
3324 	}
3325 
3326 	/*
3327 	 * Then check if it's an integer, for compatibility with the method
3328 	 * used in older drivers.
3329 	 */
3330 	if (!mode_set) {
3331 		fc_num = strtol(fc_str, &fc_str_end, 0);
3332 		if (fc_str_end == fc_str)
3333 			fc_num = -1;
3334 		switch (fc_num) {
3335 		case 3:
3336 			new_mode = ICE_FC_FULL;
3337 			break;
3338 		case 2:
3339 			new_mode = ICE_FC_TX_PAUSE;
3340 			break;
3341 		case 1:
3342 			new_mode = ICE_FC_RX_PAUSE;
3343 			break;
3344 		case 0:
3345 			new_mode = ICE_FC_NONE;
3346 			break;
3347 		default:
3348 			device_printf(dev,
3349 			    "%s: \"%s\" is not a valid flow control mode\n",
3350 			    __func__, fc_str);
3351 			return (EINVAL);
3352 		}
3353 	}
3354 
3355 	/* Save flow control mode from user */
3356 	pi->phy.curr_user_fc_req = new_mode;
3357 
3358 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3359 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3360 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3361 	    (new_mode != ICE_FC_NONE)) {
3362 		ret = ice_config_pfc(sc, 0x0);
3363 		if (ret)
3364 			return (ret);
3365 	}
3366 
3367 	/* Apply settings requested by user */
3368 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3369 }
3370 
3371 /**
3372  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3373  * @oidp: sysctl oid structure
3374  * @arg1: pointer to private data structure
3375  * @arg2: unused
3376  * @req: sysctl request pointer
3377  *
3378  * On read: Displays the currently negotiated flow control settings.
3379  *
3380  * If link is not established, this will report ICE_FC_NONE, as no flow
3381  * control is negotiated while link is down.
3382  */
3383 static int
3384 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3385 {
3386 	struct ice_softc *sc = (struct ice_softc *)arg1;
3387 	struct ice_port_info *pi = sc->hw.port_info;
3388 	const char *negotiated_fc;
3389 
3390 	UNREFERENCED_PARAMETER(arg2);
3391 
3392 	if (ice_driver_is_detaching(sc))
3393 		return (ESHUTDOWN);
3394 
3395 	negotiated_fc = ice_flowcontrol_mode(pi);
3396 
3397 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3398 }
3399 
3400 /**
3401  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3402  * @oidp: sysctl oid structure
3403  * @arg1: pointer to private data structure
3404  * @arg2: unused
3405  * @req: sysctl request pointer
3406  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3407  *
3408  * Private handler for phy_type_high and phy_type_low sysctls.
3409  */
3410 static int
3411 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3412 {
3413 	struct ice_softc *sc = (struct ice_softc *)arg1;
3414 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3415 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3416 	struct ice_hw *hw = &sc->hw;
3417 	device_t dev = sc->dev;
3418 	enum ice_status status;
3419 	uint64_t types;
3420 	int ret;
3421 
3422 	UNREFERENCED_PARAMETER(arg2);
3423 
3424 	if (ice_driver_is_detaching(sc))
3425 		return (ESHUTDOWN);
3426 
3427 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3428 				     &pcaps, NULL);
3429 	if (status != ICE_SUCCESS) {
3430 		device_printf(dev,
3431 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3432 		    __func__, ice_status_str(status),
3433 		    ice_aq_str(hw->adminq.sq_last_status));
3434 		return (EIO);
3435 	}
3436 
3437 	if (is_phy_type_high)
3438 		types = pcaps.phy_type_high;
3439 	else
3440 		types = pcaps.phy_type_low;
3441 
3442 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3443 	if ((ret) || (req->newptr == NULL))
3444 		return (ret);
3445 
3446 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3447 
3448 	if (is_phy_type_high)
3449 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3450 	else
3451 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3452 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3453 
3454 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3455 	if (status != ICE_SUCCESS) {
3456 		device_printf(dev,
3457 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3458 		    __func__, ice_status_str(status),
3459 		    ice_aq_str(hw->adminq.sq_last_status));
3460 		return (EIO);
3461 	}
3462 
3463 	return (0);
3464 
3465 }
3466 
3467 /**
3468  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3469  * @oidp: sysctl oid structure
3470  * @arg1: pointer to private data structure
3471  * @arg2: unused
3472  * @req: sysctl request pointer
3473  *
3474  * On read: Displays the currently supported lower PHY types
3475  * On write: Sets the device's supported low PHY types
3476  */
3477 static int
3478 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3479 {
3480 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3481 }
3482 
3483 /**
3484  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3485  * @oidp: sysctl oid structure
3486  * @arg1: pointer to private data structure
3487  * @arg2: unused
3488  * @req: sysctl request pointer
3489  *
3490  * On read: Displays the currently supported higher PHY types
3491  * On write: Sets the device's supported high PHY types
3492  */
3493 static int
3494 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3495 {
3496 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3497 }
3498 
3499 /**
3500  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3501  * @oidp: sysctl oid structure
3502  * @arg1: pointer to private data structure
3503  * @arg2: unused
3504  * @req: sysctl request pointer
3505  * @report_mode: the mode to report
3506  *
3507  * On read: Display the response from Get PHY abillities with the given report
3508  * mode.
3509  */
3510 static int
3511 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3512 {
3513 	struct ice_softc *sc = (struct ice_softc *)arg1;
3514 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3515 	struct ice_hw *hw = &sc->hw;
3516 	struct ice_port_info *pi = hw->port_info;
3517 	device_t dev = sc->dev;
3518 	enum ice_status status;
3519 	int ret;
3520 
3521 	UNREFERENCED_PARAMETER(arg2);
3522 
3523 	ret = priv_check(curthread, PRIV_DRIVER);
3524 	if (ret)
3525 		return (ret);
3526 
3527 	if (ice_driver_is_detaching(sc))
3528 		return (ESHUTDOWN);
3529 
3530 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3531 	if (status != ICE_SUCCESS) {
3532 		device_printf(dev,
3533 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3534 		    __func__, ice_status_str(status),
3535 		    ice_aq_str(hw->adminq.sq_last_status));
3536 		return (EIO);
3537 	}
3538 
3539 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3540 	if (req->newptr != NULL)
3541 		return (EPERM);
3542 
3543 	return (ret);
3544 }
3545 
3546 /**
3547  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3548  * @oidp: sysctl oid structure
3549  * @arg1: pointer to private data structure
3550  * @arg2: unused
3551  * @req: sysctl request pointer
3552  *
3553  * On read: Display the response from Get PHY abillities reporting the last
3554  * software configuration.
3555  */
3556 static int
3557 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3558 {
3559 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3560 				   ICE_AQC_REPORT_ACTIVE_CFG);
3561 }
3562 
3563 /**
3564  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3565  * @oidp: sysctl oid structure
3566  * @arg1: pointer to private data structure
3567  * @arg2: unused
3568  * @req: sysctl request pointer
3569  *
3570  * On read: Display the response from Get PHY abillities reporting the NVM
3571  * configuration.
3572  */
3573 static int
3574 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3575 {
3576 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3577 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3578 }
3579 
3580 /**
3581  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3582  * @oidp: sysctl oid structure
3583  * @arg1: pointer to private data structure
3584  * @arg2: unused
3585  * @req: sysctl request pointer
3586  *
3587  * On read: Display the response from Get PHY abillities reporting the
3588  * topology configuration.
3589  */
3590 static int
3591 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3592 {
3593 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3594 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3595 }
3596 
3597 /**
3598  * ice_sysctl_phy_link_status - Display response from Get Link Status
3599  * @oidp: sysctl oid structure
3600  * @arg1: pointer to private data structure
3601  * @arg2: unused
3602  * @req: sysctl request pointer
3603  *
3604  * On read: Display the response from firmware for the Get Link Status
3605  * request.
3606  */
3607 static int
3608 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3609 {
3610 	struct ice_aqc_get_link_status_data link_data = { 0 };
3611 	struct ice_softc *sc = (struct ice_softc *)arg1;
3612 	struct ice_hw *hw = &sc->hw;
3613 	struct ice_port_info *pi = hw->port_info;
3614 	struct ice_aqc_get_link_status *resp;
3615 	struct ice_aq_desc desc;
3616 	device_t dev = sc->dev;
3617 	enum ice_status status;
3618 	int ret;
3619 
3620 	UNREFERENCED_PARAMETER(arg2);
3621 
3622 	/*
3623 	 * Ensure that only contexts with driver privilege are allowed to
3624 	 * access this information
3625 	 */
3626 	ret = priv_check(curthread, PRIV_DRIVER);
3627 	if (ret)
3628 		return (ret);
3629 
3630 	if (ice_driver_is_detaching(sc))
3631 		return (ESHUTDOWN);
3632 
3633 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3634 	resp = &desc.params.get_link_status;
3635 	resp->lport_num = pi->lport;
3636 
3637 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3638 	if (status != ICE_SUCCESS) {
3639 		device_printf(dev,
3640 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3641 		    __func__, ice_status_str(status),
3642 		    ice_aq_str(hw->adminq.sq_last_status));
3643 		return (EIO);
3644 	}
3645 
3646 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3647 	if (req->newptr != NULL)
3648 		return (EPERM);
3649 
3650 	return (ret);
3651 }
3652 
3653 /**
3654  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3655  * @oidp: sysctl oid structure
3656  * @arg1: pointer to private softc structure
3657  * @arg2: unused
3658  * @req: sysctl request pointer
3659  *
3660  * On read: Displays current persistent LLDP status.
3661  */
3662 static int
3663 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3664 {
3665 	struct ice_softc *sc = (struct ice_softc *)arg1;
3666 	struct ice_hw *hw = &sc->hw;
3667 	device_t dev = sc->dev;
3668 	enum ice_status status;
3669 	struct sbuf *sbuf;
3670 	u32 lldp_state;
3671 
3672 	UNREFERENCED_PARAMETER(arg2);
3673 	UNREFERENCED_PARAMETER(oidp);
3674 
3675 	if (ice_driver_is_detaching(sc))
3676 		return (ESHUTDOWN);
3677 
3678 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3679 	if (status) {
3680 		device_printf(dev,
3681 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3682 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3683 		return (EIO);
3684 	}
3685 
3686 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3687 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3688 	sbuf_finish(sbuf);
3689 	sbuf_delete(sbuf);
3690 
3691 	return (0);
3692 }
3693 
3694 /**
3695  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3696  * @oidp: sysctl oid structure
3697  * @arg1: pointer to private softc structure
3698  * @arg2: unused
3699  * @req: sysctl request pointer
3700  *
3701  * On read: Displays default persistent LLDP status.
3702  */
3703 static int
3704 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3705 {
3706 	struct ice_softc *sc = (struct ice_softc *)arg1;
3707 	struct ice_hw *hw = &sc->hw;
3708 	device_t dev = sc->dev;
3709 	enum ice_status status;
3710 	struct sbuf *sbuf;
3711 	u32 lldp_state;
3712 
3713 	UNREFERENCED_PARAMETER(arg2);
3714 	UNREFERENCED_PARAMETER(oidp);
3715 
3716 	if (ice_driver_is_detaching(sc))
3717 		return (ESHUTDOWN);
3718 
3719 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3720 	if (status) {
3721 		device_printf(dev,
3722 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3723 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3724 		return (EIO);
3725 	}
3726 
3727 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3728 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3729 	sbuf_finish(sbuf);
3730 	sbuf_delete(sbuf);
3731 
3732 	return (0);
3733 }
3734 
3735 /**
3736  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3737  * @dcbcfg: Configuration struct to check for mappings in
3738  *
3739  * @return true if there exists a non-zero DSCP to TC mapping
3740  * inside the input DCB configuration struct.
3741  */
3742 static bool
3743 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3744 {
3745 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3746 		if (dcbcfg->dscp_map[i] != 0)
3747 			return (true);
3748 
3749 	return (false);
3750 }
3751 
3752 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3753 "\nDisplay or change FW LLDP agent state:" \
3754 "\n\t0 - disabled"			\
3755 "\n\t1 - enabled"
3756 
3757 /**
3758  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3759  * @oidp: sysctl oid structure
3760  * @arg1: pointer to private softc structure
3761  * @arg2: unused
3762  * @req: sysctl request pointer
3763  *
3764  * On read: Displays whether the FW LLDP agent is running
3765  * On write: Persistently enables or disables the FW LLDP agent
3766  */
3767 static int
3768 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3769 {
3770 	struct ice_softc *sc = (struct ice_softc *)arg1;
3771 	struct ice_dcbx_cfg *local_dcbx_cfg;
3772 	struct ice_hw *hw = &sc->hw;
3773 	device_t dev = sc->dev;
3774 	enum ice_status status;
3775 	int ret;
3776 	u32 old_state;
3777 	u8 fw_lldp_enabled;
3778 	bool retried_start_lldp = false;
3779 
3780 	UNREFERENCED_PARAMETER(arg2);
3781 
3782 	if (ice_driver_is_detaching(sc))
3783 		return (ESHUTDOWN);
3784 
3785 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3786 	if (status) {
3787 		device_printf(dev,
3788 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3789 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3790 		return (EIO);
3791 	}
3792 
3793 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3794 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3795 		if (status) {
3796 			device_printf(dev,
3797 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3798 			    ice_status_str(status),
3799 			    ice_aq_str(hw->adminq.sq_last_status));
3800 			return (EIO);
3801 		}
3802 	}
3803 	if (old_state == 0)
3804 		fw_lldp_enabled = false;
3805 	else
3806 		fw_lldp_enabled = true;
3807 
3808 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3809 	if ((ret) || (req->newptr == NULL))
3810 		return (ret);
3811 
3812 	if (old_state == 0 && fw_lldp_enabled == false)
3813 		return (0);
3814 
3815 	if (old_state != 0 && fw_lldp_enabled == true)
3816 		return (0);
3817 
3818 	/* Block transition to FW LLDP if DSCP mode is enabled */
3819 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3820 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) &&
3821 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3822 		device_printf(dev,
3823 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3824 		return (EOPNOTSUPP);
3825 	}
3826 
3827 	if (fw_lldp_enabled == false) {
3828 		status = ice_aq_stop_lldp(hw, true, true, NULL);
3829 		/* EPERM is returned if the LLDP agent is already shutdown */
3830 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3831 			device_printf(dev,
3832 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3833 			    __func__, ice_status_str(status),
3834 			    ice_aq_str(hw->adminq.sq_last_status));
3835 			return (EIO);
3836 		}
3837 		ice_aq_set_dcb_parameters(hw, true, NULL);
3838 		hw->port_info->qos_cfg.is_sw_lldp = true;
3839 		ice_add_rx_lldp_filter(sc);
3840 	} else {
3841 		ice_del_rx_lldp_filter(sc);
3842 retry_start_lldp:
3843 		status = ice_aq_start_lldp(hw, true, NULL);
3844 		if (status) {
3845 			switch (hw->adminq.sq_last_status) {
3846 			/* EEXIST is returned if the LLDP agent is already started */
3847 			case ICE_AQ_RC_EEXIST:
3848 				break;
3849 			case ICE_AQ_RC_EAGAIN:
3850 				/* Retry command after a 2 second wait */
3851 				if (retried_start_lldp == false) {
3852 					retried_start_lldp = true;
3853 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3854 					goto retry_start_lldp;
3855 				}
3856 				/* Fallthrough */
3857 			default:
3858 				device_printf(dev,
3859 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3860 				    __func__, ice_status_str(status),
3861 				    ice_aq_str(hw->adminq.sq_last_status));
3862 				return (EIO);
3863 			}
3864 		}
3865 		ice_start_dcbx_agent(sc);
3866 		hw->port_info->qos_cfg.is_sw_lldp = false;
3867 	}
3868 
3869 	return (ret);
3870 }
3871 
3872 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3873 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
3874 "\nIn SW DCB mode, displays and allows setting the table." \
3875 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
3876 "\nWhere the bandwidth total must add up to 100"
3877 
3878 /**
3879  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
3880  * @oidp: sysctl oid structure
3881  * @arg1: pointer to private data structure
3882  * @arg2: unused
3883  * @req: sysctl request pointer
3884  *
3885  * Returns the current ETS TC bandwidth table
3886  * cached by the driver.
3887  *
3888  * In SW DCB mode this sysctl also accepts a value that will
3889  * be sent to the firmware for configuration.
3890  */
3891 static int
3892 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
3893 {
3894 	struct ice_softc *sc = (struct ice_softc *)arg1;
3895 	struct ice_dcbx_cfg *local_dcbx_cfg;
3896 	struct ice_port_info *pi;
3897 	struct ice_hw *hw = &sc->hw;
3898 	device_t dev = sc->dev;
3899 	enum ice_status status;
3900 	struct sbuf *sbuf;
3901 	int ret;
3902 
3903 	/* Store input rates from user */
3904 	char ets_user_buf[128] = "";
3905 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
3906 
3907 	UNREFERENCED_PARAMETER(arg2);
3908 
3909 	if (ice_driver_is_detaching(sc))
3910 		return (ESHUTDOWN);
3911 
3912 	if (req->oldptr == NULL && req->newptr == NULL) {
3913 		ret = SYSCTL_OUT(req, 0, 128);
3914 		return (ret);
3915 	}
3916 
3917 	pi = hw->port_info;
3918 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
3919 
3920 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3921 
3922 	/* Format ETS BW data for output */
3923 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3924 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
3925 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
3926 			sbuf_printf(sbuf, ",");
3927 	}
3928 
3929 	sbuf_finish(sbuf);
3930 	sbuf_delete(sbuf);
3931 
3932 	/* Read in the new ETS values */
3933 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
3934 	if ((ret) || (req->newptr == NULL))
3935 		return (ret);
3936 
3937 	/* Don't allow setting changes in FW DCB mode */
3938 	if (!hw->port_info->qos_cfg.is_sw_lldp)
3939 		return (EPERM);
3940 
3941 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
3942 	if (ret) {
3943 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
3944 		    __func__, ets_user_buf);
3945 		return (ret);
3946 	}
3947 
3948 	if (!ice_check_ets_bw(new_ets_table)) {
3949 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
3950 		    __func__, ets_user_buf);
3951 		return (EINVAL);
3952 	}
3953 
3954 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
3955 	    sizeof(new_ets_table));
3956 
3957 	/* If BW > 0, then set TSA entry to 2 */
3958 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3959 		if (new_ets_table[i] > 0)
3960 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
3961 		else
3962 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
3963 	}
3964 	local_dcbx_cfg->etscfg.willing = 0;
3965 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
3966 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
3967 
3968 	status = ice_set_dcb_cfg(pi);
3969 	if (status) {
3970 		device_printf(dev,
3971 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
3972 		    __func__, ice_status_str(status),
3973 		    ice_aq_str(hw->adminq.sq_last_status));
3974 		return (EIO);
3975 	}
3976 
3977 	ice_do_dcb_reconfig(sc, false);
3978 
3979 	return (0);
3980 }
3981 
3982 #define ICE_SYSCTL_HELP_UP2TC_MAP \
3983 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
3984 "\nIn SW DCB mode, displays and allows setting the table." \
3985 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
3986 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
3987 
3988 /**
3989  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
3990  * @oidp: sysctl oid structure
3991  * @arg1: pointer to private data structure
3992  * @arg2: unused
3993  * @req: sysctl request pointer
3994  *
3995  * In FW DCB mode, returns the current ETS prio table /
3996  * UP2TC mapping from the local MIB.
3997  *
3998  * In SW DCB mode this sysctl also accepts a value that will
3999  * be sent to the firmware for configuration.
4000  */
4001 static int
4002 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4003 {
4004 	struct ice_softc *sc = (struct ice_softc *)arg1;
4005 	struct ice_dcbx_cfg *local_dcbx_cfg;
4006 	struct ice_port_info *pi;
4007 	struct ice_hw *hw = &sc->hw;
4008 	device_t dev = sc->dev;
4009 	enum ice_status status;
4010 	struct sbuf *sbuf;
4011 	int ret;
4012 
4013 	/* Store input rates from user */
4014 	char up2tc_user_buf[128] = "";
4015 	/* This array is indexed by UP, not TC */
4016 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4017 
4018 	UNREFERENCED_PARAMETER(arg2);
4019 
4020 	if (ice_driver_is_detaching(sc))
4021 		return (ESHUTDOWN);
4022 
4023 	if (req->oldptr == NULL && req->newptr == NULL) {
4024 		ret = SYSCTL_OUT(req, 0, 128);
4025 		return (ret);
4026 	}
4027 
4028 	pi = hw->port_info;
4029 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4030 
4031 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4032 
4033 	/* Format ETS Priority Mapping Table for output */
4034 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4035 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4036 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4037 			sbuf_printf(sbuf, ",");
4038 	}
4039 
4040 	sbuf_finish(sbuf);
4041 	sbuf_delete(sbuf);
4042 
4043 	/* Read in the new ETS priority mapping */
4044 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4045 	if ((ret) || (req->newptr == NULL))
4046 		return (ret);
4047 
4048 	/* Don't allow setting changes in FW DCB mode */
4049 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4050 		return (EPERM);
4051 
4052 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
4053 	if (ret) {
4054 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4055 		    __func__, up2tc_user_buf);
4056 		return (ret);
4057 	}
4058 
4059 	/* Prepare updated ETS CFG/REC TLVs */
4060 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4061 	    sizeof(new_up2tc));
4062 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4063 	    sizeof(new_up2tc));
4064 
4065 	status = ice_set_dcb_cfg(pi);
4066 	if (status) {
4067 		device_printf(dev,
4068 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4069 		    __func__, ice_status_str(status),
4070 		    ice_aq_str(hw->adminq.sq_last_status));
4071 		return (EIO);
4072 	}
4073 
4074 	ice_do_dcb_reconfig(sc, false);
4075 
4076 	return (0);
4077 }
4078 
4079 /**
4080  * ice_config_pfc - helper function to set PFC config in FW
4081  * @sc: device private structure
4082  * @new_mode: bit flags indicating PFC status for TCs
4083  *
4084  * @pre must be in SW DCB mode
4085  *
4086  * Configures the driver's local PFC TLV and sends it to the
4087  * FW for configuration, then reconfigures the driver/VSI
4088  * for DCB if needed.
4089  */
4090 static int
4091 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4092 {
4093 	struct ice_dcbx_cfg *local_dcbx_cfg;
4094 	struct ice_hw *hw = &sc->hw;
4095 	struct ice_port_info *pi;
4096 	device_t dev = sc->dev;
4097 	enum ice_status status;
4098 
4099 	pi = hw->port_info;
4100 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4101 
4102 	/* Prepare updated PFC TLV */
4103 	local_dcbx_cfg->pfc.pfcena = new_mode;
4104 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4105 	local_dcbx_cfg->pfc.willing = 0;
4106 	local_dcbx_cfg->pfc.mbc = 0;
4107 
4108 	/* Warn if PFC is being disabled with RoCE v2 in use */
4109 	if (new_mode == 0 && sc->rdma_entry.attached)
4110 		device_printf(dev,
4111 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4112 
4113 	status = ice_set_dcb_cfg(pi);
4114 	if (status) {
4115 		device_printf(dev,
4116 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4117 		    __func__, ice_status_str(status),
4118 		    ice_aq_str(hw->adminq.sq_last_status));
4119 		return (EIO);
4120 	}
4121 
4122 	ice_do_dcb_reconfig(sc, false);
4123 
4124 	return (0);
4125 }
4126 
4127 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4128 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4129 "\nIn SW DCB mode, displays and allows setting the configuration" \
4130 "\nInput/Output is in this format: 0xff" \
4131 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4132 
4133 /**
4134  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4135  * @oidp: sysctl oid structure
4136  * @arg1: pointer to private data structure
4137  * @arg2: unused
4138  * @req: sysctl request pointer
4139  *
4140  * In FW DCB mode, returns a bitmap containing the current TCs
4141  * that have PFC enabled on them.
4142  *
4143  * In SW DCB mode this sysctl also accepts a value that will
4144  * be sent to the firmware for configuration.
4145  */
4146 static int
4147 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4148 {
4149 	struct ice_softc *sc = (struct ice_softc *)arg1;
4150 	struct ice_dcbx_cfg *local_dcbx_cfg;
4151 	struct ice_port_info *pi;
4152 	struct ice_hw *hw = &sc->hw;
4153 	int ret;
4154 
4155 	/* Store input flags from user */
4156 	u8 user_pfc;
4157 
4158 	UNREFERENCED_PARAMETER(arg2);
4159 
4160 	if (ice_driver_is_detaching(sc))
4161 		return (ESHUTDOWN);
4162 
4163 	if (req->oldptr == NULL && req->newptr == NULL) {
4164 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4165 		return (ret);
4166 	}
4167 
4168 	pi = hw->port_info;
4169 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4170 
4171 	/* Format current PFC enable setting for output */
4172 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4173 
4174 	/* Read in the new PFC config */
4175 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4176 	if ((ret) || (req->newptr == NULL))
4177 		return (ret);
4178 
4179 	/* Don't allow setting changes in FW DCB mode */
4180 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4181 		return (EPERM);
4182 
4183 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4184 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4185 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4186 		ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4187 		if (ret)
4188 			return (ret);
4189 	}
4190 
4191 	return ice_config_pfc(sc, user_pfc);
4192 }
4193 
4194 #define ICE_SYSCTL_HELP_PFC_MODE \
4195 "\nDisplay and set the current QoS mode for the firmware" \
4196 "\n\t0: VLAN UP mode" \
4197 "\n\t1: DSCP mode"
4198 
4199 /**
4200  * ice_sysctl_pfc_mode
4201  * @oidp: sysctl oid structure
4202  * @arg1: pointer to private data structure
4203  * @arg2: unused
4204  * @req: sysctl request pointer
4205  *
4206  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4207  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4208  * -based settings are configured for DCB.
4209  */
4210 static int
4211 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4212 {
4213 	struct ice_softc *sc = (struct ice_softc *)arg1;
4214 	struct ice_dcbx_cfg *local_dcbx_cfg;
4215 	struct ice_port_info *pi;
4216 	struct ice_hw *hw = &sc->hw;
4217 	device_t dev = sc->dev;
4218 	enum ice_status status;
4219 	u8 user_pfc_mode, aq_pfc_mode;
4220 	int ret;
4221 
4222 	UNREFERENCED_PARAMETER(arg2);
4223 
4224 	if (ice_driver_is_detaching(sc))
4225 		return (ESHUTDOWN);
4226 
4227 	if (req->oldptr == NULL && req->newptr == NULL) {
4228 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4229 		return (ret);
4230 	}
4231 
4232 	pi = hw->port_info;
4233 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4234 
4235 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4236 
4237 	/* Read in the new mode */
4238 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4239 	if ((ret) || (req->newptr == NULL))
4240 		return (ret);
4241 
4242 	/* Don't allow setting changes in FW DCB mode */
4243 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4244 		return (EPERM);
4245 
4246 	/* Currently, there are only two modes */
4247 	switch (user_pfc_mode) {
4248 	case 0:
4249 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4250 		break;
4251 	case 1:
4252 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4253 		break;
4254 	default:
4255 		device_printf(dev,
4256 		    "%s: Valid input range is 0-1 (input %d)\n",
4257 		    __func__, user_pfc_mode);
4258 		return (EINVAL);
4259 	}
4260 
4261 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4262 	if (status == ICE_ERR_NOT_SUPPORTED) {
4263 		device_printf(dev,
4264 		    "%s: Failed to set PFC mode; DCB not supported\n",
4265 		    __func__);
4266 		return (ENODEV);
4267 	}
4268 	if (status) {
4269 		device_printf(dev,
4270 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4271 		    __func__, ice_status_str(status),
4272 		    ice_aq_str(hw->adminq.sq_last_status));
4273 		return (EIO);
4274 	}
4275 
4276 	/* Reset settings to default when mode is changed */
4277 	ice_set_default_local_mib_settings(sc);
4278 	/* Cache current settings and reconfigure */
4279 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4280 	ice_do_dcb_reconfig(sc, false);
4281 
4282 	return (0);
4283 }
4284 
4285 /**
4286  * ice_add_device_sysctls - add device specific dynamic sysctls
4287  * @sc: device private structure
4288  *
4289  * Add per-device dynamic sysctls which show device configuration or enable
4290  * configuring device functionality. For tunable values which can be set prior
4291  * to load, see ice_add_device_tunables.
4292  *
4293  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4294  * and likely should be called near the end of the attach process.
4295  */
4296 void
4297 ice_add_device_sysctls(struct ice_softc *sc)
4298 {
4299 	struct sysctl_oid *hw_node;
4300 	device_t dev = sc->dev;
4301 
4302 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4303 	struct sysctl_oid_list *ctx_list =
4304 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4305 
4306 	SYSCTL_ADD_PROC(ctx, ctx_list,
4307 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4308 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4309 
4310 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4311 		SYSCTL_ADD_PROC(ctx, ctx_list,
4312 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4313 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4314 	}
4315 
4316 	SYSCTL_ADD_PROC(ctx, ctx_list,
4317 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4318 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4319 
4320 	SYSCTL_ADD_PROC(ctx, ctx_list,
4321 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4322 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4323 
4324 	SYSCTL_ADD_PROC(ctx, ctx_list,
4325 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4326 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4327 
4328 	SYSCTL_ADD_PROC(ctx, ctx_list,
4329 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4330 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4331 
4332 	SYSCTL_ADD_PROC(ctx, ctx_list,
4333 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4334 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4335 
4336 	SYSCTL_ADD_PROC(ctx, ctx_list,
4337 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4338 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4339 
4340 	SYSCTL_ADD_PROC(ctx, ctx_list,
4341 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4342 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4343 
4344 	SYSCTL_ADD_PROC(ctx, ctx_list,
4345 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4346 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4347 
4348 	SYSCTL_ADD_PROC(ctx, ctx_list,
4349 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4350 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4351 
4352 	SYSCTL_ADD_PROC(ctx, ctx_list,
4353 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4354 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4355 
4356 	SYSCTL_ADD_PROC(ctx, ctx_list,
4357 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4358 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4359 
4360 	SYSCTL_ADD_PROC(ctx, ctx_list,
4361 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4362 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4363 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4364 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4365 
4366 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4367 
4368 	/* Differentiate software and hardware statistics, by keeping hw stats
4369 	 * in their own node. This isn't in ice_add_device_tunables, because
4370 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4371 	 */
4372 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4373 				  NULL, "Port Hardware Statistics");
4374 
4375 	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4376 
4377 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4378 	 * during creation
4379 	 */
4380 	ice_add_vsi_sysctls(&sc->pf_vsi);
4381 
4382 	/* Add sysctls related to debugging the device driver. This includes
4383 	 * sysctls which display additional internal driver state for use in
4384 	 * understanding what is happening within the driver.
4385 	 */
4386 	ice_add_debug_sysctls(sc);
4387 }
4388 
4389 /**
4390  * @enum hmc_error_type
4391  * @brief enumeration of HMC errors
4392  *
4393  * Enumeration defining the possible HMC errors that might occur.
4394  */
4395 enum hmc_error_type {
4396 	HMC_ERR_PMF_INVALID = 0,
4397 	HMC_ERR_VF_IDX_INVALID = 1,
4398 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4399 	/* 3 is reserved */
4400 	HMC_ERR_INDEX_TOO_BIG = 4,
4401 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4402 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4403 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4404 	HMC_ERR_PAGE_DESC_INVALID = 8,
4405 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4406 	/* 10 is reserved */
4407 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4408 	/* 12 is reserved */
4409 };
4410 
4411 /**
4412  * ice_log_hmc_error - Log an HMC error message
4413  * @hw: device hw structure
4414  * @dev: the device to pass to device_printf()
4415  *
4416  * Log a message when an HMC error interrupt is triggered.
4417  */
4418 void
4419 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4420 {
4421 	u32 info, data;
4422 	u8 index, errtype, objtype;
4423 	bool isvf;
4424 
4425 	info = rd32(hw, PFHMC_ERRORINFO);
4426 	data = rd32(hw, PFHMC_ERRORDATA);
4427 
4428 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4429 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4430 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4431 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4432 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4433 
4434 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4435 
4436 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4437 		      isvf ? "VF" : "PF", index);
4438 
4439 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4440 		      errtype, objtype, data);
4441 
4442 	switch (errtype) {
4443 	case HMC_ERR_PMF_INVALID:
4444 		device_printf(dev, "Private Memory Function is not valid\n");
4445 		break;
4446 	case HMC_ERR_VF_IDX_INVALID:
4447 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4448 		break;
4449 	case HMC_ERR_VF_PARENT_PF_INVALID:
4450 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4451 		break;
4452 	case HMC_ERR_INDEX_TOO_BIG:
4453 		device_printf(dev, "Object index too big\n");
4454 		break;
4455 	case HMC_ERR_ADDRESS_TOO_LARGE:
4456 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4457 		break;
4458 	case HMC_ERR_SEGMENT_DESC_INVALID:
4459 		device_printf(dev, "Segment descriptor is invalid\n");
4460 		break;
4461 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4462 		device_printf(dev, "Segment descriptor is too small\n");
4463 		break;
4464 	case HMC_ERR_PAGE_DESC_INVALID:
4465 		device_printf(dev, "Page descriptor is invalid\n");
4466 		break;
4467 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4468 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4469 		break;
4470 	case HMC_ERR_INVALID_OBJECT_TYPE:
4471 		device_printf(dev, "Invalid object type\n");
4472 		break;
4473 	default:
4474 		device_printf(dev, "Unknown HMC error\n");
4475 	}
4476 
4477 	/* Clear the error indication */
4478 	wr32(hw, PFHMC_ERRORINFO, 0);
4479 }
4480 
4481 /**
4482  * @struct ice_sysctl_info
4483  * @brief sysctl information
4484  *
4485  * Structure used to simplify the process of defining the many similar
4486  * statistics sysctls.
4487  */
4488 struct ice_sysctl_info {
4489 	u64		*stat;
4490 	const char	*name;
4491 	const char	*description;
4492 };
4493 
4494 /**
4495  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4496  * @ctx: sysctl ctx to use
4497  * @parent: the parent node to add sysctls under
4498  * @stats: the ethernet stats structure to source values from
4499  *
4500  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4501  * Will add them under the parent node specified.
4502  *
4503  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4504  * statistics, so it is not included here. Similarly, rx_discards has different
4505  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4506  */
4507 void
4508 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4509 			  struct sysctl_oid *parent,
4510 			  struct ice_eth_stats *stats)
4511 {
4512 	const struct ice_sysctl_info ctls[] = {
4513 		/* Rx Stats */
4514 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4515 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4516 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4517 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4518 		/* Tx Stats */
4519 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4520 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4521 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4522 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4523 		/* End */
4524 		{ 0, 0, 0 }
4525 	};
4526 
4527 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4528 
4529 	const struct ice_sysctl_info *entry = ctls;
4530 	while (entry->stat != 0) {
4531 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4532 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4533 			       entry->description);
4534 		entry++;
4535 	}
4536 }
4537 
4538 /**
4539  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4540  * @oidp: sysctl oid structure
4541  * @arg1: pointer to private data structure
4542  * @arg2: Tx CSO stat to read
4543  * @req: sysctl request pointer
4544  *
4545  * On read: Sums the per-queue Tx CSO stat and displays it.
4546  */
4547 static int
4548 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4549 {
4550 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4551 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4552 	u64 stat = 0;
4553 	int i;
4554 
4555 	if (ice_driver_is_detaching(vsi->sc))
4556 		return (ESHUTDOWN);
4557 
4558 	/* Check that the type is valid */
4559 	if (type >= ICE_CSO_STAT_TX_COUNT)
4560 		return (EDOOFUS);
4561 
4562 	/* Sum the stat for each of the Tx queues */
4563 	for (i = 0; i < vsi->num_tx_queues; i++)
4564 		stat += vsi->tx_queues[i].stats.cso[type];
4565 
4566 	return sysctl_handle_64(oidp, NULL, stat, req);
4567 }
4568 
4569 /**
4570  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4571  * @oidp: sysctl oid structure
4572  * @arg1: pointer to private data structure
4573  * @arg2: Rx CSO stat to read
4574  * @req: sysctl request pointer
4575  *
4576  * On read: Sums the per-queue Rx CSO stat and displays it.
4577  */
4578 static int
4579 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4580 {
4581 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4582 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4583 	u64 stat = 0;
4584 	int i;
4585 
4586 	if (ice_driver_is_detaching(vsi->sc))
4587 		return (ESHUTDOWN);
4588 
4589 	/* Check that the type is valid */
4590 	if (type >= ICE_CSO_STAT_RX_COUNT)
4591 		return (EDOOFUS);
4592 
4593 	/* Sum the stat for each of the Rx queues */
4594 	for (i = 0; i < vsi->num_rx_queues; i++)
4595 		stat += vsi->rx_queues[i].stats.cso[type];
4596 
4597 	return sysctl_handle_64(oidp, NULL, stat, req);
4598 }
4599 
4600 /**
4601  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4602  * @oidp: sysctl oid structure
4603  * @arg1: pointer to private data structure
4604  * @arg2: unused
4605  * @req: sysctl request pointer
4606  *
4607  * On read: Sums current values of Rx error statistics and
4608  * displays it.
4609  */
4610 static int
4611 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4612 {
4613 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4614 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4615 	u64 stat = 0;
4616 	int i, type;
4617 
4618 	UNREFERENCED_PARAMETER(arg2);
4619 
4620 	if (ice_driver_is_detaching(vsi->sc))
4621 		return (ESHUTDOWN);
4622 
4623 	stat += hs->rx_undersize;
4624 	stat += hs->rx_fragments;
4625 	stat += hs->rx_oversize;
4626 	stat += hs->rx_jabber;
4627 	stat += hs->rx_len_errors;
4628 	stat += hs->crc_errors;
4629 	stat += hs->illegal_bytes;
4630 
4631 	/* Checksum error stats */
4632 	for (i = 0; i < vsi->num_rx_queues; i++)
4633 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4634 		     type < ICE_CSO_STAT_RX_COUNT;
4635 		     type++)
4636 			stat += vsi->rx_queues[i].stats.cso[type];
4637 
4638 	return sysctl_handle_64(oidp, NULL, stat, req);
4639 }
4640 
4641 /**
4642  * @struct ice_rx_cso_stat_info
4643  * @brief sysctl information for an Rx checksum offload statistic
4644  *
4645  * Structure used to simplify the process of defining the checksum offload
4646  * statistics.
4647  */
4648 struct ice_rx_cso_stat_info {
4649 	enum ice_rx_cso_stat	type;
4650 	const char		*name;
4651 	const char		*description;
4652 };
4653 
4654 /**
4655  * @struct ice_tx_cso_stat_info
4656  * @brief sysctl information for a Tx checksum offload statistic
4657  *
4658  * Structure used to simplify the process of defining the checksum offload
4659  * statistics.
4660  */
4661 struct ice_tx_cso_stat_info {
4662 	enum ice_tx_cso_stat	type;
4663 	const char		*name;
4664 	const char		*description;
4665 };
4666 
4667 /**
4668  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4669  * @vsi: pointer to the VSI to add sysctls for
4670  * @ctx: sysctl ctx to use
4671  * @parent: the parent node to add sysctls under
4672  *
4673  * Add statistics sysctls for software tracked statistics of a VSI.
4674  *
4675  * Currently this only adds checksum offload statistics, but more counters may
4676  * be added in the future.
4677  */
4678 static void
4679 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4680 			 struct sysctl_ctx_list *ctx,
4681 			 struct sysctl_oid *parent)
4682 {
4683 	struct sysctl_oid *cso_node;
4684 	struct sysctl_oid_list *cso_list;
4685 
4686 	/* Tx CSO Stats */
4687 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4688 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4689 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4690 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4691 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4692 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4693 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4694 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4695 		/* End */
4696 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4697 	};
4698 
4699 	/* Rx CSO Stats */
4700 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4701 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4702 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4703 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4704 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4705 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4706 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4707 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4708 		/* End */
4709 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4710 	};
4711 
4712 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4713 
4714 	/* Add a node for statistics tracked by software. */
4715 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4716 				  NULL, "Checksum offload Statistics");
4717 	cso_list = SYSCTL_CHILDREN(cso_node);
4718 
4719 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4720 	while (tx_entry->name && tx_entry->description) {
4721 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4722 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4723 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4724 				tx_entry->description);
4725 		tx_entry++;
4726 	}
4727 
4728 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4729 	while (rx_entry->name && rx_entry->description) {
4730 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4731 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4732 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4733 				rx_entry->description);
4734 		rx_entry++;
4735 	}
4736 }
4737 
4738 /**
4739  * ice_add_vsi_sysctls - Add sysctls for a VSI
4740  * @vsi: pointer to VSI structure
4741  *
4742  * Add various sysctls for a given VSI.
4743  */
4744 void
4745 ice_add_vsi_sysctls(struct ice_vsi *vsi)
4746 {
4747 	struct sysctl_ctx_list *ctx = &vsi->ctx;
4748 	struct sysctl_oid *hw_node, *sw_node;
4749 	struct sysctl_oid_list *vsi_list, *hw_list;
4750 
4751 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4752 
4753 	/* Keep hw stats in their own node. */
4754 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4755 				  NULL, "VSI Hardware Statistics");
4756 	hw_list = SYSCTL_CHILDREN(hw_node);
4757 
4758 	/* Add the ethernet statistics for this VSI */
4759 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4760 
4761 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4762 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4763 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4764 
4765 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4766 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4767 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4768 			"Aggregate of all Rx errors");
4769 
4770 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4771 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4772 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4773 
4774 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4775 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4776 			0, "Tx Packets Discarded Due To Error");
4777 
4778 	/* Add a node for statistics tracked by software. */
4779 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4780 				  NULL, "VSI Software Statistics");
4781 
4782 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4783 }
4784 
4785 /**
4786  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4787  * @ctx: sysctl ctx to use
4788  * @parent_list: parent sysctl list to add sysctls under
4789  * @pfc_stat_location: address of statistic for sysctl to display
4790  * @node_name: Name for statistic node
4791  * @descr: Description used for nodes added in this function
4792  *
4793  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
4794  * for a stat and leaves for each traffic class for that stat.
4795  */
4796 static void
4797 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
4798 				 struct sysctl_oid_list *parent_list,
4799 				 u64* pfc_stat_location,
4800 				 const char *node_name,
4801 				 const char *descr)
4802 {
4803 	struct sysctl_oid_list *node_list;
4804 	struct sysctl_oid *node;
4805 	struct sbuf *namebuf, *descbuf;
4806 
4807 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
4808 				   NULL, descr);
4809 	node_list = SYSCTL_CHILDREN(node);
4810 
4811 	namebuf = sbuf_new_auto();
4812 	descbuf = sbuf_new_auto();
4813 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4814 		sbuf_clear(namebuf);
4815 		sbuf_clear(descbuf);
4816 
4817 		sbuf_printf(namebuf, "%d", i);
4818 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
4819 
4820 		sbuf_finish(namebuf);
4821 		sbuf_finish(descbuf);
4822 
4823 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
4824 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
4825 			sbuf_data(descbuf));
4826 	}
4827 
4828 	sbuf_delete(namebuf);
4829 	sbuf_delete(descbuf);
4830 }
4831 
4832 /**
4833  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
4834  * @ctx: the sysctl ctx to use
4835  * @parent: parent node to add the sysctls under
4836  * @stats: the hw ports stat structure to pull values from
4837  *
4838  * Add global Priority Flow Control MAC statistics sysctls. These are
4839  * structured as a node with the PFC statistic, where there are eight
4840  * nodes for each traffic class.
4841  */
4842 static void
4843 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
4844 			      struct sysctl_oid *parent,
4845 			      struct ice_hw_port_stats *stats)
4846 {
4847 	struct sysctl_oid_list *parent_list;
4848 
4849 	parent_list = SYSCTL_CHILDREN(parent);
4850 
4851 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
4852 	    "p_xon_recvd", "PFC XON received");
4853 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
4854 	    "p_xoff_recvd", "PFC XOFF received");
4855 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
4856 	    "p_xon_txd", "PFC XON transmitted");
4857 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
4858 	    "p_xoff_txd", "PFC XOFF transmitted");
4859 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
4860 	    "p_xon2xoff", "PFC XON to XOFF transitions");
4861 }
4862 
4863 /**
4864  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
4865  * @ctx: the sysctl ctx to use
4866  * @parent: parent node to add the sysctls under
4867  * @stats: the hw ports stat structure to pull values from
4868  *
4869  * Add global MAC statistics sysctls.
4870  */
4871 void
4872 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
4873 			  struct sysctl_oid *parent,
4874 			  struct ice_hw_port_stats *stats)
4875 {
4876 	struct sysctl_oid *mac_node;
4877 	struct sysctl_oid_list *parent_list, *mac_list;
4878 
4879 	parent_list = SYSCTL_CHILDREN(parent);
4880 
4881 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
4882 				   NULL, "Mac Hardware Statistics");
4883 	mac_list = SYSCTL_CHILDREN(mac_node);
4884 
4885 	/* Add the ethernet statistics common to VSI and MAC */
4886 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
4887 
4888 	/* Add PFC stats that add per-TC counters */
4889 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
4890 
4891 	const struct ice_sysctl_info ctls[] = {
4892 		/* Packet Reception Stats */
4893 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
4894 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
4895 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
4896 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
4897 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
4898 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
4899 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
4900 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
4901 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
4902 		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
4903 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
4904 		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
4905 		{&stats->eth.rx_discards, "rx_discards",
4906 		    "Discarded Rx Packets by Port (shortage of storage space)"},
4907 		/* Packet Transmission Stats */
4908 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
4909 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
4910 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
4911 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
4912 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
4913 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
4914 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
4915 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
4916 		/* Flow control */
4917 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
4918 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
4919 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
4920 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
4921 		/* Other */
4922 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
4923 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
4924 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
4925 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
4926 		/* End */
4927 		{ 0, 0, 0 }
4928 	};
4929 
4930 	const struct ice_sysctl_info *entry = ctls;
4931 	while (entry->stat != 0) {
4932 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
4933 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4934 			entry->description);
4935 		entry++;
4936 	}
4937 }
4938 
4939 /**
4940  * ice_configure_misc_interrupts - enable 'other' interrupt causes
4941  * @sc: pointer to device private softc
4942  *
4943  * Enable various "other" interrupt causes, and associate them to interrupt 0,
4944  * which is our administrative interrupt.
4945  */
4946 void
4947 ice_configure_misc_interrupts(struct ice_softc *sc)
4948 {
4949 	struct ice_hw *hw = &sc->hw;
4950 	u32 val;
4951 
4952 	/* Read the OICR register to clear it */
4953 	rd32(hw, PFINT_OICR);
4954 
4955 	/* Enable useful "other" interrupt causes */
4956 	val = (PFINT_OICR_ECC_ERR_M |
4957 	       PFINT_OICR_MAL_DETECT_M |
4958 	       PFINT_OICR_GRST_M |
4959 	       PFINT_OICR_PCI_EXCEPTION_M |
4960 	       PFINT_OICR_VFLR_M |
4961 	       PFINT_OICR_HMC_ERR_M |
4962 	       PFINT_OICR_PE_CRITERR_M);
4963 
4964 	wr32(hw, PFINT_OICR_ENA, val);
4965 
4966 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
4967 	 * not explicitly program them when writing to the PFINT_*_CTL
4968 	 * registers. Nevertheless, these writes are associating the
4969 	 * interrupts with the ITR 0 vector
4970 	 */
4971 
4972 	/* Associate the OICR interrupt with ITR 0, and enable it */
4973 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
4974 
4975 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
4976 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
4977 
4978 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
4979 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
4980 }
4981 
4982 /**
4983  * ice_filter_is_mcast - Check if info is a multicast filter
4984  * @vsi: vsi structure addresses are targeted towards
4985  * @info: filter info
4986  *
4987  * @returns true if the provided info is a multicast filter, and false
4988  * otherwise.
4989  */
4990 static bool
4991 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
4992 {
4993 	const u8 *addr = info->l_data.mac.mac_addr;
4994 
4995 	/*
4996 	 * Check if this info matches a multicast filter added by
4997 	 * ice_add_mac_to_list
4998 	 */
4999 	if ((info->flag == ICE_FLTR_TX) &&
5000 	    (info->src_id == ICE_SRC_ID_VSI) &&
5001 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5002 	    (info->vsi_handle == vsi->idx) &&
5003 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5004 		return true;
5005 
5006 	return false;
5007 }
5008 
5009 /**
5010  * @struct ice_mcast_sync_data
5011  * @brief data used by ice_sync_one_mcast_filter function
5012  *
5013  * Structure used to store data needed for processing by the
5014  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5015  * to be added, an error indication, and a pointer to the device softc.
5016  */
5017 struct ice_mcast_sync_data {
5018 	struct ice_list_head add_list;
5019 	struct ice_softc *sc;
5020 	int err;
5021 };
5022 
5023 /**
5024  * ice_sync_one_mcast_filter - Check if we need to program the filter
5025  * @p: void pointer to algorithm data
5026  * @sdl: link level socket address
5027  * @count: unused count value
5028  *
5029  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5030  * list. For the given address, search our internal list to see if we have
5031  * found the filter. If not, add it to our list of filters that need to be
5032  * programmed.
5033  *
5034  * @returns (1) if we've actually setup the filter to be added
5035  */
5036 static u_int
5037 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5038 			  u_int __unused count)
5039 {
5040 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5041 	struct ice_softc *sc = data->sc;
5042 	struct ice_hw *hw = &sc->hw;
5043 	struct ice_switch_info *sw = hw->switch_info;
5044 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5045 	struct ice_fltr_mgmt_list_entry *itr;
5046 	struct ice_list_head *rules;
5047 	int err;
5048 
5049 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5050 
5051 	/*
5052 	 * If a previous filter already indicated an error, there is no need
5053 	 * for us to finish processing the rest of the filters.
5054 	 */
5055 	if (data->err)
5056 		return (0);
5057 
5058 	/* See if this filter has already been programmed */
5059 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5060 		struct ice_fltr_info *info = &itr->fltr_info;
5061 		const u8 *addr = info->l_data.mac.mac_addr;
5062 
5063 		/* Only check multicast filters */
5064 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5065 			continue;
5066 
5067 		/*
5068 		 * If this filter matches, mark the internal filter as
5069 		 * "found", and exit.
5070 		 */
5071 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5072 			itr->marker = ICE_FLTR_FOUND;
5073 			return (1);
5074 		}
5075 	}
5076 
5077 	/*
5078 	 * If we failed to locate the filter in our internal list, we need to
5079 	 * place it into our add list.
5080 	 */
5081 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5082 				  ICE_FWD_TO_VSI);
5083 	if (err) {
5084 		device_printf(sc->dev,
5085 			      "Failed to place MAC %6D onto add list, err %s\n",
5086 			      sdl_addr, ":", ice_err_str(err));
5087 		data->err = err;
5088 
5089 		return (0);
5090 	}
5091 
5092 	return (1);
5093 }
5094 
5095 /**
5096  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5097  * @sc: device private structure
5098  *
5099  * Called in response to SIOCDELMULTI to synchronize the operating system
5100  * multicast address list with the internal list of filters programmed to
5101  * firmware.
5102  *
5103  * Works in one phase to find added and deleted filters using a marker bit on
5104  * the internal list.
5105  *
5106  * First, a loop over the internal list clears the marker bit. Second, for
5107  * each filter in the ifp list is checked. If we find it in the internal list,
5108  * the marker bit is set. Otherwise, the filter is added to the add list.
5109  * Third, a loop over the internal list determines if any filters have not
5110  * been found. Each of these is added to the delete list. Finally, the add and
5111  * delete lists are programmed to firmware to update the filters.
5112  *
5113  * @returns zero on success or an integer error code on failure.
5114  */
5115 int
5116 ice_sync_multicast_filters(struct ice_softc *sc)
5117 {
5118 	struct ice_hw *hw = &sc->hw;
5119 	struct ice_switch_info *sw = hw->switch_info;
5120 	struct ice_fltr_mgmt_list_entry *itr;
5121 	struct ice_mcast_sync_data data = {};
5122 	struct ice_list_head *rules, remove_list;
5123 	enum ice_status status;
5124 	int err = 0;
5125 
5126 	INIT_LIST_HEAD(&data.add_list);
5127 	INIT_LIST_HEAD(&remove_list);
5128 	data.sc = sc;
5129 	data.err = 0;
5130 
5131 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5132 
5133 	/* Acquire the lock for the entire duration */
5134 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5135 
5136 	/* (1) Reset the marker state for all filters */
5137 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5138 		itr->marker = ICE_FLTR_NOT_FOUND;
5139 
5140 	/* (2) determine which filters need to be added and removed */
5141 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5142 	if (data.err) {
5143 		/* ice_sync_one_mcast_filter already prints an error */
5144 		err = data.err;
5145 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5146 		goto free_filter_lists;
5147 	}
5148 
5149 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5150 		struct ice_fltr_info *info = &itr->fltr_info;
5151 		const u8 *addr = info->l_data.mac.mac_addr;
5152 
5153 		/* Only check multicast filters */
5154 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5155 			continue;
5156 
5157 		/*
5158 		 * If the filter is not marked as found, then it must no
5159 		 * longer be in the ifp address list, so we need to remove it.
5160 		 */
5161 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5162 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5163 						  addr, ICE_FWD_TO_VSI);
5164 			if (err) {
5165 				device_printf(sc->dev,
5166 					      "Failed to place MAC %6D onto remove list, err %s\n",
5167 					      addr, ":", ice_err_str(err));
5168 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5169 				goto free_filter_lists;
5170 			}
5171 		}
5172 	}
5173 
5174 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5175 
5176 	status = ice_add_mac(hw, &data.add_list);
5177 	if (status) {
5178 		device_printf(sc->dev,
5179 			      "Could not add new MAC filters, err %s aq_err %s\n",
5180 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5181 		err = (EIO);
5182 		goto free_filter_lists;
5183 	}
5184 
5185 	status = ice_remove_mac(hw, &remove_list);
5186 	if (status) {
5187 		device_printf(sc->dev,
5188 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5189 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5190 		err = (EIO);
5191 		goto free_filter_lists;
5192 	}
5193 
5194 free_filter_lists:
5195 	ice_free_fltr_list(&data.add_list);
5196 	ice_free_fltr_list(&remove_list);
5197 
5198 	return (err);
5199 }
5200 
5201 /**
5202  * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5203  * @vsi: The VSI to add the filter for
5204  * @vid: array of VLAN ids to add
5205  * @length: length of vid array
5206  *
5207  * Programs HW filters so that the given VSI will receive the specified VLANs.
5208  */
5209 enum ice_status
5210 ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5211 {
5212 	struct ice_hw *hw = &vsi->sc->hw;
5213 	struct ice_list_head vlan_list;
5214 	struct ice_fltr_list_entry *vlan_entries;
5215 	enum ice_status status;
5216 
5217 	MPASS(length > 0);
5218 
5219 	INIT_LIST_HEAD(&vlan_list);
5220 
5221 	vlan_entries = (struct ice_fltr_list_entry *)
5222 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5223 	if (!vlan_entries)
5224 		return (ICE_ERR_NO_MEMORY);
5225 
5226 	for (u16 i = 0; i < length; i++) {
5227 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5228 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5229 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5230 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5231 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5232 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5233 
5234 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5235 	}
5236 
5237 	status = ice_add_vlan(hw, &vlan_list);
5238 	if (!status)
5239 		goto done;
5240 
5241 	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5242 	for (u16 i = 0; i < length; i++) {
5243 		device_printf(vsi->sc->dev,
5244 		    "- vlan %d, status %d\n",
5245 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5246 		    vlan_entries[i].status);
5247 	}
5248 done:
5249 	free(vlan_entries, M_ICE);
5250 	return (status);
5251 }
5252 
5253 /**
5254  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5255  * @vsi: The VSI to add the filter for
5256  * @vid: VLAN to add
5257  *
5258  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5259  */
5260 enum ice_status
5261 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5262 {
5263 	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5264 }
5265 
5266 /**
5267  * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5268  * @vsi: The VSI to remove the filters from
5269  * @vid: array of VLAN ids to remove
5270  * @length: length of vid array
5271  *
5272  * Removes previously programmed HW filters for the specified VSI.
5273  */
5274 enum ice_status
5275 ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5276 {
5277 	struct ice_hw *hw = &vsi->sc->hw;
5278 	struct ice_list_head vlan_list;
5279 	struct ice_fltr_list_entry *vlan_entries;
5280 	enum ice_status status;
5281 
5282 	MPASS(length > 0);
5283 
5284 	INIT_LIST_HEAD(&vlan_list);
5285 
5286 	vlan_entries = (struct ice_fltr_list_entry *)
5287 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5288 	if (!vlan_entries)
5289 		return (ICE_ERR_NO_MEMORY);
5290 
5291 	for (u16 i = 0; i < length; i++) {
5292 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5293 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5294 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5295 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5296 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5297 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5298 
5299 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5300 	}
5301 
5302 	status = ice_remove_vlan(hw, &vlan_list);
5303 	if (!status)
5304 		goto done;
5305 
5306 	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5307 	for (u16 i = 0; i < length; i++) {
5308 		device_printf(vsi->sc->dev,
5309 		    "- vlan %d, status %d\n",
5310 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5311 		    vlan_entries[i].status);
5312 	}
5313 done:
5314 	free(vlan_entries, M_ICE);
5315 	return (status);
5316 }
5317 
5318 /**
5319  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5320  * @vsi: The VSI to remove the filter from
5321  * @vid: VLAN to remove
5322  *
5323  * Removes a previously programmed HW filter for the specified VSI.
5324  */
5325 enum ice_status
5326 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5327 {
5328 	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5329 }
5330 
5331 #define ICE_SYSCTL_HELP_RX_ITR			\
5332 "\nControl Rx interrupt throttle rate."		\
5333 "\n\t0-8160 - sets interrupt rate in usecs"	\
5334 "\n\t    -1 - reset the Rx itr to default"
5335 
5336 /**
5337  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5338  * @oidp: sysctl oid structure
5339  * @arg1: pointer to private data structure
5340  * @arg2: unused
5341  * @req: sysctl request pointer
5342  *
5343  * On read: Displays the current Rx ITR value
5344  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5345  */
5346 static int
5347 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5348 {
5349 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5350 	struct ice_softc *sc = vsi->sc;
5351 	int increment, ret;
5352 
5353 	UNREFERENCED_PARAMETER(arg2);
5354 
5355 	if (ice_driver_is_detaching(sc))
5356 		return (ESHUTDOWN);
5357 
5358 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5359 	if ((ret) || (req->newptr == NULL))
5360 		return (ret);
5361 
5362 	if (vsi->rx_itr < 0)
5363 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5364 	if (vsi->rx_itr > ICE_ITR_MAX)
5365 		vsi->rx_itr = ICE_ITR_MAX;
5366 
5367 	/* Assume 2usec increment if it hasn't been loaded yet */
5368 	increment = sc->hw.itr_gran ? : 2;
5369 
5370 	/* We need to round the value to the hardware's ITR granularity */
5371 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5372 
5373 	/* If the driver has finished initializing, then we need to reprogram
5374 	 * the ITR registers now. Otherwise, they will be programmed during
5375 	 * driver initialization.
5376 	 */
5377 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5378 		ice_configure_rx_itr(vsi);
5379 
5380 	return (0);
5381 }
5382 
5383 #define ICE_SYSCTL_HELP_TX_ITR			\
5384 "\nControl Tx interrupt throttle rate."		\
5385 "\n\t0-8160 - sets interrupt rate in usecs"	\
5386 "\n\t    -1 - reset the Tx itr to default"
5387 
5388 /**
5389  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5390  * @oidp: sysctl oid structure
5391  * @arg1: pointer to private data structure
5392  * @arg2: unused
5393  * @req: sysctl request pointer
5394  *
5395  * On read: Displays the current Tx ITR value
5396  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5397  */
5398 static int
5399 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5400 {
5401 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5402 	struct ice_softc *sc = vsi->sc;
5403 	int increment, ret;
5404 
5405 	UNREFERENCED_PARAMETER(arg2);
5406 
5407 	if (ice_driver_is_detaching(sc))
5408 		return (ESHUTDOWN);
5409 
5410 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5411 	if ((ret) || (req->newptr == NULL))
5412 		return (ret);
5413 
5414 	/* Allow configuring a negative value to reset to the default */
5415 	if (vsi->tx_itr < 0)
5416 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5417 	if (vsi->tx_itr > ICE_ITR_MAX)
5418 		vsi->tx_itr = ICE_ITR_MAX;
5419 
5420 	/* Assume 2usec increment if it hasn't been loaded yet */
5421 	increment = sc->hw.itr_gran ? : 2;
5422 
5423 	/* We need to round the value to the hardware's ITR granularity */
5424 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5425 
5426 	/* If the driver has finished initializing, then we need to reprogram
5427 	 * the ITR registers now. Otherwise, they will be programmed during
5428 	 * driver initialization.
5429 	 */
5430 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5431 		ice_configure_tx_itr(vsi);
5432 
5433 	return (0);
5434 }
5435 
5436 /**
5437  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5438  * @vsi: pointer to VSI structure
5439  * @parent: parent node to add the tunables under
5440  *
5441  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5442  * dynamically removed upon VSI removal.
5443  *
5444  * Add various tunables and set up the basic node structure for the VSI. Must
5445  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5446  * possible after the VSI memory is initialized.
5447  *
5448  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5449  * their values can be read from loader.conf prior to their first use in the
5450  * driver.
5451  */
5452 void
5453 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5454 {
5455 	struct sysctl_oid_list *vsi_list;
5456 	char vsi_name[32], vsi_desc[32];
5457 
5458 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5459 
5460 	/* Initialize the sysctl context for this VSI */
5461 	sysctl_ctx_init(&vsi->ctx);
5462 
5463 	/* Add a node to collect this VSI's statistics together */
5464 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5465 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5466 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5467 					CTLFLAG_RD, NULL, vsi_desc);
5468 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5469 
5470 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5471 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5472 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5473 			vsi, 0, ice_sysctl_rx_itr, "S",
5474 			ICE_SYSCTL_HELP_RX_ITR);
5475 
5476 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5477 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5478 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5479 			vsi, 0, ice_sysctl_tx_itr, "S",
5480 			ICE_SYSCTL_HELP_TX_ITR);
5481 }
5482 
5483 /**
5484  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5485  * @vsi: the VSI to remove contexts for
5486  *
5487  * Free the context for the VSI sysctls. This includes the main context, as
5488  * well as the per-queue sysctls.
5489  */
5490 void
5491 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5492 {
5493 	device_t dev = vsi->sc->dev;
5494 	int err;
5495 
5496 	if (vsi->vsi_node) {
5497 		err = sysctl_ctx_free(&vsi->ctx);
5498 		if (err)
5499 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5500 				      vsi->idx, ice_err_str(err));
5501 		vsi->vsi_node = NULL;
5502 	}
5503 }
5504 
5505 /**
5506  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5507  * @sc: pointer to device private softc
5508  * @ctx: the sysctl ctx to use
5509  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5510  *
5511  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5512  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5513  * of 64 DSCP to TC map values that the user can configure.
5514  */
5515 void
5516 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5517 			    struct sysctl_ctx_list *ctx,
5518 			    struct sysctl_oid_list *ctx_list)
5519 {
5520 	struct sysctl_oid_list *node_list;
5521 	struct sysctl_oid *node;
5522 	struct sbuf *namebuf, *descbuf;
5523 	int first_dscp_val, last_dscp_val;
5524 
5525 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5526 			       NULL, "Map of DSCP values to DCB TCs");
5527 	node_list = SYSCTL_CHILDREN(node);
5528 
5529 	namebuf = sbuf_new_auto();
5530 	descbuf = sbuf_new_auto();
5531 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5532 		sbuf_clear(namebuf);
5533 		sbuf_clear(descbuf);
5534 
5535 		first_dscp_val = i * 8;
5536 		last_dscp_val = first_dscp_val + 7;
5537 
5538 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5539 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5540 			    first_dscp_val, last_dscp_val);
5541 
5542 		sbuf_finish(namebuf);
5543 		sbuf_finish(descbuf);
5544 
5545 		SYSCTL_ADD_PROC(ctx, node_list,
5546 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5547 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5548 	}
5549 
5550 	sbuf_delete(namebuf);
5551 	sbuf_delete(descbuf);
5552 }
5553 
5554 /**
5555  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5556  * @sc: device private structure
5557  *
5558  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5559  * for re-use by ice_add_device_sysctls.
5560  *
5561  * In order for the sysctl fields to be initialized before use, this function
5562  * should be called as early as possible during attach activities.
5563  *
5564  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5565  * here in this function, rather than later in ice_add_device_sysctls.
5566  *
5567  * To make things easier, this function is also expected to setup the various
5568  * sysctl nodes in addition to tunables so that other sysctls which can't be
5569  * initialized early can hook into the same nodes.
5570  */
5571 void
5572 ice_add_device_tunables(struct ice_softc *sc)
5573 {
5574 	device_t dev = sc->dev;
5575 
5576 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5577 	struct sysctl_oid_list *ctx_list =
5578 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5579 
5580 	sc->enable_health_events = ice_enable_health_events;
5581 
5582 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5583 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5584 			"Enable FW health event reporting for this PF");
5585 
5586 	/* Add a node to track VSI sysctls. Keep track of the node in the
5587 	 * softc so that we can hook other sysctls into it later. This
5588 	 * includes both the VSI statistics, as well as potentially dynamic
5589 	 * VSIs in the future.
5590 	 */
5591 
5592 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5593 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5594 
5595 	/* Add debug tunables */
5596 	ice_add_debug_tunables(sc);
5597 }
5598 
5599 /**
5600  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5601  * @oidp: sysctl oid structure
5602  * @arg1: pointer to private data structure
5603  * @arg2: unused
5604  * @req: sysctl request pointer
5605  *
5606  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5607  */
5608 static int
5609 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5610 {
5611 	struct ice_softc *sc = (struct ice_softc *)arg1;
5612 	struct ice_hw *hw = &sc->hw;
5613 	struct ice_switch_info *sw = hw->switch_info;
5614 	struct ice_fltr_mgmt_list_entry *fm_entry;
5615 	struct ice_list_head *rule_head;
5616 	struct ice_lock *rule_lock;
5617 	struct ice_fltr_info *fi;
5618 	struct sbuf *sbuf;
5619 	int ret;
5620 
5621 	UNREFERENCED_PARAMETER(oidp);
5622 	UNREFERENCED_PARAMETER(arg2);
5623 
5624 	if (ice_driver_is_detaching(sc))
5625 		return (ESHUTDOWN);
5626 
5627 	/* Wire the old buffer so we can take a non-sleepable lock */
5628 	ret = sysctl_wire_old_buffer(req, 0);
5629 	if (ret)
5630 		return (ret);
5631 
5632 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5633 
5634 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5635 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5636 
5637 	sbuf_printf(sbuf, "MAC Filter List");
5638 
5639 	ice_acquire_lock(rule_lock);
5640 
5641 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5642 		fi = &fm_entry->fltr_info;
5643 
5644 		sbuf_printf(sbuf,
5645 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5646 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5647 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5648 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5649 
5650 		/* if we have a vsi_list_info, print some information about that */
5651 		if (fm_entry->vsi_list_info) {
5652 			sbuf_printf(sbuf,
5653 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5654 				    fm_entry->vsi_count,
5655 				    fm_entry->vsi_list_info->vsi_list_id,
5656 				    fm_entry->vsi_list_info->ref_cnt);
5657 		}
5658 	}
5659 
5660 	ice_release_lock(rule_lock);
5661 
5662 	sbuf_finish(sbuf);
5663 	sbuf_delete(sbuf);
5664 
5665 	return (0);
5666 }
5667 
5668 /**
5669  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5670  * @oidp: sysctl oid structure
5671  * @arg1: pointer to private data structure
5672  * @arg2: unused
5673  * @req: sysctl request pointer
5674  *
5675  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5676  */
5677 static int
5678 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5679 {
5680 	struct ice_softc *sc = (struct ice_softc *)arg1;
5681 	struct ice_hw *hw = &sc->hw;
5682 	struct ice_switch_info *sw = hw->switch_info;
5683 	struct ice_fltr_mgmt_list_entry *fm_entry;
5684 	struct ice_list_head *rule_head;
5685 	struct ice_lock *rule_lock;
5686 	struct ice_fltr_info *fi;
5687 	struct sbuf *sbuf;
5688 	int ret;
5689 
5690 	UNREFERENCED_PARAMETER(oidp);
5691 	UNREFERENCED_PARAMETER(arg2);
5692 
5693 	if (ice_driver_is_detaching(sc))
5694 		return (ESHUTDOWN);
5695 
5696 	/* Wire the old buffer so we can take a non-sleepable lock */
5697 	ret = sysctl_wire_old_buffer(req, 0);
5698 	if (ret)
5699 		return (ret);
5700 
5701 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5702 
5703 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5704 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5705 
5706 	sbuf_printf(sbuf, "VLAN Filter List");
5707 
5708 	ice_acquire_lock(rule_lock);
5709 
5710 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5711 		fi = &fm_entry->fltr_info;
5712 
5713 		sbuf_printf(sbuf,
5714 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5715 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5716 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5717 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5718 
5719 		/* if we have a vsi_list_info, print some information about that */
5720 		if (fm_entry->vsi_list_info) {
5721 			sbuf_printf(sbuf,
5722 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5723 				    fm_entry->vsi_count,
5724 				    fm_entry->vsi_list_info->vsi_list_id,
5725 				    fm_entry->vsi_list_info->ref_cnt);
5726 		}
5727 	}
5728 
5729 	ice_release_lock(rule_lock);
5730 
5731 	sbuf_finish(sbuf);
5732 	sbuf_delete(sbuf);
5733 
5734 	return (0);
5735 }
5736 
5737 /**
5738  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5739  * @oidp: sysctl oid structure
5740  * @arg1: pointer to private data structure
5741  * @arg2: unused
5742  * @req: sysctl request pointer
5743  *
5744  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5745  * filters.
5746  */
5747 static int
5748 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5749 {
5750 	struct ice_softc *sc = (struct ice_softc *)arg1;
5751 	struct ice_hw *hw = &sc->hw;
5752 	struct ice_switch_info *sw = hw->switch_info;
5753 	struct ice_fltr_mgmt_list_entry *fm_entry;
5754 	struct ice_list_head *rule_head;
5755 	struct ice_lock *rule_lock;
5756 	struct ice_fltr_info *fi;
5757 	struct sbuf *sbuf;
5758 	int ret;
5759 
5760 	UNREFERENCED_PARAMETER(oidp);
5761 	UNREFERENCED_PARAMETER(arg2);
5762 
5763 	if (ice_driver_is_detaching(sc))
5764 		return (ESHUTDOWN);
5765 
5766 	/* Wire the old buffer so we can take a non-sleepable lock */
5767 	ret = sysctl_wire_old_buffer(req, 0);
5768 	if (ret)
5769 		return (ret);
5770 
5771 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5772 
5773 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5774 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5775 
5776 	sbuf_printf(sbuf, "Ethertype Filter List");
5777 
5778 	ice_acquire_lock(rule_lock);
5779 
5780 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5781 		fi = &fm_entry->fltr_info;
5782 
5783 		sbuf_printf(sbuf,
5784 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5785 			fi->l_data.ethertype_mac.ethertype,
5786 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5787 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5788 			fi->fltr_rule_id);
5789 
5790 		/* if we have a vsi_list_info, print some information about that */
5791 		if (fm_entry->vsi_list_info) {
5792 			sbuf_printf(sbuf,
5793 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5794 				    fm_entry->vsi_count,
5795 				    fm_entry->vsi_list_info->vsi_list_id,
5796 				    fm_entry->vsi_list_info->ref_cnt);
5797 		}
5798 	}
5799 
5800 	ice_release_lock(rule_lock);
5801 
5802 	sbuf_finish(sbuf);
5803 	sbuf_delete(sbuf);
5804 
5805 	return (0);
5806 }
5807 
5808 /**
5809  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
5810  * @oidp: sysctl oid structure
5811  * @arg1: pointer to private data structure
5812  * @arg2: unused
5813  * @req: sysctl request pointer
5814  *
5815  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
5816  * Ethertype/MAC filters.
5817  */
5818 static int
5819 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
5820 {
5821 	struct ice_softc *sc = (struct ice_softc *)arg1;
5822 	struct ice_hw *hw = &sc->hw;
5823 	struct ice_switch_info *sw = hw->switch_info;
5824 	struct ice_fltr_mgmt_list_entry *fm_entry;
5825 	struct ice_list_head *rule_head;
5826 	struct ice_lock *rule_lock;
5827 	struct ice_fltr_info *fi;
5828 	struct sbuf *sbuf;
5829 	int ret;
5830 
5831 	UNREFERENCED_PARAMETER(oidp);
5832 	UNREFERENCED_PARAMETER(arg2);
5833 
5834 	if (ice_driver_is_detaching(sc))
5835 		return (ESHUTDOWN);
5836 
5837 	/* Wire the old buffer so we can take a non-sleepable lock */
5838 	ret = sysctl_wire_old_buffer(req, 0);
5839 	if (ret)
5840 		return (ret);
5841 
5842 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5843 
5844 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
5845 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
5846 
5847 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
5848 
5849 	ice_acquire_lock(rule_lock);
5850 
5851 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5852 		fi = &fm_entry->fltr_info;
5853 
5854 		sbuf_printf(sbuf,
5855 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5856 			    fi->l_data.ethertype_mac.ethertype,
5857 			    fi->l_data.ethertype_mac.mac_addr, ":",
5858 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5859 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5860 			    fi->fltr_rule_id);
5861 
5862 		/* if we have a vsi_list_info, print some information about that */
5863 		if (fm_entry->vsi_list_info) {
5864 			sbuf_printf(sbuf,
5865 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5866 				    fm_entry->vsi_count,
5867 				    fm_entry->vsi_list_info->vsi_list_id,
5868 				    fm_entry->vsi_list_info->ref_cnt);
5869 		}
5870 	}
5871 
5872 	ice_release_lock(rule_lock);
5873 
5874 	sbuf_finish(sbuf);
5875 	sbuf_delete(sbuf);
5876 
5877 	return (0);
5878 }
5879 
5880 /**
5881  * ice_sysctl_dump_state_flags - Dump device driver state flags
5882  * @oidp: sysctl oid structure
5883  * @arg1: pointer to private data structure
5884  * @arg2: unused
5885  * @req: sysctl request pointer
5886  *
5887  * Callback for "state" sysctl to display currently set driver state flags.
5888  */
5889 static int
5890 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
5891 {
5892 	struct ice_softc *sc = (struct ice_softc *)arg1;
5893 	struct sbuf *sbuf;
5894 	u32 copied_state;
5895 	unsigned int i;
5896 	bool at_least_one = false;
5897 
5898 	UNREFERENCED_PARAMETER(oidp);
5899 	UNREFERENCED_PARAMETER(arg2);
5900 
5901 	if (ice_driver_is_detaching(sc))
5902 		return (ESHUTDOWN);
5903 
5904 	/* Make a copy of the state to ensure we display coherent values */
5905 	copied_state = atomic_load_acq_32(&sc->state);
5906 
5907 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5908 
5909 	/* Add the string for each set state to the sbuf */
5910 	for (i = 0; i < 32; i++) {
5911 		if (copied_state & BIT(i)) {
5912 			const char *str = ice_state_to_str((enum ice_state)i);
5913 
5914 			at_least_one = true;
5915 
5916 			if (str)
5917 				sbuf_printf(sbuf, "\n%s", str);
5918 			else
5919 				sbuf_printf(sbuf, "\nBIT(%u)", i);
5920 		}
5921 	}
5922 
5923 	if (!at_least_one)
5924 		sbuf_printf(sbuf, "Nothing set");
5925 
5926 	sbuf_finish(sbuf);
5927 	sbuf_delete(sbuf);
5928 
5929 	return (0);
5930 }
5931 
5932 #define ICE_SYSCTL_DEBUG_MASK_HELP \
5933 "\nSelect debug statements to print to kernel messages"		\
5934 "\nFlags:"							\
5935 "\n\t        0x1 - Function Tracing"				\
5936 "\n\t        0x2 - Driver Initialization"			\
5937 "\n\t        0x4 - Release"					\
5938 "\n\t        0x8 - FW Logging"					\
5939 "\n\t       0x10 - Link"					\
5940 "\n\t       0x20 - PHY"						\
5941 "\n\t       0x40 - Queue Context"				\
5942 "\n\t       0x80 - NVM"						\
5943 "\n\t      0x100 - LAN"						\
5944 "\n\t      0x200 - Flow"					\
5945 "\n\t      0x400 - DCB"						\
5946 "\n\t      0x800 - Diagnostics"					\
5947 "\n\t     0x1000 - Flow Director"				\
5948 "\n\t     0x2000 - Switch"					\
5949 "\n\t     0x4000 - Scheduler"					\
5950 "\n\t     0x8000 - RDMA"					\
5951 "\n\t    0x10000 - DDP Package"					\
5952 "\n\t    0x20000 - Resources"					\
5953 "\n\t    0x40000 - ACL"						\
5954 "\n\t    0x80000 - PTP"						\
5955 "\n\t   0x100000 - Admin Queue messages"			\
5956 "\n\t   0x200000 - Admin Queue descriptors"			\
5957 "\n\t   0x400000 - Admin Queue descriptor buffers"		\
5958 "\n\t   0x800000 - Admin Queue commands"			\
5959 "\n\t  0x1000000 - Parser"					\
5960 "\n\t  ..."							\
5961 "\n\t  0x8000000 - (Reserved for user)"				\
5962 "\n\t"								\
5963 "\nUse \"sysctl -x\" to view flags properly."
5964 
5965 /**
5966  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
5967  * @sc: device private structure
5968  *
5969  * Add sysctl tunable values related to debugging the device driver. For now,
5970  * this means a tunable to set the debug mask early during driver load.
5971  *
5972  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
5973  * that in normal kernel builds, these will all be hidden, but on a debug
5974  * kernel they will be more easily visible.
5975  */
5976 static void
5977 ice_add_debug_tunables(struct ice_softc *sc)
5978 {
5979 	struct sysctl_oid_list *debug_list;
5980 	device_t dev = sc->dev;
5981 
5982 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5983 	struct sysctl_oid_list *ctx_list =
5984 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5985 
5986 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
5987 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
5988 					    NULL, "Debug Sysctls");
5989 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
5990 
5991 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
5992 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
5993 		       &sc->hw.debug_mask, 0,
5994 		       ICE_SYSCTL_DEBUG_MASK_HELP);
5995 
5996 	/* Load the default value from the global sysctl first */
5997 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
5998 
5999 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6000 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6001 			&sc->enable_tx_fc_filter, 0,
6002 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6003 
6004 	sc->tx_balance_en = ice_tx_balance_en;
6005 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6006 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6007 			&sc->tx_balance_en, 0,
6008 			"Enable 5-layer scheduler topology");
6009 
6010 	/* Load the default value from the global sysctl first */
6011 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6012 
6013 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6014 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6015 			&sc->enable_tx_lldp_filter, 0,
6016 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6017 
6018 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6019 }
6020 
6021 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
6022 "\nRequest the driver to initiate a reset."	\
6023 "\n\tpfr - Initiate a PF reset"			\
6024 "\n\tcorer - Initiate a CORE reset"		\
6025 "\n\tglobr - Initiate a GLOBAL reset"
6026 
6027 /**
6028  * @var rl_sysctl_ticks
6029  * @brief timestamp for latest reset request sysctl call
6030  *
6031  * Helps rate-limit the call to the sysctl which resets the device
6032  */
6033 int rl_sysctl_ticks = 0;
6034 
6035 /**
6036  * ice_sysctl_request_reset - Request that the driver initiate a reset
6037  * @oidp: sysctl oid structure
6038  * @arg1: pointer to private data structure
6039  * @arg2: unused
6040  * @req: sysctl request pointer
6041  *
6042  * Callback for "request_reset" sysctl to request that the driver initiate
6043  * a reset. Expects to be passed one of the following strings
6044  *
6045  * "pfr" - Initiate a PF reset
6046  * "corer" - Initiate a CORE reset
6047  * "globr" - Initiate a Global reset
6048  */
6049 static int
6050 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6051 {
6052 	struct ice_softc *sc = (struct ice_softc *)arg1;
6053 	struct ice_hw *hw = &sc->hw;
6054 	enum ice_status status;
6055 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6056 	const char *reset_message;
6057 	int ret;
6058 
6059 	/* Buffer to store the requested reset string. Must contain enough
6060 	 * space to store the largest expected reset string, which currently
6061 	 * means 6 bytes of space.
6062 	 */
6063 	char reset[6] = "";
6064 
6065 	UNREFERENCED_PARAMETER(arg2);
6066 
6067 	ret = priv_check(curthread, PRIV_DRIVER);
6068 	if (ret)
6069 		return (ret);
6070 
6071 	if (ice_driver_is_detaching(sc))
6072 		return (ESHUTDOWN);
6073 
6074 	/* Read in the requested reset type. */
6075 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6076 	if ((ret) || (req->newptr == NULL))
6077 		return (ret);
6078 
6079 	if (strcmp(reset, "pfr") == 0) {
6080 		reset_message = "Requesting a PF reset";
6081 		reset_type = ICE_RESET_PFR;
6082 	} else if (strcmp(reset, "corer") == 0) {
6083 		reset_message = "Initiating a CORE reset";
6084 		reset_type = ICE_RESET_CORER;
6085 	} else if (strcmp(reset, "globr") == 0) {
6086 		reset_message = "Initiating a GLOBAL reset";
6087 		reset_type = ICE_RESET_GLOBR;
6088 	} else if (strcmp(reset, "empr") == 0) {
6089 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6090 		return (EOPNOTSUPP);
6091 	}
6092 
6093 	if (reset_type == ICE_RESET_INVAL) {
6094 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6095 		return (EINVAL);
6096 	}
6097 
6098 	/*
6099 	 * Rate-limit the frequency at which this function is called.
6100 	 * Assuming this is called successfully once, typically,
6101 	 * everything should be handled within the allotted time frame.
6102 	 * However, in the odd setup situations, we've also put in
6103 	 * guards for when the reset has finished, but we're in the
6104 	 * process of rebuilding. And instead of queueing an intent,
6105 	 * simply error out and let the caller retry, if so desired.
6106 	 */
6107 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6108 		device_printf(sc->dev,
6109 		    "Call frequency too high. Operation aborted.\n");
6110 		return (EBUSY);
6111 	}
6112 	rl_sysctl_ticks = ticks;
6113 
6114 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6115 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6116 		return (EBUSY);
6117 	}
6118 
6119 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6120 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6121 		return (EBUSY);
6122 	}
6123 
6124 	device_printf(sc->dev, "%s\n", reset_message);
6125 
6126 	/* Initiate the PF reset during the admin status task */
6127 	if (reset_type == ICE_RESET_PFR) {
6128 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6129 		return (0);
6130 	}
6131 
6132 	/*
6133 	 * Other types of resets including CORE and GLOBAL resets trigger an
6134 	 * interrupt on all PFs. Initiate the reset now. Preparation and
6135 	 * rebuild logic will be handled by the admin status task.
6136 	 */
6137 	status = ice_reset(hw, reset_type);
6138 
6139 	/*
6140 	 * Resets can take a long time and we still don't want another call
6141 	 * to this function before we settle down.
6142 	 */
6143 	rl_sysctl_ticks = ticks;
6144 
6145 	if (status) {
6146 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6147 			      ice_status_str(status));
6148 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6149 		return (EFAULT);
6150 	}
6151 
6152 	return (0);
6153 }
6154 
6155 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6156 "\nSelect clusters to dump with \"dump\" sysctl"		\
6157 "\nFlags:"							\
6158 "\n\t   0x1 - Switch"						\
6159 "\n\t   0x2 - ACL"						\
6160 "\n\t   0x4 - Tx Scheduler"					\
6161 "\n\t   0x8 - Profile Configuration"				\
6162 "\n\t  0x20 - Link"						\
6163 "\n\t  0x80 - DCB"						\
6164 "\n\t 0x100 - L2P"						\
6165 "\n\t"								\
6166 "\nUse \"sysctl -x\" to view flags properly."
6167 
6168 /**
6169  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6170  *     from FW when FW debug dump occurs
6171  * @oidp: sysctl oid structure
6172  * @arg1: pointer to private data structure
6173  * @arg2: unused
6174  * @req: sysctl request pointer
6175  */
6176 static int
6177 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6178 {
6179 	struct ice_softc *sc = (struct ice_softc *)arg1;
6180 	device_t dev = sc->dev;
6181 	u16 clusters;
6182 	int ret;
6183 
6184 	UNREFERENCED_PARAMETER(arg2);
6185 
6186 	ret = priv_check(curthread, PRIV_DRIVER);
6187 	if (ret)
6188 		return (ret);
6189 
6190 	if (ice_driver_is_detaching(sc))
6191 		return (ESHUTDOWN);
6192 
6193 	clusters = sc->fw_debug_dump_cluster_mask;
6194 
6195 	ret = sysctl_handle_16(oidp, &clusters, 0, req);
6196 	if ((ret) || (req->newptr == NULL))
6197 		return (ret);
6198 
6199 	if (!clusters ||
6200 	    (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK))) {
6201 		device_printf(dev,
6202 		    "%s: ERROR: Incorrect settings requested\n",
6203 		    __func__);
6204 		return (EINVAL);
6205 	}
6206 
6207 	sc->fw_debug_dump_cluster_mask = clusters;
6208 
6209 	return (0);
6210 }
6211 
6212 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6213 
6214 /**
6215  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6216  * @sc: the device softc
6217  * @sbuf: initialized sbuf to print data to
6218  * @cluster_id: FW cluster ID to print data from
6219  *
6220  * Reads debug data from the specified cluster id in the FW and prints it to
6221  * the input sbuf. This function issues multiple AQ commands to the FW in
6222  * order to get all of the data in the cluster.
6223  *
6224  * @remark Only intended to be used by the sysctl handler
6225  * ice_sysctl_fw_debug_dump_do_dump
6226  */
6227 static void
6228 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6229 {
6230 	struct ice_hw *hw = &sc->hw;
6231 	device_t dev = sc->dev;
6232 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6233 	const u8 reserved_buf[8] = {};
6234 	enum ice_status status;
6235 	int counter = 0;
6236 	u8 *data_buf;
6237 
6238 	/* Other setup */
6239 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6240 	if (!data_buf)
6241 		return;
6242 
6243 	/* Input parameters / loop variables */
6244 	u16 table_id = 0;
6245 	u32 offset = 0;
6246 
6247 	/* Output from the Get Internal Data AQ command */
6248 	u16 ret_buf_size = 0;
6249 	u16 ret_next_table = 0;
6250 	u32 ret_next_index = 0;
6251 
6252 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6253 	    cluster_id);
6254 
6255 	for (;;) {
6256 		/* Do not trust the FW behavior to be completely correct */
6257 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6258 			device_printf(dev,
6259 			    "%s: Exceeded counter limit for cluster %d\n",
6260 			    __func__, cluster_id);
6261 			break;
6262 		}
6263 
6264 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6265 		ice_debug(hw, ICE_DBG_DIAG,
6266 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6267 		    table_id, offset, data_buf_size);
6268 
6269 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6270 		    offset, data_buf, data_buf_size, &ret_buf_size,
6271 		    &ret_next_table, &ret_next_index, NULL);
6272 		if (status) {
6273 			device_printf(dev,
6274 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6275 			    __func__, cluster_id, ice_status_str(status),
6276 			    ice_aq_str(hw->adminq.sq_last_status));
6277 			break;
6278 		}
6279 
6280 		ice_debug(hw, ICE_DBG_DIAG,
6281 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6282 		    ret_next_table, ret_next_index, ret_buf_size);
6283 
6284 		/* Print cluster id */
6285 		u32 print_cluster_id = (u32)cluster_id;
6286 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6287 		/* Print table id */
6288 		u32 print_table_id = (u32)table_id;
6289 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6290 		/* Print table length */
6291 		u32 print_table_length = (u32)ret_buf_size;
6292 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6293 		/* Print current offset */
6294 		u32 print_curr_offset = offset;
6295 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6296 		/* Print reserved bytes */
6297 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6298 		/* Print data */
6299 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6300 
6301 		/* Adjust loop variables */
6302 		memset(data_buf, 0, data_buf_size);
6303 		bool same_table_next = (table_id == ret_next_table);
6304 		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6305 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6306 
6307 		if ((!same_table_next && !last_offset_next) ||
6308 		    (same_table_next && last_table_next)) {
6309 			device_printf(dev,
6310 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6311 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6312 			break;
6313 		}
6314 
6315 		if (!same_table_next && !last_table_next && last_offset_next) {
6316 			/* We've hit the end of the table */
6317 			table_id = ret_next_table;
6318 			offset = 0;
6319 		}
6320 		else if (!same_table_next && last_table_next && last_offset_next) {
6321 			/* We've hit the end of the cluster */
6322 			break;
6323 		}
6324 		else if (same_table_next && !last_table_next && last_offset_next) {
6325 			if (cluster_id == 0x1 && table_id < 39)
6326 				table_id += 1;
6327 			else
6328 				break;
6329 		}
6330 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6331 			/* More data left in the table */
6332 			offset = ret_next_index;
6333 		}
6334 	}
6335 
6336 	free(data_buf, M_ICE);
6337 }
6338 
6339 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6340 "\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6341 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6342 "\nthis data is opaque and not a string."
6343 
6344 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6345 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6346 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6347 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6348 
6349 /**
6350  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6351  * @oidp: sysctl oid structure
6352  * @arg1: pointer to private data structure
6353  * @arg2: unused
6354  * @req: sysctl request pointer
6355  *
6356  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6357  * formatted dump of some debug FW data intended to be processed by a special
6358  * Intel tool. Prints out the cluster data specified by the "clusters"
6359  * sysctl.
6360  *
6361  * @remark The actual AQ calls and printing are handled by a helper
6362  * function above.
6363  */
6364 static int
6365 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6366 {
6367 	struct ice_softc *sc = (struct ice_softc *)arg1;
6368 	device_t dev = sc->dev;
6369 	struct sbuf *sbuf;
6370 	int bit, ret;
6371 
6372 	UNREFERENCED_PARAMETER(arg2);
6373 
6374 	ret = priv_check(curthread, PRIV_DRIVER);
6375 	if (ret)
6376 		return (ret);
6377 
6378 	if (ice_driver_is_detaching(sc))
6379 		return (ESHUTDOWN);
6380 
6381 	/* If the user hasn't written "1" to this sysctl yet: */
6382 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6383 		/* Avoid output on the first set of reads to this sysctl in
6384 		 * order to prevent a null byte from being written to the
6385 		 * end result when called via sysctl(8).
6386 		 */
6387 		if (req->oldptr == NULL && req->newptr == NULL) {
6388 			ret = SYSCTL_OUT(req, 0, 0);
6389 			return (ret);
6390 		}
6391 
6392 		char input_buf[2] = "";
6393 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6394 		if ((ret) || (req->newptr == NULL))
6395 			return (ret);
6396 
6397 		/* If we get '1', then indicate we'll do a dump in the next
6398 		 * sysctl read call.
6399 		 */
6400 		if (input_buf[0] == '1') {
6401 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6402 			return (0);
6403 		}
6404 
6405 		return (EINVAL);
6406 	}
6407 
6408 	/* --- FW debug dump state is set --- */
6409 
6410 	if (!sc->fw_debug_dump_cluster_mask) {
6411 		device_printf(dev,
6412 		    "%s: Debug Dump failed because no cluster was specified.\n",
6413 		    __func__);
6414 		ret = EINVAL;
6415 		goto out;
6416 	}
6417 
6418 	/* Caller just wants the upper bound for size */
6419 	if (req->oldptr == NULL && req->newptr == NULL) {
6420 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6421 		if (sc->fw_debug_dump_cluster_mask & 0x1)
6422 			est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6423 		if (sc->fw_debug_dump_cluster_mask & 0x2)
6424 			est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6425 		if (sc->fw_debug_dump_cluster_mask & 0x4)
6426 			est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6427 
6428 		ret = SYSCTL_OUT(req, 0, est_output_len);
6429 		return (ret);
6430 	}
6431 
6432 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6433 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6434 
6435 	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6436 
6437 	for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6438 	    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6439 		ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6440 
6441 	sbuf_finish(sbuf);
6442 	sbuf_delete(sbuf);
6443 
6444 out:
6445 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6446 	return (ret);
6447 }
6448 
6449 /**
6450  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6451  * @sc: device private structure
6452  *
6453  * Add sysctls related to debugging the device driver. Generally these should
6454  * simply be sysctls which dump internal driver state, to aid in understanding
6455  * what the driver is doing.
6456  */
6457 static void
6458 ice_add_debug_sysctls(struct ice_softc *sc)
6459 {
6460 	struct sysctl_oid *sw_node, *dump_node;
6461 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6462 	device_t dev = sc->dev;
6463 
6464 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6465 
6466 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6467 
6468 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6469 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6470 			ice_sysctl_request_reset, "A",
6471 			ICE_SYSCTL_HELP_REQUEST_RESET);
6472 
6473 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6474 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6475 		       &sc->soft_stats.pfr_count, 0,
6476 		       "# of PF resets handled");
6477 
6478 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6479 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6480 		       &sc->soft_stats.corer_count, 0,
6481 		       "# of CORE resets handled");
6482 
6483 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6484 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6485 		       &sc->soft_stats.globr_count, 0,
6486 		       "# of Global resets handled");
6487 
6488 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6489 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6490 		       &sc->soft_stats.empr_count, 0,
6491 		       "# of EMP resets handled");
6492 
6493 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6494 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6495 		       &sc->soft_stats.tx_mdd_count, 0,
6496 		       "# of Tx MDD events detected");
6497 
6498 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6499 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6500 		       &sc->soft_stats.rx_mdd_count, 0,
6501 		       "# of Rx MDD events detected");
6502 
6503 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6504 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6505 			ice_sysctl_dump_state_flags, "A",
6506 			"Driver State Flags");
6507 
6508 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6509 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6510 			ice_sysctl_phy_type_low, "QU",
6511 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6512 
6513 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6514 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6515 			ice_sysctl_phy_type_high, "QU",
6516 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6517 
6518 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6519 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6520 			ice_sysctl_phy_sw_caps, "",
6521 			"Get PHY Capabilities (Software configuration)");
6522 
6523 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6524 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6525 			ice_sysctl_phy_nvm_caps, "",
6526 			"Get PHY Capabilities (NVM configuration)");
6527 
6528 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6529 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6530 			ice_sysctl_phy_topo_caps, "",
6531 			"Get PHY Capabilities (Topology configuration)");
6532 
6533 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6534 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6535 			ice_sysctl_phy_link_status, "",
6536 			"Get PHY Link Status");
6537 
6538 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6539 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6540 			ice_sysctl_read_i2c_diag_data, "A",
6541 			"Dump selected diagnostic data from FW");
6542 
6543 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6544 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6545 		       "FW Build ID");
6546 
6547 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6548 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6549 			ice_sysctl_os_pkg_version, "A",
6550 			"DDP package name and version found in ice_ddp");
6551 
6552 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6553 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6554 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6555 			"Current LLDP persistent status");
6556 
6557 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6558 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6559 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6560 			"Default LLDP persistent status");
6561 
6562 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6563 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6564 			ice_sysctl_negotiated_fc, "A",
6565 			"Current Negotiated Flow Control mode");
6566 
6567 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6568 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6569 			ice_sysctl_dump_dcbx_cfg, "A",
6570 			"Dumps Local MIB information from firmware");
6571 
6572 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6573 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6574 			ice_sysctl_dump_dcbx_cfg, "A",
6575 			"Dumps Remote MIB information from firmware");
6576 
6577 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6578 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6579 			"Dumps Selected PF VSI parameters from firmware");
6580 
6581 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6582 			sc, 0, ice_sysctl_query_port_ets, "A",
6583 			"Prints selected output from Query Port ETS AQ command");
6584 
6585 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6586 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6587 				  "Switch Configuration");
6588 	sw_list = SYSCTL_CHILDREN(sw_node);
6589 
6590 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6591 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6592 			ice_sysctl_dump_mac_filters, "A",
6593 			"MAC Filters");
6594 
6595 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6596 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6597 			ice_sysctl_dump_vlan_filters, "A",
6598 			"VLAN Filters");
6599 
6600 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6601 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6602 			ice_sysctl_dump_ethertype_filters, "A",
6603 			"Ethertype Filters");
6604 
6605 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6606 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6607 			ice_sysctl_dump_ethertype_mac_filters, "A",
6608 			"Ethertype/MAC Filters");
6609 
6610 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6611 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6612 				  "Internal FW Dump");
6613 	dump_list = SYSCTL_CHILDREN(dump_node);
6614 
6615 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6616 			ICE_CTLFLAG_DEBUG | CTLTYPE_U16 | CTLFLAG_RW, sc, 0,
6617 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6618 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6619 
6620 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6621 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6622 			ice_sysctl_fw_debug_dump_do_dump, "",
6623 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6624 }
6625 
6626 /**
6627  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6628  * @vsi: the VSI to disable
6629  *
6630  * Disables the Tx queues associated with this VSI. Essentially the opposite
6631  * of ice_cfg_vsi_for_tx.
6632  */
6633 int
6634 ice_vsi_disable_tx(struct ice_vsi *vsi)
6635 {
6636 	struct ice_softc *sc = vsi->sc;
6637 	struct ice_hw *hw = &sc->hw;
6638 	enum ice_status status;
6639 	u32 *q_teids;
6640 	u16 *q_ids, *q_handles;
6641 	size_t q_teids_size, q_ids_size, q_handles_size;
6642 	int tc, j, buf_idx, err = 0;
6643 
6644 	if (vsi->num_tx_queues > 255)
6645 		return (ENOSYS);
6646 
6647 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6648 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6649 	if (!q_teids)
6650 		return (ENOMEM);
6651 
6652 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6653 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6654 	if (!q_ids) {
6655 		err = (ENOMEM);
6656 		goto free_q_teids;
6657 	}
6658 
6659 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6660 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6661 	if (!q_handles) {
6662 		err = (ENOMEM);
6663 		goto free_q_ids;
6664 	}
6665 
6666 	ice_for_each_traffic_class(tc) {
6667 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6668 		u16 start_idx, end_idx;
6669 
6670 		/* Skip rest of disabled TCs once the first
6671 		 * disabled TC is found */
6672 		if (!(vsi->tc_map & BIT(tc)))
6673 			break;
6674 
6675 		/* Fill out TX queue information for this TC */
6676 		start_idx = tc_info->qoffset;
6677 		end_idx = start_idx + tc_info->qcount_tx;
6678 		buf_idx = 0;
6679 		for (j = start_idx; j < end_idx; j++) {
6680 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6681 
6682 			q_ids[buf_idx] = vsi->tx_qmap[j];
6683 			q_handles[buf_idx] = txq->q_handle;
6684 			q_teids[buf_idx] = txq->q_teid;
6685 			buf_idx++;
6686 		}
6687 
6688 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6689 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6690 		if (status == ICE_ERR_DOES_NOT_EXIST) {
6691 			; /* Queues have already been disabled, no need to report this as an error */
6692 		} else if (status == ICE_ERR_RESET_ONGOING) {
6693 			device_printf(sc->dev,
6694 				      "Reset in progress. LAN Tx queues already disabled\n");
6695 			break;
6696 		} else if (status) {
6697 			device_printf(sc->dev,
6698 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6699 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6700 			err = (ENODEV);
6701 			break;
6702 		}
6703 
6704 		/* Clear buffers */
6705 		memset(q_teids, 0, q_teids_size);
6706 		memset(q_ids, 0, q_ids_size);
6707 		memset(q_handles, 0, q_handles_size);
6708 	}
6709 
6710 /* free_q_handles: */
6711 	free(q_handles, M_ICE);
6712 free_q_ids:
6713 	free(q_ids, M_ICE);
6714 free_q_teids:
6715 	free(q_teids, M_ICE);
6716 
6717 	return err;
6718 }
6719 
6720 /**
6721  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6722  * @vsi: the VSI to configure
6723  *
6724  * Sets the RSS table size and lookup table type for the VSI based on its
6725  * VSI type.
6726  */
6727 static void
6728 ice_vsi_set_rss_params(struct ice_vsi *vsi)
6729 {
6730 	struct ice_softc *sc = vsi->sc;
6731 	struct ice_hw_common_caps *cap;
6732 
6733 	cap = &sc->hw.func_caps.common_cap;
6734 
6735 	switch (vsi->type) {
6736 	case ICE_VSI_PF:
6737 		/* The PF VSI inherits RSS instance of the PF */
6738 		vsi->rss_table_size = cap->rss_table_size;
6739 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
6740 		break;
6741 	case ICE_VSI_VF:
6742 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6743 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
6744 		break;
6745 	default:
6746 		device_printf(sc->dev,
6747 			      "VSI %d: RSS not supported for VSI type %d\n",
6748 			      vsi->idx, vsi->type);
6749 		break;
6750 	}
6751 }
6752 
6753 /**
6754  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6755  * @vsi: The VSI to add the context for
6756  *
6757  * Creates a sysctl context for storing txq sysctls. Additionally creates
6758  * a node rooted at the given VSI's main sysctl node. This context will be
6759  * used to store per-txq sysctls which may need to be released during the
6760  * driver's lifetime.
6761  */
6762 void
6763 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6764 {
6765 	struct sysctl_oid_list *vsi_list;
6766 
6767 	sysctl_ctx_init(&vsi->txqs_ctx);
6768 
6769 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6770 
6771 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
6772 					 CTLFLAG_RD, NULL, "Tx Queues");
6773 }
6774 
6775 /**
6776  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
6777  * @vsi: The VSI to add the context for
6778  *
6779  * Creates a sysctl context for storing rxq sysctls. Additionally creates
6780  * a node rooted at the given VSI's main sysctl node. This context will be
6781  * used to store per-rxq sysctls which may need to be released during the
6782  * driver's lifetime.
6783  */
6784 void
6785 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
6786 {
6787 	struct sysctl_oid_list *vsi_list;
6788 
6789 	sysctl_ctx_init(&vsi->rxqs_ctx);
6790 
6791 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6792 
6793 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
6794 					 CTLFLAG_RD, NULL, "Rx Queues");
6795 }
6796 
6797 /**
6798  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
6799  * @vsi: The VSI to delete from
6800  *
6801  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
6802  * Must be called prior to freeing the Tx queue memory, in order to avoid
6803  * having sysctls point at stale memory.
6804  */
6805 void
6806 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
6807 {
6808 	device_t dev = vsi->sc->dev;
6809 	int err;
6810 
6811 	if (vsi->txqs_node) {
6812 		err = sysctl_ctx_free(&vsi->txqs_ctx);
6813 		if (err)
6814 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
6815 				      vsi->idx, ice_err_str(err));
6816 		vsi->txqs_node = NULL;
6817 	}
6818 }
6819 
6820 /**
6821  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
6822  * @vsi: The VSI to delete from
6823  *
6824  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
6825  * Must be called prior to freeing the Rx queue memory, in order to avoid
6826  * having sysctls point at stale memory.
6827  */
6828 void
6829 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
6830 {
6831 	device_t dev = vsi->sc->dev;
6832 	int err;
6833 
6834 	if (vsi->rxqs_node) {
6835 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
6836 		if (err)
6837 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
6838 				      vsi->idx, ice_err_str(err));
6839 		vsi->rxqs_node = NULL;
6840 	}
6841 }
6842 
6843 /**
6844  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
6845  * @txq: pointer to the Tx queue
6846  *
6847 * Add per-queue sysctls for a given Tx queue. Can't be called during
6848 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6849  */
6850 void
6851 ice_add_txq_sysctls(struct ice_tx_queue *txq)
6852 {
6853 	struct ice_vsi *vsi = txq->vsi;
6854 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
6855 	struct sysctl_oid_list *txqs_list, *this_txq_list;
6856 	struct sysctl_oid *txq_node;
6857 	char txq_name[32], txq_desc[32];
6858 
6859 	const struct ice_sysctl_info ctls[] = {
6860 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
6861 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
6862 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
6863 		{ 0, 0, 0 }
6864 	};
6865 
6866 	const struct ice_sysctl_info *entry = ctls;
6867 
6868 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
6869 
6870 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
6871 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
6872 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
6873 				   CTLFLAG_RD, NULL, txq_desc);
6874 	this_txq_list = SYSCTL_CHILDREN(txq_node);
6875 
6876 	/* Add the Tx queue statistics */
6877 	while (entry->stat != 0) {
6878 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
6879 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6880 			       entry->description);
6881 		entry++;
6882 	}
6883 
6884 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
6885 		       CTLFLAG_RD, &txq->tc, 0,
6886 		       "Traffic Class that Queue belongs to");
6887 }
6888 
6889 /**
6890  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
6891  * @rxq: pointer to the Rx queue
6892  *
6893  * Add per-queue sysctls for a given Rx queue. Can't be called during
6894  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6895  */
6896 void
6897 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
6898 {
6899 	struct ice_vsi *vsi = rxq->vsi;
6900 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
6901 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
6902 	struct sysctl_oid *rxq_node;
6903 	char rxq_name[32], rxq_desc[32];
6904 
6905 	const struct ice_sysctl_info ctls[] = {
6906 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
6907 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
6908 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
6909 		{ 0, 0, 0 }
6910 	};
6911 
6912 	const struct ice_sysctl_info *entry = ctls;
6913 
6914 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
6915 
6916 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
6917 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
6918 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
6919 				   CTLFLAG_RD, NULL, rxq_desc);
6920 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
6921 
6922 	/* Add the Rx queue statistics */
6923 	while (entry->stat != 0) {
6924 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
6925 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6926 			       entry->description);
6927 		entry++;
6928 	}
6929 
6930 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
6931 		       CTLFLAG_RD, &rxq->tc, 0,
6932 		       "Traffic Class that Queue belongs to");
6933 }
6934 
6935 /**
6936  * ice_get_default_rss_key - Obtain a default RSS key
6937  * @seed: storage for the RSS key data
6938  *
6939  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
6940  * point to a block of memory that is at least 40 bytes in size.
6941  *
6942  * The key isn't randomly generated each time this function is called because
6943  * that makes the RSS key change every time we reconfigure RSS. This does mean
6944  * that we're hard coding a possibly 'well known' key. We might want to
6945  * investigate randomly generating this key once during the first call.
6946  */
6947 static void
6948 ice_get_default_rss_key(u8 *seed)
6949 {
6950 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
6951 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
6952 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
6953 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
6954 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
6955 	};
6956 
6957 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
6958 }
6959 
6960 /**
6961  * ice_set_rss_key - Configure a given VSI with the default RSS key
6962  * @vsi: the VSI to configure
6963  *
6964  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
6965  * If the kernel RSS interface is not available, this will fall back to our
6966  * pre-generated hash seed from ice_get_default_rss_key().
6967  */
6968 static int
6969 ice_set_rss_key(struct ice_vsi *vsi)
6970 {
6971 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
6972 	struct ice_softc *sc = vsi->sc;
6973 	struct ice_hw *hw = &sc->hw;
6974 	enum ice_status status;
6975 
6976 	/*
6977 	 * If the RSS kernel interface is disabled, this will return the
6978 	 * default RSS key above.
6979 	 */
6980 	rss_getkey(keydata.standard_rss_key);
6981 
6982 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
6983 	if (status) {
6984 		device_printf(sc->dev,
6985 			      "ice_aq_set_rss_key status %s, error %s\n",
6986 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6987 		return (EIO);
6988 	}
6989 
6990 	return (0);
6991 }
6992 
6993 /**
6994  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
6995  * @vsi: the VSI to configure
6996  *
6997  * If the package file is initialized, the default RSS flows are reset. We
6998  * need to reprogram the expected hash configuration. We'll use
6999  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7000  * support is not enabled, this macro will fall back to suitable defaults.
7001  */
7002 static void
7003 ice_set_rss_flow_flds(struct ice_vsi *vsi)
7004 {
7005 	struct ice_softc *sc = vsi->sc;
7006 	struct ice_hw *hw = &sc->hw;
7007 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7008 	device_t dev = sc->dev;
7009 	enum ice_status status;
7010 	u_int rss_hash_config;
7011 
7012 	rss_hash_config = rss_gethashconfig();
7013 
7014 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7015 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7016 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7017 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7018 		if (status)
7019 			device_printf(dev,
7020 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7021 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7022 	}
7023 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7024 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7025 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7026 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7027 		if (status)
7028 			device_printf(dev,
7029 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7030 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7031 	}
7032 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7033 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7034 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7035 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7036 		if (status)
7037 			device_printf(dev,
7038 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7039 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7040 	}
7041 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7042 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7043 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7044 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7045 		if (status)
7046 			device_printf(dev,
7047 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7048 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7049 	}
7050 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7051 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7052 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7053 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7054 		if (status)
7055 			device_printf(dev,
7056 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7057 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7058 	}
7059 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7060 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7061 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7062 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7063 		if (status)
7064 			device_printf(dev,
7065 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7066 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7067 	}
7068 
7069 	/* Warn about RSS hash types which are not supported */
7070 	/* coverity[dead_error_condition] */
7071 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7072 		device_printf(dev,
7073 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7074 			      vsi->idx);
7075 	}
7076 }
7077 
7078 /**
7079  * ice_set_rss_lut - Program the RSS lookup table for a VSI
7080  * @vsi: the VSI to configure
7081  *
7082  * Programs the RSS lookup table for a given VSI. We use
7083  * rss_get_indirection_to_bucket which will use the indirection table provided
7084  * by the kernel RSS interface when available. If the kernel RSS interface is
7085  * not available, we will fall back to a simple round-robin fashion queue
7086  * assignment.
7087  */
7088 static int
7089 ice_set_rss_lut(struct ice_vsi *vsi)
7090 {
7091 	struct ice_softc *sc = vsi->sc;
7092 	struct ice_hw *hw = &sc->hw;
7093 	device_t dev = sc->dev;
7094 	struct ice_aq_get_set_rss_lut_params lut_params;
7095 	enum ice_status status;
7096 	int i, err = 0;
7097 	u8 *lut;
7098 
7099 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7100 	if (!lut) {
7101 		device_printf(dev, "Failed to allocate RSS lut memory\n");
7102 		return (ENOMEM);
7103 	}
7104 
7105 	/* Populate the LUT with max no. of queues. If the RSS kernel
7106 	 * interface is disabled, this will assign the lookup table in
7107 	 * a simple round robin fashion
7108 	 */
7109 	for (i = 0; i < vsi->rss_table_size; i++) {
7110 		/* XXX: this needs to be changed if num_rx_queues ever counts
7111 		 * more than just the RSS queues */
7112 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7113 	}
7114 
7115 	lut_params.vsi_handle = vsi->idx;
7116 	lut_params.lut_size = vsi->rss_table_size;
7117 	lut_params.lut_type = vsi->rss_lut_type;
7118 	lut_params.lut = lut;
7119 	lut_params.global_lut_id = 0;
7120 	status = ice_aq_set_rss_lut(hw, &lut_params);
7121 	if (status) {
7122 		device_printf(dev,
7123 			      "Cannot set RSS lut, err %s aq_err %s\n",
7124 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7125 		err = (EIO);
7126 	}
7127 
7128 	free(lut, M_ICE);
7129 	return err;
7130 }
7131 
7132 /**
7133  * ice_config_rss - Configure RSS for a VSI
7134  * @vsi: the VSI to configure
7135  *
7136  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7137  * a given VSI.
7138  */
7139 int
7140 ice_config_rss(struct ice_vsi *vsi)
7141 {
7142 	int err;
7143 
7144 	/* Nothing to do, if RSS is not enabled */
7145 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7146 		return 0;
7147 
7148 	err = ice_set_rss_key(vsi);
7149 	if (err)
7150 		return err;
7151 
7152 	ice_set_rss_flow_flds(vsi);
7153 
7154 	return ice_set_rss_lut(vsi);
7155 }
7156 
7157 /**
7158  * ice_log_pkg_init - Log a message about status of DDP initialization
7159  * @sc: the device softc pointer
7160  * @pkg_status: the status result of ice_copy_and_init_pkg
7161  *
7162  * Called by ice_load_pkg after an attempt to download the DDP package
7163  * contents to the device to log an appropriate message for the system
7164  * administrator about download status.
7165  *
7166  * @post ice_is_init_pkg_successful function is used to determine
7167  * whether the download was successful and DDP package is compatible
7168  * with this driver. Otherwise driver will transition to Safe Mode.
7169  */
7170 void
7171 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7172 {
7173 	struct ice_hw *hw = &sc->hw;
7174 	device_t dev = sc->dev;
7175 	struct sbuf *active_pkg, *os_pkg;
7176 
7177 	active_pkg = sbuf_new_auto();
7178 	ice_active_pkg_version_str(hw, active_pkg);
7179 	sbuf_finish(active_pkg);
7180 
7181 	os_pkg = sbuf_new_auto();
7182 	ice_os_pkg_version_str(hw, os_pkg);
7183 	sbuf_finish(os_pkg);
7184 
7185 	switch (pkg_status) {
7186 	case ICE_DDP_PKG_SUCCESS:
7187 		device_printf(dev,
7188 			      "The DDP package was successfully loaded: %s.\n",
7189 			      sbuf_data(active_pkg));
7190 		break;
7191 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7192 	case ICE_DDP_PKG_ALREADY_LOADED:
7193 		device_printf(dev,
7194 			      "DDP package already present on device: %s.\n",
7195 			      sbuf_data(active_pkg));
7196 		break;
7197 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7198 		device_printf(dev,
7199 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7200 			      sbuf_data(active_pkg),
7201 			      sbuf_data(os_pkg));
7202 		break;
7203 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7204 		device_printf(dev,
7205 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7206 			      sbuf_data(active_pkg),
7207 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7208 		break;
7209 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7210 		device_printf(dev,
7211 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7212 			      sbuf_data(active_pkg),
7213 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7214 		break;
7215 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7216 		/*
7217 		 * This assumes that the active_pkg_ver will not be
7218 		 * initialized if the ice_ddp package version is not
7219 		 * supported.
7220 		 */
7221 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7222 			/* The ice_ddp version is not supported */
7223 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7224 				device_printf(dev,
7225 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7226 					      sbuf_data(os_pkg),
7227 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7228 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7229 				device_printf(dev,
7230 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7231 					      sbuf_data(os_pkg),
7232 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7233 			} else {
7234 				device_printf(dev,
7235 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7236 					      sbuf_data(os_pkg),
7237 					      sbuf_data(active_pkg),
7238 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7239 			}
7240 		} else {
7241 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7242 				device_printf(dev,
7243 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7244 					      sbuf_data(active_pkg),
7245 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7246 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7247 				device_printf(dev,
7248 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7249 					      sbuf_data(active_pkg),
7250 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7251 			} else {
7252 				device_printf(dev,
7253 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7254 					      sbuf_data(os_pkg),
7255 					      sbuf_data(active_pkg),
7256 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7257 			}
7258 		}
7259 		break;
7260 	case ICE_DDP_PKG_INVALID_FILE:
7261 		device_printf(dev,
7262 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7263 		break;
7264 	case ICE_DDP_PKG_FW_MISMATCH:
7265 		device_printf(dev,
7266 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7267 		break;
7268 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7269 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7270 		device_printf(dev,
7271 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7272 		break;
7273 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7274 		device_printf(dev,
7275 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7276 		break;
7277 	case ICE_DDP_PKG_MANIFEST_INVALID:
7278 	case ICE_DDP_PKG_BUFFER_INVALID:
7279 		device_printf(dev,
7280 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7281 		break;
7282 	default:
7283 		device_printf(dev,
7284 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7285 		break;
7286 	}
7287 
7288 	sbuf_delete(active_pkg);
7289 	sbuf_delete(os_pkg);
7290 }
7291 
7292 /**
7293  * ice_load_pkg_file - Load the DDP package file using firmware_get
7294  * @sc: device private softc
7295  *
7296  * Use firmware_get to load the DDP package memory and then request that
7297  * firmware download the package contents and program the relevant hardware
7298  * bits.
7299  *
7300  * This function makes a copy of the DDP package memory which is tracked in
7301  * the ice_hw structure. The copy will be managed and released by
7302  * ice_deinit_hw(). This allows the firmware reference to be immediately
7303  * released using firmware_put.
7304  */
7305 enum ice_status
7306 ice_load_pkg_file(struct ice_softc *sc)
7307 {
7308 	struct ice_hw *hw = &sc->hw;
7309 	device_t dev = sc->dev;
7310 	enum ice_ddp_state state;
7311 	const struct firmware *pkg;
7312 	enum ice_status status = ICE_SUCCESS;
7313 	u8 cached_layer_count;
7314 	u8 *buf_copy;
7315 
7316 	pkg = firmware_get("ice_ddp");
7317 	if (!pkg) {
7318 		device_printf(dev,
7319 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7320 		if (cold)
7321 			device_printf(dev,
7322 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7323 		status = ICE_ERR_CFG;
7324 		goto err_load_pkg;
7325 	}
7326 
7327 	/* Check for topology change */
7328 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7329 		cached_layer_count = hw->num_tx_sched_layers;
7330 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7331 		if (buf_copy == NULL)
7332 			return ICE_ERR_NO_MEMORY;
7333 		memcpy(buf_copy, pkg->data, pkg->datasize);
7334 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7335 		free(buf_copy, M_ICE);
7336 		/* Success indicates a change was made */
7337 		if (status == ICE_SUCCESS) {
7338 			/* 9 -> 5 */
7339 			if (cached_layer_count == 9)
7340 				device_printf(dev,
7341 				    "Transmit balancing feature enabled\n");
7342 			else
7343 				device_printf(dev,
7344 				    "Transmit balancing feature disabled\n");
7345 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7346 			return (status);
7347 		}
7348 	}
7349 
7350 	/* Copy and download the pkg contents */
7351 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7352 
7353 	/* Release the firmware reference */
7354 	firmware_put(pkg, FIRMWARE_UNLOAD);
7355 
7356 	/* Check the active DDP package version and log a message */
7357 	ice_log_pkg_init(sc, state);
7358 
7359 	/* Place the driver into safe mode */
7360 	if (ice_is_init_pkg_successful(state))
7361 		return (ICE_ERR_ALREADY_EXISTS);
7362 
7363 err_load_pkg:
7364 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7365 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7366 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7367 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7368 
7369 	return (status);
7370 }
7371 
7372 /**
7373  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7374  * @vsi: the vsi to retrieve the value for
7375  * @counter: the counter type to retrieve
7376  *
7377  * Returns the value for a given ifnet counter. To do so, we calculate the
7378  * value based on the matching hardware statistics.
7379  */
7380 uint64_t
7381 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7382 {
7383 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7384 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7385 
7386 	/* For some statistics, especially those related to error flows, we do
7387 	 * not have per-VSI counters. In this case, we just report the global
7388 	 * counters.
7389 	 */
7390 
7391 	switch (counter) {
7392 	case IFCOUNTER_IPACKETS:
7393 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7394 	case IFCOUNTER_IERRORS:
7395 		return (hs->crc_errors + hs->illegal_bytes +
7396 			hs->mac_local_faults + hs->mac_remote_faults +
7397 			hs->rx_len_errors + hs->rx_undersize +
7398 			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7399 	case IFCOUNTER_OPACKETS:
7400 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7401 	case IFCOUNTER_OERRORS:
7402 		return (es->tx_errors);
7403 	case IFCOUNTER_COLLISIONS:
7404 		return (0);
7405 	case IFCOUNTER_IBYTES:
7406 		return (es->rx_bytes);
7407 	case IFCOUNTER_OBYTES:
7408 		return (es->tx_bytes);
7409 	case IFCOUNTER_IMCASTS:
7410 		return (es->rx_multicast);
7411 	case IFCOUNTER_OMCASTS:
7412 		return (es->tx_multicast);
7413 	case IFCOUNTER_IQDROPS:
7414 		return (es->rx_discards);
7415 	case IFCOUNTER_OQDROPS:
7416 		return (hs->tx_dropped_link_down);
7417 	case IFCOUNTER_NOPROTO:
7418 		return (es->rx_unknown_protocol);
7419 	default:
7420 		return if_get_counter_default(vsi->sc->ifp, counter);
7421 	}
7422 }
7423 
7424 /**
7425  * ice_save_pci_info - Save PCI configuration fields in HW struct
7426  * @hw: the ice_hw struct to save the PCI information in
7427  * @dev: the device to get the PCI information from
7428  *
7429  * This should only be called once, early in the device attach
7430  * process.
7431  */
7432 void
7433 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7434 {
7435 	hw->vendor_id = pci_get_vendor(dev);
7436 	hw->device_id = pci_get_device(dev);
7437 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7438 	hw->subsystem_device_id = pci_get_subdevice(dev);
7439 	hw->revision_id = pci_get_revid(dev);
7440 	hw->bus.device = pci_get_slot(dev);
7441 	hw->bus.func = pci_get_function(dev);
7442 }
7443 
7444 /**
7445  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7446  * @sc: the device softc
7447  *
7448  * Replace the configuration for each VSI, and then cleanup replay
7449  * information. Called after a hardware reset in order to reconfigure the
7450  * active VSIs.
7451  */
7452 int
7453 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7454 {
7455 	struct ice_hw *hw = &sc->hw;
7456 	enum ice_status status;
7457 	int i;
7458 
7459 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7460 		struct ice_vsi *vsi = sc->all_vsi[i];
7461 
7462 		if (!vsi)
7463 			continue;
7464 
7465 		status = ice_replay_vsi(hw, vsi->idx);
7466 		if (status) {
7467 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7468 				      vsi->idx, ice_status_str(status),
7469 				      ice_aq_str(hw->adminq.sq_last_status));
7470 			return (EIO);
7471 		}
7472 	}
7473 
7474 	/* Cleanup replay filters after successful reconfiguration */
7475 	ice_replay_post(hw);
7476 	return (0);
7477 }
7478 
7479 /**
7480  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7481  * @vsi: pointer to the VSI structure
7482  *
7483  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7484  * during driver removal to ensure that all RSS resources are properly
7485  * released.
7486  *
7487  * @remark this function doesn't report an error as it is expected to be
7488  * called during driver reset and unload, and there isn't much the driver can
7489  * do if freeing RSS resources fails.
7490  */
7491 static void
7492 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7493 {
7494 	struct ice_softc *sc = vsi->sc;
7495 	struct ice_hw *hw = &sc->hw;
7496 	device_t dev = sc->dev;
7497 	enum ice_status status;
7498 
7499 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7500 	if (status)
7501 		device_printf(dev,
7502 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7503 			      vsi->idx, ice_status_str(status));
7504 
7505 	/* Remove this VSI from the RSS list */
7506 	ice_rem_vsi_rss_list(hw, vsi->idx);
7507 }
7508 
7509 /**
7510  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7511  * @sc: the device softc pointer
7512  *
7513  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7514  * interface.
7515  *
7516  * @remark This should be called while preparing for a reset, to cleanup stale
7517  * RSS configuration for all VSIs.
7518  */
7519 void
7520 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7521 {
7522 	int i;
7523 
7524 	/* No need to cleanup if RSS is not enabled */
7525 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7526 		return;
7527 
7528 	for (i = 0; i < sc->num_available_vsi; i++) {
7529 		struct ice_vsi *vsi = sc->all_vsi[i];
7530 
7531 		if (vsi)
7532 			ice_clean_vsi_rss_cfg(vsi);
7533 	}
7534 }
7535 
7536 /**
7537  * ice_requested_fec_mode - Return the requested FEC mode as a string
7538  * @pi: The port info structure
7539  *
7540  * Return a string representing the requested FEC mode.
7541  */
7542 static const char *
7543 ice_requested_fec_mode(struct ice_port_info *pi)
7544 {
7545 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7546 	enum ice_status status;
7547 
7548 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7549 				     &pcaps, NULL);
7550 	if (status)
7551 		/* Just report unknown if we can't get capabilities */
7552 		return "Unknown";
7553 
7554 	/* Check if RS-FEC has been requested first */
7555 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7556 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7557 		return ice_fec_str(ICE_FEC_RS);
7558 
7559 	/* If RS FEC has not been requested, then check BASE-R */
7560 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7561 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7562 		return ice_fec_str(ICE_FEC_BASER);
7563 
7564 	return ice_fec_str(ICE_FEC_NONE);
7565 }
7566 
7567 /**
7568  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7569  * @pi: The port info structure
7570  *
7571  * Return a string representing the current FEC mode.
7572  */
7573 static const char *
7574 ice_negotiated_fec_mode(struct ice_port_info *pi)
7575 {
7576 	/* First, check if RS has been requested first */
7577 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7578 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7579 		return ice_fec_str(ICE_FEC_RS);
7580 
7581 	/* If RS FEC has not been requested, then check BASE-R */
7582 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7583 		return ice_fec_str(ICE_FEC_BASER);
7584 
7585 	return ice_fec_str(ICE_FEC_NONE);
7586 }
7587 
7588 /**
7589  * ice_autoneg_mode - Return string indicating of autoneg completed
7590  * @pi: The port info structure
7591  *
7592  * Return "True" if autonegotiation is completed, "False" otherwise.
7593  */
7594 static const char *
7595 ice_autoneg_mode(struct ice_port_info *pi)
7596 {
7597 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7598 		return "True";
7599 	else
7600 		return "False";
7601 }
7602 
7603 /**
7604  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7605  * @pi: The port info structure
7606  *
7607  * Returns the current Flow Control mode as a string.
7608  */
7609 static const char *
7610 ice_flowcontrol_mode(struct ice_port_info *pi)
7611 {
7612 	return ice_fc_str(pi->fc.current_mode);
7613 }
7614 
7615 /**
7616  * ice_link_up_msg - Log a link up message with associated info
7617  * @sc: the device private softc
7618  *
7619  * Log a link up message with LOG_NOTICE message level. Include information
7620  * about the duplex, FEC mode, autonegotiation and flow control.
7621  */
7622 void
7623 ice_link_up_msg(struct ice_softc *sc)
7624 {
7625 	struct ice_hw *hw = &sc->hw;
7626 	struct ifnet *ifp = sc->ifp;
7627 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7628 
7629 	speed = ice_aq_speed_to_str(hw->port_info);
7630 	req_fec = ice_requested_fec_mode(hw->port_info);
7631 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7632 	autoneg = ice_autoneg_mode(hw->port_info);
7633 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7634 
7635 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7636 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7637 }
7638 
7639 /**
7640  * ice_update_laa_mac - Update MAC address if Locally Administered
7641  * @sc: the device softc
7642  *
7643  * Update the device MAC address when a Locally Administered Address is
7644  * assigned.
7645  *
7646  * This function does *not* update the MAC filter list itself. Instead, it
7647  * should be called after ice_rm_pf_default_mac_filters, so that the previous
7648  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7649  * so that the new address filter will be assigned.
7650  */
7651 int
7652 ice_update_laa_mac(struct ice_softc *sc)
7653 {
7654 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7655 	struct ice_hw *hw = &sc->hw;
7656 	enum ice_status status;
7657 
7658 	/* If the address is the same, then there is nothing to update */
7659 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7660 		return (0);
7661 
7662 	/* Reject Multicast addresses */
7663 	if (ETHER_IS_MULTICAST(lladdr))
7664 		return (EINVAL);
7665 
7666 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7667 	if (status) {
7668 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7669 			      lladdr, ":", ice_status_str(status),
7670 			      ice_aq_str(hw->adminq.sq_last_status));
7671 		return (EFAULT);
7672 	}
7673 
7674 	/* Copy the address into place of the LAN address. */
7675 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7676 
7677 	return (0);
7678 }
7679 
7680 /**
7681  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7682  * @sc: device softc
7683  *
7684  * This will potentially print out a warning message if bus bandwidth
7685  * is insufficient for full-speed operation.
7686  *
7687  * This should only be called once, during the attach process, after
7688  * hw->port_info has been filled out with port link topology information
7689  * (from the Get PHY Capabilities Admin Queue command).
7690  */
7691 void
7692 ice_get_and_print_bus_info(struct ice_softc *sc)
7693 {
7694 	struct ice_hw *hw = &sc->hw;
7695 	device_t dev = sc->dev;
7696 	u16 pci_link_status;
7697 	int offset;
7698 
7699 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7700 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7701 
7702 	/* Fill out hw struct with PCIE link status info */
7703 	ice_set_pci_link_status_data(hw, pci_link_status);
7704 
7705 	/* Use info to print out bandwidth messages */
7706 	ice_print_bus_link_data(dev, hw);
7707 
7708 	if (ice_pcie_bandwidth_check(sc)) {
7709 		device_printf(dev,
7710 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7711 		device_printf(dev,
7712 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7713 	}
7714 }
7715 
7716 /**
7717  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7718  * a 64-bit baudrate.
7719  * @speed: enum value to convert
7720  *
7721  * This only goes up to PCIE Gen 4.
7722  */
7723 static uint64_t
7724 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7725 {
7726 	/* If the PCI-E speed is Gen1 or Gen2, then report
7727 	 * only 80% of bus speed to account for encoding overhead.
7728 	 */
7729 	switch (speed) {
7730 	case ice_pcie_speed_2_5GT:
7731 		return IF_Gbps(2);
7732 	case ice_pcie_speed_5_0GT:
7733 		return IF_Gbps(4);
7734 	case ice_pcie_speed_8_0GT:
7735 		return IF_Gbps(8);
7736 	case ice_pcie_speed_16_0GT:
7737 		return IF_Gbps(16);
7738 	case ice_pcie_speed_unknown:
7739 	default:
7740 		return 0;
7741 	}
7742 }
7743 
7744 /**
7745  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7746  * a 32-bit number.
7747  * @width: enum value to convert
7748  */
7749 static int
7750 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7751 {
7752 	switch (width) {
7753 	case ice_pcie_lnk_x1:
7754 		return (1);
7755 	case ice_pcie_lnk_x2:
7756 		return (2);
7757 	case ice_pcie_lnk_x4:
7758 		return (4);
7759 	case ice_pcie_lnk_x8:
7760 		return (8);
7761 	case ice_pcie_lnk_x12:
7762 		return (12);
7763 	case ice_pcie_lnk_x16:
7764 		return (16);
7765 	case ice_pcie_lnk_x32:
7766 		return (32);
7767 	case ice_pcie_lnk_width_resrv:
7768 	case ice_pcie_lnk_width_unknown:
7769 	default:
7770 		return (0);
7771 	}
7772 }
7773 
7774 /**
7775  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
7776  * full-speed device operation.
7777  * @sc: adapter softc
7778  *
7779  * Returns 0 if sufficient; 1 if not.
7780  */
7781 static uint8_t
7782 ice_pcie_bandwidth_check(struct ice_softc *sc)
7783 {
7784 	struct ice_hw *hw = &sc->hw;
7785 	int num_ports, pcie_width;
7786 	u64 pcie_speed, port_speed;
7787 
7788 	MPASS(hw->port_info);
7789 
7790 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
7791 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
7792 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
7793 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
7794 
7795 	/*
7796 	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
7797 	 * failover.
7798 	 */
7799 	if (port_speed == IF_Gbps(100))
7800 		num_ports = 1;
7801 
7802 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
7803 }
7804 
7805 /**
7806  * ice_print_bus_link_data - Print PCI-E bandwidth information
7807  * @dev: device to print string for
7808  * @hw: hw struct with PCI-e link information
7809  */
7810 static void
7811 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
7812 {
7813         device_printf(dev, "PCI Express Bus: Speed %s %s\n",
7814             ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
7815             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
7816             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
7817             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
7818             (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
7819             (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
7820             (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
7821             (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
7822             (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
7823             (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
7824             (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
7825 }
7826 
7827 /**
7828  * ice_set_pci_link_status_data - store PCI bus info
7829  * @hw: pointer to hardware structure
7830  * @link_status: the link status word from PCI config space
7831  *
7832  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
7833  **/
7834 static void
7835 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
7836 {
7837 	u16 reg;
7838 
7839 	hw->bus.type = ice_bus_pci_express;
7840 
7841 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
7842 
7843 	switch (reg) {
7844 	case ice_pcie_lnk_x1:
7845 	case ice_pcie_lnk_x2:
7846 	case ice_pcie_lnk_x4:
7847 	case ice_pcie_lnk_x8:
7848 	case ice_pcie_lnk_x12:
7849 	case ice_pcie_lnk_x16:
7850 	case ice_pcie_lnk_x32:
7851 		hw->bus.width = (enum ice_pcie_link_width)reg;
7852 		break;
7853 	default:
7854 		hw->bus.width = ice_pcie_lnk_width_unknown;
7855 		break;
7856 	}
7857 
7858 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
7859 
7860 	switch (reg) {
7861 	case ice_pcie_speed_2_5GT:
7862 	case ice_pcie_speed_5_0GT:
7863 	case ice_pcie_speed_8_0GT:
7864 	case ice_pcie_speed_16_0GT:
7865 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
7866 		break;
7867 	default:
7868 		hw->bus.speed = ice_pcie_speed_unknown;
7869 		break;
7870 	}
7871 }
7872 
7873 /**
7874  * ice_init_link_events - Initialize Link Status Events mask
7875  * @sc: the device softc
7876  *
7877  * Initialize the Link Status Events mask to disable notification of link
7878  * events we don't care about in software. Also request that link status
7879  * events be enabled.
7880  */
7881 int
7882 ice_init_link_events(struct ice_softc *sc)
7883 {
7884 	struct ice_hw *hw = &sc->hw;
7885 	enum ice_status status;
7886 	u16 wanted_events;
7887 
7888 	/* Set the bits for the events that we want to be notified by */
7889 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
7890 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
7891 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
7892 
7893 	/* request that every event except the wanted events be masked */
7894 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
7895 	if (status) {
7896 		device_printf(sc->dev,
7897 			      "Failed to set link status event mask, err %s aq_err %s\n",
7898 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7899 		return (EIO);
7900 	}
7901 
7902 	/* Request link info with the LSE bit set to enable link status events */
7903 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
7904 	if (status) {
7905 		device_printf(sc->dev,
7906 			      "Failed to enable link status events, err %s aq_err %s\n",
7907 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7908 		return (EIO);
7909 	}
7910 
7911 	return (0);
7912 }
7913 
7914 /**
7915  * ice_handle_mdd_event - Handle possibly malicious events
7916  * @sc: the device softc
7917  *
7918  * Called by the admin task if an MDD detection interrupt is triggered.
7919  * Identifies possibly malicious events coming from VFs. Also triggers for
7920  * similar incorrect behavior from the PF as well.
7921  */
7922 void
7923 ice_handle_mdd_event(struct ice_softc *sc)
7924 {
7925 	struct ice_hw *hw = &sc->hw;
7926 	bool mdd_detected = false, request_reinit = false;
7927 	device_t dev = sc->dev;
7928 	u32 reg;
7929 
7930 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
7931 		return;
7932 
7933 	reg = rd32(hw, GL_MDET_TX_TCLAN);
7934 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
7935 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
7936 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
7937 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
7938 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
7939 
7940 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
7941 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
7942 
7943 		/* Only clear this event if it matches this PF, that way other
7944 		 * PFs can read the event and determine VF and queue number.
7945 		 */
7946 		if (pf_num == hw->pf_id)
7947 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
7948 
7949 		mdd_detected = true;
7950 	}
7951 
7952 	/* Determine what triggered the MDD event */
7953 	reg = rd32(hw, GL_MDET_TX_PQM);
7954 	if (reg & GL_MDET_TX_PQM_VALID_M) {
7955 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
7956 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
7957 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
7958 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
7959 
7960 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
7961 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
7962 
7963 		/* Only clear this event if it matches this PF, that way other
7964 		 * PFs can read the event and determine VF and queue number.
7965 		 */
7966 		if (pf_num == hw->pf_id)
7967 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
7968 
7969 		mdd_detected = true;
7970 	}
7971 
7972 	reg = rd32(hw, GL_MDET_RX);
7973 	if (reg & GL_MDET_RX_VALID_M) {
7974 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
7975 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
7976 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
7977 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
7978 
7979 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
7980 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
7981 
7982 		/* Only clear this event if it matches this PF, that way other
7983 		 * PFs can read the event and determine VF and queue number.
7984 		 */
7985 		if (pf_num == hw->pf_id)
7986 			wr32(hw, GL_MDET_RX, 0xffffffff);
7987 
7988 		mdd_detected = true;
7989 	}
7990 
7991 	/* Now, confirm that this event actually affects this PF, by checking
7992 	 * the PF registers.
7993 	 */
7994 	if (mdd_detected) {
7995 		reg = rd32(hw, PF_MDET_TX_TCLAN);
7996 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
7997 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
7998 			sc->soft_stats.tx_mdd_count++;
7999 			request_reinit = true;
8000 		}
8001 
8002 		reg = rd32(hw, PF_MDET_TX_PQM);
8003 		if (reg & PF_MDET_TX_PQM_VALID_M) {
8004 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8005 			sc->soft_stats.tx_mdd_count++;
8006 			request_reinit = true;
8007 		}
8008 
8009 		reg = rd32(hw, PF_MDET_RX);
8010 		if (reg & PF_MDET_RX_VALID_M) {
8011 			wr32(hw, PF_MDET_RX, 0xffff);
8012 			sc->soft_stats.rx_mdd_count++;
8013 			request_reinit = true;
8014 		}
8015 	}
8016 
8017 	/* TODO: Implement logic to detect and handle events caused by VFs. */
8018 
8019 	/* request that the upper stack re-initialize the Tx/Rx queues */
8020 	if (request_reinit)
8021 		ice_request_stack_reinit(sc);
8022 
8023 	ice_flush(hw);
8024 }
8025 
8026 /**
8027  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8028  * @sc: the device softc
8029  *
8030  * @pre device is DCB capable and the FW LLDP agent has started
8031  *
8032  * Checks DCBX status and starts the DCBX agent if it is not in
8033  * a valid state via an AQ command.
8034  */
8035 static void
8036 ice_start_dcbx_agent(struct ice_softc *sc)
8037 {
8038 	struct ice_hw *hw = &sc->hw;
8039 	device_t dev = sc->dev;
8040 	bool dcbx_agent_status;
8041 	enum ice_status status;
8042 
8043 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8044 
8045 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8046 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8047 		/*
8048 		 * Start DCBX agent, but not LLDP. The return value isn't
8049 		 * checked here because a more detailed dcbx agent status is
8050 		 * retrieved and checked in ice_init_dcb() and elsewhere.
8051 		 */
8052 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8053 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8054 			device_printf(dev,
8055 			    "start_stop_dcbx failed, err %s aq_err %s\n",
8056 			    ice_status_str(status),
8057 			    ice_aq_str(hw->adminq.sq_last_status));
8058 	}
8059 }
8060 
8061 /**
8062  * ice_init_dcb_setup - Initialize DCB settings for HW
8063  * @sc: the device softc
8064  *
8065  * This needs to be called after the fw_lldp_agent sysctl is added, since that
8066  * can update the device's LLDP agent status if a tunable value is set.
8067  *
8068  * Get and store the initial state of DCB settings on driver load. Print out
8069  * informational messages as well.
8070  */
8071 void
8072 ice_init_dcb_setup(struct ice_softc *sc)
8073 {
8074 	struct ice_dcbx_cfg *local_dcbx_cfg;
8075 	struct ice_hw *hw = &sc->hw;
8076 	device_t dev = sc->dev;
8077 	enum ice_status status;
8078 	u8 pfcmode_ret;
8079 
8080 	/* Don't do anything if DCB isn't supported */
8081 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8082 		device_printf(dev, "%s: No DCB support\n", __func__);
8083 		return;
8084 	}
8085 
8086 	/* Starts DCBX agent if it needs starting */
8087 	ice_start_dcbx_agent(sc);
8088 
8089 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8090 	status = ice_init_dcb(hw, true);
8091 
8092 	/* If there is an error, then FW LLDP is not in a usable state */
8093 	if (status != 0 && status != ICE_ERR_NOT_READY) {
8094 		/* Don't print an error message if the return code from the AQ
8095 		 * cmd performed in ice_init_dcb() is EPERM; that means the
8096 		 * FW LLDP engine is disabled, and that is a valid state.
8097 		 */
8098 		if (!(status == ICE_ERR_AQ_ERROR &&
8099 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8100 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8101 				      ice_status_str(status),
8102 				      ice_aq_str(hw->adminq.sq_last_status));
8103 		}
8104 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8105 	}
8106 
8107 	switch (hw->port_info->qos_cfg.dcbx_status) {
8108 	case ICE_DCBX_STATUS_DIS:
8109 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8110 		break;
8111 	case ICE_DCBX_STATUS_NOT_STARTED:
8112 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8113 		break;
8114 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8115 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8116 		break;
8117 	default:
8118 		break;
8119 	}
8120 
8121 	/* LLDP disabled in FW */
8122 	if (hw->port_info->qos_cfg.is_sw_lldp) {
8123 		ice_add_rx_lldp_filter(sc);
8124 		device_printf(dev, "Firmware LLDP agent disabled\n");
8125 	}
8126 
8127 	/* Query and cache PFC mode */
8128 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8129 	if (status) {
8130 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8131 			      ice_status_str(status),
8132 			      ice_aq_str(hw->adminq.sq_last_status));
8133 	}
8134 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8135 	switch (pfcmode_ret) {
8136 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8137 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8138 		break;
8139 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8140 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8141 		break;
8142 	default:
8143 		/* DCB is disabled, but we shouldn't get here */
8144 		break;
8145 	}
8146 
8147 	/* Set default SW MIB for init */
8148 	ice_set_default_local_mib_settings(sc);
8149 
8150 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8151 }
8152 
8153 /**
8154  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8155  * @dcbcfg: DCB configuration to examine
8156  *
8157  * Scans a TC mapping table inside dcbcfg to find traffic classes
8158  * enabled and @returns a bitmask of enabled TCs
8159  */
8160 u8
8161 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8162 {
8163 	u8 tc_map = 0;
8164 	int i = 0;
8165 
8166 	switch (dcbcfg->pfc_mode) {
8167 	case ICE_QOS_MODE_VLAN:
8168 		/* XXX: "i" is actually "User Priority" here, not
8169 		 * Traffic Class, but the max for both is 8, so it works
8170 		 * out here.
8171 		 */
8172 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8173 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8174 		break;
8175 	case ICE_QOS_MODE_DSCP:
8176 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8177 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8178 		break;
8179 	default:
8180 		/* Invalid Mode */
8181 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8182 		break;
8183 	}
8184 
8185 	return (tc_map);
8186 }
8187 
8188 /**
8189  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8190  * @dcbcfg: config to retrieve number of TCs from
8191  *
8192  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8193  * Priority Assignment Table, a value from 1 to 8. If there are
8194  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8195  * then returns 0.
8196  */
8197 static u8
8198 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8199 {
8200 	u8 tc_map;
8201 
8202 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8203 
8204 	return (ice_dcb_tc_contig(tc_map));
8205 }
8206 
8207 /**
8208  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8209  * @sc: the device private softc
8210  * @event: event received on a control queue
8211  *
8212  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8213  */
8214 static void
8215 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8216 {
8217 	struct ice_aqc_lldp_get_mib *params =
8218 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8219 	u8 mib_type, bridge_type, tx_status;
8220 
8221 	static const char* mib_type_strings[] = {
8222 	    "Local MIB",
8223 	    "Remote MIB",
8224 	    "Reserved",
8225 	    "Reserved"
8226 	};
8227 	static const char* bridge_type_strings[] = {
8228 	    "Nearest Bridge",
8229 	    "Non-TPMR Bridge",
8230 	    "Reserved",
8231 	    "Reserved"
8232 	};
8233 	static const char* tx_status_strings[] = {
8234 	    "Port's TX active",
8235 	    "Port's TX suspended and drained",
8236 	    "Reserved",
8237 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8238 	};
8239 
8240 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8241 	    ICE_AQ_LLDP_MIB_TYPE_S;
8242 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8243 	    ICE_AQ_LLDP_BRID_TYPE_S;
8244 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8245 	    ICE_AQ_LLDP_TX_S;
8246 
8247 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8248 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8249 	    tx_status_strings[tx_status]);
8250 
8251 	/* Nothing else to report */
8252 	if (!event->msg_buf)
8253 		return;
8254 
8255 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8256 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8257 			event->msg_len);
8258 }
8259 
8260 /**
8261  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8262  * @sc: the device private softc
8263  * @old_cfg: Old DCBX configuration to compare against
8264  * @new_cfg: New DCBX configuration to check
8265  *
8266  * @return true if something changed in new_cfg that requires the driver
8267  * to do some reconfiguration.
8268  */
8269 static bool
8270 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8271     struct ice_dcbx_cfg *new_cfg)
8272 {
8273 	struct ice_hw *hw = &sc->hw;
8274 	bool needs_reconfig = false;
8275 
8276 	/* No change detected in DCBX config */
8277 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8278 		ice_debug(hw, ICE_DBG_DCB,
8279 		    "No change detected in local DCBX configuration\n");
8280 		return (false);
8281 	}
8282 
8283 	/* Check if ETS config has changed */
8284 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8285 		   sizeof(new_cfg->etscfg))) {
8286 		/* If Priority Table has changed, then driver reconfig is needed */
8287 		if (memcmp(&new_cfg->etscfg.prio_table,
8288 			   &old_cfg->etscfg.prio_table,
8289 			   sizeof(new_cfg->etscfg.prio_table))) {
8290 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8291 			needs_reconfig = true;
8292 		}
8293 
8294 		/* These are just informational */
8295 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8296 			   &old_cfg->etscfg.tcbwtable,
8297 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8298 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8299 			needs_reconfig = true;
8300 		}
8301 
8302 		if (memcmp(&new_cfg->etscfg.tsatable,
8303 			   &old_cfg->etscfg.tsatable,
8304 			   sizeof(new_cfg->etscfg.tsatable))) {
8305 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8306 			needs_reconfig = true;
8307 		}
8308 	}
8309 
8310 	/* Check if PFC config has changed */
8311 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8312 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8313 		needs_reconfig = true;
8314 	}
8315 
8316 	/* Check if APP table has changed */
8317 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8318 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8319 
8320 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8321 
8322 	return (needs_reconfig);
8323 }
8324 
8325 /**
8326  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8327  * @sc: the device private softc
8328  *
8329  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8330  */
8331 static void
8332 ice_stop_pf_vsi(struct ice_softc *sc)
8333 {
8334 	/* Dissociate the Tx and Rx queues from the interrupts */
8335 	ice_flush_txq_interrupts(&sc->pf_vsi);
8336 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8337 
8338 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8339 		return;
8340 
8341 	/* Disable the Tx and Rx queues */
8342 	ice_vsi_disable_tx(&sc->pf_vsi);
8343 	ice_control_all_rx_queues(&sc->pf_vsi, false);
8344 }
8345 
8346 /**
8347  * ice_vsi_setup_q_map - Setup a VSI queue map
8348  * @vsi: the VSI being configured
8349  * @ctxt: VSI context structure
8350  */
8351 static void
8352 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8353 {
8354 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8355 	u16 offset = 0, qmap = 0, pow = 0;
8356 	u16 num_q_per_tc, qcount_rx, rem_queues;
8357 	int i, j, k;
8358 
8359 	if (vsi->num_tcs == 0) {
8360 		/* at least TC0 should be enabled by default */
8361 		vsi->num_tcs = 1;
8362 		vsi->tc_map = 0x1;
8363 	}
8364 
8365 	qcount_rx = vsi->num_rx_queues;
8366 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8367 
8368 	if (!num_q_per_tc)
8369 		num_q_per_tc = 1;
8370 
8371 	/* Set initial values for # of queues to use for each active TC */
8372 	ice_for_each_traffic_class(i)
8373 		if (i < vsi->num_tcs)
8374 			qcounts[i] = num_q_per_tc;
8375 
8376 	/* If any queues are unassigned, add them to TC 0 */
8377 	rem_queues = qcount_rx % vsi->num_tcs;
8378 	if (rem_queues > 0)
8379 		qcounts[0] += rem_queues;
8380 
8381 	/* TC mapping is a function of the number of Rx queues assigned to the
8382 	 * VSI for each traffic class and the offset of these queues.
8383 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8384 	 * queues allocated to TC0. No:of queues is a power-of-2.
8385 	 *
8386 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8387 	 * queue, this way, traffic for the given TC will be sent to the default
8388 	 * queue.
8389 	 *
8390 	 * Setup number and offset of Rx queues for all TCs for the VSI
8391 	 */
8392 	ice_for_each_traffic_class(i) {
8393 		if (!(vsi->tc_map & BIT(i))) {
8394 			/* TC is not enabled */
8395 			vsi->tc_info[i].qoffset = 0;
8396 			vsi->tc_info[i].qcount_rx = 1;
8397 			vsi->tc_info[i].qcount_tx = 1;
8398 
8399 			ctxt->info.tc_mapping[i] = 0;
8400 			continue;
8401 		}
8402 
8403 		/* TC is enabled */
8404 		vsi->tc_info[i].qoffset = offset;
8405 		vsi->tc_info[i].qcount_rx = qcounts[i];
8406 		vsi->tc_info[i].qcount_tx = qcounts[i];
8407 
8408 		/* find the (rounded up) log-2 of queue count for current TC */
8409 		pow = fls(qcounts[i] - 1);
8410 
8411 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8412 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8413 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8414 			 ICE_AQ_VSI_TC_Q_NUM_M);
8415 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8416 
8417 		/* Store traffic class and handle data in queue structures */
8418 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8419 			vsi->tx_queues[j].q_handle = k;
8420 			vsi->tx_queues[j].tc = i;
8421 
8422 			vsi->rx_queues[j].tc = i;
8423 		}
8424 
8425 		offset += qcounts[i];
8426 	}
8427 
8428 	/* Rx queue mapping */
8429 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8430 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8431 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8432 }
8433 
8434 /**
8435  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8436  * @sc: the device private softc
8437  * @tc_map: traffic class bitmap
8438  *
8439  * @pre VSI queues are stopped
8440  *
8441  * @return 0 if configuration is successful
8442  * @return EIO if Update VSI AQ cmd fails
8443  * @return ENODEV if updating Tx Scheduler fails
8444  */
8445 static int
8446 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8447 {
8448 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8449 	struct ice_vsi *vsi = &sc->pf_vsi;
8450 	struct ice_hw *hw = &sc->hw;
8451 	struct ice_vsi_ctx ctx = { 0 };
8452 	device_t dev = sc->dev;
8453 	enum ice_status status;
8454 	u8 num_tcs = 0;
8455 	int i = 0;
8456 
8457 	/* Count the number of enabled Traffic Classes */
8458 	ice_for_each_traffic_class(i)
8459 		if (tc_map & BIT(i))
8460 			num_tcs++;
8461 
8462 	vsi->tc_map = tc_map;
8463 	vsi->num_tcs = num_tcs;
8464 
8465 	/* Set default parameters for context */
8466 	ctx.vf_num = 0;
8467 	ctx.info = vsi->info;
8468 
8469 	/* Setup queue map */
8470 	ice_vsi_setup_q_map(vsi, &ctx);
8471 
8472 	/* Update VSI configuration in firmware (RX queues) */
8473 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8474 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8475 	if (status) {
8476 		device_printf(dev,
8477 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8478 		    __func__, ice_status_str(status),
8479 		    ice_aq_str(hw->adminq.sq_last_status));
8480 		return (EIO);
8481 	}
8482 	vsi->info = ctx.info;
8483 
8484 	/* Use values derived in ice_vsi_setup_q_map() */
8485 	for (i = 0; i < num_tcs; i++)
8486 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8487 
8488 	if (hw->debug_mask & ICE_DBG_DCB) {
8489 		device_printf(dev, "%s: max_txqs:", __func__);
8490 		ice_for_each_traffic_class(i)
8491 			printf(" %d", max_txqs[i]);
8492 		printf("\n");
8493 	}
8494 
8495 	/* Update LAN Tx queue info in firmware */
8496 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8497 				 max_txqs);
8498 	if (status) {
8499 		device_printf(dev,
8500 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8501 		    __func__, ice_status_str(status),
8502 		    ice_aq_str(hw->adminq.sq_last_status));
8503 		return (ENODEV);
8504 	}
8505 
8506 	vsi->info.valid_sections = 0;
8507 
8508 	return (0);
8509 }
8510 
8511 /**
8512  * ice_dcb_tc_contig - Count TCs if they're contiguous
8513  * @tc_map: pointer to priority table
8514  *
8515  * @return The number of traffic classes in
8516  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8517  */
8518 static u8
8519 ice_dcb_tc_contig(u8 tc_map)
8520 {
8521 	bool tc_unused = false;
8522 	u8 ret = 0;
8523 
8524 	/* Scan bitmask for contiguous TCs starting with TC0 */
8525 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8526 		if (tc_map & BIT(i)) {
8527 			if (!tc_unused) {
8528 				ret++;
8529 			} else {
8530 				/* Non-contiguous TCs detected */
8531 				return (0);
8532 			}
8533 		} else
8534 			tc_unused = true;
8535 	}
8536 
8537 	return (ret);
8538 }
8539 
8540 /**
8541  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8542  * @sc: the device private softc
8543  *
8544  * @pre All VSIs have been disabled/stopped
8545  *
8546  * Reconfigures VSI settings based on local_dcbx_cfg.
8547  */
8548 static void
8549 ice_dcb_recfg(struct ice_softc *sc)
8550 {
8551 	struct ice_dcbx_cfg *dcbcfg =
8552 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8553 	device_t dev = sc->dev;
8554 	u8 tc_map = 0;
8555 	int ret;
8556 
8557 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8558 
8559 	/* If non-contiguous TCs are used, then configure
8560 	 * the default TC instead. There's no support for
8561 	 * non-contiguous TCs being used.
8562 	 */
8563 	if (ice_dcb_tc_contig(tc_map) == 0) {
8564 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8565 		ice_set_default_local_lldp_mib(sc);
8566 	}
8567 
8568 	/* Reconfigure VSI queues to add/remove traffic classes */
8569 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8570 	if (ret)
8571 		device_printf(dev,
8572 		    "Failed to configure TCs for PF VSI, err %s\n",
8573 		    ice_err_str(ret));
8574 
8575 }
8576 
8577 /**
8578  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8579  * @sc: device softc structure
8580  *
8581  * Overwrites the driver's SW local LLDP MIB with default settings. This
8582  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8583  * admin queue command.
8584  */
8585 static void
8586 ice_set_default_local_mib_settings(struct ice_softc *sc)
8587 {
8588 	struct ice_dcbx_cfg *dcbcfg;
8589 	struct ice_hw *hw = &sc->hw;
8590 	struct ice_port_info *pi;
8591 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8592 
8593 	pi = hw->port_info;
8594 
8595 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8596 
8597 	maxtcs = hw->func_caps.common_cap.maxtc;
8598 	/* This value is only 3 bits; 8 TCs maps to 0 */
8599 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8600 
8601 	/* VLAN vs DSCP mode needs to be preserved */
8602 	old_pfc_mode = dcbcfg->pfc_mode;
8603 
8604 	/**
8605 	 * Setup the default settings used by the driver for the Set Local
8606 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8607 	 * PFC, TSA=2).
8608 	 */
8609 	memset(dcbcfg, 0, sizeof(*dcbcfg));
8610 
8611 	dcbcfg->etscfg.willing = 1;
8612 	dcbcfg->etscfg.tcbwtable[0] = 100;
8613 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8614 	dcbcfg->etscfg.tsatable[0] = 2;
8615 
8616 	dcbcfg->etsrec = dcbcfg->etscfg;
8617 	dcbcfg->etsrec.willing = 0;
8618 
8619 	dcbcfg->pfc.willing = 1;
8620 	dcbcfg->pfc.pfccap = maxtcs;
8621 
8622 	dcbcfg->pfc_mode = old_pfc_mode;
8623 }
8624 
8625 /**
8626  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8627  * @sc: the device private softc
8628  * @pending_mib: FW has a pending MIB change to execute
8629  *
8630  * @pre Determined that the DCB configuration requires a change
8631  *
8632  * Reconfigures the PF LAN VSI based on updated DCB configuration
8633  * found in the hw struct's/port_info's/ local dcbx configuration.
8634  */
8635 static void
8636 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8637 {
8638 	struct ice_aqc_port_ets_elem port_ets = { 0 };
8639 	struct ice_dcbx_cfg *local_dcbx_cfg;
8640 	struct ice_hw *hw = &sc->hw;
8641 	struct ice_port_info *pi;
8642 	device_t dev = sc->dev;
8643 	enum ice_status status;
8644 
8645 	pi = sc->hw.port_info;
8646 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8647 
8648 	ice_rdma_notify_dcb_qos_change(sc);
8649 	/* If there's a pending MIB, tell the FW to execute the MIB change
8650 	 * now.
8651 	 */
8652 	if (pending_mib) {
8653 		status = ice_lldp_execute_pending_mib(hw);
8654 		if ((status == ICE_ERR_AQ_ERROR) &&
8655 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8656 			device_printf(dev,
8657 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8658 		} else if (status) {
8659 			device_printf(dev,
8660 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8661 			    ice_status_str(status),
8662 			    ice_aq_str(hw->adminq.sq_last_status));
8663 			/* This won't break traffic, but QoS will not work as expected */
8664 		}
8665 	}
8666 
8667 	/* Set state when there's more than one TC */
8668 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8669 		device_printf(dev, "Multiple traffic classes enabled\n");
8670 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8671 	} else {
8672 		device_printf(dev, "Multiple traffic classes disabled\n");
8673 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8674 	}
8675 
8676 	/* Disable PF VSI since it's going to be reconfigured */
8677 	ice_stop_pf_vsi(sc);
8678 
8679 	/* Query ETS configuration and update SW Tx scheduler info */
8680 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8681 	if (status != ICE_SUCCESS) {
8682 		device_printf(dev,
8683 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8684 		    ice_status_str(status),
8685 		    ice_aq_str(hw->adminq.sq_last_status));
8686 		/* This won't break traffic, but QoS will not work as expected */
8687 	}
8688 
8689 	/* Change PF VSI configuration */
8690 	ice_dcb_recfg(sc);
8691 
8692 	/* Send new configuration to RDMA client driver */
8693 	ice_rdma_dcb_qos_update(sc, pi);
8694 
8695 	ice_request_stack_reinit(sc);
8696 }
8697 
8698 /**
8699  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8700  * @sc: the device private softc
8701  * @event: event received on a control queue
8702  *
8703  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8704  * VSI depending on what has changed. This will also print out some debug
8705  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8706  */
8707 static void
8708 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8709 {
8710 	struct ice_aqc_lldp_get_mib *params =
8711 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8712 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8713 	struct ice_port_info *pi;
8714 	device_t dev = sc->dev;
8715 	struct ice_hw *hw = &sc->hw;
8716 	bool needs_reconfig, mib_is_pending;
8717 	enum ice_status status;
8718 	u8 mib_type, bridge_type;
8719 
8720 	ASSERT_CFG_LOCKED(sc);
8721 
8722 	ice_debug_print_mib_change_event(sc, event);
8723 
8724 	pi = sc->hw.port_info;
8725 
8726 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8727 	    ICE_AQ_LLDP_MIB_TYPE_S;
8728 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8729 	    ICE_AQ_LLDP_BRID_TYPE_S;
8730 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8731 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8732 
8733 	/* Ignore if event is not for Nearest Bridge */
8734 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8735 		return;
8736 
8737 	/* Check MIB Type and return if event for Remote MIB update */
8738 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8739 		/* Update the cached remote MIB and return */
8740 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8741 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8742 					 &pi->qos_cfg.remote_dcbx_cfg);
8743 		if (status)
8744 			device_printf(dev,
8745 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8746 			    __func__, ice_status_str(status),
8747 			    ice_aq_str(hw->adminq.sq_last_status));
8748 		/* Not fatal if this fails */
8749 		return;
8750 	}
8751 
8752 	/* Save line length by aliasing the local dcbx cfg */
8753 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8754 	/* Save off the old configuration and clear current config */
8755 	tmp_dcbx_cfg = *local_dcbx_cfg;
8756 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8757 
8758 	/* Update the current local_dcbx_cfg with new data */
8759 	if (mib_is_pending) {
8760 		ice_get_dcb_cfg_from_mib_change(pi, event);
8761 	} else {
8762 		/* Get updated DCBX data from firmware */
8763 		status = ice_get_dcb_cfg(pi);
8764 		if (status) {
8765 			device_printf(dev,
8766 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
8767 			    __func__, ice_status_str(status),
8768 			    ice_aq_str(hw->adminq.sq_last_status));
8769 			return;
8770 		}
8771 	}
8772 
8773 	/* Check to see if DCB needs reconfiguring */
8774 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
8775 	    local_dcbx_cfg);
8776 
8777 	if (!needs_reconfig && !mib_is_pending)
8778 		return;
8779 
8780 	/* Reconfigure -- this will also notify FW that configuration is done,
8781 	 * if the FW MIB change is only pending instead of executed.
8782 	 */
8783 	ice_do_dcb_reconfig(sc, mib_is_pending);
8784 }
8785 
8786 /**
8787  * ice_send_version - Send driver version to firmware
8788  * @sc: the device private softc
8789  *
8790  * Send the driver version to the firmware. This must be called as early as
8791  * possible after ice_init_hw().
8792  */
8793 int
8794 ice_send_version(struct ice_softc *sc)
8795 {
8796 	struct ice_driver_ver driver_version = {0};
8797 	struct ice_hw *hw = &sc->hw;
8798 	device_t dev = sc->dev;
8799 	enum ice_status status;
8800 
8801 	driver_version.major_ver = ice_major_version;
8802 	driver_version.minor_ver = ice_minor_version;
8803 	driver_version.build_ver = ice_patch_version;
8804 	driver_version.subbuild_ver = ice_rc_version;
8805 
8806 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
8807 		sizeof(driver_version.driver_string));
8808 
8809 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
8810 	if (status) {
8811 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
8812 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8813 		return (EIO);
8814 	}
8815 
8816 	return (0);
8817 }
8818 
8819 /**
8820  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
8821  * @sc: device softc
8822  * @event: event received on a control queue
8823  *
8824  * Prints out a message when a LAN overflow event is detected on a receive
8825  * queue.
8826  */
8827 static void
8828 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8829 {
8830 	struct ice_aqc_event_lan_overflow *params =
8831 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
8832 	struct ice_hw *hw = &sc->hw;
8833 
8834 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
8835 		  LE32_TO_CPU(params->prtdcb_ruptq),
8836 		  LE32_TO_CPU(params->qtx_ctl));
8837 }
8838 
8839 /**
8840  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
8841  * @vsi: the VSI to target packets to
8842  * @list: the list to add the filter to
8843  * @ethertype: the Ethertype to filter on
8844  * @direction: The direction of the filter (Tx or Rx)
8845  * @action: the action to take
8846  *
8847  * Add an Ethertype filter to a filter list. Used to forward a series of
8848  * filters to the firmware for configuring the switch.
8849  *
8850  * Returns 0 on success, and an error code on failure.
8851  */
8852 static int
8853 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
8854 			  u16 ethertype, u16 direction,
8855 			  enum ice_sw_fwd_act_type action)
8856 {
8857 	struct ice_fltr_list_entry *entry;
8858 
8859 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
8860 
8861 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
8862 	if (!entry)
8863 		return (ENOMEM);
8864 
8865 	entry->fltr_info.flag = direction;
8866 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
8867 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
8868 	entry->fltr_info.fltr_act = action;
8869 	entry->fltr_info.vsi_handle = vsi->idx;
8870 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
8871 
8872 	LIST_ADD(&entry->list_entry, list);
8873 
8874 	return 0;
8875 }
8876 
8877 #define ETHERTYPE_PAUSE_FRAMES 0x8808
8878 #define ETHERTYPE_LLDP_FRAMES 0x88cc
8879 
8880 /**
8881  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
8882  * @sc: the device private softc
8883  *
8884  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
8885  * the host. This prevents malicious VFs from sending these frames and being
8886  * able to control or configure the network.
8887  */
8888 int
8889 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
8890 {
8891 	struct ice_list_head ethertype_list;
8892 	struct ice_vsi *vsi = &sc->pf_vsi;
8893 	struct ice_hw *hw = &sc->hw;
8894 	device_t dev = sc->dev;
8895 	enum ice_status status;
8896 	int err = 0;
8897 
8898 	INIT_LIST_HEAD(&ethertype_list);
8899 
8900 	/*
8901 	 * Note that the switch filters will ignore the VSI index for the drop
8902 	 * action, so we only need to program drop filters once for the main
8903 	 * VSI.
8904 	 */
8905 
8906 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
8907 	if (sc->enable_tx_fc_filter) {
8908 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8909 						ETHERTYPE_PAUSE_FRAMES,
8910 						ICE_FLTR_TX, ICE_DROP_PACKET);
8911 		if (err)
8912 			goto free_ethertype_list;
8913 	}
8914 
8915 	/* Configure switch to drop LLDP frames coming from any VSI */
8916 	if (sc->enable_tx_lldp_filter) {
8917 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8918 						ETHERTYPE_LLDP_FRAMES,
8919 						ICE_FLTR_TX, ICE_DROP_PACKET);
8920 		if (err)
8921 			goto free_ethertype_list;
8922 	}
8923 
8924 	status = ice_add_eth_mac(hw, &ethertype_list);
8925 	if (status) {
8926 		device_printf(dev,
8927 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
8928 			      ice_status_str(status),
8929 			      ice_aq_str(hw->adminq.sq_last_status));
8930 		err = (EIO);
8931 	}
8932 
8933 free_ethertype_list:
8934 	ice_free_fltr_list(&ethertype_list);
8935 	return err;
8936 }
8937 
8938 /**
8939  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
8940  * @sc: the device private structure
8941  *
8942  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
8943  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
8944  * be forwarded to the stack.
8945  */
8946 static void
8947 ice_add_rx_lldp_filter(struct ice_softc *sc)
8948 {
8949 	struct ice_list_head ethertype_list;
8950 	struct ice_vsi *vsi = &sc->pf_vsi;
8951 	struct ice_hw *hw = &sc->hw;
8952 	device_t dev = sc->dev;
8953 	enum ice_status status;
8954 	int err;
8955 	u16 vsi_num;
8956 
8957 	/*
8958 	 * If FW is new enough, use a direct AQ command to perform the filter
8959 	 * addition.
8960 	 */
8961 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
8962 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
8963 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
8964 		if (status) {
8965 			device_printf(dev,
8966 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8967 			    ice_status_str(status),
8968 			    ice_aq_str(hw->adminq.sq_last_status));
8969 		} else
8970 			ice_set_state(&sc->state,
8971 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
8972 		return;
8973 	}
8974 
8975 	INIT_LIST_HEAD(&ethertype_list);
8976 
8977 	/* Forward Rx LLDP frames to the stack */
8978 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8979 					ETHERTYPE_LLDP_FRAMES,
8980 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
8981 	if (err) {
8982 		device_printf(dev,
8983 			      "Failed to add Rx LLDP filter, err %s\n",
8984 			      ice_err_str(err));
8985 		goto free_ethertype_list;
8986 	}
8987 
8988 	status = ice_add_eth_mac(hw, &ethertype_list);
8989 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
8990 		device_printf(dev,
8991 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8992 			      ice_status_str(status),
8993 			      ice_aq_str(hw->adminq.sq_last_status));
8994 	} else {
8995 		/*
8996 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
8997 		 * already existing filter as an error case.
8998 		 */
8999 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9000 	}
9001 
9002 free_ethertype_list:
9003 	ice_free_fltr_list(&ethertype_list);
9004 }
9005 
9006 /**
9007  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9008  * @sc: the device private structure
9009  *
9010  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9011  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9012  * stack.
9013  */
9014 static void
9015 ice_del_rx_lldp_filter(struct ice_softc *sc)
9016 {
9017 	struct ice_list_head ethertype_list;
9018 	struct ice_vsi *vsi = &sc->pf_vsi;
9019 	struct ice_hw *hw = &sc->hw;
9020 	device_t dev = sc->dev;
9021 	enum ice_status status;
9022 	int err;
9023 	u16 vsi_num;
9024 
9025 	/*
9026 	 * Only in the scenario where the driver added the filter during
9027 	 * this session (while the driver was loaded) would we be able to
9028 	 * delete this filter.
9029 	 */
9030 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9031 		return;
9032 
9033 	/*
9034 	 * If FW is new enough, use a direct AQ command to perform the filter
9035 	 * removal.
9036 	 */
9037 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9038 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9039 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9040 		if (status) {
9041 			device_printf(dev,
9042 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9043 			    ice_status_str(status),
9044 			    ice_aq_str(hw->adminq.sq_last_status));
9045 		}
9046 		return;
9047 	}
9048 
9049 	INIT_LIST_HEAD(&ethertype_list);
9050 
9051 	/* Remove filter forwarding Rx LLDP frames to the stack */
9052 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9053 					ETHERTYPE_LLDP_FRAMES,
9054 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9055 	if (err) {
9056 		device_printf(dev,
9057 			      "Failed to remove Rx LLDP filter, err %s\n",
9058 			      ice_err_str(err));
9059 		goto free_ethertype_list;
9060 	}
9061 
9062 	status = ice_remove_eth_mac(hw, &ethertype_list);
9063 	if (status == ICE_ERR_DOES_NOT_EXIST) {
9064 		; /* Don't complain if we try to remove a filter that doesn't exist */
9065 	} else if (status) {
9066 		device_printf(dev,
9067 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9068 			      ice_status_str(status),
9069 			      ice_aq_str(hw->adminq.sq_last_status));
9070 	}
9071 
9072 free_ethertype_list:
9073 	ice_free_fltr_list(&ethertype_list);
9074 }
9075 
9076 /**
9077  * ice_init_link_configuration -- Setup link in different ways depending
9078  * on whether media is available or not.
9079  * @sc: device private structure
9080  *
9081  * Called at the end of the attach process to either set default link
9082  * parameters if there is media available, or force HW link down and
9083  * set a state bit if there is no media.
9084  */
9085 void
9086 ice_init_link_configuration(struct ice_softc *sc)
9087 {
9088 	struct ice_port_info *pi = sc->hw.port_info;
9089 	struct ice_hw *hw = &sc->hw;
9090 	device_t dev = sc->dev;
9091 	enum ice_status status;
9092 
9093 	pi->phy.get_link_info = true;
9094 	status = ice_get_link_status(pi, &sc->link_up);
9095 	if (status != ICE_SUCCESS) {
9096 		device_printf(dev,
9097 		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9098 		    __func__, ice_status_str(status),
9099 		    ice_aq_str(hw->adminq.sq_last_status));
9100 		return;
9101 	}
9102 
9103 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9104 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9105 		/* Apply default link settings */
9106 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9107 	} else {
9108 		 /* Set link down, and poll for media available in timer. This prevents the
9109 		  * driver from receiving spurious link-related events.
9110 		  */
9111 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9112 		status = ice_aq_set_link_restart_an(pi, false, NULL);
9113 		if (status != ICE_SUCCESS)
9114 			device_printf(dev,
9115 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9116 			    __func__, ice_status_str(status),
9117 			    ice_aq_str(hw->adminq.sq_last_status));
9118 	}
9119 }
9120 
9121 /**
9122  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9123  * @sc: device private structure
9124  * @cfg: new PHY config data to be modified
9125  *
9126  * Applies user settings for advertised speeds to the PHY type fields in the
9127  * supplied PHY config struct. It uses the data from pcaps to check if the
9128  * saved settings are invalid and uses the pcaps data instead if they are
9129  * invalid.
9130  */
9131 static int
9132 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9133 			       struct ice_aqc_set_phy_cfg_data *cfg)
9134 {
9135 	struct ice_phy_data phy_data = { 0 };
9136 	struct ice_port_info *pi = sc->hw.port_info;
9137 	u64 phy_low = 0, phy_high = 0;
9138 	u16 link_speeds;
9139 	int ret;
9140 
9141 	link_speeds = pi->phy.curr_user_speed_req;
9142 
9143 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9144 		memset(&phy_data, 0, sizeof(phy_data));
9145 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9146 		phy_data.user_speeds_orig = link_speeds;
9147 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9148 		if (ret != 0) {
9149 			/* Error message already printed within function */
9150 			return (ret);
9151 		}
9152 		phy_low = phy_data.phy_low_intr;
9153 		phy_high = phy_data.phy_high_intr;
9154 
9155 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9156 			goto finalize_link_speed;
9157 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9158 			memset(&phy_data, 0, sizeof(phy_data));
9159 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9160 			phy_data.user_speeds_orig = link_speeds;
9161 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9162 			if (ret != 0) {
9163 				/* Error message already printed within function */
9164 				return (ret);
9165 			}
9166 			phy_low = phy_data.phy_low_intr;
9167 			phy_high = phy_data.phy_high_intr;
9168 
9169 			if (!phy_data.user_speeds_intr) {
9170 				phy_low = phy_data.phy_low_orig;
9171 				phy_high = phy_data.phy_high_orig;
9172 			}
9173 			goto finalize_link_speed;
9174 		}
9175 		/* If we're here, then it means the benefits of Version 2
9176 		 * link management aren't utilized.  We fall through to
9177 		 * handling Strict Link Mode the same as Version 1 link
9178 		 * management.
9179 		 */
9180 	}
9181 
9182 	memset(&phy_data, 0, sizeof(phy_data));
9183 	if ((link_speeds == 0) &&
9184 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9185 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9186 	else
9187 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9188 	phy_data.user_speeds_orig = link_speeds;
9189 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9190 	if (ret != 0) {
9191 		/* Error message already printed within function */
9192 		return (ret);
9193 	}
9194 	phy_low = phy_data.phy_low_intr;
9195 	phy_high = phy_data.phy_high_intr;
9196 
9197 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9198 		if (phy_low == 0 && phy_high == 0) {
9199 			device_printf(sc->dev,
9200 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9201 			return (EINVAL);
9202 		}
9203 	} else {
9204 		if (link_speeds == 0) {
9205 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9206 			    sc->ldo_tlv.phy_type_high & phy_high) {
9207 				phy_low &= sc->ldo_tlv.phy_type_low;
9208 				phy_high &= sc->ldo_tlv.phy_type_high;
9209 			}
9210 		} else if (phy_low == 0 && phy_high == 0) {
9211 			memset(&phy_data, 0, sizeof(phy_data));
9212 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9213 			phy_data.user_speeds_orig = link_speeds;
9214 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9215 			if (ret != 0) {
9216 				/* Error message already printed within function */
9217 				return (ret);
9218 			}
9219 			phy_low = phy_data.phy_low_intr;
9220 			phy_high = phy_data.phy_high_intr;
9221 
9222 			if (!phy_data.user_speeds_intr) {
9223 				phy_low = phy_data.phy_low_orig;
9224 				phy_high = phy_data.phy_high_orig;
9225 			}
9226 		}
9227 	}
9228 
9229 finalize_link_speed:
9230 
9231 	/* Cache new user settings for speeds */
9232 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9233 	cfg->phy_type_low = htole64(phy_low);
9234 	cfg->phy_type_high = htole64(phy_high);
9235 
9236 	return (ret);
9237 }
9238 
9239 /**
9240  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9241  * @sc: device private structure
9242  * @cfg: new PHY config data to be modified
9243  *
9244  * Applies user setting for FEC mode to PHY config struct. It uses the data
9245  * from pcaps to check if the saved settings are invalid and uses the pcaps
9246  * data instead if they are invalid.
9247  */
9248 static int
9249 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9250 			       struct ice_aqc_set_phy_cfg_data *cfg)
9251 {
9252 	struct ice_port_info *pi = sc->hw.port_info;
9253 	enum ice_status status;
9254 
9255 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9256 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9257 	if (status)
9258 		return (EIO);
9259 
9260 	return (0);
9261 }
9262 
9263 /**
9264  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9265  * @pi: port info struct
9266  * @cfg: new PHY config data to be modified
9267  *
9268  * Applies user setting for flow control mode to PHY config struct. There are
9269  * no invalid flow control mode settings; if there are, then this function
9270  * treats them like "ICE_FC_NONE".
9271  */
9272 static void
9273 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9274 			      struct ice_aqc_set_phy_cfg_data *cfg)
9275 {
9276 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9277 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9278 
9279 	switch (pi->phy.curr_user_fc_req) {
9280 	case ICE_FC_FULL:
9281 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9282 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9283 		break;
9284 	case ICE_FC_RX_PAUSE:
9285 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9286 		break;
9287 	case ICE_FC_TX_PAUSE:
9288 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9289 		break;
9290 	default:
9291 		/* ICE_FC_NONE */
9292 		break;
9293 	}
9294 }
9295 
9296 /**
9297  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9298  * @sc: device private structure
9299  * @settings: which settings to apply
9300  *
9301  * Applies user settings for advertised speeds, FEC mode, and flow
9302  * control mode to a PHY config struct; it uses the data from pcaps
9303  * to check if the saved settings are invalid and uses the pcaps
9304  * data instead if they are invalid.
9305  *
9306  * For things like sysctls where only one setting needs to be
9307  * updated, the bitmap allows the caller to specify which setting
9308  * to update.
9309  */
9310 int
9311 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9312 {
9313 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9314 	struct ice_port_info *pi = sc->hw.port_info;
9315 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9316 	struct ice_hw *hw = &sc->hw;
9317 	device_t dev = sc->dev;
9318 	u64 phy_low, phy_high;
9319 	enum ice_status status;
9320 	enum ice_fec_mode dflt_fec_mode;
9321 	u16 dflt_user_speed;
9322 
9323 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9324 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9325 		    settings);
9326 	}
9327 
9328 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9329 				     &pcaps, NULL);
9330 	if (status != ICE_SUCCESS) {
9331 		device_printf(dev,
9332 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9333 		    __func__, ice_status_str(status),
9334 		    ice_aq_str(hw->adminq.sq_last_status));
9335 		return (EIO);
9336 	}
9337 
9338 	phy_low = le64toh(pcaps.phy_type_low);
9339 	phy_high = le64toh(pcaps.phy_type_high);
9340 
9341 	/* Save off initial config parameters */
9342 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9343 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9344 
9345 	/* Setup new PHY config */
9346 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9347 
9348 	/* On error, restore active configuration values */
9349 	if ((settings & ICE_APPLY_LS) &&
9350 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9351 		pi->phy.curr_user_speed_req = dflt_user_speed;
9352 		cfg.phy_type_low = pcaps.phy_type_low;
9353 		cfg.phy_type_high = pcaps.phy_type_high;
9354 	}
9355 	if ((settings & ICE_APPLY_FEC) &&
9356 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9357 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9358 	}
9359 	if (settings & ICE_APPLY_FC) {
9360 		/* No real error indicators for this process,
9361 		 * so we'll just have to assume it works. */
9362 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9363 	}
9364 
9365 	/* Enable link and re-negotiate it */
9366 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9367 
9368 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9369 	if (status != ICE_SUCCESS) {
9370 		/* Don't indicate failure if there's no media in the port.
9371 		 * The settings have been saved and will apply when media
9372 		 * is inserted.
9373 		 */
9374 		if ((status == ICE_ERR_AQ_ERROR) &&
9375 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9376 			device_printf(dev,
9377 			    "%s: Setting will be applied when media is inserted\n",
9378 			    __func__);
9379 			return (0);
9380 		} else {
9381 			device_printf(dev,
9382 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9383 			    __func__, ice_status_str(status),
9384 			    ice_aq_str(hw->adminq.sq_last_status));
9385 			return (EIO);
9386 		}
9387 	}
9388 
9389 	return (0);
9390 }
9391 
9392 /**
9393  * ice_print_ldo_tlv - Print out LDO TLV information
9394  * @sc: device private structure
9395  * @tlv: LDO TLV information from the adapter NVM
9396  *
9397  * Dump out the information in tlv to the kernel message buffer; intended for
9398  * debugging purposes.
9399  */
9400 static void
9401 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9402 {
9403 	device_t dev = sc->dev;
9404 
9405 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9406 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9407 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9408 	device_printf(dev, "     -phy_high    0x%016llx\n",
9409 	    (unsigned long long)tlv->phy_type_high);
9410 	device_printf(dev, "     -phy_low     0x%016llx\n",
9411 	    (unsigned long long)tlv->phy_type_low);
9412 }
9413 
9414 /**
9415  * ice_set_link_management_mode -- Strict or lenient link management
9416  * @sc: device private structure
9417  *
9418  * Some NVMs give the adapter the option to advertise a superset of link
9419  * configurations.  This checks to see if that option is enabled.
9420  * Further, the NVM could also provide a specific set of configurations
9421  * to try; these are cached in the driver's private structure if they
9422  * are available.
9423  */
9424 void
9425 ice_set_link_management_mode(struct ice_softc *sc)
9426 {
9427 	struct ice_port_info *pi = sc->hw.port_info;
9428 	device_t dev = sc->dev;
9429 	struct ice_link_default_override_tlv tlv = { 0 };
9430 	enum ice_status status;
9431 
9432 	/* Port must be in strict mode if FW version is below a certain
9433 	 * version. (i.e. Don't set lenient mode features)
9434 	 */
9435 	if (!(ice_fw_supports_link_override(&sc->hw)))
9436 		return;
9437 
9438 	status = ice_get_link_default_override(&tlv, pi);
9439 	if (status != ICE_SUCCESS) {
9440 		device_printf(dev,
9441 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9442 		    __func__, ice_status_str(status),
9443 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9444 		return;
9445 	}
9446 
9447 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9448 		ice_print_ldo_tlv(sc, &tlv);
9449 
9450 	/* Set lenient link mode */
9451 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9452 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9453 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9454 
9455 	/* FW supports reporting a default configuration */
9456 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9457 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9458 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9459 		/* Knowing we're at a high enough firmware revision to
9460 		 * support this link management configuration, we don't
9461 		 * need to check/support earlier versions.
9462 		 */
9463 		return;
9464 	}
9465 
9466 	/* Default overrides only work if in lenient link mode */
9467 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9468 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9469 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9470 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9471 
9472 	/* Cache the LDO TLV structure in the driver, since it
9473 	 * won't change during the driver's lifetime.
9474 	 */
9475 	sc->ldo_tlv = tlv;
9476 }
9477 
9478 /**
9479  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9480  * @sc: device private structure
9481  *
9482  * This should be called before the tunables for these link settings
9483  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9484  * the cached values that the sysctl handlers will write.
9485  *
9486  * This also needs to be called before ice_init_link_configuration, to ensure
9487  * that there are sane values that can be written if there is media available
9488  * in the port.
9489  */
9490 void
9491 ice_init_saved_phy_cfg(struct ice_softc *sc)
9492 {
9493 	struct ice_port_info *pi = sc->hw.port_info;
9494 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9495 	struct ice_hw *hw = &sc->hw;
9496 	device_t dev = sc->dev;
9497 	enum ice_status status;
9498 	u64 phy_low, phy_high;
9499 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9500 
9501 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9502 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9503 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9504 	if (status != ICE_SUCCESS) {
9505 		device_printf(dev,
9506 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9507 		    __func__,
9508 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9509 		    ice_status_str(status),
9510 		    ice_aq_str(hw->adminq.sq_last_status));
9511 		return;
9512 	}
9513 
9514 	phy_low = le64toh(pcaps.phy_type_low);
9515 	phy_high = le64toh(pcaps.phy_type_high);
9516 
9517 	/* Save off initial config parameters */
9518 	pi->phy.curr_user_speed_req =
9519 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9520 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9521 	    pcaps.link_fec_options);
9522 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9523 }
9524 
9525 /**
9526  * ice_module_init - Driver callback to handle module load
9527  *
9528  * Callback for handling module load events. This function should initialize
9529  * any data structures that are used for the life of the device driver.
9530  */
9531 static int
9532 ice_module_init(void)
9533 {
9534 	ice_rdma_init();
9535 	return (0);
9536 }
9537 
9538 /**
9539  * ice_module_exit - Driver callback to handle module exit
9540  *
9541  * Callback for handling module unload events. This function should release
9542  * any resources initialized during ice_module_init.
9543  *
9544  * If this function returns non-zero, the module will not be unloaded. It
9545  * should only return such a value if the module cannot be unloaded at all,
9546  * such as due to outstanding memory references that cannot be revoked.
9547  */
9548 static int
9549 ice_module_exit(void)
9550 {
9551 	ice_rdma_exit();
9552 	return (0);
9553 }
9554 
9555 /**
9556  * ice_module_event_handler - Callback for module events
9557  * @mod: unused module_t parameter
9558  * @what: the event requested
9559  * @arg: unused event argument
9560  *
9561  * Callback used to handle module events from the stack. Used to allow the
9562  * driver to define custom behavior that should happen at module load and
9563  * unload.
9564  */
9565 int
9566 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9567 {
9568 	switch (what) {
9569 	case MOD_LOAD:
9570 		return ice_module_init();
9571 	case MOD_UNLOAD:
9572 		return ice_module_exit();
9573 	default:
9574 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9575 		return (EOPNOTSUPP);
9576 	}
9577 }
9578 
9579 /**
9580  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9581  * @sc: the device private softc
9582  * @ifd: ifdrv ioctl request pointer
9583  */
9584 int
9585 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9586 {
9587 	union ice_nvm_access_data *data;
9588 	struct ice_nvm_access_cmd *cmd;
9589 	size_t ifd_len = ifd->ifd_len, malloc_len;
9590 	struct ice_hw *hw = &sc->hw;
9591 	device_t dev = sc->dev;
9592 	enum ice_status status;
9593 	u8 *nvm_buffer;
9594 	int err;
9595 
9596 	/*
9597 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9598 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9599 	 * without performing a privilege check. Perform one here to ensure
9600 	 * that non-privileged threads cannot access this interface.
9601 	 */
9602 	err = priv_check(curthread, PRIV_DRIVER);
9603 	if (err)
9604 		return (err);
9605 
9606 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9607 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9608 			      __func__);
9609 		return (EBUSY);
9610 	}
9611 
9612 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9613 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9614 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9615 		return (EINVAL);
9616 	}
9617 
9618 	if (ifd->ifd_data == NULL) {
9619 		device_printf(dev, "%s: ifd data buffer not present.\n",
9620 			      __func__);
9621 		return (EINVAL);
9622 	}
9623 
9624 	/*
9625 	 * If everything works correctly, ice_handle_nvm_access should not
9626 	 * modify data past the size of the ioctl length. However, it could
9627 	 * lead to memory corruption if it did. Make sure to allocate at least
9628 	 * enough space for the command and data regardless. This
9629 	 * ensures that any access to the data union will not access invalid
9630 	 * memory.
9631 	 */
9632 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9633 
9634 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9635 	if (!nvm_buffer)
9636 		return (ENOMEM);
9637 
9638 	/* Copy the NVM access command and data in from user space */
9639 	/* coverity[tainted_data_argument] */
9640 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9641 	if (err) {
9642 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9643 			      __func__, ice_err_str(err));
9644 		goto cleanup_free_nvm_buffer;
9645 	}
9646 
9647 	/*
9648 	 * The NVM command structure is immediately followed by data which
9649 	 * varies in size based on the command.
9650 	 */
9651 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9652 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9653 
9654 	/* Handle the NVM access request */
9655 	status = ice_handle_nvm_access(hw, cmd, data);
9656 	if (status)
9657 		ice_debug(hw, ICE_DBG_NVM,
9658 			  "NVM access request failed, err %s\n",
9659 			  ice_status_str(status));
9660 
9661 	/* Copy the possibly modified contents of the handled request out */
9662 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9663 	if (err) {
9664 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9665 			      __func__, ice_err_str(err));
9666 		goto cleanup_free_nvm_buffer;
9667 	}
9668 
9669 	/* Convert private status to an error code for proper ioctl response */
9670 	switch (status) {
9671 	case ICE_SUCCESS:
9672 		err = (0);
9673 		break;
9674 	case ICE_ERR_NO_MEMORY:
9675 		err = (ENOMEM);
9676 		break;
9677 	case ICE_ERR_OUT_OF_RANGE:
9678 		err = (ENOTTY);
9679 		break;
9680 	case ICE_ERR_PARAM:
9681 	default:
9682 		err = (EINVAL);
9683 		break;
9684 	}
9685 
9686 cleanup_free_nvm_buffer:
9687 	free(nvm_buffer, M_ICE);
9688 	return err;
9689 }
9690 
9691 /**
9692  * ice_read_sff_eeprom - Read data from SFF eeprom
9693  * @sc: device softc
9694  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9695  * @offset: offset into the eeprom
9696  * @data: pointer to data buffer to store read data in
9697  * @length: length to read; max length is 16
9698  *
9699  * Read from the SFF eeprom in the module for this PF's port. For more details
9700  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9701  * and SFF-8024 (both).
9702  */
9703 int
9704 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9705 {
9706 	struct ice_hw *hw = &sc->hw;
9707 	int ret = 0, retries = 0;
9708 	enum ice_status status;
9709 
9710 	if (length > 16)
9711 		return (EINVAL);
9712 
9713 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9714 		return (ENOSYS);
9715 
9716 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9717 		return (ENXIO);
9718 
9719 	do {
9720 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
9721 					   offset, 0, 0, data, length,
9722 					   false, NULL);
9723 		if (!status) {
9724 			ret = 0;
9725 			break;
9726 		}
9727 		if (status == ICE_ERR_AQ_ERROR &&
9728 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
9729 			ret = EBUSY;
9730 			continue;
9731 		}
9732 		if (status == ICE_ERR_AQ_ERROR &&
9733 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
9734 			/* FW says I2C access isn't supported */
9735 			ret = EACCES;
9736 			break;
9737 		}
9738 		if (status == ICE_ERR_AQ_ERROR &&
9739 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
9740 			device_printf(sc->dev,
9741 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
9742 				  __func__);
9743 			ret = EPERM;
9744 			break;
9745 		} else {
9746 			device_printf(sc->dev,
9747 				  "%s: Error reading I2C data: err %s aq_err %s\n",
9748 				  __func__, ice_status_str(status),
9749 				  ice_aq_str(hw->adminq.sq_last_status));
9750 			ret = EIO;
9751 			break;
9752 		}
9753 	} while (retries++ < ICE_I2C_MAX_RETRIES);
9754 
9755 	if (ret == EBUSY)
9756 		device_printf(sc->dev,
9757 			  "%s: Error reading I2C data after %d retries\n",
9758 			  __func__, ICE_I2C_MAX_RETRIES);
9759 
9760 	return (ret);
9761 }
9762 
9763 /**
9764  * ice_handle_i2c_req - Driver independent I2C request handler
9765  * @sc: device softc
9766  * @req: The I2C parameters to use
9767  *
9768  * Read from the port's I2C eeprom using the parameters from the ioctl.
9769  */
9770 int
9771 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
9772 {
9773 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
9774 }
9775 
9776 /**
9777  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
9778  * @oidp: sysctl oid structure
9779  * @arg1: pointer to private data structure
9780  * @arg2: unused
9781  * @req: sysctl request pointer
9782  *
9783  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
9784  * inserted into the port.
9785  *
9786  *             | SFP A2  | QSFP Lower Page
9787  * ------------|---------|----------------
9788  * Temperature | 96-97	 | 22-23
9789  * Vcc         | 98-99   | 26-27
9790  * TX power    | 102-103 | 34-35..40-41
9791  * RX power    | 104-105 | 50-51..56-57
9792  */
9793 static int
9794 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
9795 {
9796 	struct ice_softc *sc = (struct ice_softc *)arg1;
9797 	device_t dev = sc->dev;
9798 	struct sbuf *sbuf;
9799 	int ret;
9800 	u8 data[16];
9801 
9802 	UNREFERENCED_PARAMETER(arg2);
9803 	UNREFERENCED_PARAMETER(oidp);
9804 
9805 	if (ice_driver_is_detaching(sc))
9806 		return (ESHUTDOWN);
9807 
9808 	if (req->oldptr == NULL) {
9809 		ret = SYSCTL_OUT(req, 0, 128);
9810 		return (ret);
9811 	}
9812 
9813 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
9814 	if (ret)
9815 		return (ret);
9816 
9817 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
9818 	if (data[0] == 0x3) {
9819 		/*
9820 		 * Check for:
9821 		 * - Internally calibrated data
9822 		 * - Diagnostic monitoring is implemented
9823 		 */
9824 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
9825 		if (!(data[0] & 0x60)) {
9826 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
9827 			return (ENODEV);
9828 		}
9829 
9830 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9831 
9832 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
9833 		for (int i = 0; i < 4; i++)
9834 			sbuf_printf(sbuf, "%02X ", data[i]);
9835 
9836 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
9837 		for (int i = 0; i < 4; i++)
9838 			sbuf_printf(sbuf, "%02X ", data[i]);
9839 	} else if (data[0] == 0xD || data[0] == 0x11) {
9840 		/*
9841 		 * QSFP+ modules are always internally calibrated, and must indicate
9842 		 * what types of diagnostic monitoring are implemented
9843 		 */
9844 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9845 
9846 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
9847 		for (int i = 0; i < 2; i++)
9848 			sbuf_printf(sbuf, "%02X ", data[i]);
9849 
9850 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
9851 		for (int i = 0; i < 2; i++)
9852 			sbuf_printf(sbuf, "%02X ", data[i]);
9853 
9854 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
9855 		for (int i = 0; i < 2; i++)
9856 			sbuf_printf(sbuf, "%02X ", data[i]);
9857 
9858 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
9859 		for (int i = 0; i < 2; i++)
9860 			sbuf_printf(sbuf, "%02X ", data[i]);
9861 	} else {
9862 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
9863 		return (ENODEV);
9864 	}
9865 
9866 	sbuf_finish(sbuf);
9867 	sbuf_delete(sbuf);
9868 
9869 	return (0);
9870 }
9871 
9872 /**
9873  * ice_alloc_intr_tracking - Setup interrupt tracking structures
9874  * @sc: device softc structure
9875  *
9876  * Sets up the resource manager for keeping track of interrupt allocations,
9877  * and initializes the tracking maps for the PF's interrupt allocations.
9878  *
9879  * Unlike the scheme for queues, this is done in one step since both the
9880  * manager and the maps both have the same lifetime.
9881  *
9882  * @returns 0 on success, or an error code on failure.
9883  */
9884 int
9885 ice_alloc_intr_tracking(struct ice_softc *sc)
9886 {
9887 	struct ice_hw *hw = &sc->hw;
9888 	device_t dev = sc->dev;
9889 	int err;
9890 
9891 	/* Initialize the interrupt allocation manager */
9892 	err = ice_resmgr_init_contig_only(&sc->imgr,
9893 	    hw->func_caps.common_cap.num_msix_vectors);
9894 	if (err) {
9895 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
9896 			      ice_err_str(err));
9897 		return (err);
9898 	}
9899 
9900 	/* Allocate PF interrupt mapping storage */
9901 	if (!(sc->pf_imap =
9902 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9903 	      M_ICE, M_NOWAIT))) {
9904 		device_printf(dev, "Unable to allocate PF imap memory\n");
9905 		err = ENOMEM;
9906 		goto free_imgr;
9907 	}
9908 	if (!(sc->rdma_imap =
9909 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9910 	      M_ICE, M_NOWAIT))) {
9911 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
9912 		err = ENOMEM;
9913 		free(sc->pf_imap, M_ICE);
9914 		goto free_imgr;
9915 	}
9916 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
9917 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
9918 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
9919 	}
9920 
9921 	return (0);
9922 
9923 free_imgr:
9924 	ice_resmgr_destroy(&sc->imgr);
9925 	return (err);
9926 }
9927 
9928 /**
9929  * ice_free_intr_tracking - Free PF interrupt tracking structures
9930  * @sc: device softc structure
9931  *
9932  * Frees the interrupt resource allocation manager and the PF's owned maps.
9933  *
9934  * VF maps are released when the owning VF's are destroyed, which should always
9935  * happen before this function is called.
9936  */
9937 void
9938 ice_free_intr_tracking(struct ice_softc *sc)
9939 {
9940 	if (sc->pf_imap) {
9941 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
9942 				       sc->lan_vectors);
9943 		free(sc->pf_imap, M_ICE);
9944 		sc->pf_imap = NULL;
9945 	}
9946 	if (sc->rdma_imap) {
9947 		ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
9948 				       sc->lan_vectors);
9949 		free(sc->rdma_imap, M_ICE);
9950 		sc->rdma_imap = NULL;
9951 	}
9952 
9953 	ice_resmgr_destroy(&sc->imgr);
9954 }
9955 
9956 /**
9957  * ice_apply_supported_speed_filter - Mask off unsupported speeds
9958  * @report_speeds: bit-field for the desired link speeds
9959  * @mod_type: type of module/sgmii connection we have
9960  *
9961  * Given a bitmap of the desired lenient mode link speeds,
9962  * this function will mask off the speeds that are not currently
9963  * supported by the device.
9964  */
9965 static u16
9966 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
9967 {
9968 	u16 speed_mask;
9969 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
9970 
9971 	/*
9972 	 * The SFF specification says 0 is unknown, so we'll
9973 	 * treat it like we're connected through SGMII for now.
9974 	 * This may need revisiting if a new type is supported
9975 	 * in the future.
9976 	 */
9977 	switch (mod_type) {
9978 	case 0:
9979 		module = IS_SGMII;
9980 		break;
9981 	case 3:
9982 		module = IS_SFP;
9983 		break;
9984 	default:
9985 		module = IS_QSFP;
9986 		break;
9987 	}
9988 
9989 	/* We won't offer anything lower than 100M for any part,
9990 	 * but we'll need to mask off other speeds based on the
9991 	 * device and module type.
9992 	 */
9993 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
9994 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
9995 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9996 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
9997 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9998 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
9999 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10000 		if (module == IS_QSFP)
10001 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10002 	}
10003 	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
10004 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10005 	return (report_speeds & speed_mask);
10006 }
10007 
10008 /**
10009  * ice_init_health_events - Enable FW health event reporting
10010  * @sc: device softc
10011  *
10012  * Will try to enable firmware health event reporting, but shouldn't
10013  * cause any grief (to the caller) if this fails.
10014  */
10015 void
10016 ice_init_health_events(struct ice_softc *sc)
10017 {
10018 	enum ice_status status;
10019 	u8 health_mask;
10020 
10021 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10022 		(!sc->enable_health_events))
10023 		return;
10024 
10025 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10026 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10027 
10028 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10029 	if (status)
10030 		device_printf(sc->dev,
10031 		    "Failed to enable firmware health events, err %s aq_err %s\n",
10032 		    ice_status_str(status),
10033 		    ice_aq_str(sc->hw.adminq.sq_last_status));
10034 	else
10035 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10036 }
10037 
10038 /**
10039  * ice_print_health_status_string - Print message for given FW health event
10040  * @dev: the PCIe device
10041  * @elem: health status element containing status code
10042  *
10043  * A rather large list of possible health status codes and their associated
10044  * messages.
10045  */
10046 static void
10047 ice_print_health_status_string(device_t dev,
10048 			       struct ice_aqc_health_status_elem *elem)
10049 {
10050 	u16 status_code = le16toh(elem->health_status_code);
10051 
10052 	switch (status_code) {
10053 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10054 		device_printf(dev, "The device is in firmware recovery mode.\n");
10055 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10056 		break;
10057 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10058 		device_printf(dev, "The flash chip cannot be accessed.\n");
10059 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10060 		break;
10061 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10062 		device_printf(dev, "NVM authentication failed.\n");
10063 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10064 		break;
10065 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10066 		device_printf(dev, "Option ROM authentication failed.\n");
10067 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10068 		break;
10069 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10070 		device_printf(dev, "DDP package failed.\n");
10071 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10072 		break;
10073 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10074 		device_printf(dev, "NVM image is incompatible.\n");
10075 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10076 		break;
10077 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10078 		device_printf(dev, "Option ROM is incompatible.\n");
10079 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10080 		break;
10081 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10082 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10083 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10084 		break;
10085 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10086 		device_printf(dev, "An unsupported module was detected.\n");
10087 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10088 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10089 		break;
10090 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10091 		device_printf(dev, "Module type is not supported.\n");
10092 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10093 		break;
10094 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10095 		device_printf(dev, "Module is not qualified.\n");
10096 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10097 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10098 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10099 		break;
10100 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10101 		device_printf(dev, "Device cannot communicate with the module.\n");
10102 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10103 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10104 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10105 		break;
10106 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10107 		device_printf(dev, "Unresolved module conflict.\n");
10108 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10109 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10110 		break;
10111 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10112 		device_printf(dev, "Module is not present.\n");
10113 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10114 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10115 		break;
10116 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10117 		device_printf(dev, "Underutilized module.\n");
10118 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10119 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10120 		break;
10121 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10122 		device_printf(dev, "An unsupported module was detected.\n");
10123 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10124 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10125 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10126 		break;
10127 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10128 		device_printf(dev, "Invalid link configuration.\n");
10129 		break;
10130 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10131 		device_printf(dev, "Port hardware access error.\n");
10132 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10133 		break;
10134 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10135 		device_printf(dev, "A port is unreachable.\n");
10136 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10137 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10138 		break;
10139 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10140 		device_printf(dev, "Port speed is limited due to module.\n");
10141 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10142 		break;
10143 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10144 		device_printf(dev, "A parallel fault was detected.\n");
10145 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10146 		break;
10147 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10148 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10149 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10150 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10151 		break;
10152 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10153 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10154 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10155 		break;
10156 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10157 		device_printf(dev, "Unrecoverable netlist error.\n");
10158 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10159 		break;
10160 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10161 		device_printf(dev, "Port topology conflict.\n");
10162 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10163 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10164 		break;
10165 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10166 		device_printf(dev, "Unrecoverable hardware access error.\n");
10167 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10168 		break;
10169 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10170 		device_printf(dev, "Unrecoverable runtime error.\n");
10171 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10172 		break;
10173 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10174 		device_printf(dev, "Link management engine failed to initialize.\n");
10175 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10176 		break;
10177 	default:
10178 		break;
10179 	}
10180 }
10181 
10182 /**
10183  * ice_handle_health_status_event - helper function to output health status
10184  * @sc: device softc structure
10185  * @event: event received on a control queue
10186  *
10187  * Prints out the appropriate string based on the given Health Status Event
10188  * code.
10189  */
10190 static void
10191 ice_handle_health_status_event(struct ice_softc *sc,
10192 			       struct ice_rq_event_info *event)
10193 {
10194 	struct ice_aqc_health_status_elem *health_info;
10195 	u16 status_count;
10196 	int i;
10197 
10198 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10199 		return;
10200 
10201 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10202 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10203 
10204 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10205 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10206 		return;
10207 	}
10208 
10209 	for (i = 0; i < status_count; i++) {
10210 		ice_print_health_status_string(sc->dev, health_info);
10211 		health_info++;
10212 	}
10213 }
10214 
10215 /**
10216  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10217  * @sc: device softc structure
10218  *
10219  * This function needs to be called after link up; it makes sure the FW has
10220  * certain PFC/DCB settings. In certain configurations this will re-apply a
10221  * default local LLDP MIB configuration; this is intended to workaround a FW
10222  * behavior where these settings seem to be cleared on link up.
10223  */
10224 void
10225 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10226 {
10227 	struct ice_hw *hw = &sc->hw;
10228 	struct ice_port_info *pi;
10229 	device_t dev = sc->dev;
10230 	enum ice_status status;
10231 
10232 	/* Set Local MIB can disrupt flow control settings for
10233 	 * non-DCB-supported devices.
10234 	 */
10235 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10236 		return;
10237 
10238 	pi = hw->port_info;
10239 
10240 	/* Don't overwrite a custom SW configuration */
10241 	if (!pi->qos_cfg.is_sw_lldp &&
10242 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10243 		ice_set_default_local_mib_settings(sc);
10244 
10245 	status = ice_set_dcb_cfg(pi);
10246 
10247 	if (status)
10248 		device_printf(dev,
10249 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10250 		    ice_status_str(status),
10251 		    ice_aq_str(hw->adminq.sq_last_status));
10252 }
10253 
10254 /**
10255  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10256  * @sbuf: string buffer to print to
10257  * @name: prefix string to use
10258  * @ets: structure to pull values from
10259  *
10260  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10261  * formats the ETS rec and cfg TLVs into text.
10262  */
10263 static void
10264 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10265 {
10266 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10267 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10268 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10269 
10270 	sbuf_printf(sbuf, "%s.prio_table:", name);
10271 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10272 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10273 	sbuf_printf(sbuf, "\n");
10274 
10275 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10276 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10277 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10278 	sbuf_printf(sbuf, "\n");
10279 
10280 	sbuf_printf(sbuf, "%s.tsatable:", name);
10281 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10282 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10283 	sbuf_printf(sbuf, "\n");
10284 }
10285 
10286 /**
10287  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10288  * @oidp: sysctl oid structure
10289  * @arg1: pointer to private data structure
10290  * @arg2: AQ define for either Local or Remote MIB
10291  * @req: sysctl request pointer
10292  *
10293  * Prints out DCB/DCBX configuration, including the contents
10294  * of either the local or remote MIB, depending on the value
10295  * used in arg2.
10296  */
10297 static int
10298 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10299 {
10300 	struct ice_softc *sc = (struct ice_softc *)arg1;
10301 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10302 	struct ice_dcbx_cfg dcb_buf = {};
10303 	struct ice_dcbx_cfg *dcbcfg;
10304 	struct ice_hw *hw = &sc->hw;
10305 	device_t dev = sc->dev;
10306 	struct sbuf *sbuf;
10307 	enum ice_status status;
10308 	u8 maxtcs, dcbx_status, is_sw_lldp;
10309 
10310 	UNREFERENCED_PARAMETER(oidp);
10311 
10312 	if (ice_driver_is_detaching(sc))
10313 		return (ESHUTDOWN);
10314 
10315 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10316 
10317 	/* The driver doesn't receive a Remote MIB via SW */
10318 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10319 		return (ENOENT);
10320 
10321 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10322 	if (!is_sw_lldp) {
10323 		/* Collect information from the FW in FW LLDP mode */
10324 		dcbcfg = &dcb_buf;
10325 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10326 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10327 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10328 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10329 			device_printf(dev,
10330 			    "Unable to query Remote MIB; port has not received one yet\n");
10331 			return (ENOENT);
10332 		}
10333 		if (status) {
10334 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10335 			    ice_status_str(status),
10336 			    ice_aq_str(hw->adminq.sq_last_status));
10337 			return (EIO);
10338 		}
10339 	}
10340 
10341 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10342 	if (status == ICE_SUCCESS)
10343 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10344 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10345 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10346 	else
10347 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10348 		    ice_status_str(status),
10349 		    ice_aq_str(hw->adminq.sq_last_status));
10350 
10351 	maxtcs = hw->func_caps.common_cap.maxtc;
10352 	dcbx_status = ice_get_dcbx_status(hw);
10353 
10354 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10355 
10356 	/* Do the actual printing */
10357 	sbuf_printf(sbuf, "\n");
10358 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10359 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10360 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10361 
10362 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10363 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10364 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10365 	    "DSCP" : "VLAN");
10366 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10367 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10368 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10369 	    "Unknown");
10370 
10371 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10372 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10373 
10374 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10375 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10376 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10377 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10378 
10379 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10380 		sbuf_printf(sbuf, "dscp_map:\n");
10381 		for (int i = 0; i < 8; i++) {
10382 			for (int j = 0; j < 8; j++)
10383 				sbuf_printf(sbuf, " %d",
10384 					    dcbcfg->dscp_map[i * 8 + j]);
10385 			sbuf_printf(sbuf, "\n");
10386 		}
10387 
10388 		sbuf_printf(sbuf, "\nLocal registers:\n");
10389 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10390 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10391 		        >> PRTDCB_GENC_NUMTC_S);
10392 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10393 		    (rd32(hw, PRTDCB_TUP2TC)));
10394 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10395 		    (rd32(hw, PRTDCB_RUP2TC)));
10396 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10397 		    (rd32(hw, GLDCB_TC2PFC)));
10398 	}
10399 
10400 	/* Finish */
10401 	sbuf_finish(sbuf);
10402 	sbuf_delete(sbuf);
10403 
10404 	return (0);
10405 }
10406 
10407 /**
10408  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10409  * @oidp: sysctl oid structure
10410  * @arg1: pointer to private data structure
10411  * @arg2: unused
10412  * @req: sysctl request pointer
10413  *
10414  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10415  * but for simplicity, this only works on the PF's LAN VSI.
10416  */
10417 static int
10418 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10419 {
10420 	struct ice_softc *sc = (struct ice_softc *)arg1;
10421 	struct ice_vsi_ctx ctx = { 0 };
10422 	struct ice_hw *hw = &sc->hw;
10423 	device_t dev = sc->dev;
10424 	struct sbuf *sbuf;
10425 	enum ice_status status;
10426 
10427 	UNREFERENCED_PARAMETER(oidp);
10428 	UNREFERENCED_PARAMETER(arg2);
10429 
10430 	if (ice_driver_is_detaching(sc))
10431 		return (ESHUTDOWN);
10432 
10433 	/* Get HW absolute index of a VSI */
10434 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10435 
10436 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10437 	if (status != ICE_SUCCESS) {
10438 		device_printf(dev,
10439 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10440 		    ice_status_str(status),
10441 		    ice_aq_str(hw->adminq.sq_last_status));
10442 		return (EIO);
10443 	}
10444 
10445 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10446 
10447 	/* Do the actual printing */
10448 	sbuf_printf(sbuf, "\n");
10449 
10450 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10451 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10452 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10453 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10454 
10455 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10456 	    LE16_TO_CPU(ctx.info.mapping_flags));
10457 	/* The PF VSI is always contiguous, so there's no if-statement here */
10458 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10459 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10460 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10461 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10462 
10463 	sbuf_printf(sbuf, "TC qbases  :");
10464 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10465 		sbuf_printf(sbuf, " %4d",
10466 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10467 	}
10468 	sbuf_printf(sbuf, "\n");
10469 
10470 	sbuf_printf(sbuf, "TC qcounts :");
10471 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10472 		sbuf_printf(sbuf, " %4d",
10473 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10474 	}
10475 
10476 	/* Finish */
10477 	sbuf_finish(sbuf);
10478 	sbuf_delete(sbuf);
10479 
10480 	return (0);
10481 }
10482 
10483 /**
10484  * ice_ets_str_to_tbl - Parse string into ETS table
10485  * @str: input string to parse
10486  * @table: output eight values used for ETS values
10487  * @limit: max valid value to accept for ETS values
10488  *
10489  * Parses a string and converts the eight values within
10490  * into a table that can be used in setting ETS settings
10491  * in a MIB.
10492  *
10493  * @return 0 on success, EINVAL if a parsed value is
10494  * not between 0 and limit.
10495  */
10496 static int
10497 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10498 {
10499 	const char *str_start = str;
10500 	char *str_end;
10501 	long token;
10502 
10503 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10504 		token = strtol(str_start, &str_end, 0);
10505 		if (token < 0 || token > limit)
10506 			return (EINVAL);
10507 
10508 		table[i] = (u8)token;
10509 		str_start = (str_end + 1);
10510 	}
10511 
10512 	return (0);
10513 }
10514 
10515 /**
10516  * ice_check_ets_bw - Check if ETS bw vals are valid
10517  * @table: eight values used for ETS bandwidth
10518  *
10519  * @return true if the sum of all 8 values in table
10520  * equals 100.
10521  */
10522 static bool
10523 ice_check_ets_bw(u8 *table)
10524 {
10525 	int sum = 0;
10526 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10527 		sum += (int)table[i];
10528 
10529 	return (sum == 100);
10530 }
10531 
10532 /**
10533  * ice_cfg_pba_num - Determine if PBA Number is retrievable
10534  * @sc: the device private softc structure
10535  *
10536  * Sets the feature flag for the existence of a PBA number
10537  * based on the success of the read command.  This does not
10538  * cache the result.
10539  */
10540 void
10541 ice_cfg_pba_num(struct ice_softc *sc)
10542 {
10543 	u8 pba_string[32] = "";
10544 
10545 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10546 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10547 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10548 }
10549 
10550 /**
10551  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10552  * @oidp: sysctl oid structure
10553  * @arg1: pointer to private data structure
10554  * @arg2: unused
10555  * @req: sysctl request pointer
10556  */
10557 static int
10558 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10559 {
10560 	struct ice_softc *sc = (struct ice_softc *)arg1;
10561 	struct ice_aqc_port_ets_elem port_ets = { 0 };
10562 	struct ice_hw *hw = &sc->hw;
10563 	struct ice_port_info *pi;
10564 	device_t dev = sc->dev;
10565 	struct sbuf *sbuf;
10566 	enum ice_status status;
10567 	int i = 0;
10568 
10569 	UNREFERENCED_PARAMETER(oidp);
10570 	UNREFERENCED_PARAMETER(arg2);
10571 
10572 	if (ice_driver_is_detaching(sc))
10573 		return (ESHUTDOWN);
10574 
10575 	pi = hw->port_info;
10576 
10577 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10578 	if (status != ICE_SUCCESS) {
10579 		device_printf(dev,
10580 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10581 		    ice_status_str(status),
10582 		    ice_aq_str(hw->adminq.sq_last_status));
10583 		return (EIO);
10584 	}
10585 
10586 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10587 
10588 	/* Do the actual printing */
10589 	sbuf_printf(sbuf, "\n");
10590 
10591 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10592 
10593 	sbuf_printf(sbuf, "TC BW %%:");
10594 	ice_for_each_traffic_class(i) {
10595 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10596 	}
10597 	sbuf_printf(sbuf, "\n");
10598 
10599 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10600 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10601 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10602 
10603 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10604 	ice_for_each_traffic_class(i) {
10605 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10606 	}
10607 
10608 	/* Finish */
10609 	sbuf_finish(sbuf);
10610 	sbuf_delete(sbuf);
10611 
10612 	return (0);
10613 }
10614 
10615 /**
10616  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10617  * @oidp: sysctl oid structure
10618  * @arg1: pointer to private data structure
10619  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10620  * @req: sysctl request pointer
10621  *
10622  * Gets or sets the current DSCP to UP table cached by the driver. Since there
10623  * are 64 possible DSCP values to configure, this sysctl only configures
10624  * chunks of 8 in that space at a time.
10625  *
10626  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10627  * mode.
10628  */
10629 static int
10630 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10631 {
10632 	struct ice_softc *sc = (struct ice_softc *)arg1;
10633 	struct ice_dcbx_cfg *local_dcbx_cfg;
10634 	struct ice_port_info *pi;
10635 	struct ice_hw *hw = &sc->hw;
10636 	device_t dev = sc->dev;
10637 	enum ice_status status;
10638 	struct sbuf *sbuf;
10639 	int ret;
10640 
10641 	/* Store input rates from user */
10642 	char dscp_user_buf[128] = "";
10643 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10644 
10645 	if (ice_driver_is_detaching(sc))
10646 		return (ESHUTDOWN);
10647 
10648 	if (req->oldptr == NULL && req->newptr == NULL) {
10649 		ret = SYSCTL_OUT(req, 0, 128);
10650 		return (ret);
10651 	}
10652 
10653 	pi = hw->port_info;
10654 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10655 
10656 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10657 
10658 	/* Format DSCP-to-UP data for output */
10659 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10660 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10661 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10662 			sbuf_printf(sbuf, ",");
10663 	}
10664 
10665 	sbuf_finish(sbuf);
10666 	sbuf_delete(sbuf);
10667 
10668 	/* Read in the new DSCP mapping values */
10669 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10670 	if ((ret) || (req->newptr == NULL))
10671 		return (ret);
10672 
10673 	/* Don't allow setting changes in FW DCB mode */
10674 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10675 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10676 		    __func__);
10677 		return (EINVAL);
10678 	}
10679 
10680 	/* Convert 8 values in a string to a table; this is similar to what
10681 	 * needs to be done for ETS settings, so this function can be re-used
10682 	 * for that purpose.
10683 	 */
10684 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10685 	if (ret) {
10686 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10687 		    __func__, dscp_user_buf);
10688 		return (ret);
10689 	}
10690 
10691 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10692 	    sizeof(new_dscp_table_seg));
10693 
10694 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10695 
10696 	status = ice_set_dcb_cfg(pi);
10697 	if (status) {
10698 		device_printf(dev,
10699 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10700 		    __func__, ice_status_str(status),
10701 		    ice_aq_str(hw->adminq.sq_last_status));
10702 		return (EIO);
10703 	}
10704 
10705 	ice_do_dcb_reconfig(sc, false);
10706 
10707 	return (0);
10708 }
10709 
10710 /**
10711  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10712  * @sc: the device private softc
10713  * @ifd: ifdrv ioctl request pointer
10714  */
10715 int
10716 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10717 {
10718 	size_t ifd_len = ifd->ifd_len;
10719 	struct ice_hw *hw = &sc->hw;
10720 	device_t dev = sc->dev;
10721 	struct ice_debug_dump_cmd *ddc;
10722 	enum ice_status status;
10723 	int err = 0;
10724 
10725 	/* Returned arguments from the Admin Queue */
10726 	u16 ret_buf_size = 0;
10727 	u16 ret_next_table = 0;
10728 	u32 ret_next_index = 0;
10729 
10730 	/*
10731 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
10732 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
10733 	 * without performing a privilege check. Perform one here to ensure
10734 	 * that non-privileged threads cannot access this interface.
10735 	 */
10736 	err = priv_check(curthread, PRIV_DRIVER);
10737 	if (err)
10738 		return (err);
10739 
10740 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
10741 		device_printf(dev,
10742 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
10743 		    __func__);
10744 		return (EBUSY);
10745 	}
10746 
10747 	if (ifd_len < sizeof(*ddc)) {
10748 		device_printf(dev,
10749 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
10750 		    __func__, ifd_len, sizeof(*ddc));
10751 		return (EINVAL);
10752 	}
10753 
10754 	if (ifd->ifd_data == NULL) {
10755 		device_printf(dev, "%s: ifd data buffer not present.\n",
10756 		     __func__);
10757 		return (EINVAL);
10758 	}
10759 
10760 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
10761 	if (!ddc)
10762 		return (ENOMEM);
10763 
10764 	/* Copy the NVM access command and data in from user space */
10765 	/* coverity[tainted_data_argument] */
10766 	err = copyin(ifd->ifd_data, ddc, ifd_len);
10767 	if (err) {
10768 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
10769 			      __func__, ice_err_str(err));
10770 		goto out;
10771 	}
10772 
10773 	/* The data_size arg must be at least 1 for the AQ cmd to work */
10774 	if (ddc->data_size == 0) {
10775 		device_printf(dev,
10776 		    "%s: data_size must be greater than 0\n", __func__);
10777 		err = EINVAL;
10778 		goto out;
10779 	}
10780 	/* ...and it can't be too long */
10781 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
10782 		device_printf(dev,
10783 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
10784 		    ddc->data_size, ifd_len - sizeof(*ddc));
10785 		err = EINVAL;
10786 		goto out;
10787 	}
10788 
10789 	/* Make sure any possible data buffer space is zeroed */
10790 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
10791 
10792 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
10793 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size, &ret_next_table, &ret_next_index, NULL);
10794 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
10795 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
10796 	if (status) {
10797 		device_printf(dev,
10798 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
10799 		    __func__,
10800 		    ice_status_str(status),
10801 		    ice_aq_str(hw->adminq.sq_last_status));
10802 		goto aq_error;
10803 	}
10804 
10805 	ddc->table_id = ret_next_table;
10806 	ddc->offset = ret_next_index;
10807 	ddc->data_size = ret_buf_size;
10808 
10809 	/* Copy the possibly modified contents of the handled request out */
10810 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
10811 	if (err) {
10812 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
10813 			      __func__, ice_err_str(err));
10814 		goto out;
10815 	}
10816 
10817 aq_error:
10818 	/* Convert private status to an error code for proper ioctl response */
10819 	switch (status) {
10820 	case ICE_SUCCESS:
10821 		err = (0);
10822 		break;
10823 	case ICE_ERR_NO_MEMORY:
10824 		err = (ENOMEM);
10825 		break;
10826 	case ICE_ERR_OUT_OF_RANGE:
10827 		err = (ENOTTY);
10828 		break;
10829 	case ICE_ERR_AQ_ERROR:
10830 		err = (EIO);
10831 		break;
10832 	case ICE_ERR_PARAM:
10833 	default:
10834 		err = (EINVAL);
10835 		break;
10836 	}
10837 
10838 out:
10839 	free(ddc, M_ICE);
10840 	return (err);
10841 }
10842 
10843 /**
10844  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
10845  * @oidp: sysctl oid structure
10846  * @arg1: pointer to private data structure
10847  * @arg2: unused
10848  * @req: sysctl request pointer
10849  *
10850  * Allows user to let "No FEC" mode to be used in "Auto"
10851  * FEC mode during FEC negotiation. This is only supported
10852  * on newer firmware versions.
10853  */
10854 static int
10855 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
10856 {
10857 	struct ice_softc *sc = (struct ice_softc *)arg1;
10858 	struct ice_hw *hw = &sc->hw;
10859 	device_t dev = sc->dev;
10860 	u8 user_flag;
10861 	int ret;
10862 
10863 	UNREFERENCED_PARAMETER(arg2);
10864 
10865 	ret = priv_check(curthread, PRIV_DRIVER);
10866 	if (ret)
10867 		return (ret);
10868 
10869 	if (ice_driver_is_detaching(sc))
10870 		return (ESHUTDOWN);
10871 
10872 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
10873 
10874 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
10875 	if ((ret) || (req->newptr == NULL))
10876 		return (ret);
10877 
10878 	if (!ice_fw_supports_fec_dis_auto(hw)) {
10879 		log(LOG_INFO,
10880 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
10881 		    device_get_nameunit(dev));
10882 		return (ENODEV);
10883 	}
10884 
10885 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
10886 		return (0);
10887 
10888 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
10889 
10890 	if (sc->allow_no_fec_mod_in_auto)
10891 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
10892 		    device_get_nameunit(dev));
10893 	else
10894 		log(LOG_INFO,
10895 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
10896 		    device_get_nameunit(dev));
10897 
10898 	return (0);
10899 }
10900 
10901