xref: /freebsd/sys/dev/ice/ice_lib.c (revision ff8da9b2bab43920a19c16855ac3d30b5ccb1df2)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2022, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*$FreeBSD$*/
32 
33 /**
34  * @file ice_lib.c
35  * @brief Generic device setup and sysctl functions
36  *
37  * Library of generic device functions not specific to the networking stack.
38  *
39  * This includes hardware initialization functions, as well as handlers for
40  * many of the device sysctls used to probe driver status or tune specific
41  * behaviors.
42  */
43 
44 #include "ice_lib.h"
45 #include "ice_iflib.h"
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcireg.h>
48 #include <machine/resource.h>
49 #include <net/if_dl.h>
50 #include <sys/firmware.h>
51 #include <sys/priv.h>
52 #include <sys/limits.h>
53 
54 /**
55  * @var M_ICE
56  * @brief main ice driver allocation type
57  *
58  * malloc(9) allocation type used by the majority of memory allocations in the
59  * ice driver.
60  */
61 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
62 
63 /*
64  * Helper function prototypes
65  */
66 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
67 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
68 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
69 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
70 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
71 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
72 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
73 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
74 static void ice_free_fltr_list(struct ice_list_head *list);
75 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
76 			       const u8 *addr, enum ice_sw_fwd_act_type action);
77 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
78 				   struct ice_ctl_q_info *cq);
79 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
80 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
81 				    struct ice_rq_event_info *event);
82 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
83 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
85 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
86 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
87 static void ice_add_debug_tunables(struct ice_softc *sc);
88 static void ice_add_debug_sysctls(struct ice_softc *sc);
89 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
90 static void ice_get_default_rss_key(u8 *seed);
91 static int  ice_set_rss_key(struct ice_vsi *vsi);
92 static int  ice_set_rss_lut(struct ice_vsi *vsi);
93 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
94 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
95 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
96 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
97 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
98 static const char *ice_autoneg_mode(struct ice_port_info *pi);
99 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
100 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
101 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
102 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
103 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
104 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
105 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
106 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
107 				     struct sysctl_ctx_list *ctx,
108 				     struct sysctl_oid *parent);
109 static void
110 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
111 				 struct sysctl_oid_list *parent_list,
112 				 u64* pfc_stat_location,
113 				 const char *node_name,
114 				 const char *descr);
115 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
116 					  struct sysctl_oid *parent,
117 					  struct ice_hw_port_stats *stats);
118 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
119 				 enum ice_vsi_type type, int idx,
120 				 bool dynamic);
121 static void ice_handle_mib_change_event(struct ice_softc *sc,
122 				 struct ice_rq_event_info *event);
123 static void
124 ice_handle_lan_overflow_event(struct ice_softc *sc,
125 			      struct ice_rq_event_info *event);
126 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
127 				     struct ice_list_head *list,
128 				     u16 ethertype, u16 direction,
129 				     enum ice_sw_fwd_act_type action);
130 static void ice_add_rx_lldp_filter(struct ice_softc *sc);
131 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
132 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
133 					   u64 phy_type_high);
134 struct ice_phy_data;
135 static int
136 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
137 				   struct ice_phy_data *phy_data);
138 static int
139 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
140 			       struct ice_aqc_set_phy_cfg_data *cfg);
141 static int
142 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
143 			       struct ice_aqc_set_phy_cfg_data *cfg);
144 static void
145 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
146 			      struct ice_aqc_set_phy_cfg_data *cfg);
147 static void
148 ice_print_ldo_tlv(struct ice_softc *sc,
149 		  struct ice_link_default_override_tlv *tlv);
150 static void
151 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
152 				  u64 *phy_type_high);
153 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
154 static void
155 ice_handle_health_status_event(struct ice_softc *sc,
156 			       struct ice_rq_event_info *event);
157 static void
158 ice_print_health_status_string(device_t dev,
159 			       struct ice_aqc_health_status_elem *elem);
160 static void
161 ice_debug_print_mib_change_event(struct ice_softc *sc,
162 				 struct ice_rq_event_info *event);
163 static bool ice_check_ets_bw(u8 *table);
164 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
165 static bool
166 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
167 		       struct ice_dcbx_cfg *new_cfg);
168 static void ice_dcb_recfg(struct ice_softc *sc);
169 static u8 ice_dcb_tc_contig(u8 tc_map);
170 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
171 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
172 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
173 				   struct ice_dcb_ets_cfg *ets);
174 static void ice_stop_pf_vsi(struct ice_softc *sc);
175 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
176 static void ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib);
177 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
178 void
179 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
180 			    struct sysctl_ctx_list *ctx,
181 			    struct sysctl_oid_list *ctx_list);
182 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
183 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
184 static void ice_start_dcbx_agent(struct ice_softc *sc);
185 static void ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
186 					    struct sbuf *sbuf, u16 cluster_id);
187 
188 static int ice_module_init(void);
189 static int ice_module_exit(void);
190 
191 /*
192  * package version comparison functions
193  */
194 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
195 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
196 
197 /*
198  * dynamic sysctl handlers
199  */
200 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
201 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
202 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
203 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
204 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
205 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
206 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
207 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
208 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
209 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
210 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
211 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
212 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
213 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
214 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
215 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
216 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
217 					 bool is_phy_type_high);
218 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
222 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
223 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
224 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
225 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
230 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
231 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
232 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
239 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
244 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
245 
246 /**
247  * ice_map_bar - Map PCIe BAR memory
248  * @dev: the PCIe device
249  * @bar: the BAR info structure
250  * @bar_num: PCIe BAR number
251  *
252  * Maps the specified PCIe BAR. Stores the mapping data in struct
253  * ice_bar_info.
254  */
255 int
256 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
257 {
258 	if (bar->res != NULL) {
259 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
260 		return (EDOOFUS);
261 	}
262 
263 	bar->rid = PCIR_BAR(bar_num);
264 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
265 					  RF_ACTIVE);
266 	if (!bar->res) {
267 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
268 		return (ENXIO);
269 	}
270 
271 	bar->tag = rman_get_bustag(bar->res);
272 	bar->handle = rman_get_bushandle(bar->res);
273 	bar->size = rman_get_size(bar->res);
274 
275 	return (0);
276 }
277 
278 /**
279  * ice_free_bar - Free PCIe BAR memory
280  * @dev: the PCIe device
281  * @bar: the BAR info structure
282  *
283  * Frees the specified PCIe BAR, releasing its resources.
284  */
285 void
286 ice_free_bar(device_t dev, struct ice_bar_info *bar)
287 {
288 	if (bar->res != NULL)
289 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
290 	bar->res = NULL;
291 }
292 
293 /**
294  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
295  * @hw: the device hardware structure
296  *
297  * Configures the control queues for the given device, setting up the
298  * specified lengths, prior to initializing hardware.
299  */
300 void
301 ice_set_ctrlq_len(struct ice_hw *hw)
302 {
303 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
304 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
305 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
306 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
307 
308 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
309 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
310 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
311 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
312 
313 }
314 
315 /**
316  * ice_get_next_vsi - Get the next available VSI slot
317  * @all_vsi: the VSI list
318  * @size: the size of the VSI list
319  *
320  * Returns the index to the first available VSI slot. Will return size (one
321  * past the last index) if there are no slots available.
322  */
323 static int
324 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
325 {
326 	int i;
327 
328 	for (i = 0; i < size; i++) {
329 		if (all_vsi[i] == NULL)
330 			return i;
331 	}
332 
333 	return size;
334 }
335 
336 /**
337  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
338  * @sc: the device private softc structure
339  * @vsi: the VSI to setup
340  * @type: the VSI type of the new VSI
341  * @idx: the index in the all_vsi array to use
342  * @dynamic: whether this VSI memory was dynamically allocated
343  *
344  * Perform setup for a VSI that is common to both dynamically allocated VSIs
345  * and the static PF VSI which is embedded in the softc structure.
346  */
347 static void
348 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
349 		     enum ice_vsi_type type, int idx, bool dynamic)
350 {
351 	/* Store important values in VSI struct */
352 	vsi->type = type;
353 	vsi->sc = sc;
354 	vsi->idx = idx;
355 	sc->all_vsi[idx] = vsi;
356 	vsi->dynamic = dynamic;
357 
358 	/* Setup the VSI tunables now */
359 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
360 }
361 
362 /**
363  * ice_alloc_vsi - Allocate a dynamic VSI
364  * @sc: device softc structure
365  * @type: VSI type
366  *
367  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
368  */
369 struct ice_vsi *
370 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
371 {
372 	struct ice_vsi *vsi;
373 	int idx;
374 
375 	/* Find an open index for a new VSI to be allocated. If the returned
376 	 * index is >= the num_available_vsi then it means no slot is
377 	 * available.
378 	 */
379 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
380 	if (idx >= sc->num_available_vsi) {
381 		device_printf(sc->dev, "No available VSI slots\n");
382 		return NULL;
383 	}
384 
385 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_WAITOK|M_ZERO);
386 	if (!vsi) {
387 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
388 		return NULL;
389 	}
390 
391 	ice_setup_vsi_common(sc, vsi, type, idx, true);
392 
393 	return vsi;
394 }
395 
396 /**
397  * ice_setup_pf_vsi - Setup the PF VSI
398  * @sc: the device private softc
399  *
400  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
401  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
402  * the softc memory, instead of being dynamically allocated at creation.
403  */
404 void
405 ice_setup_pf_vsi(struct ice_softc *sc)
406 {
407 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
408 }
409 
410 /**
411  * ice_alloc_vsi_qmap
412  * @vsi: VSI structure
413  * @max_tx_queues: Number of transmit queues to identify
414  * @max_rx_queues: Number of receive queues to identify
415  *
416  * Allocates a max_[t|r]x_queues array of words for the VSI where each
417  * word contains the index of the queue it represents.  In here, all
418  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
419  * all queues for this VSI are not yet assigned an index and thus,
420  * not ready for use.
421  *
422  * Returns an error code on failure.
423  */
424 int
425 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
426 		   const int max_rx_queues)
427 {
428 	struct ice_softc *sc = vsi->sc;
429 	int i;
430 
431 	MPASS(max_tx_queues > 0);
432 	MPASS(max_rx_queues > 0);
433 
434 	/* Allocate Tx queue mapping memory */
435 	if (!(vsi->tx_qmap =
436 	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
437 		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
438 		return (ENOMEM);
439 	}
440 
441 	/* Allocate Rx queue mapping memory */
442 	if (!(vsi->rx_qmap =
443 	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
444 		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
445 		goto free_tx_qmap;
446 	}
447 
448 	/* Mark every queue map as invalid to start with */
449 	for (i = 0; i < max_tx_queues; i++) {
450 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
451 	}
452 	for (i = 0; i < max_rx_queues; i++) {
453 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
454 	}
455 
456 	return 0;
457 
458 free_tx_qmap:
459 	free(vsi->tx_qmap, M_ICE);
460 	vsi->tx_qmap = NULL;
461 
462 	return (ENOMEM);
463 }
464 
465 /**
466  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
467  * @vsi: the VSI private structure
468  *
469  * Frees the PF qmaps associated with the given VSI. Generally this will be
470  * called by ice_release_vsi, but may need to be called during attach cleanup,
471  * depending on when the qmaps were allocated.
472  */
473 void
474 ice_free_vsi_qmaps(struct ice_vsi *vsi)
475 {
476 	struct ice_softc *sc = vsi->sc;
477 
478 	if (vsi->tx_qmap) {
479 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
480 					   vsi->num_tx_queues);
481 		free(vsi->tx_qmap, M_ICE);
482 		vsi->tx_qmap = NULL;
483 	}
484 
485 	if (vsi->rx_qmap) {
486 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
487 					   vsi->num_rx_queues);
488 		free(vsi->rx_qmap, M_ICE);
489 		vsi->rx_qmap = NULL;
490 	}
491 }
492 
493 /**
494  * ice_set_default_vsi_ctx - Setup default VSI context parameters
495  * @ctx: the VSI context to initialize
496  *
497  * Initialize and prepare a default VSI context for configuring a new VSI.
498  */
499 static void
500 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
501 {
502 	u32 table = 0;
503 
504 	memset(&ctx->info, 0, sizeof(ctx->info));
505 	/* VSI will be allocated from shared pool */
506 	ctx->alloc_from_pool = true;
507 	/* Enable source pruning by default */
508 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
509 	/* Traffic from VSI can be sent to LAN */
510 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
511 	/* Allow all packets untagged/tagged */
512 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
513 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
514 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
515 	/* Show VLAN/UP from packets in Rx descriptors */
516 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
517 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
518 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
519 	/* Have 1:1 UP mapping for both ingress/egress tables */
520 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
521 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
522 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
523 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
524 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
525 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
526 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
527 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
528 	ctx->info.ingress_table = CPU_TO_LE32(table);
529 	ctx->info.egress_table = CPU_TO_LE32(table);
530 	/* Have 1:1 UP mapping for outer to inner UP table */
531 	ctx->info.outer_up_table = CPU_TO_LE32(table);
532 	/* No Outer tag support, so outer_vlan_flags remains zero */
533 }
534 
535 /**
536  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
537  * @ctx: the VSI context to configure
538  * @type: the VSI type
539  *
540  * Configures the VSI context for RSS, based on the VSI type.
541  */
542 static void
543 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
544 {
545 	u8 lut_type, hash_type;
546 
547 	switch (type) {
548 	case ICE_VSI_PF:
549 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
550 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
551 		break;
552 	case ICE_VSI_VF:
553 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
554 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
555 		break;
556 	default:
557 		/* Other VSI types do not support RSS */
558 		return;
559 	}
560 
561 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
562 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
563 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
564 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
565 }
566 
567 /**
568  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
569  * @vsi: the VSI to configure
570  * @ctx: the VSI context to configure
571  *
572  * Configures the context for the given VSI, setting up how the firmware
573  * should map the queues for this VSI.
574  */
575 static int
576 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
577 {
578 	int pow = 0;
579 	u16 qmap;
580 
581 	MPASS(vsi->rx_qmap != NULL);
582 
583 	/* TODO:
584 	 * Handle scattered queues (for VFs)
585 	 */
586 	if (vsi->qmap_type != ICE_RESMGR_ALLOC_CONTIGUOUS)
587 		return (EOPNOTSUPP);
588 
589 	ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
590 
591 	ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
592 	ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
593 
594 	/* Calculate the next power-of-2 of number of queues */
595 	if (vsi->num_rx_queues)
596 		pow = flsl(vsi->num_rx_queues - 1);
597 
598 	/* Assign all the queues to traffic class zero */
599 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
600 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
601 
602 	/* Fill out default driver TC queue info for VSI */
603 	vsi->tc_info[0].qoffset = 0;
604 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
605 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
606 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
607 		vsi->tc_info[i].qoffset = 0;
608 		vsi->tc_info[i].qcount_rx = 1;
609 		vsi->tc_info[i].qcount_tx = 1;
610 	}
611 	vsi->tc_map = 0x1;
612 
613 	return 0;
614 }
615 
616 /**
617  * ice_initialize_vsi - Initialize a VSI for use
618  * @vsi: the vsi to initialize
619  *
620  * Initialize a VSI over the adminq and prepare it for operation.
621  */
622 int
623 ice_initialize_vsi(struct ice_vsi *vsi)
624 {
625 	struct ice_vsi_ctx ctx = { 0 };
626 	struct ice_hw *hw = &vsi->sc->hw;
627 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
628 	enum ice_status status;
629 	int err;
630 
631 	/* For now, we only have code supporting PF VSIs */
632 	switch (vsi->type) {
633 	case ICE_VSI_PF:
634 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
635 		break;
636 	default:
637 		return (ENODEV);
638 	}
639 
640 	ice_set_default_vsi_ctx(&ctx);
641 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
642 
643 	/* XXX: VSIs of other types may need different port info? */
644 	ctx.info.sw_id = hw->port_info->sw_id;
645 
646 	/* Set some RSS parameters based on the VSI type */
647 	ice_vsi_set_rss_params(vsi);
648 
649 	/* Initialize the Rx queue mapping for this VSI */
650 	err = ice_setup_vsi_qmap(vsi, &ctx);
651 	if (err) {
652 		return err;
653 	}
654 
655 	/* (Re-)add VSI to HW VSI handle list */
656 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
657 	if (status != 0) {
658 		device_printf(vsi->sc->dev,
659 		    "Add VSI AQ call failed, err %s aq_err %s\n",
660 		    ice_status_str(status),
661 		    ice_aq_str(hw->adminq.sq_last_status));
662 		return (EIO);
663 	}
664 	vsi->info = ctx.info;
665 
666 	/* Initialize VSI with just 1 TC to start */
667 	max_txqs[0] = vsi->num_tx_queues;
668 
669 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
670 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
671 	if (status) {
672 		device_printf(vsi->sc->dev,
673 		    "Failed VSI lan queue config, err %s aq_err %s\n",
674 		    ice_status_str(status),
675 		    ice_aq_str(hw->adminq.sq_last_status));
676 		ice_deinit_vsi(vsi);
677 		return (ENODEV);
678 	}
679 
680 	/* Reset VSI stats */
681 	ice_reset_vsi_stats(vsi);
682 
683 	return 0;
684 }
685 
686 /**
687  * ice_deinit_vsi - Tell firmware to release resources for a VSI
688  * @vsi: the VSI to release
689  *
690  * Helper function which requests the firmware to release the hardware
691  * resources associated with a given VSI.
692  */
693 void
694 ice_deinit_vsi(struct ice_vsi *vsi)
695 {
696 	struct ice_vsi_ctx ctx = { 0 };
697 	struct ice_softc *sc = vsi->sc;
698 	struct ice_hw *hw = &sc->hw;
699 	enum ice_status status;
700 
701 	/* Assert that the VSI pointer matches in the list */
702 	MPASS(vsi == sc->all_vsi[vsi->idx]);
703 
704 	ctx.info = vsi->info;
705 
706 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
707 	if (status) {
708 		/*
709 		 * This should only fail if the VSI handle is invalid, or if
710 		 * any of the nodes have leaf nodes which are still in use.
711 		 */
712 		device_printf(sc->dev,
713 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
714 			      vsi->idx, ice_status_str(status));
715 	}
716 
717 	/* Tell firmware to release the VSI resources */
718 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
719 	if (status != 0) {
720 		device_printf(sc->dev,
721 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
722 		    vsi->idx, ice_status_str(status),
723 		    ice_aq_str(hw->adminq.sq_last_status));
724 	}
725 }
726 
727 /**
728  * ice_release_vsi - Release resources associated with a VSI
729  * @vsi: the VSI to release
730  *
731  * Release software and firmware resources associated with a VSI. Release the
732  * queue managers associated with this VSI. Also free the VSI structure memory
733  * if the VSI was allocated dynamically using ice_alloc_vsi().
734  */
735 void
736 ice_release_vsi(struct ice_vsi *vsi)
737 {
738 	struct ice_softc *sc = vsi->sc;
739 	int idx = vsi->idx;
740 
741 	/* Assert that the VSI pointer matches in the list */
742 	MPASS(vsi == sc->all_vsi[idx]);
743 
744 	/* Cleanup RSS configuration */
745 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
746 		ice_clean_vsi_rss_cfg(vsi);
747 
748 	ice_del_vsi_sysctl_ctx(vsi);
749 
750 	/*
751 	 * If we unload the driver after a reset fails, we do not need to do
752 	 * this step.
753 	 */
754 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
755 		ice_deinit_vsi(vsi);
756 
757 	ice_free_vsi_qmaps(vsi);
758 
759 	if (vsi->dynamic) {
760 		free(sc->all_vsi[idx], M_ICE);
761 	}
762 
763 	sc->all_vsi[idx] = NULL;
764 }
765 
766 /**
767  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
768  * @pi: port info data
769  *
770  * Returns the baudrate value for the current link speed of a given port.
771  */
772 uint64_t
773 ice_aq_speed_to_rate(struct ice_port_info *pi)
774 {
775 	switch (pi->phy.link_info.link_speed) {
776 	case ICE_AQ_LINK_SPEED_100GB:
777 		return IF_Gbps(100);
778 	case ICE_AQ_LINK_SPEED_50GB:
779 		return IF_Gbps(50);
780 	case ICE_AQ_LINK_SPEED_40GB:
781 		return IF_Gbps(40);
782 	case ICE_AQ_LINK_SPEED_25GB:
783 		return IF_Gbps(25);
784 	case ICE_AQ_LINK_SPEED_10GB:
785 		return IF_Gbps(10);
786 	case ICE_AQ_LINK_SPEED_5GB:
787 		return IF_Gbps(5);
788 	case ICE_AQ_LINK_SPEED_2500MB:
789 		return IF_Mbps(2500);
790 	case ICE_AQ_LINK_SPEED_1000MB:
791 		return IF_Mbps(1000);
792 	case ICE_AQ_LINK_SPEED_100MB:
793 		return IF_Mbps(100);
794 	case ICE_AQ_LINK_SPEED_10MB:
795 		return IF_Mbps(10);
796 	case ICE_AQ_LINK_SPEED_UNKNOWN:
797 	default:
798 		/* return 0 if we don't know the link speed */
799 		return 0;
800 	}
801 }
802 
803 /**
804  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
805  * @pi: port info data
806  *
807  * Returns the string representation of the current link speed for a given
808  * port.
809  */
810 static const char *
811 ice_aq_speed_to_str(struct ice_port_info *pi)
812 {
813 	switch (pi->phy.link_info.link_speed) {
814 	case ICE_AQ_LINK_SPEED_100GB:
815 		return "100 Gbps";
816 	case ICE_AQ_LINK_SPEED_50GB:
817 		return "50 Gbps";
818 	case ICE_AQ_LINK_SPEED_40GB:
819 		return "40 Gbps";
820 	case ICE_AQ_LINK_SPEED_25GB:
821 		return "25 Gbps";
822 	case ICE_AQ_LINK_SPEED_20GB:
823 		return "20 Gbps";
824 	case ICE_AQ_LINK_SPEED_10GB:
825 		return "10 Gbps";
826 	case ICE_AQ_LINK_SPEED_5GB:
827 		return "5 Gbps";
828 	case ICE_AQ_LINK_SPEED_2500MB:
829 		return "2.5 Gbps";
830 	case ICE_AQ_LINK_SPEED_1000MB:
831 		return "1 Gbps";
832 	case ICE_AQ_LINK_SPEED_100MB:
833 		return "100 Mbps";
834 	case ICE_AQ_LINK_SPEED_10MB:
835 		return "10 Mbps";
836 	case ICE_AQ_LINK_SPEED_UNKNOWN:
837 	default:
838 		return "Unknown speed";
839 	}
840 }
841 
842 /**
843  * ice_get_phy_type_low - Get media associated with phy_type_low
844  * @phy_type_low: the low 64bits of phy_type from the AdminQ
845  *
846  * Given the lower 64bits of the phy_type from the hardware, return the
847  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
848  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
849  * be called. If phy_type_low is zero, call ice_phy_type_high.
850  */
851 int
852 ice_get_phy_type_low(uint64_t phy_type_low)
853 {
854 	switch (phy_type_low) {
855 	case ICE_PHY_TYPE_LOW_100BASE_TX:
856 		return IFM_100_TX;
857 	case ICE_PHY_TYPE_LOW_100M_SGMII:
858 		return IFM_100_SGMII;
859 	case ICE_PHY_TYPE_LOW_1000BASE_T:
860 		return IFM_1000_T;
861 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
862 		return IFM_1000_SX;
863 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
864 		return IFM_1000_LX;
865 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
866 		return IFM_1000_KX;
867 	case ICE_PHY_TYPE_LOW_1G_SGMII:
868 		return IFM_1000_SGMII;
869 	case ICE_PHY_TYPE_LOW_2500BASE_T:
870 		return IFM_2500_T;
871 	case ICE_PHY_TYPE_LOW_2500BASE_X:
872 		return IFM_2500_X;
873 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
874 		return IFM_2500_KX;
875 	case ICE_PHY_TYPE_LOW_5GBASE_T:
876 		return IFM_5000_T;
877 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
878 		return IFM_5000_KR;
879 	case ICE_PHY_TYPE_LOW_10GBASE_T:
880 		return IFM_10G_T;
881 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
882 		return IFM_10G_TWINAX;
883 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
884 		return IFM_10G_SR;
885 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
886 		return IFM_10G_LR;
887 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
888 		return IFM_10G_KR;
889 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
890 		return IFM_10G_AOC;
891 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
892 		return IFM_10G_SFI;
893 	case ICE_PHY_TYPE_LOW_25GBASE_T:
894 		return IFM_25G_T;
895 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
896 		return IFM_25G_CR;
897 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
898 		return IFM_25G_CR_S;
899 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
900 		return IFM_25G_CR1;
901 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
902 		return IFM_25G_SR;
903 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
904 		return IFM_25G_LR;
905 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
906 		return IFM_25G_KR;
907 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
908 		return IFM_25G_KR_S;
909 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
910 		return IFM_25G_KR1;
911 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
912 		return IFM_25G_AOC;
913 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
914 		return IFM_25G_AUI;
915 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
916 		return IFM_40G_CR4;
917 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
918 		return IFM_40G_SR4;
919 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
920 		return IFM_40G_LR4;
921 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
922 		return IFM_40G_KR4;
923 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
924 		return IFM_40G_XLAUI_AC;
925 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
926 		return IFM_40G_XLAUI;
927 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
928 		return IFM_50G_CR2;
929 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
930 		return IFM_50G_SR2;
931 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
932 		return IFM_50G_LR2;
933 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
934 		return IFM_50G_KR2;
935 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
936 		return IFM_50G_LAUI2_AC;
937 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
938 		return IFM_50G_LAUI2;
939 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
940 		return IFM_50G_AUI2_AC;
941 	case ICE_PHY_TYPE_LOW_50G_AUI2:
942 		return IFM_50G_AUI2;
943 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
944 		return IFM_50G_CP;
945 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
946 		return IFM_50G_SR;
947 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
948 		return IFM_50G_FR;
949 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
950 		return IFM_50G_LR;
951 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
952 		return IFM_50G_KR_PAM4;
953 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
954 		return IFM_50G_AUI1_AC;
955 	case ICE_PHY_TYPE_LOW_50G_AUI1:
956 		return IFM_50G_AUI1;
957 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
958 		return IFM_100G_CR4;
959 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
960 		return IFM_100G_SR4;
961 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
962 		return IFM_100G_LR4;
963 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
964 		return IFM_100G_KR4;
965 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
966 		return IFM_100G_CAUI4_AC;
967 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
968 		return IFM_100G_CAUI4;
969 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
970 		return IFM_100G_AUI4_AC;
971 	case ICE_PHY_TYPE_LOW_100G_AUI4:
972 		return IFM_100G_AUI4;
973 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
974 		return IFM_100G_CR_PAM4;
975 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
976 		return IFM_100G_KR_PAM4;
977 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
978 		return IFM_100G_CP2;
979 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
980 		return IFM_100G_SR2;
981 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
982 		return IFM_100G_DR;
983 	default:
984 		return IFM_UNKNOWN;
985 	}
986 }
987 
988 /**
989  * ice_get_phy_type_high - Get media associated with phy_type_high
990  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
991  *
992  * Given the upper 64bits of the phy_type from the hardware, return the
993  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
994  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
995  * called. If phy_type_high is zero, call ice_get_phy_type_low.
996  */
997 int
998 ice_get_phy_type_high(uint64_t phy_type_high)
999 {
1000 	switch (phy_type_high) {
1001 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1002 		return IFM_100G_KR2_PAM4;
1003 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1004 		return IFM_100G_CAUI2_AC;
1005 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1006 		return IFM_100G_CAUI2;
1007 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1008 		return IFM_100G_AUI2_AC;
1009 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1010 		return IFM_100G_AUI2;
1011 	default:
1012 		return IFM_UNKNOWN;
1013 	}
1014 }
1015 
1016 /**
1017  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1018  * @pi: port info struct
1019  *
1020  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1021  * to have been called before this function for it to work.
1022  */
1023 static uint64_t
1024 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1025 {
1026 	uint64_t phy_low = pi->phy.phy_type_low;
1027 	uint64_t phy_high = pi->phy.phy_type_high;
1028 	uint64_t max_rate = 0;
1029 	int bit;
1030 
1031 	/*
1032 	 * These are based on the indices used in the BIT() macros for
1033 	 * ICE_PHY_TYPE_LOW_*
1034 	 */
1035 	static const uint64_t phy_rates[] = {
1036 	    IF_Mbps(100),
1037 	    IF_Mbps(100),
1038 	    IF_Gbps(1ULL),
1039 	    IF_Gbps(1ULL),
1040 	    IF_Gbps(1ULL),
1041 	    IF_Gbps(1ULL),
1042 	    IF_Gbps(1ULL),
1043 	    IF_Mbps(2500ULL),
1044 	    IF_Mbps(2500ULL),
1045 	    IF_Mbps(2500ULL),
1046 	    IF_Gbps(5ULL),
1047 	    IF_Gbps(5ULL),
1048 	    IF_Gbps(10ULL),
1049 	    IF_Gbps(10ULL),
1050 	    IF_Gbps(10ULL),
1051 	    IF_Gbps(10ULL),
1052 	    IF_Gbps(10ULL),
1053 	    IF_Gbps(10ULL),
1054 	    IF_Gbps(10ULL),
1055 	    IF_Gbps(25ULL),
1056 	    IF_Gbps(25ULL),
1057 	    IF_Gbps(25ULL),
1058 	    IF_Gbps(25ULL),
1059 	    IF_Gbps(25ULL),
1060 	    IF_Gbps(25ULL),
1061 	    IF_Gbps(25ULL),
1062 	    IF_Gbps(25ULL),
1063 	    IF_Gbps(25ULL),
1064 	    IF_Gbps(25ULL),
1065 	    IF_Gbps(25ULL),
1066 	    IF_Gbps(40ULL),
1067 	    IF_Gbps(40ULL),
1068 	    IF_Gbps(40ULL),
1069 	    IF_Gbps(40ULL),
1070 	    IF_Gbps(40ULL),
1071 	    IF_Gbps(40ULL),
1072 	    IF_Gbps(50ULL),
1073 	    IF_Gbps(50ULL),
1074 	    IF_Gbps(50ULL),
1075 	    IF_Gbps(50ULL),
1076 	    IF_Gbps(50ULL),
1077 	    IF_Gbps(50ULL),
1078 	    IF_Gbps(50ULL),
1079 	    IF_Gbps(50ULL),
1080 	    IF_Gbps(50ULL),
1081 	    IF_Gbps(50ULL),
1082 	    IF_Gbps(50ULL),
1083 	    IF_Gbps(50ULL),
1084 	    IF_Gbps(50ULL),
1085 	    IF_Gbps(50ULL),
1086 	    IF_Gbps(50ULL),
1087 	    IF_Gbps(100ULL),
1088 	    IF_Gbps(100ULL),
1089 	    IF_Gbps(100ULL),
1090 	    IF_Gbps(100ULL),
1091 	    IF_Gbps(100ULL),
1092 	    IF_Gbps(100ULL),
1093 	    IF_Gbps(100ULL),
1094 	    IF_Gbps(100ULL),
1095 	    IF_Gbps(100ULL),
1096 	    IF_Gbps(100ULL),
1097 	    IF_Gbps(100ULL),
1098 	    IF_Gbps(100ULL),
1099 	    IF_Gbps(100ULL),
1100 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1101 	    IF_Gbps(100ULL),
1102 	    IF_Gbps(100ULL),
1103 	    IF_Gbps(100ULL),
1104 	    IF_Gbps(100ULL),
1105 	    IF_Gbps(100ULL)
1106 	};
1107 
1108 	/* coverity[address_of] */
1109 	for_each_set_bit(bit, &phy_high, 64)
1110 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1111 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1112 
1113 	/* coverity[address_of] */
1114 	for_each_set_bit(bit, &phy_low, 64)
1115 		max_rate = uqmax(max_rate, phy_rates[bit]);
1116 
1117 	return (max_rate);
1118 }
1119 
1120 /* The if_media type is split over the original 5 bit media variant field,
1121  * along with extended types using up extra bits in the options section.
1122  * We want to convert this split number into a bitmap index, so we reverse the
1123  * calculation of IFM_X here.
1124  */
1125 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1126 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1127 
1128 /**
1129  * ice_add_media_types - Add supported media types to the media structure
1130  * @sc: ice private softc structure
1131  * @media: ifmedia structure to setup
1132  *
1133  * Looks up the supported phy types, and initializes the various media types
1134  * available.
1135  *
1136  * @pre this function must be protected from being called while another thread
1137  * is accessing the ifmedia types.
1138  */
1139 enum ice_status
1140 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1141 {
1142 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1143 	struct ice_port_info *pi = sc->hw.port_info;
1144 	enum ice_status status;
1145 	uint64_t phy_low, phy_high;
1146 	int bit;
1147 
1148 	ASSERT_CFG_LOCKED(sc);
1149 
1150 	/* the maximum possible media type index is 511. We probably don't
1151 	 * need most of this space, but this ensures future compatibility when
1152 	 * additional media types are used.
1153 	 */
1154 	ice_declare_bitmap(already_added, 511);
1155 
1156 	/* Remove all previous media types */
1157 	ifmedia_removeall(media);
1158 
1159 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1160 				     &pcaps, NULL);
1161 	if (status != ICE_SUCCESS) {
1162 		device_printf(sc->dev,
1163 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1164 		    __func__, ice_status_str(status),
1165 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1166 		return (status);
1167 	}
1168 	phy_low = le64toh(pcaps.phy_type_low);
1169 	phy_high = le64toh(pcaps.phy_type_high);
1170 
1171 	/* make sure the added bitmap is zero'd */
1172 	memset(already_added, 0, sizeof(already_added));
1173 
1174 	/* coverity[address_of] */
1175 	for_each_set_bit(bit, &phy_low, 64) {
1176 		uint64_t type = BIT_ULL(bit);
1177 		int ostype;
1178 
1179 		/* get the OS media type */
1180 		ostype = ice_get_phy_type_low(type);
1181 
1182 		/* don't bother adding the unknown type */
1183 		if (ostype == IFM_UNKNOWN)
1184 			continue;
1185 
1186 		/* only add each media type to the list once */
1187 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1188 			continue;
1189 
1190 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1191 		ice_set_bit(IFM_IDX(ostype), already_added);
1192 	}
1193 
1194 	/* coverity[address_of] */
1195 	for_each_set_bit(bit, &phy_high, 64) {
1196 		uint64_t type = BIT_ULL(bit);
1197 		int ostype;
1198 
1199 		/* get the OS media type */
1200 		ostype = ice_get_phy_type_high(type);
1201 
1202 		/* don't bother adding the unknown type */
1203 		if (ostype == IFM_UNKNOWN)
1204 			continue;
1205 
1206 		/* only add each media type to the list once */
1207 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1208 			continue;
1209 
1210 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1211 		ice_set_bit(IFM_IDX(ostype), already_added);
1212 	}
1213 
1214 	/* Use autoselect media by default */
1215 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1216 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1217 
1218 	return (ICE_SUCCESS);
1219 }
1220 
1221 /**
1222  * ice_configure_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1223  * @vsi: the VSI to configure
1224  *
1225  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1226  */
1227 void
1228 ice_configure_rxq_interrupts(struct ice_vsi *vsi)
1229 {
1230 	struct ice_hw *hw = &vsi->sc->hw;
1231 	int i;
1232 
1233 	for (i = 0; i < vsi->num_rx_queues; i++) {
1234 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1235 		u32 val;
1236 
1237 		val = (QINT_RQCTL_CAUSE_ENA_M |
1238 		       (ICE_RX_ITR << QINT_RQCTL_ITR_INDX_S) |
1239 		       (rxq->irqv->me << QINT_RQCTL_MSIX_INDX_S));
1240 		wr32(hw, QINT_RQCTL(vsi->rx_qmap[rxq->me]), val);
1241 	}
1242 
1243 	ice_flush(hw);
1244 }
1245 
1246 /**
1247  * ice_configure_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1248  * @vsi: the VSI to configure
1249  *
1250  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1251  */
1252 void
1253 ice_configure_txq_interrupts(struct ice_vsi *vsi)
1254 {
1255 	struct ice_hw *hw = &vsi->sc->hw;
1256 	int i;
1257 
1258 	for (i = 0; i < vsi->num_tx_queues; i++) {
1259 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1260 		u32 val;
1261 
1262 		val = (QINT_TQCTL_CAUSE_ENA_M |
1263 		       (ICE_TX_ITR << QINT_TQCTL_ITR_INDX_S) |
1264 		       (txq->irqv->me << QINT_TQCTL_MSIX_INDX_S));
1265 		wr32(hw, QINT_TQCTL(vsi->tx_qmap[txq->me]), val);
1266 	}
1267 
1268 	ice_flush(hw);
1269 }
1270 
1271 /**
1272  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1273  * @vsi: the VSI to configure
1274  *
1275  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1276  * a software interrupt on that cause. This is required as part of the Rx
1277  * queue disable logic to dissociate the Rx queue from the interrupt.
1278  *
1279  * Note: this function must be called prior to disabling Rx queues with
1280  * ice_control_rx_queues, otherwise the Rx queue may not be disabled properly.
1281  */
1282 void
1283 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1284 {
1285 	struct ice_hw *hw = &vsi->sc->hw;
1286 	int i;
1287 
1288 	for (i = 0; i < vsi->num_rx_queues; i++) {
1289 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1290 		u32 reg, val;
1291 
1292 		/* Clear the CAUSE_ENA flag */
1293 		reg = vsi->rx_qmap[rxq->me];
1294 		val = rd32(hw, QINT_RQCTL(reg));
1295 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1296 		wr32(hw, QINT_RQCTL(reg), val);
1297 
1298 		ice_flush(hw);
1299 
1300 		/* Trigger a software interrupt to complete interrupt
1301 		 * dissociation.
1302 		 */
1303 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1304 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1305 	}
1306 }
1307 
1308 /**
1309  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1310  * @vsi: the VSI to configure
1311  *
1312  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1313  * a software interrupt on that cause. This is required as part of the Tx
1314  * queue disable logic to dissociate the Tx queue from the interrupt.
1315  *
1316  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1317  * the Tx queue disable may not complete properly.
1318  */
1319 void
1320 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1321 {
1322 	struct ice_hw *hw = &vsi->sc->hw;
1323 	int i;
1324 
1325 	for (i = 0; i < vsi->num_tx_queues; i++) {
1326 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1327 		u32 reg, val;
1328 
1329 		/* Clear the CAUSE_ENA flag */
1330 		reg = vsi->tx_qmap[txq->me];
1331 		val = rd32(hw, QINT_TQCTL(reg));
1332 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1333 		wr32(hw, QINT_TQCTL(reg), val);
1334 
1335 		ice_flush(hw);
1336 
1337 		/* Trigger a software interrupt to complete interrupt
1338 		 * dissociation.
1339 		 */
1340 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1341 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1342 	}
1343 }
1344 
1345 /**
1346  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1347  * @vsi: the VSI to configure
1348  *
1349  * Program the hardware ITR registers with the settings for this VSI.
1350  */
1351 void
1352 ice_configure_rx_itr(struct ice_vsi *vsi)
1353 {
1354 	struct ice_hw *hw = &vsi->sc->hw;
1355 	int i;
1356 
1357 	/* TODO: Handle per-queue/per-vector ITR? */
1358 
1359 	for (i = 0; i < vsi->num_rx_queues; i++) {
1360 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1361 
1362 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1363 		     ice_itr_to_reg(hw, vsi->rx_itr));
1364 	}
1365 
1366 	ice_flush(hw);
1367 }
1368 
1369 /**
1370  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1371  * @vsi: the VSI to configure
1372  *
1373  * Program the hardware ITR registers with the settings for this VSI.
1374  */
1375 void
1376 ice_configure_tx_itr(struct ice_vsi *vsi)
1377 {
1378 	struct ice_hw *hw = &vsi->sc->hw;
1379 	int i;
1380 
1381 	/* TODO: Handle per-queue/per-vector ITR? */
1382 
1383 	for (i = 0; i < vsi->num_tx_queues; i++) {
1384 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1385 
1386 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1387 		     ice_itr_to_reg(hw, vsi->tx_itr));
1388 	}
1389 
1390 	ice_flush(hw);
1391 }
1392 
1393 /**
1394  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1395  * @txq: the Tx queue to configure
1396  * @tlan_ctx: the Tx LAN queue context structure to initialize
1397  * @pf_q: real queue number
1398  */
1399 static int
1400 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1401 {
1402 	struct ice_vsi *vsi = txq->vsi;
1403 	struct ice_softc *sc = vsi->sc;
1404 	struct ice_hw *hw = &sc->hw;
1405 
1406 	tlan_ctx->port_num = hw->port_info->lport;
1407 
1408 	/* number of descriptors in the queue */
1409 	tlan_ctx->qlen = txq->desc_count;
1410 
1411 	/* set the transmit queue base address, defined in 128 byte units */
1412 	tlan_ctx->base = txq->tx_paddr >> 7;
1413 
1414 	tlan_ctx->pf_num = hw->pf_id;
1415 
1416 	/* For now, we only have code supporting PF VSIs */
1417 	switch (vsi->type) {
1418 	case ICE_VSI_PF:
1419 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1420 		break;
1421 	default:
1422 		return (ENODEV);
1423 	}
1424 
1425 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1426 
1427 	/* Enable TSO */
1428 	tlan_ctx->tso_ena = 1;
1429 	tlan_ctx->internal_usage_flag = 1;
1430 
1431 	tlan_ctx->tso_qnum = pf_q;
1432 
1433 	/*
1434 	 * Stick with the older legacy Tx queue interface, instead of the new
1435 	 * advanced queue interface.
1436 	 */
1437 	tlan_ctx->legacy_int = 1;
1438 
1439 	/* Descriptor WB mode */
1440 	tlan_ctx->wb_mode = 0;
1441 
1442 	return (0);
1443 }
1444 
1445 /**
1446  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1447  * @vsi: the VSI to configure
1448  *
1449  * Configure the device Tx queues through firmware AdminQ commands. After
1450  * this, Tx queues will be ready for transmit.
1451  */
1452 int
1453 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1454 {
1455 	struct ice_aqc_add_tx_qgrp *qg;
1456 	struct ice_hw *hw = &vsi->sc->hw;
1457 	device_t dev = vsi->sc->dev;
1458 	enum ice_status status;
1459 	int i;
1460 	int err = 0;
1461 	u16 qg_size, pf_q;
1462 
1463 	qg_size = ice_struct_size(qg, txqs, 1);
1464 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1465 	if (!qg)
1466 		return (ENOMEM);
1467 
1468 	qg->num_txqs = 1;
1469 
1470 	for (i = 0; i < vsi->num_tx_queues; i++) {
1471 		struct ice_tlan_ctx tlan_ctx = { 0 };
1472 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1473 
1474 		pf_q = vsi->tx_qmap[txq->me];
1475 		qg->txqs[0].txq_id = htole16(pf_q);
1476 
1477 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1478 		if (err)
1479 			goto free_txqg;
1480 
1481 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1482 			    ice_tlan_ctx_info);
1483 
1484 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1485 					 txq->q_handle, 1, qg, qg_size, NULL);
1486 		if (status) {
1487 			device_printf(dev,
1488 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1489 				      i, txq->tc, txq->q_handle,
1490 				      ice_status_str(status),
1491 				      ice_aq_str(hw->adminq.sq_last_status));
1492 			err = ENODEV;
1493 			goto free_txqg;
1494 		}
1495 
1496 		/* Keep track of the Tx queue TEID */
1497 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1498 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1499 	}
1500 
1501 free_txqg:
1502 	free(qg, M_ICE);
1503 
1504 	return (err);
1505 }
1506 
1507 /**
1508  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1509  * @rxq: the receive queue to program
1510  *
1511  * Setup an Rx queue context structure and program it into the hardware
1512  * registers. This is a necessary step for enabling the Rx queue.
1513  *
1514  * @pre the VSI associated with this queue must have initialized mbuf_sz
1515  */
1516 static int
1517 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1518 {
1519 	struct ice_rlan_ctx rlan_ctx = {0};
1520 	struct ice_vsi *vsi = rxq->vsi;
1521 	struct ice_softc *sc = vsi->sc;
1522 	struct ice_hw *hw = &sc->hw;
1523 	enum ice_status status;
1524 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1525 	u32 regval;
1526 	u16 pf_q;
1527 
1528 	pf_q = vsi->rx_qmap[rxq->me];
1529 
1530 	/* set the receive queue base address, defined in 128 byte units */
1531 	rlan_ctx.base = rxq->rx_paddr >> 7;
1532 
1533 	rlan_ctx.qlen = rxq->desc_count;
1534 
1535 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1536 
1537 	/* use 32 byte descriptors */
1538 	rlan_ctx.dsize = 1;
1539 
1540 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1541 	 * host memory.
1542 	 */
1543 	rlan_ctx.crcstrip = 1;
1544 
1545 	rlan_ctx.l2tsel = 1;
1546 
1547 	/* don't do header splitting */
1548 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1549 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1550 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1551 
1552 	/* strip VLAN from inner headers */
1553 	rlan_ctx.showiv = 1;
1554 
1555 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1556 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1557 
1558 	rlan_ctx.lrxqthresh = 1;
1559 
1560 	if (vsi->type != ICE_VSI_VF) {
1561 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1562 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1563 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1564 			QRXFLXP_CNTXT_RXDID_IDX_M;
1565 
1566 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1567 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1568 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1569 
1570 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1571 	}
1572 
1573 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1574 	if (status) {
1575 		device_printf(sc->dev,
1576 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1577 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1578 		return (EIO);
1579 	}
1580 
1581 	wr32(hw, rxq->tail, 0);
1582 
1583 	return 0;
1584 }
1585 
1586 /**
1587  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1588  * @vsi: the VSI to configure
1589  *
1590  * Prepare an Rx context descriptor and configure the device to receive
1591  * traffic.
1592  *
1593  * @pre the VSI must have initialized mbuf_sz
1594  */
1595 int
1596 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1597 {
1598 	int i, err;
1599 
1600 	for (i = 0; i < vsi->num_rx_queues; i++) {
1601 		MPASS(vsi->mbuf_sz > 0);
1602 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1603 		if (err)
1604 			return err;
1605 	}
1606 
1607 	return (0);
1608 }
1609 
1610 /**
1611  * ice_is_rxq_ready - Check if an Rx queue is ready
1612  * @hw: ice hw structure
1613  * @pf_q: absolute PF queue index to check
1614  * @reg: on successful return, contains qrx_ctrl contents
1615  *
1616  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1617  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1618  * a request to change the queue, as well as to verify the request has
1619  * finished. The queue should change status within a few microseconds, so we
1620  * use a small delay while polling the register.
1621  *
1622  * Returns an error code if the queue does not update after a few retries.
1623  */
1624 static int
1625 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1626 {
1627 	u32 qrx_ctrl, qena_req, qena_stat;
1628 	int i;
1629 
1630 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1631 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1632 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1633 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1634 
1635 		/* if the request and status bits equal, then the queue is
1636 		 * fully disabled or enabled.
1637 		 */
1638 		if (qena_req == qena_stat) {
1639 			*reg = qrx_ctrl;
1640 			return (0);
1641 		}
1642 
1643 		/* wait a few microseconds before we check again */
1644 		DELAY(10);
1645 	}
1646 
1647 	return (ETIMEDOUT);
1648 }
1649 
1650 /**
1651  * ice_control_rx_queues - Configure hardware to start or stop the Rx queues
1652  * @vsi: VSI to enable/disable queues
1653  * @enable: true to enable queues, false to disable
1654  *
1655  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1656  * them. Wait for the appropriate time to ensure that the queues have actually
1657  * reached the expected state.
1658  */
1659 int
1660 ice_control_rx_queues(struct ice_vsi *vsi, bool enable)
1661 {
1662 	struct ice_hw *hw = &vsi->sc->hw;
1663 	device_t dev = vsi->sc->dev;
1664 	u32 qrx_ctrl = 0;
1665 	int i, err;
1666 
1667 	/* TODO: amortize waits by changing all queues up front and then
1668 	 * checking their status afterwards. This will become more necessary
1669 	 * when we have a large number of queues.
1670 	 */
1671 	for (i = 0; i < vsi->num_rx_queues; i++) {
1672 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1673 		int pf_q = vsi->rx_qmap[rxq->me];
1674 
1675 		err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1676 		if (err) {
1677 			device_printf(dev,
1678 				      "Rx queue %d is not ready\n",
1679 				      pf_q);
1680 			return err;
1681 		}
1682 
1683 		/* Skip if the queue is already in correct state */
1684 		if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1685 			continue;
1686 
1687 		if (enable)
1688 			qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1689 		else
1690 			qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1691 		wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1692 
1693 		/* wait for the queue to finalize the request */
1694 		err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1695 		if (err) {
1696 			device_printf(dev,
1697 				      "Rx queue %d %sable timeout\n",
1698 				      pf_q, (enable ? "en" : "dis"));
1699 			return err;
1700 		}
1701 
1702 		/* this should never happen */
1703 		if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1704 			device_printf(dev,
1705 				      "Rx queue %d invalid state\n",
1706 				      pf_q);
1707 			return (EDOOFUS);
1708 		}
1709 	}
1710 
1711 	return (0);
1712 }
1713 
1714 /**
1715  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1716  * @vsi: the VSI to forward to
1717  * @list: list which contains MAC filter entries
1718  * @addr: the MAC address to be added
1719  * @action: filter action to perform on match
1720  *
1721  * Adds a MAC address filter to the list which will be forwarded to firmware
1722  * to add a series of MAC address filters.
1723  *
1724  * Returns 0 on success, and an error code on failure.
1725  *
1726  */
1727 static int
1728 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1729 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1730 {
1731 	struct ice_fltr_list_entry *entry;
1732 
1733 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1734 	if (!entry)
1735 		return (ENOMEM);
1736 
1737 	entry->fltr_info.flag = ICE_FLTR_TX;
1738 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1739 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1740 	entry->fltr_info.fltr_act = action;
1741 	entry->fltr_info.vsi_handle = vsi->idx;
1742 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1743 
1744 	LIST_ADD(&entry->list_entry, list);
1745 
1746 	return 0;
1747 }
1748 
1749 /**
1750  * ice_free_fltr_list - Free memory associated with a MAC address list
1751  * @list: the list to free
1752  *
1753  * Free the memory of each entry associated with the list.
1754  */
1755 static void
1756 ice_free_fltr_list(struct ice_list_head *list)
1757 {
1758 	struct ice_fltr_list_entry *e, *tmp;
1759 
1760 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1761 		LIST_DEL(&e->list_entry);
1762 		free(e, M_ICE);
1763 	}
1764 }
1765 
1766 /**
1767  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1768  * @vsi: the VSI to add the filter for
1769  * @addr: MAC address to add a filter for
1770  *
1771  * Add a MAC address filter for a given VSI. This is a wrapper around
1772  * ice_add_mac to simplify the interface. First, it only accepts a single
1773  * address, so we don't have to mess around with the list setup in other
1774  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1775  * callers don't need to worry about attempting to add the same filter twice.
1776  */
1777 int
1778 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1779 {
1780 	struct ice_list_head mac_addr_list;
1781 	struct ice_hw *hw = &vsi->sc->hw;
1782 	device_t dev = vsi->sc->dev;
1783 	enum ice_status status;
1784 	int err = 0;
1785 
1786 	INIT_LIST_HEAD(&mac_addr_list);
1787 
1788 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1789 	if (err)
1790 		goto free_mac_list;
1791 
1792 	status = ice_add_mac(hw, &mac_addr_list);
1793 	if (status == ICE_ERR_ALREADY_EXISTS) {
1794 		; /* Don't complain if we try to add a filter that already exists */
1795 	} else if (status) {
1796 		device_printf(dev,
1797 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1798 			      addr, ":",
1799 			      ice_status_str(status),
1800 			      ice_aq_str(hw->adminq.sq_last_status));
1801 		err = (EIO);
1802 	}
1803 
1804 free_mac_list:
1805 	ice_free_fltr_list(&mac_addr_list);
1806 	return err;
1807 }
1808 
1809 /**
1810  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1811  * @sc: device softc structure
1812  *
1813  * Program the default unicast and broadcast filters for the PF VSI.
1814  */
1815 int
1816 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1817 {
1818 	struct ice_vsi *vsi = &sc->pf_vsi;
1819 	struct ice_hw *hw = &sc->hw;
1820 	int err;
1821 
1822 	/* Add the LAN MAC address */
1823 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1824 	if (err)
1825 		return err;
1826 
1827 	/* Add the broadcast address */
1828 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
1829 	if (err)
1830 		return err;
1831 
1832 	return (0);
1833 }
1834 
1835 /**
1836  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
1837  * @vsi: the VSI to add the filter for
1838  * @addr: MAC address to remove a filter for
1839  *
1840  * Remove a MAC address filter from a given VSI. This is a wrapper around
1841  * ice_remove_mac to simplify the interface. First, it only accepts a single
1842  * address, so we don't have to mess around with the list setup in other
1843  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
1844  * callers don't need to worry about attempting to remove filters which
1845  * haven't yet been added.
1846  */
1847 int
1848 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1849 {
1850 	struct ice_list_head mac_addr_list;
1851 	struct ice_hw *hw = &vsi->sc->hw;
1852 	device_t dev = vsi->sc->dev;
1853 	enum ice_status status;
1854 	int err = 0;
1855 
1856 	INIT_LIST_HEAD(&mac_addr_list);
1857 
1858 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1859 	if (err)
1860 		goto free_mac_list;
1861 
1862 	status = ice_remove_mac(hw, &mac_addr_list);
1863 	if (status == ICE_ERR_DOES_NOT_EXIST) {
1864 		; /* Don't complain if we try to remove a filter that doesn't exist */
1865 	} else if (status) {
1866 		device_printf(dev,
1867 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
1868 			      addr, ":",
1869 			      ice_status_str(status),
1870 			      ice_aq_str(hw->adminq.sq_last_status));
1871 		err = (EIO);
1872 	}
1873 
1874 free_mac_list:
1875 	ice_free_fltr_list(&mac_addr_list);
1876 	return err;
1877 }
1878 
1879 /**
1880  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
1881  * @sc: device softc structure
1882  *
1883  * Remove the default unicast and broadcast filters from the PF VSI.
1884  */
1885 int
1886 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
1887 {
1888 	struct ice_vsi *vsi = &sc->pf_vsi;
1889 	struct ice_hw *hw = &sc->hw;
1890 	int err;
1891 
1892 	/* Remove the LAN MAC address */
1893 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1894 	if (err)
1895 		return err;
1896 
1897 	/* Remove the broadcast address */
1898 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
1899 	if (err)
1900 		return (EIO);
1901 
1902 	return (0);
1903 }
1904 
1905 /**
1906  * ice_check_ctrlq_errors - Check for and report controlq errors
1907  * @sc: device private structure
1908  * @qname: name of the controlq
1909  * @cq: the controlq to check
1910  *
1911  * Check and report controlq errors. Currently all we do is report them to the
1912  * kernel message log, but we might want to improve this in the future, such
1913  * as to keep track of statistics.
1914  */
1915 static void
1916 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
1917 		       struct ice_ctl_q_info *cq)
1918 {
1919 	struct ice_hw *hw = &sc->hw;
1920 	u32 val;
1921 
1922 	/* Check for error indications. Note that all the controlqs use the
1923 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
1924 	 */
1925 	val = rd32(hw, cq->rq.len);
1926 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1927 		   PF_FW_ARQLEN_ARQCRIT_M)) {
1928 		if (val & PF_FW_ARQLEN_ARQVFE_M)
1929 			device_printf(sc->dev,
1930 				"%s Receive Queue VF Error detected\n", qname);
1931 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
1932 			device_printf(sc->dev,
1933 				"%s Receive Queue Overflow Error detected\n",
1934 				qname);
1935 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
1936 			device_printf(sc->dev,
1937 				"%s Receive Queue Critical Error detected\n",
1938 				qname);
1939 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1940 			 PF_FW_ARQLEN_ARQCRIT_M);
1941 		wr32(hw, cq->rq.len, val);
1942 	}
1943 
1944 	val = rd32(hw, cq->sq.len);
1945 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1946 		   PF_FW_ATQLEN_ATQCRIT_M)) {
1947 		if (val & PF_FW_ATQLEN_ATQVFE_M)
1948 			device_printf(sc->dev,
1949 				"%s Send Queue VF Error detected\n", qname);
1950 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
1951 			device_printf(sc->dev,
1952 				"%s Send Queue Overflow Error detected\n",
1953 				qname);
1954 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
1955 			device_printf(sc->dev,
1956 				"%s Send Queue Critical Error detected\n",
1957 				qname);
1958 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1959 			 PF_FW_ATQLEN_ATQCRIT_M);
1960 		wr32(hw, cq->sq.len, val);
1961 	}
1962 }
1963 
1964 /**
1965  * ice_process_link_event - Process a link event indication from firmware
1966  * @sc: device softc structure
1967  * @e: the received event data
1968  *
1969  * Gets the current link status from hardware, and may print a message if an
1970  * unqualified is detected.
1971  */
1972 static void
1973 ice_process_link_event(struct ice_softc *sc,
1974 		       struct ice_rq_event_info __invariant_only *e)
1975 {
1976 	struct ice_port_info *pi = sc->hw.port_info;
1977 	struct ice_hw *hw = &sc->hw;
1978 	device_t dev = sc->dev;
1979 	enum ice_status status;
1980 
1981 	/* Sanity check that the data length isn't too small */
1982 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
1983 
1984 	/*
1985 	 * Even though the adapter gets link status information inside the
1986 	 * event, it needs to send a Get Link Status AQ command in order
1987 	 * to re-enable link events.
1988 	 */
1989 	pi->phy.get_link_info = true;
1990 	ice_get_link_status(pi, &sc->link_up);
1991 
1992 	if (pi->phy.link_info.topo_media_conflict &
1993 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
1994 	    ICE_AQ_LINK_TOPO_CORRUPT))
1995 		device_printf(dev,
1996 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
1997 
1998 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
1999 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2000 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2001 			device_printf(dev,
2002 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2003 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2004 			device_printf(dev,
2005 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2006 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2007 			device_printf(dev,
2008 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2009 	}
2010 
2011 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2012 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2013 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2014 			if (status != ICE_SUCCESS)
2015 				device_printf(dev,
2016 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2017 				    __func__, ice_status_str(status),
2018 				    ice_aq_str(hw->adminq.sq_last_status));
2019 		}
2020 	}
2021 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2022 
2023 	/* Indicate that link status must be reported again */
2024 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2025 
2026 	/* OS link info is updated elsewhere */
2027 }
2028 
2029 /**
2030  * ice_process_ctrlq_event - Respond to a controlq event
2031  * @sc: device private structure
2032  * @qname: the name for this controlq
2033  * @event: the event to process
2034  *
2035  * Perform actions in response to various controlq event notifications.
2036  */
2037 static void
2038 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2039 			struct ice_rq_event_info *event)
2040 {
2041 	u16 opcode;
2042 
2043 	opcode = le16toh(event->desc.opcode);
2044 
2045 	switch (opcode) {
2046 	case ice_aqc_opc_get_link_status:
2047 		ice_process_link_event(sc, event);
2048 		break;
2049 	case ice_mbx_opc_send_msg_to_pf:
2050 		/* TODO: handle IOV event */
2051 		break;
2052 	case ice_aqc_opc_fw_logs_event:
2053 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2054 		break;
2055 	case ice_aqc_opc_lldp_set_mib_change:
2056 		ice_handle_mib_change_event(sc, event);
2057 		break;
2058 	case ice_aqc_opc_event_lan_overflow:
2059 		ice_handle_lan_overflow_event(sc, event);
2060 		break;
2061 	case ice_aqc_opc_get_health_status:
2062 		ice_handle_health_status_event(sc, event);
2063 		break;
2064 	default:
2065 		device_printf(sc->dev,
2066 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2067 			      qname, opcode);
2068 	}
2069 }
2070 
2071 /**
2072  * ice_process_ctrlq - helper function to process controlq rings
2073  * @sc: device private structure
2074  * @q_type: specific control queue type
2075  * @pending: return parameter to track remaining events
2076  *
2077  * Process controlq events for a given control queue type. Returns zero on
2078  * success, and an error code on failure. If successful, pending is the number
2079  * of remaining events left in the queue.
2080  */
2081 int
2082 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2083 {
2084 	struct ice_rq_event_info event = { { 0 } };
2085 	struct ice_hw *hw = &sc->hw;
2086 	struct ice_ctl_q_info *cq;
2087 	enum ice_status status;
2088 	const char *qname;
2089 	int loop = 0;
2090 
2091 	switch (q_type) {
2092 	case ICE_CTL_Q_ADMIN:
2093 		cq = &hw->adminq;
2094 		qname = "Admin";
2095 		break;
2096 	case ICE_CTL_Q_MAILBOX:
2097 		cq = &hw->mailboxq;
2098 		qname = "Mailbox";
2099 		break;
2100 	default:
2101 		device_printf(sc->dev,
2102 			      "Unknown control queue type 0x%x\n",
2103 			      q_type);
2104 		return 0;
2105 	}
2106 
2107 	ice_check_ctrlq_errors(sc, qname, cq);
2108 
2109 	/*
2110 	 * Control queue processing happens during the admin task which may be
2111 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2112 	 */
2113 	event.buf_len = cq->rq_buf_size;
2114 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2115 	if (!event.msg_buf) {
2116 		device_printf(sc->dev,
2117 			      "Unable to allocate memory for %s Receive Queue event\n",
2118 			      qname);
2119 		return (ENOMEM);
2120 	}
2121 
2122 	do {
2123 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2124 		if (status == ICE_ERR_AQ_NO_WORK)
2125 			break;
2126 		if (status) {
2127 			if (q_type == ICE_CTL_Q_ADMIN)
2128 				device_printf(sc->dev,
2129 					      "%s Receive Queue event error %s\n",
2130 					      qname, ice_status_str(status));
2131 			else
2132 				device_printf(sc->dev,
2133 					      "%s Receive Queue event error %s\n",
2134 					      qname, ice_status_str(status));
2135 			free(event.msg_buf, M_ICE);
2136 			return (EIO);
2137 		}
2138 		/* XXX should we separate this handler by controlq type? */
2139 		ice_process_ctrlq_event(sc, qname, &event);
2140 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2141 
2142 	free(event.msg_buf, M_ICE);
2143 
2144 	return 0;
2145 }
2146 
2147 /**
2148  * pkg_ver_empty - Check if a package version is empty
2149  * @pkg_ver: the package version to check
2150  * @pkg_name: the package name to check
2151  *
2152  * Checks if the package version structure is empty. We consider a package
2153  * version as empty if none of the versions are non-zero and the name string
2154  * is null as well.
2155  *
2156  * This is used to check if the package version was initialized by the driver,
2157  * as we do not expect an actual DDP package file to have a zero'd version and
2158  * name.
2159  *
2160  * @returns true if the package version is valid, or false otherwise.
2161  */
2162 static bool
2163 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2164 {
2165 	return (pkg_name[0] == '\0' &&
2166 		pkg_ver->major == 0 &&
2167 		pkg_ver->minor == 0 &&
2168 		pkg_ver->update == 0 &&
2169 		pkg_ver->draft == 0);
2170 }
2171 
2172 /**
2173  * pkg_ver_compatible - Check if the package version is compatible
2174  * @pkg_ver: the package version to check
2175  *
2176  * Compares the package version number to the driver's expected major/minor
2177  * version. Returns an integer indicating whether the version is older, newer,
2178  * or compatible with the driver.
2179  *
2180  * @returns 0 if the package version is compatible, -1 if the package version
2181  * is older, and 1 if the package version is newer than the driver version.
2182  */
2183 static int
2184 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2185 {
2186 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2187 		return (1); /* newer */
2188 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2189 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2190 		return (1); /* newer */
2191 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2192 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2193 		return (0); /* compatible */
2194 	else
2195 		return (-1); /* older */
2196 }
2197 
2198 /**
2199  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2200  * @hw: device hw structure
2201  * @buf: string buffer to store name/version string
2202  *
2203  * Formats the name and version of the OS DDP package as found in the ice_ddp
2204  * module into a string.
2205  *
2206  * @remark This will almost always be the same as the active package, but
2207  * could be different in some cases. Use ice_active_pkg_version_str to get the
2208  * version of the active DDP package.
2209  */
2210 static void
2211 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2212 {
2213 	char name_buf[ICE_PKG_NAME_SIZE];
2214 
2215 	/* If the OS DDP package info is empty, use "None" */
2216 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2217 		sbuf_printf(buf, "None");
2218 		return;
2219 	}
2220 
2221 	/*
2222 	 * This should already be null-terminated, but since this is a raw
2223 	 * value from an external source, strlcpy() into a new buffer to
2224 	 * make sure.
2225 	 */
2226 	bzero(name_buf, sizeof(name_buf));
2227 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2228 
2229 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2230 	    name_buf,
2231 	    hw->pkg_ver.major,
2232 	    hw->pkg_ver.minor,
2233 	    hw->pkg_ver.update,
2234 	    hw->pkg_ver.draft);
2235 }
2236 
2237 /**
2238  * ice_active_pkg_version_str - Format active package version info into a sbuf
2239  * @hw: device hw structure
2240  * @buf: string buffer to store name/version string
2241  *
2242  * Formats the name and version of the active DDP package info into a string
2243  * buffer for use.
2244  */
2245 static void
2246 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2247 {
2248 	char name_buf[ICE_PKG_NAME_SIZE];
2249 
2250 	/* If the active DDP package info is empty, use "None" */
2251 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2252 		sbuf_printf(buf, "None");
2253 		return;
2254 	}
2255 
2256 	/*
2257 	 * This should already be null-terminated, but since this is a raw
2258 	 * value from an external source, strlcpy() into a new buffer to
2259 	 * make sure.
2260 	 */
2261 	bzero(name_buf, sizeof(name_buf));
2262 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2263 
2264 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2265 	    name_buf,
2266 	    hw->active_pkg_ver.major,
2267 	    hw->active_pkg_ver.minor,
2268 	    hw->active_pkg_ver.update,
2269 	    hw->active_pkg_ver.draft);
2270 
2271 	if (hw->active_track_id != 0)
2272 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2273 }
2274 
2275 /**
2276  * ice_nvm_version_str - Format the NVM version information into a sbuf
2277  * @hw: device hw structure
2278  * @buf: string buffer to store version string
2279  *
2280  * Formats the NVM information including firmware version, API version, NVM
2281  * version, the EETRACK id, and OEM specific version information into a string
2282  * buffer.
2283  */
2284 static void
2285 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2286 {
2287 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2288 	struct ice_orom_info *orom = &hw->flash.orom;
2289 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2290 
2291 	/* Note that the netlist versions are stored in packed Binary Coded
2292 	 * Decimal format. The use of '%x' will correctly display these as
2293 	 * decimal numbers. This works because every 4 bits will be displayed
2294 	 * as a hexadecimal digit, and the BCD format will only use the values
2295 	 * 0-9.
2296 	 */
2297 	sbuf_printf(buf,
2298 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2299 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2300 		    hw->api_maj_ver, hw->api_min_ver,
2301 		    nvm->major, nvm->minor, nvm->eetrack,
2302 		    netlist->major, netlist->minor,
2303 		    netlist->type >> 16, netlist->type & 0xFFFF,
2304 		    netlist->rev, netlist->cust_ver, netlist->hash,
2305 		    orom->major, orom->build, orom->patch);
2306 }
2307 
2308 /**
2309  * ice_print_nvm_version - Print the NVM info to the kernel message log
2310  * @sc: the device softc structure
2311  *
2312  * Format and print an NVM version string using ice_nvm_version_str().
2313  */
2314 void
2315 ice_print_nvm_version(struct ice_softc *sc)
2316 {
2317 	struct ice_hw *hw = &sc->hw;
2318 	device_t dev = sc->dev;
2319 	struct sbuf *sbuf;
2320 
2321 	sbuf = sbuf_new_auto();
2322 	ice_nvm_version_str(hw, sbuf);
2323 	sbuf_finish(sbuf);
2324 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2325 	sbuf_delete(sbuf);
2326 }
2327 
2328 /**
2329  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2330  * @vsi: the VSI to be updated
2331  *
2332  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2333  * the updated values.
2334  */
2335 void
2336 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2337 {
2338 	struct ice_eth_stats *prev_es, *cur_es;
2339 	struct ice_hw *hw = &vsi->sc->hw;
2340 	u16 vsi_num;
2341 
2342 	if (!ice_is_vsi_valid(hw, vsi->idx))
2343 		return;
2344 
2345 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2346 	prev_es = &vsi->hw_stats.prev;
2347 	cur_es = &vsi->hw_stats.cur;
2348 
2349 #define ICE_VSI_STAT40(name, location) \
2350 	ice_stat_update40(hw, name ## L(vsi_num), \
2351 			  vsi->hw_stats.offsets_loaded, \
2352 			  &prev_es->location, &cur_es->location)
2353 
2354 #define ICE_VSI_STAT32(name, location) \
2355 	ice_stat_update32(hw, name(vsi_num), \
2356 			  vsi->hw_stats.offsets_loaded, \
2357 			  &prev_es->location, &cur_es->location)
2358 
2359 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2360 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2361 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2362 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2363 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2364 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2365 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2366 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2367 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2368 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2369 
2370 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2371 			     cur_es);
2372 
2373 #undef ICE_VSI_STAT40
2374 #undef ICE_VSI_STAT32
2375 
2376 	vsi->hw_stats.offsets_loaded = true;
2377 }
2378 
2379 /**
2380  * ice_reset_vsi_stats - Reset VSI statistics counters
2381  * @vsi: VSI structure
2382  *
2383  * Resets the software tracking counters for the VSI statistics, and indicate
2384  * that the offsets haven't been loaded. This is intended to be called
2385  * post-reset so that VSI statistics count from zero again.
2386  */
2387 void
2388 ice_reset_vsi_stats(struct ice_vsi *vsi)
2389 {
2390 	/* Reset HW stats */
2391 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2392 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2393 	vsi->hw_stats.offsets_loaded = false;
2394 }
2395 
2396 /**
2397  * ice_update_pf_stats - Update port stats counters
2398  * @sc: device private softc structure
2399  *
2400  * Reads hardware statistics registers and updates the software tracking
2401  * structure with new values.
2402  */
2403 void
2404 ice_update_pf_stats(struct ice_softc *sc)
2405 {
2406 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2407 	struct ice_hw *hw = &sc->hw;
2408 	u8 lport;
2409 
2410 	MPASS(hw->port_info);
2411 
2412 	prev_ps = &sc->stats.prev;
2413 	cur_ps = &sc->stats.cur;
2414 	lport = hw->port_info->lport;
2415 
2416 #define ICE_PF_STAT_PFC(name, location, index) \
2417 	ice_stat_update40(hw, name(lport, index), \
2418 			  sc->stats.offsets_loaded, \
2419 			  &prev_ps->location[index], &cur_ps->location[index])
2420 
2421 #define ICE_PF_STAT40(name, location) \
2422 	ice_stat_update40(hw, name ## L(lport), \
2423 			  sc->stats.offsets_loaded, \
2424 			  &prev_ps->location, &cur_ps->location)
2425 
2426 #define ICE_PF_STAT32(name, location) \
2427 	ice_stat_update32(hw, name(lport), \
2428 			  sc->stats.offsets_loaded, \
2429 			  &prev_ps->location, &cur_ps->location)
2430 
2431 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2432 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2433 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2434 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2435 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2436 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2437 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2438 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2439 	/* This stat register doesn't have an lport */
2440 	ice_stat_update32(hw, PRTRPB_RDPC,
2441 			  sc->stats.offsets_loaded,
2442 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2443 
2444 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2445 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2446 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2447 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2448 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2449 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2450 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2451 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2452 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2453 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2454 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2455 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2456 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2457 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2458 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2459 
2460 	/* Update Priority Flow Control Stats */
2461 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2462 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2463 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2464 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2465 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2466 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2467 	}
2468 
2469 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2470 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2471 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2472 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2473 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2474 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2475 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2476 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2477 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2478 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2479 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2480 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2481 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2482 
2483 #undef ICE_PF_STAT40
2484 #undef ICE_PF_STAT32
2485 #undef ICE_PF_STAT_PFC
2486 
2487 	sc->stats.offsets_loaded = true;
2488 }
2489 
2490 /**
2491  * ice_reset_pf_stats - Reset port stats counters
2492  * @sc: Device private softc structure
2493  *
2494  * Reset software tracking values for statistics to zero, and indicate that
2495  * offsets haven't been loaded. Intended to be called after a device reset so
2496  * that statistics count from zero again.
2497  */
2498 void
2499 ice_reset_pf_stats(struct ice_softc *sc)
2500 {
2501 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2502 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2503 	sc->stats.offsets_loaded = false;
2504 }
2505 
2506 /**
2507  * ice_sysctl_show_fw - sysctl callback to show firmware information
2508  * @oidp: sysctl oid structure
2509  * @arg1: pointer to private data structure
2510  * @arg2: unused
2511  * @req: sysctl request pointer
2512  *
2513  * Callback for the fw_version sysctl, to display the current firmware
2514  * information found at hardware init time.
2515  */
2516 static int
2517 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2518 {
2519 	struct ice_softc *sc = (struct ice_softc *)arg1;
2520 	struct ice_hw *hw = &sc->hw;
2521 	struct sbuf *sbuf;
2522 
2523 	UNREFERENCED_PARAMETER(oidp);
2524 	UNREFERENCED_PARAMETER(arg2);
2525 
2526 	if (ice_driver_is_detaching(sc))
2527 		return (ESHUTDOWN);
2528 
2529 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2530 	ice_nvm_version_str(hw, sbuf);
2531 	sbuf_finish(sbuf);
2532 	sbuf_delete(sbuf);
2533 
2534 	return (0);
2535 }
2536 
2537 /**
2538  * ice_sysctl_pba_number - sysctl callback to show PBA number
2539  * @oidp: sysctl oid structure
2540  * @arg1: pointer to private data structure
2541  * @arg2: unused
2542  * @req: sysctl request pointer
2543  *
2544  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2545  * number for this device.
2546  */
2547 static int
2548 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2549 {
2550 	struct ice_softc *sc = (struct ice_softc *)arg1;
2551 	struct ice_hw *hw = &sc->hw;
2552 	device_t dev = sc->dev;
2553 	u8 pba_string[32] = "";
2554 	enum ice_status status;
2555 
2556 	UNREFERENCED_PARAMETER(arg2);
2557 
2558 	if (ice_driver_is_detaching(sc))
2559 		return (ESHUTDOWN);
2560 
2561 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2562 	if (status) {
2563 		device_printf(dev,
2564 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2565 		    __func__, ice_status_str(status),
2566 		    ice_aq_str(hw->adminq.sq_last_status));
2567 		return (EIO);
2568 	}
2569 
2570 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2571 }
2572 
2573 /**
2574  * ice_sysctl_pkg_version - sysctl to show the active package version info
2575  * @oidp: sysctl oid structure
2576  * @arg1: pointer to private data structure
2577  * @arg2: unused
2578  * @req: sysctl request pointer
2579  *
2580  * Callback for the pkg_version sysctl, to display the active DDP package name
2581  * and version information.
2582  */
2583 static int
2584 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2585 {
2586 	struct ice_softc *sc = (struct ice_softc *)arg1;
2587 	struct ice_hw *hw = &sc->hw;
2588 	struct sbuf *sbuf;
2589 
2590 	UNREFERENCED_PARAMETER(oidp);
2591 	UNREFERENCED_PARAMETER(arg2);
2592 
2593 	if (ice_driver_is_detaching(sc))
2594 		return (ESHUTDOWN);
2595 
2596 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2597 	ice_active_pkg_version_str(hw, sbuf);
2598 	sbuf_finish(sbuf);
2599 	sbuf_delete(sbuf);
2600 
2601 	return (0);
2602 }
2603 
2604 /**
2605  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2606  * @oidp: sysctl oid structure
2607  * @arg1: pointer to private data structure
2608  * @arg2: unused
2609  * @req: sysctl request pointer
2610  *
2611  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2612  * version info found in the ice_ddp module.
2613  */
2614 static int
2615 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2616 {
2617 	struct ice_softc *sc = (struct ice_softc *)arg1;
2618 	struct ice_hw *hw = &sc->hw;
2619 	struct sbuf *sbuf;
2620 
2621 	UNREFERENCED_PARAMETER(oidp);
2622 	UNREFERENCED_PARAMETER(arg2);
2623 
2624 	if (ice_driver_is_detaching(sc))
2625 		return (ESHUTDOWN);
2626 
2627 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2628 	ice_os_pkg_version_str(hw, sbuf);
2629 	sbuf_finish(sbuf);
2630 	sbuf_delete(sbuf);
2631 
2632 	return (0);
2633 }
2634 
2635 /**
2636  * ice_sysctl_current_speed - sysctl callback to show current link speed
2637  * @oidp: sysctl oid structure
2638  * @arg1: pointer to private data structure
2639  * @arg2: unused
2640  * @req: sysctl request pointer
2641  *
2642  * Callback for the current_speed sysctl, to display the string representing
2643  * the current link speed.
2644  */
2645 static int
2646 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2647 {
2648 	struct ice_softc *sc = (struct ice_softc *)arg1;
2649 	struct ice_hw *hw = &sc->hw;
2650 	struct sbuf *sbuf;
2651 
2652 	UNREFERENCED_PARAMETER(oidp);
2653 	UNREFERENCED_PARAMETER(arg2);
2654 
2655 	if (ice_driver_is_detaching(sc))
2656 		return (ESHUTDOWN);
2657 
2658 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2659 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2660 	sbuf_finish(sbuf);
2661 	sbuf_delete(sbuf);
2662 
2663 	return (0);
2664 }
2665 
2666 /**
2667  * @var phy_link_speeds
2668  * @brief PHY link speed conversion array
2669  *
2670  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2671  * link speeds used by the link speed sysctls.
2672  *
2673  * @remark these are based on the indices used in the BIT() macros for the
2674  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2675  */
2676 static const uint16_t phy_link_speeds[] = {
2677     ICE_AQ_LINK_SPEED_100MB,
2678     ICE_AQ_LINK_SPEED_100MB,
2679     ICE_AQ_LINK_SPEED_1000MB,
2680     ICE_AQ_LINK_SPEED_1000MB,
2681     ICE_AQ_LINK_SPEED_1000MB,
2682     ICE_AQ_LINK_SPEED_1000MB,
2683     ICE_AQ_LINK_SPEED_1000MB,
2684     ICE_AQ_LINK_SPEED_2500MB,
2685     ICE_AQ_LINK_SPEED_2500MB,
2686     ICE_AQ_LINK_SPEED_2500MB,
2687     ICE_AQ_LINK_SPEED_5GB,
2688     ICE_AQ_LINK_SPEED_5GB,
2689     ICE_AQ_LINK_SPEED_10GB,
2690     ICE_AQ_LINK_SPEED_10GB,
2691     ICE_AQ_LINK_SPEED_10GB,
2692     ICE_AQ_LINK_SPEED_10GB,
2693     ICE_AQ_LINK_SPEED_10GB,
2694     ICE_AQ_LINK_SPEED_10GB,
2695     ICE_AQ_LINK_SPEED_10GB,
2696     ICE_AQ_LINK_SPEED_25GB,
2697     ICE_AQ_LINK_SPEED_25GB,
2698     ICE_AQ_LINK_SPEED_25GB,
2699     ICE_AQ_LINK_SPEED_25GB,
2700     ICE_AQ_LINK_SPEED_25GB,
2701     ICE_AQ_LINK_SPEED_25GB,
2702     ICE_AQ_LINK_SPEED_25GB,
2703     ICE_AQ_LINK_SPEED_25GB,
2704     ICE_AQ_LINK_SPEED_25GB,
2705     ICE_AQ_LINK_SPEED_25GB,
2706     ICE_AQ_LINK_SPEED_25GB,
2707     ICE_AQ_LINK_SPEED_40GB,
2708     ICE_AQ_LINK_SPEED_40GB,
2709     ICE_AQ_LINK_SPEED_40GB,
2710     ICE_AQ_LINK_SPEED_40GB,
2711     ICE_AQ_LINK_SPEED_40GB,
2712     ICE_AQ_LINK_SPEED_40GB,
2713     ICE_AQ_LINK_SPEED_50GB,
2714     ICE_AQ_LINK_SPEED_50GB,
2715     ICE_AQ_LINK_SPEED_50GB,
2716     ICE_AQ_LINK_SPEED_50GB,
2717     ICE_AQ_LINK_SPEED_50GB,
2718     ICE_AQ_LINK_SPEED_50GB,
2719     ICE_AQ_LINK_SPEED_50GB,
2720     ICE_AQ_LINK_SPEED_50GB,
2721     ICE_AQ_LINK_SPEED_50GB,
2722     ICE_AQ_LINK_SPEED_50GB,
2723     ICE_AQ_LINK_SPEED_50GB,
2724     ICE_AQ_LINK_SPEED_50GB,
2725     ICE_AQ_LINK_SPEED_50GB,
2726     ICE_AQ_LINK_SPEED_50GB,
2727     ICE_AQ_LINK_SPEED_50GB,
2728     ICE_AQ_LINK_SPEED_100GB,
2729     ICE_AQ_LINK_SPEED_100GB,
2730     ICE_AQ_LINK_SPEED_100GB,
2731     ICE_AQ_LINK_SPEED_100GB,
2732     ICE_AQ_LINK_SPEED_100GB,
2733     ICE_AQ_LINK_SPEED_100GB,
2734     ICE_AQ_LINK_SPEED_100GB,
2735     ICE_AQ_LINK_SPEED_100GB,
2736     ICE_AQ_LINK_SPEED_100GB,
2737     ICE_AQ_LINK_SPEED_100GB,
2738     ICE_AQ_LINK_SPEED_100GB,
2739     ICE_AQ_LINK_SPEED_100GB,
2740     ICE_AQ_LINK_SPEED_100GB,
2741     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2742     ICE_AQ_LINK_SPEED_100GB,
2743     ICE_AQ_LINK_SPEED_100GB,
2744     ICE_AQ_LINK_SPEED_100GB,
2745     ICE_AQ_LINK_SPEED_100GB,
2746     ICE_AQ_LINK_SPEED_100GB
2747 };
2748 
2749 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2750 "\nControl advertised link speed."		\
2751 "\nFlags:"					\
2752 "\n\t   0x0 - Auto"				\
2753 "\n\t   0x1 - 10 Mb"				\
2754 "\n\t   0x2 - 100 Mb"				\
2755 "\n\t   0x4 - 1G"				\
2756 "\n\t   0x8 - 2.5G"				\
2757 "\n\t  0x10 - 5G"				\
2758 "\n\t  0x20 - 10G"				\
2759 "\n\t  0x40 - 20G"				\
2760 "\n\t  0x80 - 25G"				\
2761 "\n\t 0x100 - 40G"				\
2762 "\n\t 0x200 - 50G"				\
2763 "\n\t 0x400 - 100G"				\
2764 "\n\t0x8000 - Unknown"				\
2765 "\n\t"						\
2766 "\nUse \"sysctl -x\" to view flags properly."
2767 
2768 #define ICE_PHYS_100MB			\
2769     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2770      ICE_PHY_TYPE_LOW_100M_SGMII)
2771 #define ICE_PHYS_1000MB			\
2772     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2773      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2774      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2775      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2776      ICE_PHY_TYPE_LOW_1G_SGMII)
2777 #define ICE_PHYS_2500MB			\
2778     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2779      ICE_PHY_TYPE_LOW_2500BASE_X |	\
2780      ICE_PHY_TYPE_LOW_2500BASE_KX)
2781 #define ICE_PHYS_5GB			\
2782     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2783      ICE_PHY_TYPE_LOW_5GBASE_KR)
2784 #define ICE_PHYS_10GB			\
2785     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2786      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2787      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2788      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2789      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2790      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2791      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2792 #define ICE_PHYS_25GB			\
2793     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2794      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2795      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2796      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2797      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2798      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2799      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2800      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2801      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2802      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2803      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2804 #define ICE_PHYS_40GB			\
2805     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2806      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2807      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2808      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2809      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2810      ICE_PHY_TYPE_LOW_40G_XLAUI)
2811 #define ICE_PHYS_50GB			\
2812     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2813      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2814      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2815      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2816      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2817      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2818      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2819      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2820      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2821      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2822      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2823      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2824      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2825      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2826      ICE_PHY_TYPE_LOW_50G_AUI1)
2827 #define ICE_PHYS_100GB_LOW		\
2828     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
2829      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
2830      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
2831      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
2832      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
2833      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
2834      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
2835      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
2836      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
2837      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
2838      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
2839      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
2840      ICE_PHY_TYPE_LOW_100GBASE_DR)
2841 #define ICE_PHYS_100GB_HIGH		\
2842     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
2843      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
2844      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
2845      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
2846      ICE_PHY_TYPE_HIGH_100G_AUI2)
2847 
2848 /**
2849  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
2850  * @phy_type_low: lower 64-bit PHY Type bitmask
2851  * @phy_type_high: upper 64-bit PHY Type bitmask
2852  *
2853  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
2854  * link speed flags. If phy_type_high has an unknown PHY type, then the return
2855  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
2856  */
2857 static u16
2858 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
2859 {
2860 	u16 sysctl_speeds = 0;
2861 	int bit;
2862 
2863 	/* coverity[address_of] */
2864 	for_each_set_bit(bit, &phy_type_low, 64)
2865 		sysctl_speeds |= phy_link_speeds[bit];
2866 
2867 	/* coverity[address_of] */
2868 	for_each_set_bit(bit, &phy_type_high, 64) {
2869 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
2870 			sysctl_speeds |= phy_link_speeds[bit + 64];
2871 		else
2872 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
2873 	}
2874 
2875 	return (sysctl_speeds);
2876 }
2877 
2878 /**
2879  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
2880  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
2881  * @phy_type_low: output parameter for lower AQ PHY flags
2882  * @phy_type_high: output parameter for higher AQ PHY flags
2883  *
2884  * Converts the given link speed flags into AQ PHY type flag sets appropriate
2885  * for use in a Set PHY Config command.
2886  */
2887 static void
2888 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
2889 				  u64 *phy_type_high)
2890 {
2891 	*phy_type_low = 0, *phy_type_high = 0;
2892 
2893 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
2894 		*phy_type_low |= ICE_PHYS_100MB;
2895 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
2896 		*phy_type_low |= ICE_PHYS_1000MB;
2897 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
2898 		*phy_type_low |= ICE_PHYS_2500MB;
2899 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
2900 		*phy_type_low |= ICE_PHYS_5GB;
2901 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
2902 		*phy_type_low |= ICE_PHYS_10GB;
2903 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
2904 		*phy_type_low |= ICE_PHYS_25GB;
2905 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
2906 		*phy_type_low |= ICE_PHYS_40GB;
2907 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
2908 		*phy_type_low |= ICE_PHYS_50GB;
2909 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
2910 		*phy_type_low |= ICE_PHYS_100GB_LOW;
2911 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
2912 	}
2913 }
2914 
2915 /**
2916  * @struct ice_phy_data
2917  * @brief PHY caps and link speeds
2918  *
2919  * Buffer providing report mode and user speeds;
2920  * returning intersection of PHY types and speeds.
2921  */
2922 struct ice_phy_data {
2923 	u64 phy_low_orig;     /* PHY low quad from report */
2924 	u64 phy_high_orig;    /* PHY high quad from report */
2925 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
2926 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
2927 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
2928 	u16 user_speeds_intr; /* Intersect with report speeds */
2929 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
2930 };
2931 
2932 /**
2933  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
2934  * @sc: device private structure
2935  * @phy_data: device PHY data
2936  *
2937  * On read: Displays the currently supported speeds
2938  * On write: Sets the device's supported speeds
2939  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
2940  */
2941 static int
2942 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
2943 				   struct ice_phy_data *phy_data)
2944 {
2945 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
2946 	const char *report_types[5] = { "w/o MEDIA",
2947 					"w/MEDIA",
2948 					"ACTIVE",
2949 					"EDOOFUS", /* Not used */
2950 					"DFLT" };
2951 	struct ice_hw *hw = &sc->hw;
2952 	struct ice_port_info *pi = hw->port_info;
2953 	enum ice_status status;
2954 	u16 report_speeds, temp_speeds;
2955 	u8 report_type;
2956 	bool apply_speed_filter = false;
2957 
2958 	switch (phy_data->report_mode) {
2959 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
2960 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
2961 	case ICE_AQC_REPORT_ACTIVE_CFG:
2962 	case ICE_AQC_REPORT_DFLT_CFG:
2963 		report_type = phy_data->report_mode >> 1;
2964 		break;
2965 	default:
2966 		device_printf(sc->dev,
2967 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
2968 		    __func__, phy_data->report_mode);
2969 		return (EINVAL);
2970 	}
2971 
2972 	/* 0 is treated as "Auto"; the driver will handle selecting the
2973 	 * correct speeds. Including, in some cases, applying an override
2974 	 * if provided.
2975 	 */
2976 	if (phy_data->user_speeds_orig == 0)
2977 		phy_data->user_speeds_orig = USHRT_MAX;
2978 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
2979 		apply_speed_filter = true;
2980 
2981 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
2982 	if (status != ICE_SUCCESS) {
2983 		device_printf(sc->dev,
2984 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
2985 		    __func__, report_types[report_type],
2986 		    ice_status_str(status),
2987 		    ice_aq_str(sc->hw.adminq.sq_last_status));
2988 		return (EIO);
2989 	}
2990 
2991 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
2992 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
2993 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
2994 	    phy_data->phy_high_orig);
2995 	if (apply_speed_filter) {
2996 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
2997 		    pcaps.module_type[0]);
2998 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
2999 			device_printf(sc->dev,
3000 			    "User-specified speeds (\"0x%04X\") not supported\n",
3001 			    phy_data->user_speeds_orig);
3002 			return (EINVAL);
3003 		}
3004 		report_speeds = temp_speeds;
3005 	}
3006 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3007 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3008 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3009 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3010 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3011 
3012 	return (0);
3013  }
3014 
3015 /**
3016  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3017  * @oidp: sysctl oid structure
3018  * @arg1: pointer to private data structure
3019  * @arg2: unused
3020  * @req: sysctl request pointer
3021  *
3022  * On read: Displays the currently supported speeds
3023  * On write: Sets the device's supported speeds
3024  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3025  */
3026 static int
3027 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3028 {
3029 	struct ice_softc *sc = (struct ice_softc *)arg1;
3030 	struct ice_port_info *pi = sc->hw.port_info;
3031 	struct ice_phy_data phy_data = { 0 };
3032 	device_t dev = sc->dev;
3033 	u16 sysctl_speeds;
3034 	int ret;
3035 
3036 	UNREFERENCED_PARAMETER(arg2);
3037 
3038 	if (ice_driver_is_detaching(sc))
3039 		return (ESHUTDOWN);
3040 
3041 	/* Get the current speeds from the adapter's "active" configuration. */
3042 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3043 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3044 	if (ret) {
3045 		/* Error message already printed within function */
3046 		return (ret);
3047 	}
3048 
3049 	sysctl_speeds = phy_data.user_speeds_intr;
3050 
3051 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3052 	if ((ret) || (req->newptr == NULL))
3053 		return (ret);
3054 
3055 	if (sysctl_speeds > 0x7FF) {
3056 		device_printf(dev,
3057 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3058 			      __func__, sysctl_speeds);
3059 		return (EINVAL);
3060 	}
3061 
3062 	pi->phy.curr_user_speed_req = sysctl_speeds;
3063 
3064 	/* Apply settings requested by user */
3065 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3066 }
3067 
3068 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3069 "\nDisplay or set the port's requested FEC mode."	\
3070 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3071 "\n\tfc - " ICE_FEC_STRING_BASER			\
3072 "\n\trs - " ICE_FEC_STRING_RS				\
3073 "\n\tnone - " ICE_FEC_STRING_NONE			\
3074 "\nEither of the left or right strings above can be used to set the requested mode."
3075 
3076 /**
3077  * ice_sysctl_fec_config - Display/change the configured FEC mode
3078  * @oidp: sysctl oid structure
3079  * @arg1: pointer to private data structure
3080  * @arg2: unused
3081  * @req: sysctl request pointer
3082  *
3083  * On read: Displays the configured FEC mode
3084  * On write: Sets the device's FEC mode to the input string, if it's valid.
3085  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3086  */
3087 static int
3088 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3089 {
3090 	struct ice_softc *sc = (struct ice_softc *)arg1;
3091 	struct ice_port_info *pi = sc->hw.port_info;
3092 	enum ice_fec_mode new_mode;
3093 	device_t dev = sc->dev;
3094 	char req_fec[32];
3095 	int ret;
3096 
3097 	UNREFERENCED_PARAMETER(arg2);
3098 
3099 	if (ice_driver_is_detaching(sc))
3100 		return (ESHUTDOWN);
3101 
3102 	bzero(req_fec, sizeof(req_fec));
3103 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3104 
3105 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3106 	if ((ret) || (req->newptr == NULL))
3107 		return (ret);
3108 
3109 	if (strcmp(req_fec, "auto") == 0 ||
3110 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3111 		if (sc->allow_no_fec_mod_in_auto)
3112 			new_mode = ICE_FEC_DIS_AUTO;
3113 		else
3114 			new_mode = ICE_FEC_AUTO;
3115 	} else if (strcmp(req_fec, "fc") == 0 ||
3116 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3117 		new_mode = ICE_FEC_BASER;
3118 	} else if (strcmp(req_fec, "rs") == 0 ||
3119 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3120 		new_mode = ICE_FEC_RS;
3121 	} else if (strcmp(req_fec, "none") == 0 ||
3122 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3123 		new_mode = ICE_FEC_NONE;
3124 	} else {
3125 		device_printf(dev,
3126 		    "%s: \"%s\" is not a valid FEC mode\n",
3127 		    __func__, req_fec);
3128 		return (EINVAL);
3129 	}
3130 
3131 	/* Cache user FEC mode for later link ups */
3132 	pi->phy.curr_user_fec_req = new_mode;
3133 
3134 	/* Apply settings requested by user */
3135 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3136 }
3137 
3138 /**
3139  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3140  * @oidp: sysctl oid structure
3141  * @arg1: pointer to private data structure
3142  * @arg2: unused
3143  * @req: sysctl request pointer
3144  *
3145  * On read: Displays the negotiated FEC mode, in a string
3146  */
3147 static int
3148 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3149 {
3150 	struct ice_softc *sc = (struct ice_softc *)arg1;
3151 	struct ice_hw *hw = &sc->hw;
3152 	char neg_fec[32];
3153 	int ret;
3154 
3155 	UNREFERENCED_PARAMETER(arg2);
3156 
3157 	if (ice_driver_is_detaching(sc))
3158 		return (ESHUTDOWN);
3159 
3160 	/* Copy const string into a buffer to drop const qualifier */
3161 	bzero(neg_fec, sizeof(neg_fec));
3162 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3163 
3164 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3165 	if (req->newptr != NULL)
3166 		return (EPERM);
3167 
3168 	return (ret);
3169 }
3170 
3171 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3172 "\nDisplay or set the port's advertised flow control mode.\n"	\
3173 "\t0 - " ICE_FC_STRING_NONE					\
3174 "\n\t1 - " ICE_FC_STRING_RX					\
3175 "\n\t2 - " ICE_FC_STRING_TX					\
3176 "\n\t3 - " ICE_FC_STRING_FULL					\
3177 "\nEither the numbers or the strings above can be used to set the advertised mode."
3178 
3179 /**
3180  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3181  * @oidp: sysctl oid structure
3182  * @arg1: pointer to private data structure
3183  * @arg2: unused
3184  * @req: sysctl request pointer
3185  *
3186  * On read: Displays the configured flow control mode
3187  * On write: Sets the device's flow control mode to the input, if it's valid.
3188  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3189  */
3190 static int
3191 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3192 {
3193 	struct ice_softc *sc = (struct ice_softc *)arg1;
3194 	struct ice_port_info *pi = sc->hw.port_info;
3195 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3196 	enum ice_fc_mode old_mode, new_mode;
3197 	struct ice_hw *hw = &sc->hw;
3198 	device_t dev = sc->dev;
3199 	enum ice_status status;
3200 	int ret, fc_num;
3201 	bool mode_set = false;
3202 	struct sbuf buf;
3203 	char *fc_str_end;
3204 	char fc_str[32];
3205 
3206 	UNREFERENCED_PARAMETER(arg2);
3207 
3208 	if (ice_driver_is_detaching(sc))
3209 		return (ESHUTDOWN);
3210 
3211 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3212 				     &pcaps, NULL);
3213 	if (status != ICE_SUCCESS) {
3214 		device_printf(dev,
3215 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3216 		    __func__, ice_status_str(status),
3217 		    ice_aq_str(hw->adminq.sq_last_status));
3218 		return (EIO);
3219 	}
3220 
3221 	/* Convert HW response format to SW enum value */
3222 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3223 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3224 		old_mode = ICE_FC_FULL;
3225 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3226 		old_mode = ICE_FC_TX_PAUSE;
3227 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3228 		old_mode = ICE_FC_RX_PAUSE;
3229 	else
3230 		old_mode = ICE_FC_NONE;
3231 
3232 	/* Create "old" string for output */
3233 	bzero(fc_str, sizeof(fc_str));
3234 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3235 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3236 	sbuf_finish(&buf);
3237 	sbuf_delete(&buf);
3238 
3239 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3240 	if ((ret) || (req->newptr == NULL))
3241 		return (ret);
3242 
3243 	/* Try to parse input as a string, first */
3244 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3245 		new_mode = ICE_FC_FULL;
3246 		mode_set = true;
3247 	}
3248 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3249 		new_mode = ICE_FC_TX_PAUSE;
3250 		mode_set = true;
3251 	}
3252 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3253 		new_mode = ICE_FC_RX_PAUSE;
3254 		mode_set = true;
3255 	}
3256 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3257 		new_mode = ICE_FC_NONE;
3258 		mode_set = true;
3259 	}
3260 
3261 	/*
3262 	 * Then check if it's an integer, for compatibility with the method
3263 	 * used in older drivers.
3264 	 */
3265 	if (!mode_set) {
3266 		fc_num = strtol(fc_str, &fc_str_end, 0);
3267 		if (fc_str_end == fc_str)
3268 			fc_num = -1;
3269 		switch (fc_num) {
3270 		case 3:
3271 			new_mode = ICE_FC_FULL;
3272 			break;
3273 		case 2:
3274 			new_mode = ICE_FC_TX_PAUSE;
3275 			break;
3276 		case 1:
3277 			new_mode = ICE_FC_RX_PAUSE;
3278 			break;
3279 		case 0:
3280 			new_mode = ICE_FC_NONE;
3281 			break;
3282 		default:
3283 			device_printf(dev,
3284 			    "%s: \"%s\" is not a valid flow control mode\n",
3285 			    __func__, fc_str);
3286 			return (EINVAL);
3287 		}
3288 	}
3289 
3290 	/* Save flow control mode from user */
3291 	pi->phy.curr_user_fc_req = new_mode;
3292 
3293 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3294 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3295 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3296 	    (new_mode != ICE_FC_NONE)) {
3297 		ret = ice_config_pfc(sc, 0x0);
3298 		if (ret)
3299 			return (ret);
3300 	}
3301 
3302 	/* Apply settings requested by user */
3303 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3304 }
3305 
3306 /**
3307  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3308  * @oidp: sysctl oid structure
3309  * @arg1: pointer to private data structure
3310  * @arg2: unused
3311  * @req: sysctl request pointer
3312  *
3313  * On read: Displays the currently negotiated flow control settings.
3314  *
3315  * If link is not established, this will report ICE_FC_NONE, as no flow
3316  * control is negotiated while link is down.
3317  */
3318 static int
3319 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3320 {
3321 	struct ice_softc *sc = (struct ice_softc *)arg1;
3322 	struct ice_port_info *pi = sc->hw.port_info;
3323 	const char *negotiated_fc;
3324 
3325 	UNREFERENCED_PARAMETER(arg2);
3326 
3327 	if (ice_driver_is_detaching(sc))
3328 		return (ESHUTDOWN);
3329 
3330 	negotiated_fc = ice_flowcontrol_mode(pi);
3331 
3332 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3333 }
3334 
3335 /**
3336  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3337  * @oidp: sysctl oid structure
3338  * @arg1: pointer to private data structure
3339  * @arg2: unused
3340  * @req: sysctl request pointer
3341  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3342  *
3343  * Private handler for phy_type_high and phy_type_low sysctls.
3344  */
3345 static int
3346 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3347 {
3348 	struct ice_softc *sc = (struct ice_softc *)arg1;
3349 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3350 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3351 	struct ice_hw *hw = &sc->hw;
3352 	device_t dev = sc->dev;
3353 	enum ice_status status;
3354 	uint64_t types;
3355 	int ret;
3356 
3357 	UNREFERENCED_PARAMETER(arg2);
3358 
3359 	if (ice_driver_is_detaching(sc))
3360 		return (ESHUTDOWN);
3361 
3362 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3363 				     &pcaps, NULL);
3364 	if (status != ICE_SUCCESS) {
3365 		device_printf(dev,
3366 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3367 		    __func__, ice_status_str(status),
3368 		    ice_aq_str(hw->adminq.sq_last_status));
3369 		return (EIO);
3370 	}
3371 
3372 	if (is_phy_type_high)
3373 		types = pcaps.phy_type_high;
3374 	else
3375 		types = pcaps.phy_type_low;
3376 
3377 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3378 	if ((ret) || (req->newptr == NULL))
3379 		return (ret);
3380 
3381 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3382 
3383 	if (is_phy_type_high)
3384 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3385 	else
3386 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3387 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3388 
3389 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3390 	if (status != ICE_SUCCESS) {
3391 		device_printf(dev,
3392 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3393 		    __func__, ice_status_str(status),
3394 		    ice_aq_str(hw->adminq.sq_last_status));
3395 		return (EIO);
3396 	}
3397 
3398 	return (0);
3399 
3400 }
3401 
3402 /**
3403  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3404  * @oidp: sysctl oid structure
3405  * @arg1: pointer to private data structure
3406  * @arg2: unused
3407  * @req: sysctl request pointer
3408  *
3409  * On read: Displays the currently supported lower PHY types
3410  * On write: Sets the device's supported low PHY types
3411  */
3412 static int
3413 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3414 {
3415 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3416 }
3417 
3418 /**
3419  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3420  * @oidp: sysctl oid structure
3421  * @arg1: pointer to private data structure
3422  * @arg2: unused
3423  * @req: sysctl request pointer
3424  *
3425  * On read: Displays the currently supported higher PHY types
3426  * On write: Sets the device's supported high PHY types
3427  */
3428 static int
3429 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3430 {
3431 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3432 }
3433 
3434 /**
3435  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3436  * @oidp: sysctl oid structure
3437  * @arg1: pointer to private data structure
3438  * @arg2: unused
3439  * @req: sysctl request pointer
3440  * @report_mode: the mode to report
3441  *
3442  * On read: Display the response from Get PHY abillities with the given report
3443  * mode.
3444  */
3445 static int
3446 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3447 {
3448 	struct ice_softc *sc = (struct ice_softc *)arg1;
3449 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3450 	struct ice_hw *hw = &sc->hw;
3451 	struct ice_port_info *pi = hw->port_info;
3452 	device_t dev = sc->dev;
3453 	enum ice_status status;
3454 	int ret;
3455 
3456 	UNREFERENCED_PARAMETER(arg2);
3457 
3458 	ret = priv_check(curthread, PRIV_DRIVER);
3459 	if (ret)
3460 		return (ret);
3461 
3462 	if (ice_driver_is_detaching(sc))
3463 		return (ESHUTDOWN);
3464 
3465 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3466 	if (status != ICE_SUCCESS) {
3467 		device_printf(dev,
3468 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3469 		    __func__, ice_status_str(status),
3470 		    ice_aq_str(hw->adminq.sq_last_status));
3471 		return (EIO);
3472 	}
3473 
3474 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3475 	if (req->newptr != NULL)
3476 		return (EPERM);
3477 
3478 	return (ret);
3479 }
3480 
3481 /**
3482  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3483  * @oidp: sysctl oid structure
3484  * @arg1: pointer to private data structure
3485  * @arg2: unused
3486  * @req: sysctl request pointer
3487  *
3488  * On read: Display the response from Get PHY abillities reporting the last
3489  * software configuration.
3490  */
3491 static int
3492 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3493 {
3494 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3495 				   ICE_AQC_REPORT_ACTIVE_CFG);
3496 }
3497 
3498 /**
3499  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3500  * @oidp: sysctl oid structure
3501  * @arg1: pointer to private data structure
3502  * @arg2: unused
3503  * @req: sysctl request pointer
3504  *
3505  * On read: Display the response from Get PHY abillities reporting the NVM
3506  * configuration.
3507  */
3508 static int
3509 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3510 {
3511 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3512 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3513 }
3514 
3515 /**
3516  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3517  * @oidp: sysctl oid structure
3518  * @arg1: pointer to private data structure
3519  * @arg2: unused
3520  * @req: sysctl request pointer
3521  *
3522  * On read: Display the response from Get PHY abillities reporting the
3523  * topology configuration.
3524  */
3525 static int
3526 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3527 {
3528 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3529 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3530 }
3531 
3532 /**
3533  * ice_sysctl_phy_link_status - Display response from Get Link Status
3534  * @oidp: sysctl oid structure
3535  * @arg1: pointer to private data structure
3536  * @arg2: unused
3537  * @req: sysctl request pointer
3538  *
3539  * On read: Display the response from firmware for the Get Link Status
3540  * request.
3541  */
3542 static int
3543 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3544 {
3545 	struct ice_aqc_get_link_status_data link_data = { 0 };
3546 	struct ice_softc *sc = (struct ice_softc *)arg1;
3547 	struct ice_hw *hw = &sc->hw;
3548 	struct ice_port_info *pi = hw->port_info;
3549 	struct ice_aqc_get_link_status *resp;
3550 	struct ice_aq_desc desc;
3551 	device_t dev = sc->dev;
3552 	enum ice_status status;
3553 	int ret;
3554 
3555 	UNREFERENCED_PARAMETER(arg2);
3556 
3557 	/*
3558 	 * Ensure that only contexts with driver privilege are allowed to
3559 	 * access this information
3560 	 */
3561 	ret = priv_check(curthread, PRIV_DRIVER);
3562 	if (ret)
3563 		return (ret);
3564 
3565 	if (ice_driver_is_detaching(sc))
3566 		return (ESHUTDOWN);
3567 
3568 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3569 	resp = &desc.params.get_link_status;
3570 	resp->lport_num = pi->lport;
3571 
3572 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3573 	if (status != ICE_SUCCESS) {
3574 		device_printf(dev,
3575 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3576 		    __func__, ice_status_str(status),
3577 		    ice_aq_str(hw->adminq.sq_last_status));
3578 		return (EIO);
3579 	}
3580 
3581 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3582 	if (req->newptr != NULL)
3583 		return (EPERM);
3584 
3585 	return (ret);
3586 }
3587 
3588 /**
3589  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3590  * @oidp: sysctl oid structure
3591  * @arg1: pointer to private softc structure
3592  * @arg2: unused
3593  * @req: sysctl request pointer
3594  *
3595  * On read: Displays current persistent LLDP status.
3596  */
3597 static int
3598 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3599 {
3600 	struct ice_softc *sc = (struct ice_softc *)arg1;
3601 	struct ice_hw *hw = &sc->hw;
3602 	device_t dev = sc->dev;
3603 	enum ice_status status;
3604 	struct sbuf *sbuf;
3605 	u32 lldp_state;
3606 
3607 	UNREFERENCED_PARAMETER(arg2);
3608 	UNREFERENCED_PARAMETER(oidp);
3609 
3610 	if (ice_driver_is_detaching(sc))
3611 		return (ESHUTDOWN);
3612 
3613 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3614 	if (status) {
3615 		device_printf(dev,
3616 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3617 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3618 		return (EIO);
3619 	}
3620 
3621 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3622 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3623 	sbuf_finish(sbuf);
3624 	sbuf_delete(sbuf);
3625 
3626 	return (0);
3627 }
3628 
3629 /**
3630  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3631  * @oidp: sysctl oid structure
3632  * @arg1: pointer to private softc structure
3633  * @arg2: unused
3634  * @req: sysctl request pointer
3635  *
3636  * On read: Displays default persistent LLDP status.
3637  */
3638 static int
3639 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3640 {
3641 	struct ice_softc *sc = (struct ice_softc *)arg1;
3642 	struct ice_hw *hw = &sc->hw;
3643 	device_t dev = sc->dev;
3644 	enum ice_status status;
3645 	struct sbuf *sbuf;
3646 	u32 lldp_state;
3647 
3648 	UNREFERENCED_PARAMETER(arg2);
3649 	UNREFERENCED_PARAMETER(oidp);
3650 
3651 	if (ice_driver_is_detaching(sc))
3652 		return (ESHUTDOWN);
3653 
3654 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3655 	if (status) {
3656 		device_printf(dev,
3657 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3658 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3659 		return (EIO);
3660 	}
3661 
3662 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3663 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3664 	sbuf_finish(sbuf);
3665 	sbuf_delete(sbuf);
3666 
3667 	return (0);
3668 }
3669 
3670 /**
3671  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3672  * @dcbcfg: Configuration struct to check for mappings in
3673  *
3674  * @return true if there exists a non-zero DSCP to TC mapping
3675  * inside the input DCB configuration struct.
3676  */
3677 static bool
3678 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3679 {
3680 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3681 		if (dcbcfg->dscp_map[i] != 0)
3682 			return (true);
3683 
3684 	return (false);
3685 }
3686 
3687 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3688 "\nDisplay or change FW LLDP agent state:" \
3689 "\n\t0 - disabled"			\
3690 "\n\t1 - enabled"
3691 
3692 /**
3693  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3694  * @oidp: sysctl oid structure
3695  * @arg1: pointer to private softc structure
3696  * @arg2: unused
3697  * @req: sysctl request pointer
3698  *
3699  * On read: Displays whether the FW LLDP agent is running
3700  * On write: Persistently enables or disables the FW LLDP agent
3701  */
3702 static int
3703 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3704 {
3705 	struct ice_softc *sc = (struct ice_softc *)arg1;
3706 	struct ice_dcbx_cfg *local_dcbx_cfg;
3707 	struct ice_hw *hw = &sc->hw;
3708 	device_t dev = sc->dev;
3709 	enum ice_status status;
3710 	int ret;
3711 	u32 old_state;
3712 	u8 fw_lldp_enabled;
3713 	bool retried_start_lldp = false;
3714 
3715 	UNREFERENCED_PARAMETER(arg2);
3716 
3717 	if (ice_driver_is_detaching(sc))
3718 		return (ESHUTDOWN);
3719 
3720 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3721 	if (status) {
3722 		device_printf(dev,
3723 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3724 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3725 		return (EIO);
3726 	}
3727 
3728 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3729 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3730 		if (status) {
3731 			device_printf(dev,
3732 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3733 			    ice_status_str(status),
3734 			    ice_aq_str(hw->adminq.sq_last_status));
3735 			return (EIO);
3736 		}
3737 	}
3738 	if (old_state == 0)
3739 		fw_lldp_enabled = false;
3740 	else
3741 		fw_lldp_enabled = true;
3742 
3743 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3744 	if ((ret) || (req->newptr == NULL))
3745 		return (ret);
3746 
3747 	if (old_state == 0 && fw_lldp_enabled == false)
3748 		return (0);
3749 
3750 	if (old_state != 0 && fw_lldp_enabled == true)
3751 		return (0);
3752 
3753 	/* Block transition to FW LLDP if DSCP mode is enabled */
3754 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3755 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) &&
3756 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3757 		device_printf(dev,
3758 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3759 		return (EOPNOTSUPP);
3760 	}
3761 
3762 	if (fw_lldp_enabled == false) {
3763 		status = ice_aq_stop_lldp(hw, true, true, NULL);
3764 		/* EPERM is returned if the LLDP agent is already shutdown */
3765 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3766 			device_printf(dev,
3767 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3768 			    __func__, ice_status_str(status),
3769 			    ice_aq_str(hw->adminq.sq_last_status));
3770 			return (EIO);
3771 		}
3772 		ice_aq_set_dcb_parameters(hw, true, NULL);
3773 		hw->port_info->qos_cfg.is_sw_lldp = true;
3774 		ice_add_rx_lldp_filter(sc);
3775 	} else {
3776 		ice_del_rx_lldp_filter(sc);
3777 retry_start_lldp:
3778 		status = ice_aq_start_lldp(hw, true, NULL);
3779 		if (status) {
3780 			switch (hw->adminq.sq_last_status) {
3781 			/* EEXIST is returned if the LLDP agent is already started */
3782 			case ICE_AQ_RC_EEXIST:
3783 				break;
3784 			case ICE_AQ_RC_EAGAIN:
3785 				/* Retry command after a 2 second wait */
3786 				if (retried_start_lldp == false) {
3787 					retried_start_lldp = true;
3788 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3789 					goto retry_start_lldp;
3790 				}
3791 				/* Fallthrough */
3792 			default:
3793 				device_printf(dev,
3794 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3795 				    __func__, ice_status_str(status),
3796 				    ice_aq_str(hw->adminq.sq_last_status));
3797 				return (EIO);
3798 			}
3799 		}
3800 		ice_start_dcbx_agent(sc);
3801 		hw->port_info->qos_cfg.is_sw_lldp = false;
3802 	}
3803 
3804 	return (ret);
3805 }
3806 
3807 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3808 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
3809 "\nIn SW DCB mode, displays and allows setting the table." \
3810 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
3811 "\nWhere the bandwidth total must add up to 100"
3812 
3813 /**
3814  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
3815  * @oidp: sysctl oid structure
3816  * @arg1: pointer to private data structure
3817  * @arg2: unused
3818  * @req: sysctl request pointer
3819  *
3820  * Returns the current ETS TC bandwidth table
3821  * cached by the driver.
3822  *
3823  * In SW DCB mode this sysctl also accepts a value that will
3824  * be sent to the firmware for configuration.
3825  */
3826 static int
3827 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
3828 {
3829 	struct ice_softc *sc = (struct ice_softc *)arg1;
3830 	struct ice_dcbx_cfg *local_dcbx_cfg;
3831 	struct ice_port_info *pi;
3832 	struct ice_hw *hw = &sc->hw;
3833 	device_t dev = sc->dev;
3834 	enum ice_status status;
3835 	struct sbuf *sbuf;
3836 	int ret;
3837 
3838 	/* Store input rates from user */
3839 	char ets_user_buf[128] = "";
3840 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
3841 
3842 	UNREFERENCED_PARAMETER(arg2);
3843 
3844 	if (ice_driver_is_detaching(sc))
3845 		return (ESHUTDOWN);
3846 
3847 	if (req->oldptr == NULL && req->newptr == NULL) {
3848 		ret = SYSCTL_OUT(req, 0, 128);
3849 		return (ret);
3850 	}
3851 
3852 	pi = hw->port_info;
3853 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
3854 
3855 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3856 
3857 	/* Format ETS BW data for output */
3858 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3859 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
3860 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
3861 			sbuf_printf(sbuf, ",");
3862 	}
3863 
3864 	sbuf_finish(sbuf);
3865 	sbuf_delete(sbuf);
3866 
3867 	/* Read in the new ETS values */
3868 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
3869 	if ((ret) || (req->newptr == NULL))
3870 		return (ret);
3871 
3872 	/* Don't allow setting changes in FW DCB mode */
3873 	if (!hw->port_info->qos_cfg.is_sw_lldp)
3874 		return (EPERM);
3875 
3876 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
3877 	if (ret) {
3878 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
3879 		    __func__, ets_user_buf);
3880 		return (ret);
3881 	}
3882 
3883 	if (!ice_check_ets_bw(new_ets_table)) {
3884 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
3885 		    __func__, ets_user_buf);
3886 		return (EINVAL);
3887 	}
3888 
3889 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
3890 	    sizeof(new_ets_table));
3891 
3892 	/* If BW > 0, then set TSA entry to 2 */
3893 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3894 		if (new_ets_table[i] > 0)
3895 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
3896 		else
3897 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
3898 	}
3899 	local_dcbx_cfg->etscfg.willing = 0;
3900 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
3901 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
3902 
3903 	status = ice_set_dcb_cfg(pi);
3904 	if (status) {
3905 		device_printf(dev,
3906 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
3907 		    __func__, ice_status_str(status),
3908 		    ice_aq_str(hw->adminq.sq_last_status));
3909 		return (EIO);
3910 	}
3911 
3912 	ice_do_dcb_reconfig(sc, false);
3913 
3914 	return (0);
3915 }
3916 
3917 #define ICE_SYSCTL_HELP_UP2TC_MAP \
3918 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
3919 "\nIn SW DCB mode, displays and allows setting the table." \
3920 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
3921 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
3922 
3923 /**
3924  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
3925  * @oidp: sysctl oid structure
3926  * @arg1: pointer to private data structure
3927  * @arg2: unused
3928  * @req: sysctl request pointer
3929  *
3930  * In FW DCB mode, returns the current ETS prio table /
3931  * UP2TC mapping from the local MIB.
3932  *
3933  * In SW DCB mode this sysctl also accepts a value that will
3934  * be sent to the firmware for configuration.
3935  */
3936 static int
3937 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
3938 {
3939 	struct ice_softc *sc = (struct ice_softc *)arg1;
3940 	struct ice_dcbx_cfg *local_dcbx_cfg;
3941 	struct ice_port_info *pi;
3942 	struct ice_hw *hw = &sc->hw;
3943 	device_t dev = sc->dev;
3944 	enum ice_status status;
3945 	struct sbuf *sbuf;
3946 	int ret;
3947 
3948 	/* Store input rates from user */
3949 	char up2tc_user_buf[128] = "";
3950 	/* This array is indexed by UP, not TC */
3951 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
3952 
3953 	UNREFERENCED_PARAMETER(arg2);
3954 
3955 	if (ice_driver_is_detaching(sc))
3956 		return (ESHUTDOWN);
3957 
3958 	if (req->oldptr == NULL && req->newptr == NULL) {
3959 		ret = SYSCTL_OUT(req, 0, 128);
3960 		return (ret);
3961 	}
3962 
3963 	pi = hw->port_info;
3964 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
3965 
3966 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3967 
3968 	/* Format ETS Priority Mapping Table for output */
3969 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
3970 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
3971 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
3972 			sbuf_printf(sbuf, ",");
3973 	}
3974 
3975 	sbuf_finish(sbuf);
3976 	sbuf_delete(sbuf);
3977 
3978 	/* Read in the new ETS priority mapping */
3979 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
3980 	if ((ret) || (req->newptr == NULL))
3981 		return (ret);
3982 
3983 	/* Don't allow setting changes in FW DCB mode */
3984 	if (!hw->port_info->qos_cfg.is_sw_lldp)
3985 		return (EPERM);
3986 
3987 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
3988 	if (ret) {
3989 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
3990 		    __func__, up2tc_user_buf);
3991 		return (ret);
3992 	}
3993 
3994 	/* Prepare updated ETS CFG/REC TLVs */
3995 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
3996 	    sizeof(new_up2tc));
3997 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
3998 	    sizeof(new_up2tc));
3999 
4000 	status = ice_set_dcb_cfg(pi);
4001 	if (status) {
4002 		device_printf(dev,
4003 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4004 		    __func__, ice_status_str(status),
4005 		    ice_aq_str(hw->adminq.sq_last_status));
4006 		return (EIO);
4007 	}
4008 
4009 	ice_do_dcb_reconfig(sc, false);
4010 
4011 	return (0);
4012 }
4013 
4014 /**
4015  * ice_config_pfc - helper function to set PFC config in FW
4016  * @sc: device private structure
4017  * @new_mode: bit flags indicating PFC status for TCs
4018  *
4019  * @pre must be in SW DCB mode
4020  *
4021  * Configures the driver's local PFC TLV and sends it to the
4022  * FW for configuration, then reconfigures the driver/VSI
4023  * for DCB if needed.
4024  */
4025 static int
4026 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4027 {
4028 	struct ice_dcbx_cfg *local_dcbx_cfg;
4029 	struct ice_hw *hw = &sc->hw;
4030 	struct ice_port_info *pi;
4031 	device_t dev = sc->dev;
4032 	enum ice_status status;
4033 
4034 	pi = hw->port_info;
4035 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4036 
4037 	/* Prepare updated PFC TLV */
4038 	local_dcbx_cfg->pfc.pfcena = new_mode;
4039 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4040 	local_dcbx_cfg->pfc.willing = 0;
4041 	local_dcbx_cfg->pfc.mbc = 0;
4042 
4043 	/* Warn if PFC is being disabled with RoCE v2 in use */
4044 	if (new_mode == 0 && sc->rdma_entry.attached)
4045 		device_printf(dev,
4046 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4047 
4048 	status = ice_set_dcb_cfg(pi);
4049 	if (status) {
4050 		device_printf(dev,
4051 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4052 		    __func__, ice_status_str(status),
4053 		    ice_aq_str(hw->adminq.sq_last_status));
4054 		return (EIO);
4055 	}
4056 
4057 	ice_do_dcb_reconfig(sc, false);
4058 
4059 	return (0);
4060 }
4061 
4062 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4063 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4064 "\nIn SW DCB mode, displays and allows setting the configuration" \
4065 "\nInput/Output is in this format: 0xff" \
4066 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4067 
4068 /**
4069  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4070  * @oidp: sysctl oid structure
4071  * @arg1: pointer to private data structure
4072  * @arg2: unused
4073  * @req: sysctl request pointer
4074  *
4075  * In FW DCB mode, returns a bitmap containing the current TCs
4076  * that have PFC enabled on them.
4077  *
4078  * In SW DCB mode this sysctl also accepts a value that will
4079  * be sent to the firmware for configuration.
4080  */
4081 static int
4082 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4083 {
4084 	struct ice_softc *sc = (struct ice_softc *)arg1;
4085 	struct ice_dcbx_cfg *local_dcbx_cfg;
4086 	struct ice_port_info *pi;
4087 	struct ice_hw *hw = &sc->hw;
4088 	int ret;
4089 
4090 	/* Store input flags from user */
4091 	u8 user_pfc;
4092 
4093 	UNREFERENCED_PARAMETER(arg2);
4094 
4095 	if (ice_driver_is_detaching(sc))
4096 		return (ESHUTDOWN);
4097 
4098 	if (req->oldptr == NULL && req->newptr == NULL) {
4099 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4100 		return (ret);
4101 	}
4102 
4103 	pi = hw->port_info;
4104 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4105 
4106 	/* Format current PFC enable setting for output */
4107 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4108 
4109 	/* Read in the new PFC config */
4110 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4111 	if ((ret) || (req->newptr == NULL))
4112 		return (ret);
4113 
4114 	/* Don't allow setting changes in FW DCB mode */
4115 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4116 		return (EPERM);
4117 
4118 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4119 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4120 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4121 		ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4122 		if (ret)
4123 			return (ret);
4124 	}
4125 
4126 	return ice_config_pfc(sc, user_pfc);
4127 }
4128 
4129 #define ICE_SYSCTL_HELP_PFC_MODE \
4130 "\nDisplay and set the current QoS mode for the firmware" \
4131 "\n\t0: VLAN UP mode" \
4132 "\n\t1: DSCP mode"
4133 
4134 /**
4135  * ice_sysctl_pfc_mode
4136  * @oidp: sysctl oid structure
4137  * @arg1: pointer to private data structure
4138  * @arg2: unused
4139  * @req: sysctl request pointer
4140  *
4141  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4142  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4143  * -based settings are configured for DCB.
4144  */
4145 static int
4146 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4147 {
4148 	struct ice_softc *sc = (struct ice_softc *)arg1;
4149 	struct ice_dcbx_cfg *local_dcbx_cfg;
4150 	struct ice_port_info *pi;
4151 	struct ice_hw *hw = &sc->hw;
4152 	device_t dev = sc->dev;
4153 	enum ice_status status;
4154 	u8 user_pfc_mode, aq_pfc_mode;
4155 	int ret;
4156 
4157 	UNREFERENCED_PARAMETER(arg2);
4158 
4159 	if (ice_driver_is_detaching(sc))
4160 		return (ESHUTDOWN);
4161 
4162 	if (req->oldptr == NULL && req->newptr == NULL) {
4163 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4164 		return (ret);
4165 	}
4166 
4167 	pi = hw->port_info;
4168 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4169 
4170 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4171 
4172 	/* Read in the new mode */
4173 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4174 	if ((ret) || (req->newptr == NULL))
4175 		return (ret);
4176 
4177 	/* Don't allow setting changes in FW DCB mode */
4178 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4179 		return (EPERM);
4180 
4181 	/* Currently, there are only two modes */
4182 	switch (user_pfc_mode) {
4183 	case 0:
4184 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4185 		break;
4186 	case 1:
4187 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4188 		break;
4189 	default:
4190 		device_printf(dev,
4191 		    "%s: Valid input range is 0-1 (input %d)\n",
4192 		    __func__, user_pfc_mode);
4193 		return (EINVAL);
4194 	}
4195 
4196 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4197 	if (status == ICE_ERR_NOT_SUPPORTED) {
4198 		device_printf(dev,
4199 		    "%s: Failed to set PFC mode; DCB not supported\n",
4200 		    __func__);
4201 		return (ENODEV);
4202 	}
4203 	if (status) {
4204 		device_printf(dev,
4205 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4206 		    __func__, ice_status_str(status),
4207 		    ice_aq_str(hw->adminq.sq_last_status));
4208 		return (EIO);
4209 	}
4210 
4211 	/* Reset settings to default when mode is changed */
4212 	ice_set_default_local_mib_settings(sc);
4213 	/* Cache current settings and reconfigure */
4214 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4215 	ice_do_dcb_reconfig(sc, false);
4216 
4217 	return (0);
4218 }
4219 
4220 /**
4221  * ice_add_device_sysctls - add device specific dynamic sysctls
4222  * @sc: device private structure
4223  *
4224  * Add per-device dynamic sysctls which show device configuration or enable
4225  * configuring device functionality. For tunable values which can be set prior
4226  * to load, see ice_add_device_tunables.
4227  *
4228  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4229  * and likely should be called near the end of the attach process.
4230  */
4231 void
4232 ice_add_device_sysctls(struct ice_softc *sc)
4233 {
4234 	struct sysctl_oid *hw_node;
4235 	device_t dev = sc->dev;
4236 
4237 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4238 	struct sysctl_oid_list *ctx_list =
4239 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4240 
4241 	SYSCTL_ADD_PROC(ctx, ctx_list,
4242 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4243 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4244 
4245 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4246 		SYSCTL_ADD_PROC(ctx, ctx_list,
4247 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4248 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4249 	}
4250 
4251 	SYSCTL_ADD_PROC(ctx, ctx_list,
4252 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4253 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4254 
4255 	SYSCTL_ADD_PROC(ctx, ctx_list,
4256 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4257 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4258 
4259 	SYSCTL_ADD_PROC(ctx, ctx_list,
4260 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4261 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4262 
4263 	SYSCTL_ADD_PROC(ctx, ctx_list,
4264 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4265 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4266 
4267 	SYSCTL_ADD_PROC(ctx, ctx_list,
4268 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4269 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4270 
4271 	SYSCTL_ADD_PROC(ctx, ctx_list,
4272 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4273 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4274 
4275 	SYSCTL_ADD_PROC(ctx, ctx_list,
4276 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4277 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4278 
4279 	SYSCTL_ADD_PROC(ctx, ctx_list,
4280 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4281 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4282 
4283 	SYSCTL_ADD_PROC(ctx, ctx_list,
4284 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4285 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4286 
4287 	SYSCTL_ADD_PROC(ctx, ctx_list,
4288 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4289 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4290 
4291 	SYSCTL_ADD_PROC(ctx, ctx_list,
4292 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4293 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4294 
4295 	SYSCTL_ADD_PROC(ctx, ctx_list,
4296 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4297 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4298 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4299 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4300 
4301 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4302 
4303 	/* Differentiate software and hardware statistics, by keeping hw stats
4304 	 * in their own node. This isn't in ice_add_device_tunables, because
4305 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4306 	 */
4307 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4308 				  NULL, "Port Hardware Statistics");
4309 
4310 	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4311 
4312 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4313 	 * during creation
4314 	 */
4315 	ice_add_vsi_sysctls(&sc->pf_vsi);
4316 
4317 	/* Add sysctls related to debugging the device driver. This includes
4318 	 * sysctls which display additional internal driver state for use in
4319 	 * understanding what is happening within the driver.
4320 	 */
4321 	ice_add_debug_sysctls(sc);
4322 }
4323 
4324 /**
4325  * @enum hmc_error_type
4326  * @brief enumeration of HMC errors
4327  *
4328  * Enumeration defining the possible HMC errors that might occur.
4329  */
4330 enum hmc_error_type {
4331 	HMC_ERR_PMF_INVALID = 0,
4332 	HMC_ERR_VF_IDX_INVALID = 1,
4333 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4334 	/* 3 is reserved */
4335 	HMC_ERR_INDEX_TOO_BIG = 4,
4336 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4337 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4338 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4339 	HMC_ERR_PAGE_DESC_INVALID = 8,
4340 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4341 	/* 10 is reserved */
4342 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4343 	/* 12 is reserved */
4344 };
4345 
4346 /**
4347  * ice_log_hmc_error - Log an HMC error message
4348  * @hw: device hw structure
4349  * @dev: the device to pass to device_printf()
4350  *
4351  * Log a message when an HMC error interrupt is triggered.
4352  */
4353 void
4354 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4355 {
4356 	u32 info, data;
4357 	u8 index, errtype, objtype;
4358 	bool isvf;
4359 
4360 	info = rd32(hw, PFHMC_ERRORINFO);
4361 	data = rd32(hw, PFHMC_ERRORDATA);
4362 
4363 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4364 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4365 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4366 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4367 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4368 
4369 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4370 
4371 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4372 		      isvf ? "VF" : "PF", index);
4373 
4374 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4375 		      errtype, objtype, data);
4376 
4377 	switch (errtype) {
4378 	case HMC_ERR_PMF_INVALID:
4379 		device_printf(dev, "Private Memory Function is not valid\n");
4380 		break;
4381 	case HMC_ERR_VF_IDX_INVALID:
4382 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4383 		break;
4384 	case HMC_ERR_VF_PARENT_PF_INVALID:
4385 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4386 		break;
4387 	case HMC_ERR_INDEX_TOO_BIG:
4388 		device_printf(dev, "Object index too big\n");
4389 		break;
4390 	case HMC_ERR_ADDRESS_TOO_LARGE:
4391 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4392 		break;
4393 	case HMC_ERR_SEGMENT_DESC_INVALID:
4394 		device_printf(dev, "Segment descriptor is invalid\n");
4395 		break;
4396 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4397 		device_printf(dev, "Segment descriptor is too small\n");
4398 		break;
4399 	case HMC_ERR_PAGE_DESC_INVALID:
4400 		device_printf(dev, "Page descriptor is invalid\n");
4401 		break;
4402 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4403 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4404 		break;
4405 	case HMC_ERR_INVALID_OBJECT_TYPE:
4406 		device_printf(dev, "Invalid object type\n");
4407 		break;
4408 	default:
4409 		device_printf(dev, "Unknown HMC error\n");
4410 	}
4411 
4412 	/* Clear the error indication */
4413 	wr32(hw, PFHMC_ERRORINFO, 0);
4414 }
4415 
4416 /**
4417  * @struct ice_sysctl_info
4418  * @brief sysctl information
4419  *
4420  * Structure used to simplify the process of defining the many similar
4421  * statistics sysctls.
4422  */
4423 struct ice_sysctl_info {
4424 	u64		*stat;
4425 	const char	*name;
4426 	const char	*description;
4427 };
4428 
4429 /**
4430  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4431  * @ctx: sysctl ctx to use
4432  * @parent: the parent node to add sysctls under
4433  * @stats: the ethernet stats structure to source values from
4434  *
4435  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4436  * Will add them under the parent node specified.
4437  *
4438  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4439  * statistics, so it is not included here. Similarly, rx_discards has different
4440  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4441  */
4442 void
4443 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4444 			  struct sysctl_oid *parent,
4445 			  struct ice_eth_stats *stats)
4446 {
4447 	const struct ice_sysctl_info ctls[] = {
4448 		/* Rx Stats */
4449 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4450 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4451 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4452 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4453 		/* Tx Stats */
4454 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4455 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4456 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4457 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4458 		/* End */
4459 		{ 0, 0, 0 }
4460 	};
4461 
4462 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4463 
4464 	const struct ice_sysctl_info *entry = ctls;
4465 	while (entry->stat != 0) {
4466 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4467 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4468 			       entry->description);
4469 		entry++;
4470 	}
4471 }
4472 
4473 /**
4474  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4475  * @oidp: sysctl oid structure
4476  * @arg1: pointer to private data structure
4477  * @arg2: Tx CSO stat to read
4478  * @req: sysctl request pointer
4479  *
4480  * On read: Sums the per-queue Tx CSO stat and displays it.
4481  */
4482 static int
4483 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4484 {
4485 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4486 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4487 	u64 stat = 0;
4488 	int i;
4489 
4490 	if (ice_driver_is_detaching(vsi->sc))
4491 		return (ESHUTDOWN);
4492 
4493 	/* Check that the type is valid */
4494 	if (type >= ICE_CSO_STAT_TX_COUNT)
4495 		return (EDOOFUS);
4496 
4497 	/* Sum the stat for each of the Tx queues */
4498 	for (i = 0; i < vsi->num_tx_queues; i++)
4499 		stat += vsi->tx_queues[i].stats.cso[type];
4500 
4501 	return sysctl_handle_64(oidp, NULL, stat, req);
4502 }
4503 
4504 /**
4505  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4506  * @oidp: sysctl oid structure
4507  * @arg1: pointer to private data structure
4508  * @arg2: Rx CSO stat to read
4509  * @req: sysctl request pointer
4510  *
4511  * On read: Sums the per-queue Rx CSO stat and displays it.
4512  */
4513 static int
4514 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4515 {
4516 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4517 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4518 	u64 stat = 0;
4519 	int i;
4520 
4521 	if (ice_driver_is_detaching(vsi->sc))
4522 		return (ESHUTDOWN);
4523 
4524 	/* Check that the type is valid */
4525 	if (type >= ICE_CSO_STAT_RX_COUNT)
4526 		return (EDOOFUS);
4527 
4528 	/* Sum the stat for each of the Rx queues */
4529 	for (i = 0; i < vsi->num_rx_queues; i++)
4530 		stat += vsi->rx_queues[i].stats.cso[type];
4531 
4532 	return sysctl_handle_64(oidp, NULL, stat, req);
4533 }
4534 
4535 /**
4536  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4537  * @oidp: sysctl oid structure
4538  * @arg1: pointer to private data structure
4539  * @arg2: unused
4540  * @req: sysctl request pointer
4541  *
4542  * On read: Sums current values of Rx error statistics and
4543  * displays it.
4544  */
4545 static int
4546 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4547 {
4548 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4549 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4550 	u64 stat = 0;
4551 	int i, type;
4552 
4553 	UNREFERENCED_PARAMETER(arg2);
4554 
4555 	if (ice_driver_is_detaching(vsi->sc))
4556 		return (ESHUTDOWN);
4557 
4558 	stat += hs->rx_undersize;
4559 	stat += hs->rx_fragments;
4560 	stat += hs->rx_oversize;
4561 	stat += hs->rx_jabber;
4562 	stat += hs->rx_len_errors;
4563 	stat += hs->crc_errors;
4564 	stat += hs->illegal_bytes;
4565 
4566 	/* Checksum error stats */
4567 	for (i = 0; i < vsi->num_rx_queues; i++)
4568 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4569 		     type < ICE_CSO_STAT_RX_COUNT;
4570 		     type++)
4571 			stat += vsi->rx_queues[i].stats.cso[type];
4572 
4573 	return sysctl_handle_64(oidp, NULL, stat, req);
4574 }
4575 
4576 /**
4577  * @struct ice_rx_cso_stat_info
4578  * @brief sysctl information for an Rx checksum offload statistic
4579  *
4580  * Structure used to simplify the process of defining the checksum offload
4581  * statistics.
4582  */
4583 struct ice_rx_cso_stat_info {
4584 	enum ice_rx_cso_stat	type;
4585 	const char		*name;
4586 	const char		*description;
4587 };
4588 
4589 /**
4590  * @struct ice_tx_cso_stat_info
4591  * @brief sysctl information for a Tx checksum offload statistic
4592  *
4593  * Structure used to simplify the process of defining the checksum offload
4594  * statistics.
4595  */
4596 struct ice_tx_cso_stat_info {
4597 	enum ice_tx_cso_stat	type;
4598 	const char		*name;
4599 	const char		*description;
4600 };
4601 
4602 /**
4603  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4604  * @vsi: pointer to the VSI to add sysctls for
4605  * @ctx: sysctl ctx to use
4606  * @parent: the parent node to add sysctls under
4607  *
4608  * Add statistics sysctls for software tracked statistics of a VSI.
4609  *
4610  * Currently this only adds checksum offload statistics, but more counters may
4611  * be added in the future.
4612  */
4613 static void
4614 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4615 			 struct sysctl_ctx_list *ctx,
4616 			 struct sysctl_oid *parent)
4617 {
4618 	struct sysctl_oid *cso_node;
4619 	struct sysctl_oid_list *cso_list;
4620 
4621 	/* Tx CSO Stats */
4622 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4623 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4624 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4625 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4626 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4627 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4628 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4629 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4630 		/* End */
4631 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4632 	};
4633 
4634 	/* Rx CSO Stats */
4635 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4636 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4637 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4638 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4639 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4640 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4641 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4642 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4643 		/* End */
4644 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4645 	};
4646 
4647 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4648 
4649 	/* Add a node for statistics tracked by software. */
4650 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4651 				  NULL, "Checksum offload Statistics");
4652 	cso_list = SYSCTL_CHILDREN(cso_node);
4653 
4654 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4655 	while (tx_entry->name && tx_entry->description) {
4656 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4657 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4658 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4659 				tx_entry->description);
4660 		tx_entry++;
4661 	}
4662 
4663 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4664 	while (rx_entry->name && rx_entry->description) {
4665 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4666 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4667 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4668 				rx_entry->description);
4669 		rx_entry++;
4670 	}
4671 }
4672 
4673 /**
4674  * ice_add_vsi_sysctls - Add sysctls for a VSI
4675  * @vsi: pointer to VSI structure
4676  *
4677  * Add various sysctls for a given VSI.
4678  */
4679 void
4680 ice_add_vsi_sysctls(struct ice_vsi *vsi)
4681 {
4682 	struct sysctl_ctx_list *ctx = &vsi->ctx;
4683 	struct sysctl_oid *hw_node, *sw_node;
4684 	struct sysctl_oid_list *vsi_list, *hw_list;
4685 
4686 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4687 
4688 	/* Keep hw stats in their own node. */
4689 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4690 				  NULL, "VSI Hardware Statistics");
4691 	hw_list = SYSCTL_CHILDREN(hw_node);
4692 
4693 	/* Add the ethernet statistics for this VSI */
4694 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4695 
4696 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4697 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4698 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4699 
4700 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4701 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4702 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4703 			"Aggregate of all Rx errors");
4704 
4705 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4706 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4707 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4708 
4709 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4710 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4711 			0, "Tx Packets Discarded Due To Error");
4712 
4713 	/* Add a node for statistics tracked by software. */
4714 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4715 				  NULL, "VSI Software Statistics");
4716 
4717 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4718 }
4719 
4720 /**
4721  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4722  * @ctx: sysctl ctx to use
4723  * @parent_list: parent sysctl list to add sysctls under
4724  * @pfc_stat_location: address of statistic for sysctl to display
4725  * @node_name: Name for statistic node
4726  * @descr: Description used for nodes added in this function
4727  *
4728  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
4729  * for a stat and leaves for each traffic class for that stat.
4730  */
4731 static void
4732 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
4733 				 struct sysctl_oid_list *parent_list,
4734 				 u64* pfc_stat_location,
4735 				 const char *node_name,
4736 				 const char *descr)
4737 {
4738 	struct sysctl_oid_list *node_list;
4739 	struct sysctl_oid *node;
4740 	struct sbuf *namebuf, *descbuf;
4741 
4742 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
4743 				   NULL, descr);
4744 	node_list = SYSCTL_CHILDREN(node);
4745 
4746 	namebuf = sbuf_new_auto();
4747 	descbuf = sbuf_new_auto();
4748 	for (int i = 0; i < ICE_MAX_DCB_TCS; i++) {
4749 		sbuf_clear(namebuf);
4750 		sbuf_clear(descbuf);
4751 
4752 		sbuf_printf(namebuf, "%d", i);
4753 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
4754 
4755 		sbuf_finish(namebuf);
4756 		sbuf_finish(descbuf);
4757 
4758 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
4759 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
4760 			sbuf_data(descbuf));
4761 	}
4762 
4763 	sbuf_delete(namebuf);
4764 	sbuf_delete(descbuf);
4765 }
4766 
4767 /**
4768  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
4769  * @ctx: the sysctl ctx to use
4770  * @parent: parent node to add the sysctls under
4771  * @stats: the hw ports stat structure to pull values from
4772  *
4773  * Add global Priority Flow Control MAC statistics sysctls. These are
4774  * structured as a node with the PFC statistic, where there are eight
4775  * nodes for each traffic class.
4776  */
4777 static void
4778 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
4779 			      struct sysctl_oid *parent,
4780 			      struct ice_hw_port_stats *stats)
4781 {
4782 	struct sysctl_oid_list *parent_list;
4783 
4784 	parent_list = SYSCTL_CHILDREN(parent);
4785 
4786 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
4787 	    "p_xon_recvd", "PFC XON received");
4788 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
4789 	    "p_xoff_recvd", "PFC XOFF received");
4790 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
4791 	    "p_xon_txd", "PFC XON transmitted");
4792 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
4793 	    "p_xoff_txd", "PFC XOFF transmitted");
4794 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
4795 	    "p_xon2xoff", "PFC XON to XOFF transitions");
4796 }
4797 
4798 /**
4799  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
4800  * @ctx: the sysctl ctx to use
4801  * @parent: parent node to add the sysctls under
4802  * @stats: the hw ports stat structure to pull values from
4803  *
4804  * Add global MAC statistics sysctls.
4805  */
4806 void
4807 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
4808 			  struct sysctl_oid *parent,
4809 			  struct ice_hw_port_stats *stats)
4810 {
4811 	struct sysctl_oid *mac_node;
4812 	struct sysctl_oid_list *parent_list, *mac_list;
4813 
4814 	parent_list = SYSCTL_CHILDREN(parent);
4815 
4816 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
4817 				   NULL, "Mac Hardware Statistics");
4818 	mac_list = SYSCTL_CHILDREN(mac_node);
4819 
4820 	/* Add the ethernet statistics common to VSI and MAC */
4821 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
4822 
4823 	/* Add PFC stats that add per-TC counters */
4824 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
4825 
4826 	const struct ice_sysctl_info ctls[] = {
4827 		/* Packet Reception Stats */
4828 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
4829 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
4830 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
4831 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
4832 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
4833 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
4834 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
4835 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
4836 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
4837 		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
4838 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
4839 		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
4840 		{&stats->eth.rx_discards, "rx_discards",
4841 		    "Discarded Rx Packets by Port (shortage of storage space)"},
4842 		/* Packet Transmission Stats */
4843 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
4844 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
4845 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
4846 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
4847 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
4848 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
4849 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
4850 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
4851 		/* Flow control */
4852 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
4853 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
4854 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
4855 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
4856 		/* Other */
4857 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
4858 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
4859 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
4860 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
4861 		/* End */
4862 		{ 0, 0, 0 }
4863 	};
4864 
4865 	const struct ice_sysctl_info *entry = ctls;
4866 	while (entry->stat != 0) {
4867 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
4868 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4869 			entry->description);
4870 		entry++;
4871 	}
4872 }
4873 
4874 /**
4875  * ice_configure_misc_interrupts - enable 'other' interrupt causes
4876  * @sc: pointer to device private softc
4877  *
4878  * Enable various "other" interrupt causes, and associate them to interrupt 0,
4879  * which is our administrative interrupt.
4880  */
4881 void
4882 ice_configure_misc_interrupts(struct ice_softc *sc)
4883 {
4884 	struct ice_hw *hw = &sc->hw;
4885 	u32 val;
4886 
4887 	/* Read the OICR register to clear it */
4888 	rd32(hw, PFINT_OICR);
4889 
4890 	/* Enable useful "other" interrupt causes */
4891 	val = (PFINT_OICR_ECC_ERR_M |
4892 	       PFINT_OICR_MAL_DETECT_M |
4893 	       PFINT_OICR_GRST_M |
4894 	       PFINT_OICR_PCI_EXCEPTION_M |
4895 	       PFINT_OICR_VFLR_M |
4896 	       PFINT_OICR_HMC_ERR_M |
4897 	       PFINT_OICR_PE_CRITERR_M);
4898 
4899 	wr32(hw, PFINT_OICR_ENA, val);
4900 
4901 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
4902 	 * not explicitly program them when writing to the PFINT_*_CTL
4903 	 * registers. Nevertheless, these writes are associating the
4904 	 * interrupts with the ITR 0 vector
4905 	 */
4906 
4907 	/* Associate the OICR interrupt with ITR 0, and enable it */
4908 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
4909 
4910 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
4911 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
4912 
4913 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
4914 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
4915 }
4916 
4917 /**
4918  * ice_filter_is_mcast - Check if info is a multicast filter
4919  * @vsi: vsi structure addresses are targeted towards
4920  * @info: filter info
4921  *
4922  * @returns true if the provided info is a multicast filter, and false
4923  * otherwise.
4924  */
4925 static bool
4926 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
4927 {
4928 	const u8 *addr = info->l_data.mac.mac_addr;
4929 
4930 	/*
4931 	 * Check if this info matches a multicast filter added by
4932 	 * ice_add_mac_to_list
4933 	 */
4934 	if ((info->flag == ICE_FLTR_TX) &&
4935 	    (info->src_id == ICE_SRC_ID_VSI) &&
4936 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
4937 	    (info->vsi_handle == vsi->idx) &&
4938 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
4939 		return true;
4940 
4941 	return false;
4942 }
4943 
4944 /**
4945  * @struct ice_mcast_sync_data
4946  * @brief data used by ice_sync_one_mcast_filter function
4947  *
4948  * Structure used to store data needed for processing by the
4949  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
4950  * to be added, an error indication, and a pointer to the device softc.
4951  */
4952 struct ice_mcast_sync_data {
4953 	struct ice_list_head add_list;
4954 	struct ice_softc *sc;
4955 	int err;
4956 };
4957 
4958 /**
4959  * ice_sync_one_mcast_filter - Check if we need to program the filter
4960  * @p: void pointer to algorithm data
4961  * @sdl: link level socket address
4962  * @count: unused count value
4963  *
4964  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
4965  * list. For the given address, search our internal list to see if we have
4966  * found the filter. If not, add it to our list of filters that need to be
4967  * programmed.
4968  *
4969  * @returns (1) if we've actually setup the filter to be added
4970  */
4971 static u_int
4972 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
4973 			  u_int __unused count)
4974 {
4975 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
4976 	struct ice_softc *sc = data->sc;
4977 	struct ice_hw *hw = &sc->hw;
4978 	struct ice_switch_info *sw = hw->switch_info;
4979 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
4980 	struct ice_fltr_mgmt_list_entry *itr;
4981 	struct ice_list_head *rules;
4982 	int err;
4983 
4984 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
4985 
4986 	/*
4987 	 * If a previous filter already indicated an error, there is no need
4988 	 * for us to finish processing the rest of the filters.
4989 	 */
4990 	if (data->err)
4991 		return (0);
4992 
4993 	/* See if this filter has already been programmed */
4994 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
4995 		struct ice_fltr_info *info = &itr->fltr_info;
4996 		const u8 *addr = info->l_data.mac.mac_addr;
4997 
4998 		/* Only check multicast filters */
4999 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5000 			continue;
5001 
5002 		/*
5003 		 * If this filter matches, mark the internal filter as
5004 		 * "found", and exit.
5005 		 */
5006 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5007 			itr->marker = ICE_FLTR_FOUND;
5008 			return (1);
5009 		}
5010 	}
5011 
5012 	/*
5013 	 * If we failed to locate the filter in our internal list, we need to
5014 	 * place it into our add list.
5015 	 */
5016 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5017 				  ICE_FWD_TO_VSI);
5018 	if (err) {
5019 		device_printf(sc->dev,
5020 			      "Failed to place MAC %6D onto add list, err %s\n",
5021 			      sdl_addr, ":", ice_err_str(err));
5022 		data->err = err;
5023 
5024 		return (0);
5025 	}
5026 
5027 	return (1);
5028 }
5029 
5030 /**
5031  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5032  * @sc: device private structure
5033  *
5034  * Called in response to SIOCDELMULTI to synchronize the operating system
5035  * multicast address list with the internal list of filters programmed to
5036  * firmware.
5037  *
5038  * Works in one phase to find added and deleted filters using a marker bit on
5039  * the internal list.
5040  *
5041  * First, a loop over the internal list clears the marker bit. Second, for
5042  * each filter in the ifp list is checked. If we find it in the internal list,
5043  * the marker bit is set. Otherwise, the filter is added to the add list.
5044  * Third, a loop over the internal list determines if any filters have not
5045  * been found. Each of these is added to the delete list. Finally, the add and
5046  * delete lists are programmed to firmware to update the filters.
5047  *
5048  * @returns zero on success or an integer error code on failure.
5049  */
5050 int
5051 ice_sync_multicast_filters(struct ice_softc *sc)
5052 {
5053 	struct ice_hw *hw = &sc->hw;
5054 	struct ice_switch_info *sw = hw->switch_info;
5055 	struct ice_fltr_mgmt_list_entry *itr;
5056 	struct ice_mcast_sync_data data = {};
5057 	struct ice_list_head *rules, remove_list;
5058 	enum ice_status status;
5059 	int err = 0;
5060 
5061 	INIT_LIST_HEAD(&data.add_list);
5062 	INIT_LIST_HEAD(&remove_list);
5063 	data.sc = sc;
5064 	data.err = 0;
5065 
5066 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5067 
5068 	/* Acquire the lock for the entire duration */
5069 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5070 
5071 	/* (1) Reset the marker state for all filters */
5072 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5073 		itr->marker = ICE_FLTR_NOT_FOUND;
5074 
5075 	/* (2) determine which filters need to be added and removed */
5076 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5077 	if (data.err) {
5078 		/* ice_sync_one_mcast_filter already prints an error */
5079 		err = data.err;
5080 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5081 		goto free_filter_lists;
5082 	}
5083 
5084 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5085 		struct ice_fltr_info *info = &itr->fltr_info;
5086 		const u8 *addr = info->l_data.mac.mac_addr;
5087 
5088 		/* Only check multicast filters */
5089 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5090 			continue;
5091 
5092 		/*
5093 		 * If the filter is not marked as found, then it must no
5094 		 * longer be in the ifp address list, so we need to remove it.
5095 		 */
5096 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5097 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5098 						  addr, ICE_FWD_TO_VSI);
5099 			if (err) {
5100 				device_printf(sc->dev,
5101 					      "Failed to place MAC %6D onto remove list, err %s\n",
5102 					      addr, ":", ice_err_str(err));
5103 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5104 				goto free_filter_lists;
5105 			}
5106 		}
5107 	}
5108 
5109 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5110 
5111 	status = ice_add_mac(hw, &data.add_list);
5112 	if (status) {
5113 		device_printf(sc->dev,
5114 			      "Could not add new MAC filters, err %s aq_err %s\n",
5115 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5116 		err = (EIO);
5117 		goto free_filter_lists;
5118 	}
5119 
5120 	status = ice_remove_mac(hw, &remove_list);
5121 	if (status) {
5122 		device_printf(sc->dev,
5123 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5124 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5125 		err = (EIO);
5126 		goto free_filter_lists;
5127 	}
5128 
5129 free_filter_lists:
5130 	ice_free_fltr_list(&data.add_list);
5131 	ice_free_fltr_list(&remove_list);
5132 
5133 	return (err);
5134 }
5135 
5136 /**
5137  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5138  * @vsi: The VSI to add the filter for
5139  * @vid: VLAN to add
5140  *
5141  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5142  */
5143 enum ice_status
5144 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5145 {
5146 	struct ice_hw *hw = &vsi->sc->hw;
5147 	struct ice_list_head vlan_list;
5148 	struct ice_fltr_list_entry vlan_entry;
5149 
5150 	INIT_LIST_HEAD(&vlan_list);
5151 	memset(&vlan_entry, 0, sizeof(vlan_entry));
5152 
5153 	vlan_entry.fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5154 	vlan_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
5155 	vlan_entry.fltr_info.flag = ICE_FLTR_TX;
5156 	vlan_entry.fltr_info.src_id = ICE_SRC_ID_VSI;
5157 	vlan_entry.fltr_info.vsi_handle = vsi->idx;
5158 	vlan_entry.fltr_info.l_data.vlan.vlan_id = vid;
5159 
5160 	LIST_ADD(&vlan_entry.list_entry, &vlan_list);
5161 
5162 	return ice_add_vlan(hw, &vlan_list);
5163 }
5164 
5165 /**
5166  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5167  * @vsi: The VSI to add the filter for
5168  * @vid: VLAN to remove
5169  *
5170  * Removes a previously programmed HW filter for the specified VSI.
5171  */
5172 enum ice_status
5173 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5174 {
5175 	struct ice_hw *hw = &vsi->sc->hw;
5176 	struct ice_list_head vlan_list;
5177 	struct ice_fltr_list_entry vlan_entry;
5178 
5179 	INIT_LIST_HEAD(&vlan_list);
5180 	memset(&vlan_entry, 0, sizeof(vlan_entry));
5181 
5182 	vlan_entry.fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5183 	vlan_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
5184 	vlan_entry.fltr_info.flag = ICE_FLTR_TX;
5185 	vlan_entry.fltr_info.src_id = ICE_SRC_ID_VSI;
5186 	vlan_entry.fltr_info.vsi_handle = vsi->idx;
5187 	vlan_entry.fltr_info.l_data.vlan.vlan_id = vid;
5188 
5189 	LIST_ADD(&vlan_entry.list_entry, &vlan_list);
5190 
5191 	return ice_remove_vlan(hw, &vlan_list);
5192 }
5193 
5194 #define ICE_SYSCTL_HELP_RX_ITR			\
5195 "\nControl Rx interrupt throttle rate."		\
5196 "\n\t0-8160 - sets interrupt rate in usecs"	\
5197 "\n\t    -1 - reset the Rx itr to default"
5198 
5199 /**
5200  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5201  * @oidp: sysctl oid structure
5202  * @arg1: pointer to private data structure
5203  * @arg2: unused
5204  * @req: sysctl request pointer
5205  *
5206  * On read: Displays the current Rx ITR value
5207  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5208  */
5209 static int
5210 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5211 {
5212 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5213 	struct ice_softc *sc = vsi->sc;
5214 	int increment, ret;
5215 
5216 	UNREFERENCED_PARAMETER(arg2);
5217 
5218 	if (ice_driver_is_detaching(sc))
5219 		return (ESHUTDOWN);
5220 
5221 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5222 	if ((ret) || (req->newptr == NULL))
5223 		return (ret);
5224 
5225 	if (vsi->rx_itr < 0)
5226 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5227 	if (vsi->rx_itr > ICE_ITR_MAX)
5228 		vsi->rx_itr = ICE_ITR_MAX;
5229 
5230 	/* Assume 2usec increment if it hasn't been loaded yet */
5231 	increment = sc->hw.itr_gran ? : 2;
5232 
5233 	/* We need to round the value to the hardware's ITR granularity */
5234 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5235 
5236 	/* If the driver has finished initializing, then we need to reprogram
5237 	 * the ITR registers now. Otherwise, they will be programmed during
5238 	 * driver initialization.
5239 	 */
5240 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5241 		ice_configure_rx_itr(vsi);
5242 
5243 	return (0);
5244 }
5245 
5246 #define ICE_SYSCTL_HELP_TX_ITR			\
5247 "\nControl Tx interrupt throttle rate."		\
5248 "\n\t0-8160 - sets interrupt rate in usecs"	\
5249 "\n\t    -1 - reset the Tx itr to default"
5250 
5251 /**
5252  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5253  * @oidp: sysctl oid structure
5254  * @arg1: pointer to private data structure
5255  * @arg2: unused
5256  * @req: sysctl request pointer
5257  *
5258  * On read: Displays the current Tx ITR value
5259  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5260  */
5261 static int
5262 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5263 {
5264 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5265 	struct ice_softc *sc = vsi->sc;
5266 	int increment, ret;
5267 
5268 	UNREFERENCED_PARAMETER(arg2);
5269 
5270 	if (ice_driver_is_detaching(sc))
5271 		return (ESHUTDOWN);
5272 
5273 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5274 	if ((ret) || (req->newptr == NULL))
5275 		return (ret);
5276 
5277 	/* Allow configuring a negative value to reset to the default */
5278 	if (vsi->tx_itr < 0)
5279 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5280 	if (vsi->tx_itr > ICE_ITR_MAX)
5281 		vsi->tx_itr = ICE_ITR_MAX;
5282 
5283 	/* Assume 2usec increment if it hasn't been loaded yet */
5284 	increment = sc->hw.itr_gran ? : 2;
5285 
5286 	/* We need to round the value to the hardware's ITR granularity */
5287 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5288 
5289 	/* If the driver has finished initializing, then we need to reprogram
5290 	 * the ITR registers now. Otherwise, they will be programmed during
5291 	 * driver initialization.
5292 	 */
5293 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5294 		ice_configure_tx_itr(vsi);
5295 
5296 	return (0);
5297 }
5298 
5299 /**
5300  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5301  * @vsi: pointer to VSI structure
5302  * @parent: parent node to add the tunables under
5303  *
5304  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5305  * dynamically removed upon VSI removal.
5306  *
5307  * Add various tunables and set up the basic node structure for the VSI. Must
5308  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5309  * possible after the VSI memory is initialized.
5310  *
5311  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5312  * their values can be read from loader.conf prior to their first use in the
5313  * driver.
5314  */
5315 void
5316 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5317 {
5318 	struct sysctl_oid_list *vsi_list;
5319 	char vsi_name[32], vsi_desc[32];
5320 
5321 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5322 
5323 	/* Initialize the sysctl context for this VSI */
5324 	sysctl_ctx_init(&vsi->ctx);
5325 
5326 	/* Add a node to collect this VSI's statistics together */
5327 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5328 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5329 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5330 					CTLFLAG_RD, NULL, vsi_desc);
5331 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5332 
5333 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5334 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5335 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5336 			vsi, 0, ice_sysctl_rx_itr, "S",
5337 			ICE_SYSCTL_HELP_RX_ITR);
5338 
5339 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5340 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5341 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5342 			vsi, 0, ice_sysctl_tx_itr, "S",
5343 			ICE_SYSCTL_HELP_TX_ITR);
5344 }
5345 
5346 /**
5347  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5348  * @vsi: the VSI to remove contexts for
5349  *
5350  * Free the context for the VSI sysctls. This includes the main context, as
5351  * well as the per-queue sysctls.
5352  */
5353 void
5354 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5355 {
5356 	device_t dev = vsi->sc->dev;
5357 	int err;
5358 
5359 	if (vsi->vsi_node) {
5360 		err = sysctl_ctx_free(&vsi->ctx);
5361 		if (err)
5362 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5363 				      vsi->idx, ice_err_str(err));
5364 		vsi->vsi_node = NULL;
5365 	}
5366 }
5367 
5368 /**
5369  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5370  * @sc: pointer to device private softc
5371  * @ctx: the sysctl ctx to use
5372  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5373  *
5374  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5375  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5376  * of 64 DSCP to TC map values that the user can configure.
5377  */
5378 void
5379 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5380 			    struct sysctl_ctx_list *ctx,
5381 			    struct sysctl_oid_list *ctx_list)
5382 {
5383 	struct sysctl_oid_list *node_list;
5384 	struct sysctl_oid *node;
5385 	struct sbuf *namebuf, *descbuf;
5386 	int first_dscp_val, last_dscp_val;
5387 
5388 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5389 			       NULL, "Map of DSCP values to DCB TCs");
5390 	node_list = SYSCTL_CHILDREN(node);
5391 
5392 	namebuf = sbuf_new_auto();
5393 	descbuf = sbuf_new_auto();
5394 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5395 		sbuf_clear(namebuf);
5396 		sbuf_clear(descbuf);
5397 
5398 		first_dscp_val = i * 8;
5399 		last_dscp_val = first_dscp_val + 7;
5400 
5401 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5402 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5403 			    first_dscp_val, last_dscp_val);
5404 
5405 		sbuf_finish(namebuf);
5406 		sbuf_finish(descbuf);
5407 
5408 		SYSCTL_ADD_PROC(ctx, node_list,
5409 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5410 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5411 	}
5412 
5413 	sbuf_delete(namebuf);
5414 	sbuf_delete(descbuf);
5415 }
5416 
5417 /**
5418  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5419  * @sc: device private structure
5420  *
5421  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5422  * for re-use by ice_add_device_sysctls.
5423  *
5424  * In order for the sysctl fields to be initialized before use, this function
5425  * should be called as early as possible during attach activities.
5426  *
5427  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5428  * here in this function, rather than later in ice_add_device_sysctls.
5429  *
5430  * To make things easier, this function is also expected to setup the various
5431  * sysctl nodes in addition to tunables so that other sysctls which can't be
5432  * initialized early can hook into the same nodes.
5433  */
5434 void
5435 ice_add_device_tunables(struct ice_softc *sc)
5436 {
5437 	device_t dev = sc->dev;
5438 
5439 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5440 	struct sysctl_oid_list *ctx_list =
5441 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5442 
5443 	sc->enable_health_events = ice_enable_health_events;
5444 
5445 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5446 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5447 			"Enable FW health event reporting for this PF");
5448 
5449 	/* Add a node to track VSI sysctls. Keep track of the node in the
5450 	 * softc so that we can hook other sysctls into it later. This
5451 	 * includes both the VSI statistics, as well as potentially dynamic
5452 	 * VSIs in the future.
5453 	 */
5454 
5455 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5456 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5457 
5458 	/* Add debug tunables */
5459 	ice_add_debug_tunables(sc);
5460 }
5461 
5462 /**
5463  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5464  * @oidp: sysctl oid structure
5465  * @arg1: pointer to private data structure
5466  * @arg2: unused
5467  * @req: sysctl request pointer
5468  *
5469  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5470  */
5471 static int
5472 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5473 {
5474 	struct ice_softc *sc = (struct ice_softc *)arg1;
5475 	struct ice_hw *hw = &sc->hw;
5476 	struct ice_switch_info *sw = hw->switch_info;
5477 	struct ice_fltr_mgmt_list_entry *fm_entry;
5478 	struct ice_list_head *rule_head;
5479 	struct ice_lock *rule_lock;
5480 	struct ice_fltr_info *fi;
5481 	struct sbuf *sbuf;
5482 	int ret;
5483 
5484 	UNREFERENCED_PARAMETER(oidp);
5485 	UNREFERENCED_PARAMETER(arg2);
5486 
5487 	if (ice_driver_is_detaching(sc))
5488 		return (ESHUTDOWN);
5489 
5490 	/* Wire the old buffer so we can take a non-sleepable lock */
5491 	ret = sysctl_wire_old_buffer(req, 0);
5492 	if (ret)
5493 		return (ret);
5494 
5495 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5496 
5497 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5498 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5499 
5500 	sbuf_printf(sbuf, "MAC Filter List");
5501 
5502 	ice_acquire_lock(rule_lock);
5503 
5504 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5505 		fi = &fm_entry->fltr_info;
5506 
5507 		sbuf_printf(sbuf,
5508 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5509 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5510 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5511 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5512 
5513 		/* if we have a vsi_list_info, print some information about that */
5514 		if (fm_entry->vsi_list_info) {
5515 			sbuf_printf(sbuf,
5516 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5517 				    fm_entry->vsi_count,
5518 				    fm_entry->vsi_list_info->vsi_list_id,
5519 				    fm_entry->vsi_list_info->ref_cnt);
5520 		}
5521 	}
5522 
5523 	ice_release_lock(rule_lock);
5524 
5525 	sbuf_finish(sbuf);
5526 	sbuf_delete(sbuf);
5527 
5528 	return (0);
5529 }
5530 
5531 /**
5532  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5533  * @oidp: sysctl oid structure
5534  * @arg1: pointer to private data structure
5535  * @arg2: unused
5536  * @req: sysctl request pointer
5537  *
5538  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5539  */
5540 static int
5541 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5542 {
5543 	struct ice_softc *sc = (struct ice_softc *)arg1;
5544 	struct ice_hw *hw = &sc->hw;
5545 	struct ice_switch_info *sw = hw->switch_info;
5546 	struct ice_fltr_mgmt_list_entry *fm_entry;
5547 	struct ice_list_head *rule_head;
5548 	struct ice_lock *rule_lock;
5549 	struct ice_fltr_info *fi;
5550 	struct sbuf *sbuf;
5551 	int ret;
5552 
5553 	UNREFERENCED_PARAMETER(oidp);
5554 	UNREFERENCED_PARAMETER(arg2);
5555 
5556 	if (ice_driver_is_detaching(sc))
5557 		return (ESHUTDOWN);
5558 
5559 	/* Wire the old buffer so we can take a non-sleepable lock */
5560 	ret = sysctl_wire_old_buffer(req, 0);
5561 	if (ret)
5562 		return (ret);
5563 
5564 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5565 
5566 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5567 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5568 
5569 	sbuf_printf(sbuf, "VLAN Filter List");
5570 
5571 	ice_acquire_lock(rule_lock);
5572 
5573 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5574 		fi = &fm_entry->fltr_info;
5575 
5576 		sbuf_printf(sbuf,
5577 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5578 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5579 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5580 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5581 
5582 		/* if we have a vsi_list_info, print some information about that */
5583 		if (fm_entry->vsi_list_info) {
5584 			sbuf_printf(sbuf,
5585 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5586 				    fm_entry->vsi_count,
5587 				    fm_entry->vsi_list_info->vsi_list_id,
5588 				    fm_entry->vsi_list_info->ref_cnt);
5589 		}
5590 	}
5591 
5592 	ice_release_lock(rule_lock);
5593 
5594 	sbuf_finish(sbuf);
5595 	sbuf_delete(sbuf);
5596 
5597 	return (0);
5598 }
5599 
5600 /**
5601  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5602  * @oidp: sysctl oid structure
5603  * @arg1: pointer to private data structure
5604  * @arg2: unused
5605  * @req: sysctl request pointer
5606  *
5607  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5608  * filters.
5609  */
5610 static int
5611 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5612 {
5613 	struct ice_softc *sc = (struct ice_softc *)arg1;
5614 	struct ice_hw *hw = &sc->hw;
5615 	struct ice_switch_info *sw = hw->switch_info;
5616 	struct ice_fltr_mgmt_list_entry *fm_entry;
5617 	struct ice_list_head *rule_head;
5618 	struct ice_lock *rule_lock;
5619 	struct ice_fltr_info *fi;
5620 	struct sbuf *sbuf;
5621 	int ret;
5622 
5623 	UNREFERENCED_PARAMETER(oidp);
5624 	UNREFERENCED_PARAMETER(arg2);
5625 
5626 	if (ice_driver_is_detaching(sc))
5627 		return (ESHUTDOWN);
5628 
5629 	/* Wire the old buffer so we can take a non-sleepable lock */
5630 	ret = sysctl_wire_old_buffer(req, 0);
5631 	if (ret)
5632 		return (ret);
5633 
5634 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5635 
5636 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5637 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5638 
5639 	sbuf_printf(sbuf, "Ethertype Filter List");
5640 
5641 	ice_acquire_lock(rule_lock);
5642 
5643 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5644 		fi = &fm_entry->fltr_info;
5645 
5646 		sbuf_printf(sbuf,
5647 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5648 			fi->l_data.ethertype_mac.ethertype,
5649 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5650 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5651 			fi->fltr_rule_id);
5652 
5653 		/* if we have a vsi_list_info, print some information about that */
5654 		if (fm_entry->vsi_list_info) {
5655 			sbuf_printf(sbuf,
5656 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5657 				    fm_entry->vsi_count,
5658 				    fm_entry->vsi_list_info->vsi_list_id,
5659 				    fm_entry->vsi_list_info->ref_cnt);
5660 		}
5661 	}
5662 
5663 	ice_release_lock(rule_lock);
5664 
5665 	sbuf_finish(sbuf);
5666 	sbuf_delete(sbuf);
5667 
5668 	return (0);
5669 }
5670 
5671 /**
5672  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
5673  * @oidp: sysctl oid structure
5674  * @arg1: pointer to private data structure
5675  * @arg2: unused
5676  * @req: sysctl request pointer
5677  *
5678  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
5679  * Ethertype/MAC filters.
5680  */
5681 static int
5682 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
5683 {
5684 	struct ice_softc *sc = (struct ice_softc *)arg1;
5685 	struct ice_hw *hw = &sc->hw;
5686 	struct ice_switch_info *sw = hw->switch_info;
5687 	struct ice_fltr_mgmt_list_entry *fm_entry;
5688 	struct ice_list_head *rule_head;
5689 	struct ice_lock *rule_lock;
5690 	struct ice_fltr_info *fi;
5691 	struct sbuf *sbuf;
5692 	int ret;
5693 
5694 	UNREFERENCED_PARAMETER(oidp);
5695 	UNREFERENCED_PARAMETER(arg2);
5696 
5697 	if (ice_driver_is_detaching(sc))
5698 		return (ESHUTDOWN);
5699 
5700 	/* Wire the old buffer so we can take a non-sleepable lock */
5701 	ret = sysctl_wire_old_buffer(req, 0);
5702 	if (ret)
5703 		return (ret);
5704 
5705 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5706 
5707 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
5708 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
5709 
5710 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
5711 
5712 	ice_acquire_lock(rule_lock);
5713 
5714 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5715 		fi = &fm_entry->fltr_info;
5716 
5717 		sbuf_printf(sbuf,
5718 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5719 			    fi->l_data.ethertype_mac.ethertype,
5720 			    fi->l_data.ethertype_mac.mac_addr, ":",
5721 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5722 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5723 			    fi->fltr_rule_id);
5724 
5725 		/* if we have a vsi_list_info, print some information about that */
5726 		if (fm_entry->vsi_list_info) {
5727 			sbuf_printf(sbuf,
5728 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5729 				    fm_entry->vsi_count,
5730 				    fm_entry->vsi_list_info->vsi_list_id,
5731 				    fm_entry->vsi_list_info->ref_cnt);
5732 		}
5733 	}
5734 
5735 	ice_release_lock(rule_lock);
5736 
5737 	sbuf_finish(sbuf);
5738 	sbuf_delete(sbuf);
5739 
5740 	return (0);
5741 }
5742 
5743 /**
5744  * ice_sysctl_dump_state_flags - Dump device driver state flags
5745  * @oidp: sysctl oid structure
5746  * @arg1: pointer to private data structure
5747  * @arg2: unused
5748  * @req: sysctl request pointer
5749  *
5750  * Callback for "state" sysctl to display currently set driver state flags.
5751  */
5752 static int
5753 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
5754 {
5755 	struct ice_softc *sc = (struct ice_softc *)arg1;
5756 	struct sbuf *sbuf;
5757 	u32 copied_state;
5758 	unsigned int i;
5759 	bool at_least_one = false;
5760 
5761 	UNREFERENCED_PARAMETER(oidp);
5762 	UNREFERENCED_PARAMETER(arg2);
5763 
5764 	if (ice_driver_is_detaching(sc))
5765 		return (ESHUTDOWN);
5766 
5767 	/* Make a copy of the state to ensure we display coherent values */
5768 	copied_state = atomic_load_acq_32(&sc->state);
5769 
5770 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5771 
5772 	/* Add the string for each set state to the sbuf */
5773 	for (i = 0; i < 32; i++) {
5774 		if (copied_state & BIT(i)) {
5775 			const char *str = ice_state_to_str((enum ice_state)i);
5776 
5777 			at_least_one = true;
5778 
5779 			if (str)
5780 				sbuf_printf(sbuf, "\n%s", str);
5781 			else
5782 				sbuf_printf(sbuf, "\nBIT(%u)", i);
5783 		}
5784 	}
5785 
5786 	if (!at_least_one)
5787 		sbuf_printf(sbuf, "Nothing set");
5788 
5789 	sbuf_finish(sbuf);
5790 	sbuf_delete(sbuf);
5791 
5792 	return (0);
5793 }
5794 
5795 #define ICE_SYSCTL_DEBUG_MASK_HELP \
5796 "\nSelect debug statements to print to kernel messages"		\
5797 "\nFlags:"							\
5798 "\n\t        0x1 - Function Tracing"				\
5799 "\n\t        0x2 - Driver Initialization"			\
5800 "\n\t        0x4 - Release"					\
5801 "\n\t        0x8 - FW Logging"					\
5802 "\n\t       0x10 - Link"					\
5803 "\n\t       0x20 - PHY"						\
5804 "\n\t       0x40 - Queue Context"				\
5805 "\n\t       0x80 - NVM"						\
5806 "\n\t      0x100 - LAN"						\
5807 "\n\t      0x200 - Flow"					\
5808 "\n\t      0x400 - DCB"						\
5809 "\n\t      0x800 - Diagnostics"					\
5810 "\n\t     0x1000 - Flow Director"				\
5811 "\n\t     0x2000 - Switch"					\
5812 "\n\t     0x4000 - Scheduler"					\
5813 "\n\t     0x8000 - RDMA"					\
5814 "\n\t    0x10000 - DDP Package"					\
5815 "\n\t    0x20000 - Resources"					\
5816 "\n\t    0x40000 - ACL"						\
5817 "\n\t    0x80000 - PTP"						\
5818 "\n\t   0x100000 - Admin Queue messages"			\
5819 "\n\t   0x200000 - Admin Queue descriptors"			\
5820 "\n\t   0x400000 - Admin Queue descriptor buffers"		\
5821 "\n\t   0x800000 - Admin Queue commands"			\
5822 "\n\t  0x1000000 - Parser"					\
5823 "\n\t  ..."							\
5824 "\n\t  0x8000000 - (Reserved for user)"				\
5825 "\n\t"								\
5826 "\nUse \"sysctl -x\" to view flags properly."
5827 
5828 /**
5829  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
5830  * @sc: device private structure
5831  *
5832  * Add sysctl tunable values related to debugging the device driver. For now,
5833  * this means a tunable to set the debug mask early during driver load.
5834  *
5835  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
5836  * that in normal kernel builds, these will all be hidden, but on a debug
5837  * kernel they will be more easily visible.
5838  */
5839 static void
5840 ice_add_debug_tunables(struct ice_softc *sc)
5841 {
5842 	struct sysctl_oid_list *debug_list;
5843 	device_t dev = sc->dev;
5844 
5845 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5846 	struct sysctl_oid_list *ctx_list =
5847 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5848 
5849 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
5850 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
5851 					    NULL, "Debug Sysctls");
5852 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
5853 
5854 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
5855 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RW | CTLFLAG_TUN,
5856 		       &sc->hw.debug_mask, 0,
5857 		       ICE_SYSCTL_DEBUG_MASK_HELP);
5858 
5859 	/* Load the default value from the global sysctl first */
5860 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
5861 
5862 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
5863 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
5864 			&sc->enable_tx_fc_filter, 0,
5865 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
5866 
5867 	sc->tx_balance_en = ice_tx_balance_en;
5868 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
5869 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
5870 			&sc->tx_balance_en, 0,
5871 			"Enable 5-layer scheduler topology");
5872 
5873 	/* Load the default value from the global sysctl first */
5874 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
5875 
5876 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
5877 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
5878 			&sc->enable_tx_lldp_filter, 0,
5879 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
5880 
5881 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
5882 }
5883 
5884 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
5885 "\nRequest the driver to initiate a reset."	\
5886 "\n\tpfr - Initiate a PF reset"			\
5887 "\n\tcorer - Initiate a CORE reset"		\
5888 "\n\tglobr - Initiate a GLOBAL reset"
5889 
5890 /**
5891  * @var rl_sysctl_ticks
5892  * @brief timestamp for latest reset request sysctl call
5893  *
5894  * Helps rate-limit the call to the sysctl which resets the device
5895  */
5896 int rl_sysctl_ticks = 0;
5897 
5898 /**
5899  * ice_sysctl_request_reset - Request that the driver initiate a reset
5900  * @oidp: sysctl oid structure
5901  * @arg1: pointer to private data structure
5902  * @arg2: unused
5903  * @req: sysctl request pointer
5904  *
5905  * Callback for "request_reset" sysctl to request that the driver initiate
5906  * a reset. Expects to be passed one of the following strings
5907  *
5908  * "pfr" - Initiate a PF reset
5909  * "corer" - Initiate a CORE reset
5910  * "globr" - Initiate a Global reset
5911  */
5912 static int
5913 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
5914 {
5915 	struct ice_softc *sc = (struct ice_softc *)arg1;
5916 	struct ice_hw *hw = &sc->hw;
5917 	enum ice_status status;
5918 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
5919 	const char *reset_message;
5920 	int ret;
5921 
5922 	/* Buffer to store the requested reset string. Must contain enough
5923 	 * space to store the largest expected reset string, which currently
5924 	 * means 6 bytes of space.
5925 	 */
5926 	char reset[6] = "";
5927 
5928 	UNREFERENCED_PARAMETER(arg2);
5929 
5930 	ret = priv_check(curthread, PRIV_DRIVER);
5931 	if (ret)
5932 		return (ret);
5933 
5934 	if (ice_driver_is_detaching(sc))
5935 		return (ESHUTDOWN);
5936 
5937 	/* Read in the requested reset type. */
5938 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
5939 	if ((ret) || (req->newptr == NULL))
5940 		return (ret);
5941 
5942 	if (strcmp(reset, "pfr") == 0) {
5943 		reset_message = "Requesting a PF reset";
5944 		reset_type = ICE_RESET_PFR;
5945 	} else if (strcmp(reset, "corer") == 0) {
5946 		reset_message = "Initiating a CORE reset";
5947 		reset_type = ICE_RESET_CORER;
5948 	} else if (strcmp(reset, "globr") == 0) {
5949 		reset_message = "Initiating a GLOBAL reset";
5950 		reset_type = ICE_RESET_GLOBR;
5951 	} else if (strcmp(reset, "empr") == 0) {
5952 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
5953 		return (EOPNOTSUPP);
5954 	}
5955 
5956 	if (reset_type == ICE_RESET_INVAL) {
5957 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
5958 		return (EINVAL);
5959 	}
5960 
5961 	/*
5962 	 * Rate-limit the frequency at which this function is called.
5963 	 * Assuming this is called successfully once, typically,
5964 	 * everything should be handled within the allotted time frame.
5965 	 * However, in the odd setup situations, we've also put in
5966 	 * guards for when the reset has finished, but we're in the
5967 	 * process of rebuilding. And instead of queueing an intent,
5968 	 * simply error out and let the caller retry, if so desired.
5969 	 */
5970 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
5971 		device_printf(sc->dev,
5972 		    "Call frequency too high. Operation aborted.\n");
5973 		return (EBUSY);
5974 	}
5975 	rl_sysctl_ticks = ticks;
5976 
5977 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
5978 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
5979 		return (EBUSY);
5980 	}
5981 
5982 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
5983 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
5984 		return (EBUSY);
5985 	}
5986 
5987 	device_printf(sc->dev, "%s\n", reset_message);
5988 
5989 	/* Initiate the PF reset during the admin status task */
5990 	if (reset_type == ICE_RESET_PFR) {
5991 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
5992 		return (0);
5993 	}
5994 
5995 	/*
5996 	 * Other types of resets including CORE and GLOBAL resets trigger an
5997 	 * interrupt on all PFs. Initiate the reset now. Preparation and
5998 	 * rebuild logic will be handled by the admin status task.
5999 	 */
6000 	status = ice_reset(hw, reset_type);
6001 
6002 	/*
6003 	 * Resets can take a long time and we still don't want another call
6004 	 * to this function before we settle down.
6005 	 */
6006 	rl_sysctl_ticks = ticks;
6007 
6008 	if (status) {
6009 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6010 			      ice_status_str(status));
6011 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6012 		return (EFAULT);
6013 	}
6014 
6015 	return (0);
6016 }
6017 
6018 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6019 "\nSelect clusters to dump with \"dump\" sysctl"		\
6020 "\nFlags:"							\
6021 "\n\t   0x1 - Switch"						\
6022 "\n\t   0x2 - ACL"						\
6023 "\n\t   0x4 - Tx Scheduler"					\
6024 "\n\t   0x8 - Profile Configuration"				\
6025 "\n\t  0x20 - Link"						\
6026 "\n\t  0x80 - DCB"						\
6027 "\n\t 0x100 - L2P"						\
6028 "\n\t"								\
6029 "\nUse \"sysctl -x\" to view flags properly."
6030 
6031 /**
6032  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6033  *     from FW when FW debug dump occurs
6034  * @oidp: sysctl oid structure
6035  * @arg1: pointer to private data structure
6036  * @arg2: unused
6037  * @req: sysctl request pointer
6038  */
6039 static int
6040 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6041 {
6042 	struct ice_softc *sc = (struct ice_softc *)arg1;
6043 	device_t dev = sc->dev;
6044 	u16 clusters;
6045 	int ret;
6046 
6047 	UNREFERENCED_PARAMETER(arg2);
6048 
6049 	ret = priv_check(curthread, PRIV_DRIVER);
6050 	if (ret)
6051 		return (ret);
6052 
6053 	if (ice_driver_is_detaching(sc))
6054 		return (ESHUTDOWN);
6055 
6056 	clusters = sc->fw_debug_dump_cluster_mask;
6057 
6058 	ret = sysctl_handle_16(oidp, &clusters, 0, req);
6059 	if ((ret) || (req->newptr == NULL))
6060 		return (ret);
6061 
6062 	if (!clusters ||
6063 	    (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK))) {
6064 		device_printf(dev,
6065 		    "%s: ERROR: Incorrect settings requested\n",
6066 		    __func__);
6067 		return (EINVAL);
6068 	}
6069 
6070 	sc->fw_debug_dump_cluster_mask = clusters;
6071 
6072 	return (0);
6073 }
6074 
6075 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6076 
6077 /**
6078  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6079  * @sc: the device softc
6080  * @sbuf: initialized sbuf to print data to
6081  * @cluster_id: FW cluster ID to print data from
6082  *
6083  * Reads debug data from the specified cluster id in the FW and prints it to
6084  * the input sbuf. This function issues multiple AQ commands to the FW in
6085  * order to get all of the data in the cluster.
6086  *
6087  * @remark Only intended to be used by the sysctl handler
6088  * ice_sysctl_fw_debug_dump_do_dump
6089  */
6090 static void
6091 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6092 {
6093 	struct ice_hw *hw = &sc->hw;
6094 	device_t dev = sc->dev;
6095 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6096 	const u8 reserved_buf[8] = {};
6097 	enum ice_status status;
6098 	int counter = 0;
6099 	u8 *data_buf;
6100 
6101 	/* Other setup */
6102 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6103 	if (!data_buf)
6104 		return;
6105 
6106 	/* Input parameters / loop variables */
6107 	u16 table_id = 0;
6108 	u32 offset = 0;
6109 
6110 	/* Output from the Get Internal Data AQ command */
6111 	u16 ret_buf_size = 0;
6112 	u16 ret_next_table = 0;
6113 	u32 ret_next_index = 0;
6114 
6115 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6116 	    cluster_id);
6117 
6118 	for (;;) {
6119 		/* Do not trust the FW behavior to be completely correct */
6120 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6121 			device_printf(dev,
6122 			    "%s: Exceeded counter limit for cluster %d\n",
6123 			    __func__, cluster_id);
6124 			break;
6125 		}
6126 
6127 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6128 		ice_debug(hw, ICE_DBG_DIAG,
6129 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6130 		    table_id, offset, data_buf_size);
6131 
6132 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6133 		    offset, data_buf, data_buf_size, &ret_buf_size,
6134 		    &ret_next_table, &ret_next_index, NULL);
6135 		if (status) {
6136 			device_printf(dev,
6137 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6138 			    __func__, cluster_id, ice_status_str(status),
6139 			    ice_aq_str(hw->adminq.sq_last_status));
6140 			break;
6141 		}
6142 
6143 		ice_debug(hw, ICE_DBG_DIAG,
6144 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6145 		    ret_next_table, ret_next_index, ret_buf_size);
6146 
6147 		/* Print cluster id */
6148 		u32 print_cluster_id = (u32)cluster_id;
6149 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6150 		/* Print table id */
6151 		u32 print_table_id = (u32)table_id;
6152 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6153 		/* Print table length */
6154 		u32 print_table_length = (u32)ret_buf_size;
6155 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6156 		/* Print current offset */
6157 		u32 print_curr_offset = offset;
6158 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6159 		/* Print reserved bytes */
6160 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6161 		/* Print data */
6162 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6163 
6164 		/* Adjust loop variables */
6165 		memset(data_buf, 0, data_buf_size);
6166 		bool same_table_next = (table_id == ret_next_table);
6167 		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6168 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6169 
6170 		if ((!same_table_next && !last_offset_next) ||
6171 		    (same_table_next && last_table_next)) {
6172 			device_printf(dev,
6173 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6174 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6175 			break;
6176 		}
6177 
6178 		if (!same_table_next && !last_table_next && last_offset_next) {
6179 			/* We've hit the end of the table */
6180 			table_id = ret_next_table;
6181 			offset = 0;
6182 		}
6183 		else if (!same_table_next && last_table_next && last_offset_next) {
6184 			/* We've hit the end of the cluster */
6185 			break;
6186 		}
6187 		else if (same_table_next && !last_table_next && last_offset_next) {
6188 			if (cluster_id == 0x1 && table_id < 39)
6189 				table_id += 1;
6190 			else
6191 				break;
6192 		}
6193 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6194 			/* More data left in the table */
6195 			offset = ret_next_index;
6196 		}
6197 	}
6198 
6199 	free(data_buf, M_ICE);
6200 }
6201 
6202 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6203 "\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6204 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6205 "\nthis data is opaque and not a string."
6206 
6207 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6208 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6209 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6210 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6211 
6212 /**
6213  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6214  * @oidp: sysctl oid structure
6215  * @arg1: pointer to private data structure
6216  * @arg2: unused
6217  * @req: sysctl request pointer
6218  *
6219  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6220  * formatted dump of some debug FW data intended to be processed by a special
6221  * Intel tool. Prints out the cluster data specified by the "clusters"
6222  * sysctl.
6223  *
6224  * @remark The actual AQ calls and printing are handled by a helper
6225  * function above.
6226  */
6227 static int
6228 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6229 {
6230 	struct ice_softc *sc = (struct ice_softc *)arg1;
6231 	device_t dev = sc->dev;
6232 	struct sbuf *sbuf;
6233 	int bit, ret;
6234 
6235 	UNREFERENCED_PARAMETER(arg2);
6236 
6237 	ret = priv_check(curthread, PRIV_DRIVER);
6238 	if (ret)
6239 		return (ret);
6240 
6241 	if (ice_driver_is_detaching(sc))
6242 		return (ESHUTDOWN);
6243 
6244 	/* If the user hasn't written "1" to this sysctl yet: */
6245 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6246 		/* Avoid output on the first set of reads to this sysctl in
6247 		 * order to prevent a null byte from being written to the
6248 		 * end result when called via sysctl(8).
6249 		 */
6250 		if (req->oldptr == NULL && req->newptr == NULL) {
6251 			ret = SYSCTL_OUT(req, 0, 0);
6252 			return (ret);
6253 		}
6254 
6255 		char input_buf[2] = "";
6256 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6257 		if ((ret) || (req->newptr == NULL))
6258 			return (ret);
6259 
6260 		/* If we get '1', then indicate we'll do a dump in the next
6261 		 * sysctl read call.
6262 		 */
6263 		if (input_buf[0] == '1') {
6264 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6265 			return (0);
6266 		}
6267 
6268 		return (EINVAL);
6269 	}
6270 
6271 	/* --- FW debug dump state is set --- */
6272 
6273 	if (!sc->fw_debug_dump_cluster_mask) {
6274 		device_printf(dev,
6275 		    "%s: Debug Dump failed because no cluster was specified.\n",
6276 		    __func__);
6277 		ret = EINVAL;
6278 		goto out;
6279 	}
6280 
6281 	/* Caller just wants the upper bound for size */
6282 	if (req->oldptr == NULL && req->newptr == NULL) {
6283 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6284 		if (sc->fw_debug_dump_cluster_mask & 0x1)
6285 			est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6286 		if (sc->fw_debug_dump_cluster_mask & 0x2)
6287 			est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6288 		if (sc->fw_debug_dump_cluster_mask & 0x4)
6289 			est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6290 
6291 		ret = SYSCTL_OUT(req, 0, est_output_len);
6292 		return (ret);
6293 	}
6294 
6295 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6296 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6297 
6298 	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6299 
6300 	for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6301 	    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6302 		ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6303 
6304 	sbuf_finish(sbuf);
6305 	sbuf_delete(sbuf);
6306 
6307 out:
6308 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6309 	return (ret);
6310 }
6311 
6312 /**
6313  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6314  * @sc: device private structure
6315  *
6316  * Add sysctls related to debugging the device driver. Generally these should
6317  * simply be sysctls which dump internal driver state, to aid in understanding
6318  * what the driver is doing.
6319  */
6320 static void
6321 ice_add_debug_sysctls(struct ice_softc *sc)
6322 {
6323 	struct sysctl_oid *sw_node, *dump_node;
6324 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6325 	device_t dev = sc->dev;
6326 
6327 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6328 
6329 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6330 
6331 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6332 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6333 			ice_sysctl_request_reset, "A",
6334 			ICE_SYSCTL_HELP_REQUEST_RESET);
6335 
6336 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6337 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6338 		       &sc->soft_stats.pfr_count, 0,
6339 		       "# of PF resets handled");
6340 
6341 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6342 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6343 		       &sc->soft_stats.corer_count, 0,
6344 		       "# of CORE resets handled");
6345 
6346 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6347 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6348 		       &sc->soft_stats.globr_count, 0,
6349 		       "# of Global resets handled");
6350 
6351 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6352 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6353 		       &sc->soft_stats.empr_count, 0,
6354 		       "# of EMP resets handled");
6355 
6356 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6357 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6358 		       &sc->soft_stats.tx_mdd_count, 0,
6359 		       "# of Tx MDD events detected");
6360 
6361 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6362 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6363 		       &sc->soft_stats.rx_mdd_count, 0,
6364 		       "# of Rx MDD events detected");
6365 
6366 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6367 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6368 			ice_sysctl_dump_state_flags, "A",
6369 			"Driver State Flags");
6370 
6371 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6372 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6373 			ice_sysctl_phy_type_low, "QU",
6374 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6375 
6376 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6377 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6378 			ice_sysctl_phy_type_high, "QU",
6379 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6380 
6381 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6382 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6383 			ice_sysctl_phy_sw_caps, "",
6384 			"Get PHY Capabilities (Software configuration)");
6385 
6386 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6387 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6388 			ice_sysctl_phy_nvm_caps, "",
6389 			"Get PHY Capabilities (NVM configuration)");
6390 
6391 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6392 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6393 			ice_sysctl_phy_topo_caps, "",
6394 			"Get PHY Capabilities (Topology configuration)");
6395 
6396 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6397 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6398 			ice_sysctl_phy_link_status, "",
6399 			"Get PHY Link Status");
6400 
6401 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6402 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6403 			ice_sysctl_read_i2c_diag_data, "A",
6404 			"Dump selected diagnostic data from FW");
6405 
6406 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6407 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6408 		       "FW Build ID");
6409 
6410 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6411 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6412 			ice_sysctl_os_pkg_version, "A",
6413 			"DDP package name and version found in ice_ddp");
6414 
6415 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6416 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6417 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6418 			"Current LLDP persistent status");
6419 
6420 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6421 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6422 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6423 			"Default LLDP persistent status");
6424 
6425 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6426 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6427 			ice_sysctl_negotiated_fc, "A",
6428 			"Current Negotiated Flow Control mode");
6429 
6430 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6431 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6432 			ice_sysctl_dump_dcbx_cfg, "A",
6433 			"Dumps Local MIB information from firmware");
6434 
6435 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6436 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6437 			ice_sysctl_dump_dcbx_cfg, "A",
6438 			"Dumps Remote MIB information from firmware");
6439 
6440 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6441 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6442 			"Dumps Selected PF VSI parameters from firmware");
6443 
6444 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6445 			sc, 0, ice_sysctl_query_port_ets, "A",
6446 			"Prints selected output from Query Port ETS AQ command");
6447 
6448 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6449 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6450 				  "Switch Configuration");
6451 	sw_list = SYSCTL_CHILDREN(sw_node);
6452 
6453 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6454 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6455 			ice_sysctl_dump_mac_filters, "A",
6456 			"MAC Filters");
6457 
6458 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6459 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6460 			ice_sysctl_dump_vlan_filters, "A",
6461 			"VLAN Filters");
6462 
6463 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6464 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6465 			ice_sysctl_dump_ethertype_filters, "A",
6466 			"Ethertype Filters");
6467 
6468 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6469 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6470 			ice_sysctl_dump_ethertype_mac_filters, "A",
6471 			"Ethertype/MAC Filters");
6472 
6473 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6474 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6475 				  "Internal FW Dump");
6476 	dump_list = SYSCTL_CHILDREN(dump_node);
6477 
6478 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6479 			ICE_CTLFLAG_DEBUG | CTLTYPE_U16 | CTLFLAG_RW, sc, 0,
6480 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6481 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6482 
6483 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6484 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6485 			ice_sysctl_fw_debug_dump_do_dump, "",
6486 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6487 }
6488 
6489 /**
6490  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6491  * @vsi: the VSI to disable
6492  *
6493  * Disables the Tx queues associated with this VSI. Essentially the opposite
6494  * of ice_cfg_vsi_for_tx.
6495  */
6496 int
6497 ice_vsi_disable_tx(struct ice_vsi *vsi)
6498 {
6499 	struct ice_softc *sc = vsi->sc;
6500 	struct ice_hw *hw = &sc->hw;
6501 	enum ice_status status;
6502 	u32 *q_teids;
6503 	u16 *q_ids, *q_handles;
6504 	size_t q_teids_size, q_ids_size, q_handles_size;
6505 	int tc, j, buf_idx, err = 0;
6506 
6507 	if (vsi->num_tx_queues > 255)
6508 		return (ENOSYS);
6509 
6510 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6511 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6512 	if (!q_teids)
6513 		return (ENOMEM);
6514 
6515 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6516 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6517 	if (!q_ids) {
6518 		err = (ENOMEM);
6519 		goto free_q_teids;
6520 	}
6521 
6522 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6523 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6524 	if (!q_handles) {
6525 		err = (ENOMEM);
6526 		goto free_q_ids;
6527 	}
6528 
6529 	ice_for_each_traffic_class(tc) {
6530 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6531 		u16 start_idx, end_idx;
6532 
6533 		/* Skip rest of disabled TCs once the first
6534 		 * disabled TC is found */
6535 		if (!(vsi->tc_map & BIT(tc)))
6536 			break;
6537 
6538 		/* Fill out TX queue information for this TC */
6539 		start_idx = tc_info->qoffset;
6540 		end_idx = start_idx + tc_info->qcount_tx;
6541 		buf_idx = 0;
6542 		for (j = start_idx; j < end_idx; j++) {
6543 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6544 
6545 			q_ids[buf_idx] = vsi->tx_qmap[j];
6546 			q_handles[buf_idx] = txq->q_handle;
6547 			q_teids[buf_idx] = txq->q_teid;
6548 			buf_idx++;
6549 		}
6550 
6551 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6552 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6553 		if (status == ICE_ERR_DOES_NOT_EXIST) {
6554 			; /* Queues have already been disabled, no need to report this as an error */
6555 		} else if (status == ICE_ERR_RESET_ONGOING) {
6556 			device_printf(sc->dev,
6557 				      "Reset in progress. LAN Tx queues already disabled\n");
6558 			break;
6559 		} else if (status) {
6560 			device_printf(sc->dev,
6561 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6562 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6563 			err = (ENODEV);
6564 			break;
6565 		}
6566 
6567 		/* Clear buffers */
6568 		memset(q_teids, 0, q_teids_size);
6569 		memset(q_ids, 0, q_ids_size);
6570 		memset(q_handles, 0, q_handles_size);
6571 	}
6572 
6573 /* free_q_handles: */
6574 	free(q_handles, M_ICE);
6575 free_q_ids:
6576 	free(q_ids, M_ICE);
6577 free_q_teids:
6578 	free(q_teids, M_ICE);
6579 
6580 	return err;
6581 }
6582 
6583 /**
6584  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6585  * @vsi: the VSI to configure
6586  *
6587  * Sets the RSS table size and lookup table type for the VSI based on its
6588  * VSI type.
6589  */
6590 static void
6591 ice_vsi_set_rss_params(struct ice_vsi *vsi)
6592 {
6593 	struct ice_softc *sc = vsi->sc;
6594 	struct ice_hw_common_caps *cap;
6595 
6596 	cap = &sc->hw.func_caps.common_cap;
6597 
6598 	switch (vsi->type) {
6599 	case ICE_VSI_PF:
6600 		/* The PF VSI inherits RSS instance of the PF */
6601 		vsi->rss_table_size = cap->rss_table_size;
6602 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
6603 		break;
6604 	case ICE_VSI_VF:
6605 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6606 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
6607 		break;
6608 	default:
6609 		device_printf(sc->dev,
6610 			      "VSI %d: RSS not supported for VSI type %d\n",
6611 			      vsi->idx, vsi->type);
6612 		break;
6613 	}
6614 }
6615 
6616 /**
6617  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6618  * @vsi: The VSI to add the context for
6619  *
6620  * Creates a sysctl context for storing txq sysctls. Additionally creates
6621  * a node rooted at the given VSI's main sysctl node. This context will be
6622  * used to store per-txq sysctls which may need to be released during the
6623  * driver's lifetime.
6624  */
6625 void
6626 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6627 {
6628 	struct sysctl_oid_list *vsi_list;
6629 
6630 	sysctl_ctx_init(&vsi->txqs_ctx);
6631 
6632 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6633 
6634 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
6635 					 CTLFLAG_RD, NULL, "Tx Queues");
6636 }
6637 
6638 /**
6639  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
6640  * @vsi: The VSI to add the context for
6641  *
6642  * Creates a sysctl context for storing rxq sysctls. Additionally creates
6643  * a node rooted at the given VSI's main sysctl node. This context will be
6644  * used to store per-rxq sysctls which may need to be released during the
6645  * driver's lifetime.
6646  */
6647 void
6648 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
6649 {
6650 	struct sysctl_oid_list *vsi_list;
6651 
6652 	sysctl_ctx_init(&vsi->rxqs_ctx);
6653 
6654 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
6655 
6656 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
6657 					 CTLFLAG_RD, NULL, "Rx Queues");
6658 }
6659 
6660 /**
6661  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
6662  * @vsi: The VSI to delete from
6663  *
6664  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
6665  * Must be called prior to freeing the Tx queue memory, in order to avoid
6666  * having sysctls point at stale memory.
6667  */
6668 void
6669 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
6670 {
6671 	device_t dev = vsi->sc->dev;
6672 	int err;
6673 
6674 	if (vsi->txqs_node) {
6675 		err = sysctl_ctx_free(&vsi->txqs_ctx);
6676 		if (err)
6677 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
6678 				      vsi->idx, ice_err_str(err));
6679 		vsi->txqs_node = NULL;
6680 	}
6681 }
6682 
6683 /**
6684  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
6685  * @vsi: The VSI to delete from
6686  *
6687  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
6688  * Must be called prior to freeing the Rx queue memory, in order to avoid
6689  * having sysctls point at stale memory.
6690  */
6691 void
6692 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
6693 {
6694 	device_t dev = vsi->sc->dev;
6695 	int err;
6696 
6697 	if (vsi->rxqs_node) {
6698 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
6699 		if (err)
6700 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
6701 				      vsi->idx, ice_err_str(err));
6702 		vsi->rxqs_node = NULL;
6703 	}
6704 }
6705 
6706 /**
6707  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
6708  * @txq: pointer to the Tx queue
6709  *
6710 * Add per-queue sysctls for a given Tx queue. Can't be called during
6711 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6712  */
6713 void
6714 ice_add_txq_sysctls(struct ice_tx_queue *txq)
6715 {
6716 	struct ice_vsi *vsi = txq->vsi;
6717 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
6718 	struct sysctl_oid_list *txqs_list, *this_txq_list;
6719 	struct sysctl_oid *txq_node;
6720 	char txq_name[32], txq_desc[32];
6721 
6722 	const struct ice_sysctl_info ctls[] = {
6723 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
6724 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
6725 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
6726 		{ 0, 0, 0 }
6727 	};
6728 
6729 	const struct ice_sysctl_info *entry = ctls;
6730 
6731 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
6732 
6733 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
6734 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
6735 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
6736 				   CTLFLAG_RD, NULL, txq_desc);
6737 	this_txq_list = SYSCTL_CHILDREN(txq_node);
6738 
6739 	/* Add the Tx queue statistics */
6740 	while (entry->stat != 0) {
6741 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
6742 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6743 			       entry->description);
6744 		entry++;
6745 	}
6746 
6747 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
6748 		       CTLFLAG_RD, &txq->tc, 0,
6749 		       "Traffic Class that Queue belongs to");
6750 }
6751 
6752 /**
6753  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
6754  * @rxq: pointer to the Rx queue
6755  *
6756  * Add per-queue sysctls for a given Rx queue. Can't be called during
6757  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
6758  */
6759 void
6760 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
6761 {
6762 	struct ice_vsi *vsi = rxq->vsi;
6763 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
6764 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
6765 	struct sysctl_oid *rxq_node;
6766 	char rxq_name[32], rxq_desc[32];
6767 
6768 	const struct ice_sysctl_info ctls[] = {
6769 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
6770 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
6771 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
6772 		{ 0, 0, 0 }
6773 	};
6774 
6775 	const struct ice_sysctl_info *entry = ctls;
6776 
6777 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
6778 
6779 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
6780 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
6781 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
6782 				   CTLFLAG_RD, NULL, rxq_desc);
6783 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
6784 
6785 	/* Add the Rx queue statistics */
6786 	while (entry->stat != 0) {
6787 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
6788 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
6789 			       entry->description);
6790 		entry++;
6791 	}
6792 
6793 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
6794 		       CTLFLAG_RD, &rxq->tc, 0,
6795 		       "Traffic Class that Queue belongs to");
6796 }
6797 
6798 /**
6799  * ice_get_default_rss_key - Obtain a default RSS key
6800  * @seed: storage for the RSS key data
6801  *
6802  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
6803  * point to a block of memory that is at least 40 bytes in size.
6804  *
6805  * The key isn't randomly generated each time this function is called because
6806  * that makes the RSS key change every time we reconfigure RSS. This does mean
6807  * that we're hard coding a possibly 'well known' key. We might want to
6808  * investigate randomly generating this key once during the first call.
6809  */
6810 static void
6811 ice_get_default_rss_key(u8 *seed)
6812 {
6813 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
6814 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
6815 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
6816 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
6817 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
6818 	};
6819 
6820 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
6821 }
6822 
6823 /**
6824  * ice_set_rss_key - Configure a given VSI with the default RSS key
6825  * @vsi: the VSI to configure
6826  *
6827  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
6828  * If the kernel RSS interface is not available, this will fall back to our
6829  * pre-generated hash seed from ice_get_default_rss_key().
6830  */
6831 static int
6832 ice_set_rss_key(struct ice_vsi *vsi)
6833 {
6834 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
6835 	struct ice_softc *sc = vsi->sc;
6836 	struct ice_hw *hw = &sc->hw;
6837 	enum ice_status status;
6838 
6839 	/*
6840 	 * If the RSS kernel interface is disabled, this will return the
6841 	 * default RSS key above.
6842 	 */
6843 	rss_getkey(keydata.standard_rss_key);
6844 
6845 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
6846 	if (status) {
6847 		device_printf(sc->dev,
6848 			      "ice_aq_set_rss_key status %s, error %s\n",
6849 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6850 		return (EIO);
6851 	}
6852 
6853 	return (0);
6854 }
6855 
6856 /**
6857  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
6858  * @vsi: the VSI to configure
6859  *
6860  * If the package file is initialized, the default RSS flows are reset. We
6861  * need to reprogram the expected hash configuration. We'll use
6862  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
6863  * support is not enabled, this macro will fall back to suitable defaults.
6864  */
6865 static void
6866 ice_set_rss_flow_flds(struct ice_vsi *vsi)
6867 {
6868 	struct ice_softc *sc = vsi->sc;
6869 	struct ice_hw *hw = &sc->hw;
6870 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
6871 	device_t dev = sc->dev;
6872 	enum ice_status status;
6873 	u_int rss_hash_config;
6874 
6875 	rss_hash_config = rss_gethashconfig();
6876 
6877 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
6878 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
6879 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
6880 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6881 		if (status)
6882 			device_printf(dev,
6883 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
6884 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6885 	}
6886 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
6887 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
6888 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
6889 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6890 		if (status)
6891 			device_printf(dev,
6892 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
6893 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6894 	}
6895 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
6896 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
6897 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
6898 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6899 		if (status)
6900 			device_printf(dev,
6901 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
6902 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6903 	}
6904 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
6905 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
6906 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
6907 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6908 		if (status)
6909 			device_printf(dev,
6910 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
6911 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6912 	}
6913 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
6914 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
6915 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
6916 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6917 		if (status)
6918 			device_printf(dev,
6919 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
6920 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6921 	}
6922 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
6923 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
6924 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
6925 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
6926 		if (status)
6927 			device_printf(dev,
6928 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
6929 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6930 	}
6931 
6932 	/* Warn about RSS hash types which are not supported */
6933 	/* coverity[dead_error_condition] */
6934 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
6935 		device_printf(dev,
6936 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
6937 			      vsi->idx);
6938 	}
6939 }
6940 
6941 /**
6942  * ice_set_rss_lut - Program the RSS lookup table for a VSI
6943  * @vsi: the VSI to configure
6944  *
6945  * Programs the RSS lookup table for a given VSI. We use
6946  * rss_get_indirection_to_bucket which will use the indirection table provided
6947  * by the kernel RSS interface when available. If the kernel RSS interface is
6948  * not available, we will fall back to a simple round-robin fashion queue
6949  * assignment.
6950  */
6951 static int
6952 ice_set_rss_lut(struct ice_vsi *vsi)
6953 {
6954 	struct ice_softc *sc = vsi->sc;
6955 	struct ice_hw *hw = &sc->hw;
6956 	device_t dev = sc->dev;
6957 	struct ice_aq_get_set_rss_lut_params lut_params;
6958 	enum ice_status status;
6959 	int i, err = 0;
6960 	u8 *lut;
6961 
6962 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
6963 	if (!lut) {
6964 		device_printf(dev, "Failed to allocate RSS lut memory\n");
6965 		return (ENOMEM);
6966 	}
6967 
6968 	/* Populate the LUT with max no. of queues. If the RSS kernel
6969 	 * interface is disabled, this will assign the lookup table in
6970 	 * a simple round robin fashion
6971 	 */
6972 	for (i = 0; i < vsi->rss_table_size; i++) {
6973 		/* XXX: this needs to be changed if num_rx_queues ever counts
6974 		 * more than just the RSS queues */
6975 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
6976 	}
6977 
6978 	lut_params.vsi_handle = vsi->idx;
6979 	lut_params.lut_size = vsi->rss_table_size;
6980 	lut_params.lut_type = vsi->rss_lut_type;
6981 	lut_params.lut = lut;
6982 	lut_params.global_lut_id = 0;
6983 	status = ice_aq_set_rss_lut(hw, &lut_params);
6984 	if (status) {
6985 		device_printf(dev,
6986 			      "Cannot set RSS lut, err %s aq_err %s\n",
6987 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6988 		err = (EIO);
6989 	}
6990 
6991 	free(lut, M_ICE);
6992 	return err;
6993 }
6994 
6995 /**
6996  * ice_config_rss - Configure RSS for a VSI
6997  * @vsi: the VSI to configure
6998  *
6999  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7000  * a given VSI.
7001  */
7002 int
7003 ice_config_rss(struct ice_vsi *vsi)
7004 {
7005 	int err;
7006 
7007 	/* Nothing to do, if RSS is not enabled */
7008 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7009 		return 0;
7010 
7011 	err = ice_set_rss_key(vsi);
7012 	if (err)
7013 		return err;
7014 
7015 	ice_set_rss_flow_flds(vsi);
7016 
7017 	return ice_set_rss_lut(vsi);
7018 }
7019 
7020 /**
7021  * ice_log_pkg_init - Log a message about status of DDP initialization
7022  * @sc: the device softc pointer
7023  * @pkg_status: the status result of ice_copy_and_init_pkg
7024  *
7025  * Called by ice_load_pkg after an attempt to download the DDP package
7026  * contents to the device to log an appropriate message for the system
7027  * administrator about download status.
7028  *
7029  * @post ice_is_init_pkg_successful function is used to determine
7030  * whether the download was successful and DDP package is compatible
7031  * with this driver. Otherwise driver will transition to Safe Mode.
7032  */
7033 void
7034 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7035 {
7036 	struct ice_hw *hw = &sc->hw;
7037 	device_t dev = sc->dev;
7038 	struct sbuf *active_pkg, *os_pkg;
7039 
7040 	active_pkg = sbuf_new_auto();
7041 	ice_active_pkg_version_str(hw, active_pkg);
7042 	sbuf_finish(active_pkg);
7043 
7044 	os_pkg = sbuf_new_auto();
7045 	ice_os_pkg_version_str(hw, os_pkg);
7046 	sbuf_finish(os_pkg);
7047 
7048 	switch (pkg_status) {
7049 	case ICE_DDP_PKG_SUCCESS:
7050 		device_printf(dev,
7051 			      "The DDP package was successfully loaded: %s.\n",
7052 			      sbuf_data(active_pkg));
7053 		break;
7054 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7055 	case ICE_DDP_PKG_ALREADY_LOADED:
7056 		device_printf(dev,
7057 			      "DDP package already present on device: %s.\n",
7058 			      sbuf_data(active_pkg));
7059 		break;
7060 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7061 		device_printf(dev,
7062 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7063 			      sbuf_data(active_pkg),
7064 			      sbuf_data(os_pkg));
7065 		break;
7066 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7067 		device_printf(dev,
7068 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7069 			      sbuf_data(active_pkg),
7070 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7071 		break;
7072 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7073 		device_printf(dev,
7074 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7075 			      sbuf_data(active_pkg),
7076 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7077 		break;
7078 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7079 		/*
7080 		 * This assumes that the active_pkg_ver will not be
7081 		 * initialized if the ice_ddp package version is not
7082 		 * supported.
7083 		 */
7084 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7085 			/* The ice_ddp version is not supported */
7086 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7087 				device_printf(dev,
7088 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7089 					      sbuf_data(os_pkg),
7090 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7091 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7092 				device_printf(dev,
7093 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7094 					      sbuf_data(os_pkg),
7095 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7096 			} else {
7097 				device_printf(dev,
7098 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7099 					      sbuf_data(os_pkg),
7100 					      sbuf_data(active_pkg),
7101 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7102 			}
7103 		} else {
7104 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7105 				device_printf(dev,
7106 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7107 					      sbuf_data(active_pkg),
7108 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7109 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7110 				device_printf(dev,
7111 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7112 					      sbuf_data(active_pkg),
7113 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7114 			} else {
7115 				device_printf(dev,
7116 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7117 					      sbuf_data(os_pkg),
7118 					      sbuf_data(active_pkg),
7119 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7120 			}
7121 		}
7122 		break;
7123 	case ICE_DDP_PKG_INVALID_FILE:
7124 		device_printf(dev,
7125 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7126 		break;
7127 	case ICE_DDP_PKG_FW_MISMATCH:
7128 		device_printf(dev,
7129 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7130 		break;
7131 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7132 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7133 		device_printf(dev,
7134 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7135 		break;
7136 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7137 		device_printf(dev,
7138 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7139 		break;
7140 	case ICE_DDP_PKG_MANIFEST_INVALID:
7141 	case ICE_DDP_PKG_BUFFER_INVALID:
7142 		device_printf(dev,
7143 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7144 		break;
7145 	default:
7146 		device_printf(dev,
7147 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7148 		break;
7149 	}
7150 
7151 	sbuf_delete(active_pkg);
7152 	sbuf_delete(os_pkg);
7153 }
7154 
7155 /**
7156  * ice_load_pkg_file - Load the DDP package file using firmware_get
7157  * @sc: device private softc
7158  *
7159  * Use firmware_get to load the DDP package memory and then request that
7160  * firmware download the package contents and program the relevant hardware
7161  * bits.
7162  *
7163  * This function makes a copy of the DDP package memory which is tracked in
7164  * the ice_hw structure. The copy will be managed and released by
7165  * ice_deinit_hw(). This allows the firmware reference to be immediately
7166  * released using firmware_put.
7167  */
7168 enum ice_status
7169 ice_load_pkg_file(struct ice_softc *sc)
7170 {
7171 	struct ice_hw *hw = &sc->hw;
7172 	device_t dev = sc->dev;
7173 	enum ice_ddp_state state;
7174 	const struct firmware *pkg;
7175 	enum ice_status status = ICE_SUCCESS;
7176 	u8 cached_layer_count;
7177 	u8 *buf_copy;
7178 
7179 	pkg = firmware_get("ice_ddp");
7180 	if (!pkg) {
7181 		device_printf(dev,
7182 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7183 		if (cold)
7184 			device_printf(dev,
7185 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7186 		status = ICE_ERR_CFG;
7187 		goto err_load_pkg;
7188 	}
7189 
7190 	/* Check for topology change */
7191 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7192 		cached_layer_count = hw->num_tx_sched_layers;
7193 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7194 		if (buf_copy == NULL)
7195 			return ICE_ERR_NO_MEMORY;
7196 		memcpy(buf_copy, pkg->data, pkg->datasize);
7197 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7198 		free(buf_copy, M_ICE);
7199 		/* Success indicates a change was made */
7200 		if (status == ICE_SUCCESS) {
7201 			/* 9 -> 5 */
7202 			if (cached_layer_count == 9)
7203 				device_printf(dev,
7204 				    "Transmit balancing feature enabled\n");
7205 			else
7206 				device_printf(dev,
7207 				    "Transmit balancing feature disabled\n");
7208 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7209 			return (status);
7210 		}
7211 	}
7212 
7213 	/* Copy and download the pkg contents */
7214 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7215 
7216 	/* Release the firmware reference */
7217 	firmware_put(pkg, FIRMWARE_UNLOAD);
7218 
7219 	/* Check the active DDP package version and log a message */
7220 	ice_log_pkg_init(sc, state);
7221 
7222 	/* Place the driver into safe mode */
7223 	if (ice_is_init_pkg_successful(state))
7224 		return (ICE_ERR_ALREADY_EXISTS);
7225 
7226 err_load_pkg:
7227 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7228 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7229 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7230 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7231 
7232 	return (status);
7233 }
7234 
7235 /**
7236  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7237  * @vsi: the vsi to retrieve the value for
7238  * @counter: the counter type to retrieve
7239  *
7240  * Returns the value for a given ifnet counter. To do so, we calculate the
7241  * value based on the matching hardware statistics.
7242  */
7243 uint64_t
7244 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7245 {
7246 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7247 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7248 
7249 	/* For some statistics, especially those related to error flows, we do
7250 	 * not have per-VSI counters. In this case, we just report the global
7251 	 * counters.
7252 	 */
7253 
7254 	switch (counter) {
7255 	case IFCOUNTER_IPACKETS:
7256 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7257 	case IFCOUNTER_IERRORS:
7258 		return (hs->crc_errors + hs->illegal_bytes +
7259 			hs->mac_local_faults + hs->mac_remote_faults +
7260 			hs->rx_len_errors + hs->rx_undersize +
7261 			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7262 	case IFCOUNTER_OPACKETS:
7263 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7264 	case IFCOUNTER_OERRORS:
7265 		return (es->tx_errors);
7266 	case IFCOUNTER_COLLISIONS:
7267 		return (0);
7268 	case IFCOUNTER_IBYTES:
7269 		return (es->rx_bytes);
7270 	case IFCOUNTER_OBYTES:
7271 		return (es->tx_bytes);
7272 	case IFCOUNTER_IMCASTS:
7273 		return (es->rx_multicast);
7274 	case IFCOUNTER_OMCASTS:
7275 		return (es->tx_multicast);
7276 	case IFCOUNTER_IQDROPS:
7277 		return (es->rx_discards);
7278 	case IFCOUNTER_OQDROPS:
7279 		return (hs->tx_dropped_link_down);
7280 	case IFCOUNTER_NOPROTO:
7281 		return (es->rx_unknown_protocol);
7282 	default:
7283 		return if_get_counter_default(vsi->sc->ifp, counter);
7284 	}
7285 }
7286 
7287 /**
7288  * ice_save_pci_info - Save PCI configuration fields in HW struct
7289  * @hw: the ice_hw struct to save the PCI information in
7290  * @dev: the device to get the PCI information from
7291  *
7292  * This should only be called once, early in the device attach
7293  * process.
7294  */
7295 void
7296 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7297 {
7298 	hw->vendor_id = pci_get_vendor(dev);
7299 	hw->device_id = pci_get_device(dev);
7300 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7301 	hw->subsystem_device_id = pci_get_subdevice(dev);
7302 	hw->revision_id = pci_get_revid(dev);
7303 	hw->bus.device = pci_get_slot(dev);
7304 	hw->bus.func = pci_get_function(dev);
7305 }
7306 
7307 /**
7308  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7309  * @sc: the device softc
7310  *
7311  * Replace the configuration for each VSI, and then cleanup replay
7312  * information. Called after a hardware reset in order to reconfigure the
7313  * active VSIs.
7314  */
7315 int
7316 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7317 {
7318 	struct ice_hw *hw = &sc->hw;
7319 	enum ice_status status;
7320 	int i;
7321 
7322 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7323 		struct ice_vsi *vsi = sc->all_vsi[i];
7324 
7325 		if (!vsi)
7326 			continue;
7327 
7328 		status = ice_replay_vsi(hw, vsi->idx);
7329 		if (status) {
7330 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7331 				      vsi->idx, ice_status_str(status),
7332 				      ice_aq_str(hw->adminq.sq_last_status));
7333 			return (EIO);
7334 		}
7335 	}
7336 
7337 	/* Cleanup replay filters after successful reconfiguration */
7338 	ice_replay_post(hw);
7339 	return (0);
7340 }
7341 
7342 /**
7343  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7344  * @vsi: pointer to the VSI structure
7345  *
7346  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7347  * during driver removal to ensure that all RSS resources are properly
7348  * released.
7349  *
7350  * @remark this function doesn't report an error as it is expected to be
7351  * called during driver reset and unload, and there isn't much the driver can
7352  * do if freeing RSS resources fails.
7353  */
7354 static void
7355 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7356 {
7357 	struct ice_softc *sc = vsi->sc;
7358 	struct ice_hw *hw = &sc->hw;
7359 	device_t dev = sc->dev;
7360 	enum ice_status status;
7361 
7362 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7363 	if (status)
7364 		device_printf(dev,
7365 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7366 			      vsi->idx, ice_status_str(status));
7367 
7368 	/* Remove this VSI from the RSS list */
7369 	ice_rem_vsi_rss_list(hw, vsi->idx);
7370 }
7371 
7372 /**
7373  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7374  * @sc: the device softc pointer
7375  *
7376  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7377  * interface.
7378  *
7379  * @remark This should be called while preparing for a reset, to cleanup stale
7380  * RSS configuration for all VSIs.
7381  */
7382 void
7383 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7384 {
7385 	int i;
7386 
7387 	/* No need to cleanup if RSS is not enabled */
7388 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7389 		return;
7390 
7391 	for (i = 0; i < sc->num_available_vsi; i++) {
7392 		struct ice_vsi *vsi = sc->all_vsi[i];
7393 
7394 		if (vsi)
7395 			ice_clean_vsi_rss_cfg(vsi);
7396 	}
7397 }
7398 
7399 /**
7400  * ice_requested_fec_mode - Return the requested FEC mode as a string
7401  * @pi: The port info structure
7402  *
7403  * Return a string representing the requested FEC mode.
7404  */
7405 static const char *
7406 ice_requested_fec_mode(struct ice_port_info *pi)
7407 {
7408 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7409 	enum ice_status status;
7410 
7411 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7412 				     &pcaps, NULL);
7413 	if (status)
7414 		/* Just report unknown if we can't get capabilities */
7415 		return "Unknown";
7416 
7417 	/* Check if RS-FEC has been requested first */
7418 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7419 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7420 		return ice_fec_str(ICE_FEC_RS);
7421 
7422 	/* If RS FEC has not been requested, then check BASE-R */
7423 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7424 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7425 		return ice_fec_str(ICE_FEC_BASER);
7426 
7427 	return ice_fec_str(ICE_FEC_NONE);
7428 }
7429 
7430 /**
7431  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7432  * @pi: The port info structure
7433  *
7434  * Return a string representing the current FEC mode.
7435  */
7436 static const char *
7437 ice_negotiated_fec_mode(struct ice_port_info *pi)
7438 {
7439 	/* First, check if RS has been requested first */
7440 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7441 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7442 		return ice_fec_str(ICE_FEC_RS);
7443 
7444 	/* If RS FEC has not been requested, then check BASE-R */
7445 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7446 		return ice_fec_str(ICE_FEC_BASER);
7447 
7448 	return ice_fec_str(ICE_FEC_NONE);
7449 }
7450 
7451 /**
7452  * ice_autoneg_mode - Return string indicating of autoneg completed
7453  * @pi: The port info structure
7454  *
7455  * Return "True" if autonegotiation is completed, "False" otherwise.
7456  */
7457 static const char *
7458 ice_autoneg_mode(struct ice_port_info *pi)
7459 {
7460 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7461 		return "True";
7462 	else
7463 		return "False";
7464 }
7465 
7466 /**
7467  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7468  * @pi: The port info structure
7469  *
7470  * Returns the current Flow Control mode as a string.
7471  */
7472 static const char *
7473 ice_flowcontrol_mode(struct ice_port_info *pi)
7474 {
7475 	return ice_fc_str(pi->fc.current_mode);
7476 }
7477 
7478 /**
7479  * ice_link_up_msg - Log a link up message with associated info
7480  * @sc: the device private softc
7481  *
7482  * Log a link up message with LOG_NOTICE message level. Include information
7483  * about the duplex, FEC mode, autonegotiation and flow control.
7484  */
7485 void
7486 ice_link_up_msg(struct ice_softc *sc)
7487 {
7488 	struct ice_hw *hw = &sc->hw;
7489 	struct ifnet *ifp = sc->ifp;
7490 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7491 
7492 	speed = ice_aq_speed_to_str(hw->port_info);
7493 	req_fec = ice_requested_fec_mode(hw->port_info);
7494 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7495 	autoneg = ice_autoneg_mode(hw->port_info);
7496 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7497 
7498 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7499 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7500 }
7501 
7502 /**
7503  * ice_update_laa_mac - Update MAC address if Locally Administered
7504  * @sc: the device softc
7505  *
7506  * Update the device MAC address when a Locally Administered Address is
7507  * assigned.
7508  *
7509  * This function does *not* update the MAC filter list itself. Instead, it
7510  * should be called after ice_rm_pf_default_mac_filters, so that the previous
7511  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7512  * so that the new address filter will be assigned.
7513  */
7514 int
7515 ice_update_laa_mac(struct ice_softc *sc)
7516 {
7517 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7518 	struct ice_hw *hw = &sc->hw;
7519 	enum ice_status status;
7520 
7521 	/* If the address is the same, then there is nothing to update */
7522 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7523 		return (0);
7524 
7525 	/* Reject Multicast addresses */
7526 	if (ETHER_IS_MULTICAST(lladdr))
7527 		return (EINVAL);
7528 
7529 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7530 	if (status) {
7531 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7532 			      lladdr, ":", ice_status_str(status),
7533 			      ice_aq_str(hw->adminq.sq_last_status));
7534 		return (EFAULT);
7535 	}
7536 
7537 	/* Copy the address into place of the LAN address. */
7538 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7539 
7540 	return (0);
7541 }
7542 
7543 /**
7544  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7545  * @sc: device softc
7546  *
7547  * This will potentially print out a warning message if bus bandwidth
7548  * is insufficient for full-speed operation.
7549  *
7550  * This should only be called once, during the attach process, after
7551  * hw->port_info has been filled out with port link topology information
7552  * (from the Get PHY Capabilities Admin Queue command).
7553  */
7554 void
7555 ice_get_and_print_bus_info(struct ice_softc *sc)
7556 {
7557 	struct ice_hw *hw = &sc->hw;
7558 	device_t dev = sc->dev;
7559 	u16 pci_link_status;
7560 	int offset;
7561 
7562 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7563 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7564 
7565 	/* Fill out hw struct with PCIE link status info */
7566 	ice_set_pci_link_status_data(hw, pci_link_status);
7567 
7568 	/* Use info to print out bandwidth messages */
7569 	ice_print_bus_link_data(dev, hw);
7570 
7571 	if (ice_pcie_bandwidth_check(sc)) {
7572 		device_printf(dev,
7573 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7574 		device_printf(dev,
7575 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7576 	}
7577 }
7578 
7579 /**
7580  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7581  * a 64-bit baudrate.
7582  * @speed: enum value to convert
7583  *
7584  * This only goes up to PCIE Gen 4.
7585  */
7586 static uint64_t
7587 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7588 {
7589 	/* If the PCI-E speed is Gen1 or Gen2, then report
7590 	 * only 80% of bus speed to account for encoding overhead.
7591 	 */
7592 	switch (speed) {
7593 	case ice_pcie_speed_2_5GT:
7594 		return IF_Gbps(2);
7595 	case ice_pcie_speed_5_0GT:
7596 		return IF_Gbps(4);
7597 	case ice_pcie_speed_8_0GT:
7598 		return IF_Gbps(8);
7599 	case ice_pcie_speed_16_0GT:
7600 		return IF_Gbps(16);
7601 	case ice_pcie_speed_unknown:
7602 	default:
7603 		return 0;
7604 	}
7605 }
7606 
7607 /**
7608  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7609  * a 32-bit number.
7610  * @width: enum value to convert
7611  */
7612 static int
7613 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7614 {
7615 	switch (width) {
7616 	case ice_pcie_lnk_x1:
7617 		return (1);
7618 	case ice_pcie_lnk_x2:
7619 		return (2);
7620 	case ice_pcie_lnk_x4:
7621 		return (4);
7622 	case ice_pcie_lnk_x8:
7623 		return (8);
7624 	case ice_pcie_lnk_x12:
7625 		return (12);
7626 	case ice_pcie_lnk_x16:
7627 		return (16);
7628 	case ice_pcie_lnk_x32:
7629 		return (32);
7630 	case ice_pcie_lnk_width_resrv:
7631 	case ice_pcie_lnk_width_unknown:
7632 	default:
7633 		return (0);
7634 	}
7635 }
7636 
7637 /**
7638  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
7639  * full-speed device operation.
7640  * @sc: adapter softc
7641  *
7642  * Returns 0 if sufficient; 1 if not.
7643  */
7644 static uint8_t
7645 ice_pcie_bandwidth_check(struct ice_softc *sc)
7646 {
7647 	struct ice_hw *hw = &sc->hw;
7648 	int num_ports, pcie_width;
7649 	u64 pcie_speed, port_speed;
7650 
7651 	MPASS(hw->port_info);
7652 
7653 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
7654 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
7655 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
7656 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
7657 
7658 	/*
7659 	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
7660 	 * failover.
7661 	 */
7662 	if (port_speed == IF_Gbps(100))
7663 		num_ports = 1;
7664 
7665 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
7666 }
7667 
7668 /**
7669  * ice_print_bus_link_data - Print PCI-E bandwidth information
7670  * @dev: device to print string for
7671  * @hw: hw struct with PCI-e link information
7672  */
7673 static void
7674 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
7675 {
7676         device_printf(dev, "PCI Express Bus: Speed %s %s\n",
7677             ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
7678             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
7679             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
7680             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
7681             (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
7682             (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
7683             (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
7684             (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
7685             (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
7686             (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
7687             (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
7688 }
7689 
7690 /**
7691  * ice_set_pci_link_status_data - store PCI bus info
7692  * @hw: pointer to hardware structure
7693  * @link_status: the link status word from PCI config space
7694  *
7695  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
7696  **/
7697 static void
7698 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
7699 {
7700 	u16 reg;
7701 
7702 	hw->bus.type = ice_bus_pci_express;
7703 
7704 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
7705 
7706 	switch (reg) {
7707 	case ice_pcie_lnk_x1:
7708 	case ice_pcie_lnk_x2:
7709 	case ice_pcie_lnk_x4:
7710 	case ice_pcie_lnk_x8:
7711 	case ice_pcie_lnk_x12:
7712 	case ice_pcie_lnk_x16:
7713 	case ice_pcie_lnk_x32:
7714 		hw->bus.width = (enum ice_pcie_link_width)reg;
7715 		break;
7716 	default:
7717 		hw->bus.width = ice_pcie_lnk_width_unknown;
7718 		break;
7719 	}
7720 
7721 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
7722 
7723 	switch (reg) {
7724 	case ice_pcie_speed_2_5GT:
7725 	case ice_pcie_speed_5_0GT:
7726 	case ice_pcie_speed_8_0GT:
7727 	case ice_pcie_speed_16_0GT:
7728 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
7729 		break;
7730 	default:
7731 		hw->bus.speed = ice_pcie_speed_unknown;
7732 		break;
7733 	}
7734 }
7735 
7736 /**
7737  * ice_init_link_events - Initialize Link Status Events mask
7738  * @sc: the device softc
7739  *
7740  * Initialize the Link Status Events mask to disable notification of link
7741  * events we don't care about in software. Also request that link status
7742  * events be enabled.
7743  */
7744 int
7745 ice_init_link_events(struct ice_softc *sc)
7746 {
7747 	struct ice_hw *hw = &sc->hw;
7748 	enum ice_status status;
7749 	u16 wanted_events;
7750 
7751 	/* Set the bits for the events that we want to be notified by */
7752 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
7753 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
7754 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
7755 
7756 	/* request that every event except the wanted events be masked */
7757 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
7758 	if (status) {
7759 		device_printf(sc->dev,
7760 			      "Failed to set link status event mask, err %s aq_err %s\n",
7761 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7762 		return (EIO);
7763 	}
7764 
7765 	/* Request link info with the LSE bit set to enable link status events */
7766 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
7767 	if (status) {
7768 		device_printf(sc->dev,
7769 			      "Failed to enable link status events, err %s aq_err %s\n",
7770 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7771 		return (EIO);
7772 	}
7773 
7774 	return (0);
7775 }
7776 
7777 /**
7778  * ice_handle_mdd_event - Handle possibly malicious events
7779  * @sc: the device softc
7780  *
7781  * Called by the admin task if an MDD detection interrupt is triggered.
7782  * Identifies possibly malicious events coming from VFs. Also triggers for
7783  * similar incorrect behavior from the PF as well.
7784  */
7785 void
7786 ice_handle_mdd_event(struct ice_softc *sc)
7787 {
7788 	struct ice_hw *hw = &sc->hw;
7789 	bool mdd_detected = false, request_reinit = false;
7790 	device_t dev = sc->dev;
7791 	u32 reg;
7792 
7793 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
7794 		return;
7795 
7796 	reg = rd32(hw, GL_MDET_TX_TCLAN);
7797 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
7798 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
7799 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
7800 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
7801 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
7802 
7803 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
7804 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
7805 
7806 		/* Only clear this event if it matches this PF, that way other
7807 		 * PFs can read the event and determine VF and queue number.
7808 		 */
7809 		if (pf_num == hw->pf_id)
7810 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
7811 
7812 		mdd_detected = true;
7813 	}
7814 
7815 	/* Determine what triggered the MDD event */
7816 	reg = rd32(hw, GL_MDET_TX_PQM);
7817 	if (reg & GL_MDET_TX_PQM_VALID_M) {
7818 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
7819 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
7820 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
7821 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
7822 
7823 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
7824 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
7825 
7826 		/* Only clear this event if it matches this PF, that way other
7827 		 * PFs can read the event and determine VF and queue number.
7828 		 */
7829 		if (pf_num == hw->pf_id)
7830 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
7831 
7832 		mdd_detected = true;
7833 	}
7834 
7835 	reg = rd32(hw, GL_MDET_RX);
7836 	if (reg & GL_MDET_RX_VALID_M) {
7837 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
7838 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
7839 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
7840 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
7841 
7842 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
7843 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
7844 
7845 		/* Only clear this event if it matches this PF, that way other
7846 		 * PFs can read the event and determine VF and queue number.
7847 		 */
7848 		if (pf_num == hw->pf_id)
7849 			wr32(hw, GL_MDET_RX, 0xffffffff);
7850 
7851 		mdd_detected = true;
7852 	}
7853 
7854 	/* Now, confirm that this event actually affects this PF, by checking
7855 	 * the PF registers.
7856 	 */
7857 	if (mdd_detected) {
7858 		reg = rd32(hw, PF_MDET_TX_TCLAN);
7859 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
7860 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
7861 			sc->soft_stats.tx_mdd_count++;
7862 			request_reinit = true;
7863 		}
7864 
7865 		reg = rd32(hw, PF_MDET_TX_PQM);
7866 		if (reg & PF_MDET_TX_PQM_VALID_M) {
7867 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
7868 			sc->soft_stats.tx_mdd_count++;
7869 			request_reinit = true;
7870 		}
7871 
7872 		reg = rd32(hw, PF_MDET_RX);
7873 		if (reg & PF_MDET_RX_VALID_M) {
7874 			wr32(hw, PF_MDET_RX, 0xffff);
7875 			sc->soft_stats.rx_mdd_count++;
7876 			request_reinit = true;
7877 		}
7878 	}
7879 
7880 	/* TODO: Implement logic to detect and handle events caused by VFs. */
7881 
7882 	/* request that the upper stack re-initialize the Tx/Rx queues */
7883 	if (request_reinit)
7884 		ice_request_stack_reinit(sc);
7885 
7886 	ice_flush(hw);
7887 }
7888 
7889 /**
7890  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
7891  * @sc: the device softc
7892  *
7893  * @pre device is DCB capable and the FW LLDP agent has started
7894  *
7895  * Checks DCBX status and starts the DCBX agent if it is not in
7896  * a valid state via an AQ command.
7897  */
7898 static void
7899 ice_start_dcbx_agent(struct ice_softc *sc)
7900 {
7901 	struct ice_hw *hw = &sc->hw;
7902 	device_t dev = sc->dev;
7903 	bool dcbx_agent_status;
7904 	enum ice_status status;
7905 
7906 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
7907 
7908 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
7909 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
7910 		/*
7911 		 * Start DCBX agent, but not LLDP. The return value isn't
7912 		 * checked here because a more detailed dcbx agent status is
7913 		 * retrieved and checked in ice_init_dcb() and elsewhere.
7914 		 */
7915 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
7916 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
7917 			device_printf(dev,
7918 			    "start_stop_dcbx failed, err %s aq_err %s\n",
7919 			    ice_status_str(status),
7920 			    ice_aq_str(hw->adminq.sq_last_status));
7921 	}
7922 }
7923 
7924 /**
7925  * ice_init_dcb_setup - Initialize DCB settings for HW
7926  * @sc: the device softc
7927  *
7928  * This needs to be called after the fw_lldp_agent sysctl is added, since that
7929  * can update the device's LLDP agent status if a tunable value is set.
7930  *
7931  * Get and store the initial state of DCB settings on driver load. Print out
7932  * informational messages as well.
7933  */
7934 void
7935 ice_init_dcb_setup(struct ice_softc *sc)
7936 {
7937 	struct ice_dcbx_cfg *local_dcbx_cfg;
7938 	struct ice_hw *hw = &sc->hw;
7939 	device_t dev = sc->dev;
7940 	enum ice_status status;
7941 	u8 pfcmode_ret;
7942 
7943 	/* Don't do anything if DCB isn't supported */
7944 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
7945 		device_printf(dev, "%s: No DCB support\n", __func__);
7946 		return;
7947 	}
7948 
7949 	/* Starts DCBX agent if it needs starting */
7950 	ice_start_dcbx_agent(sc);
7951 
7952 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
7953 	status = ice_init_dcb(hw, true);
7954 
7955 	/* If there is an error, then FW LLDP is not in a usable state */
7956 	if (status != 0 && status != ICE_ERR_NOT_READY) {
7957 		/* Don't print an error message if the return code from the AQ
7958 		 * cmd performed in ice_init_dcb() is EPERM; that means the
7959 		 * FW LLDP engine is disabled, and that is a valid state.
7960 		 */
7961 		if (!(status == ICE_ERR_AQ_ERROR &&
7962 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
7963 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
7964 				      ice_status_str(status),
7965 				      ice_aq_str(hw->adminq.sq_last_status));
7966 		}
7967 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
7968 	}
7969 
7970 	switch (hw->port_info->qos_cfg.dcbx_status) {
7971 	case ICE_DCBX_STATUS_DIS:
7972 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
7973 		break;
7974 	case ICE_DCBX_STATUS_NOT_STARTED:
7975 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
7976 		break;
7977 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
7978 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
7979 		break;
7980 	default:
7981 		break;
7982 	}
7983 
7984 	/* LLDP disabled in FW */
7985 	if (hw->port_info->qos_cfg.is_sw_lldp) {
7986 		ice_add_rx_lldp_filter(sc);
7987 		device_printf(dev, "Firmware LLDP agent disabled\n");
7988 	}
7989 
7990 	/* Query and cache PFC mode */
7991 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
7992 	if (status) {
7993 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
7994 			      ice_status_str(status),
7995 			      ice_aq_str(hw->adminq.sq_last_status));
7996 	}
7997 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
7998 	switch (pfcmode_ret) {
7999 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8000 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8001 		break;
8002 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8003 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8004 		break;
8005 	default:
8006 		/* DCB is disabled, but we shouldn't get here */
8007 		break;
8008 	}
8009 
8010 	/* Set default SW MIB for init */
8011 	ice_set_default_local_mib_settings(sc);
8012 
8013 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8014 }
8015 
8016 /**
8017  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8018  * @dcbcfg: DCB configuration to examine
8019  *
8020  * Scans a TC mapping table inside dcbcfg to find traffic classes
8021  * enabled and @returns a bitmask of enabled TCs
8022  */
8023 u8
8024 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8025 {
8026 	u8 tc_map = 0;
8027 	int i = 0;
8028 
8029 	switch (dcbcfg->pfc_mode) {
8030 	case ICE_QOS_MODE_VLAN:
8031 		/* XXX: "i" is actually "User Priority" here, not
8032 		 * Traffic Class, but the max for both is 8, so it works
8033 		 * out here.
8034 		 */
8035 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8036 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8037 		break;
8038 	case ICE_QOS_MODE_DSCP:
8039 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8040 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8041 		break;
8042 	default:
8043 		/* Invalid Mode */
8044 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8045 		break;
8046 	}
8047 
8048 	return (tc_map);
8049 }
8050 
8051 /**
8052  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8053  * @dcbcfg: config to retrieve number of TCs from
8054  *
8055  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8056  * Priority Assignment Table, a value from 1 to 8. If there are
8057  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8058  * then returns 0.
8059  */
8060 static u8
8061 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8062 {
8063 	u8 tc_map;
8064 
8065 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8066 
8067 	return (ice_dcb_tc_contig(tc_map));
8068 }
8069 
8070 /**
8071  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8072  * @sc: the device private softc
8073  * @event: event received on a control queue
8074  *
8075  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8076  */
8077 static void
8078 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8079 {
8080 	struct ice_aqc_lldp_get_mib *params =
8081 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8082 	u8 mib_type, bridge_type, tx_status;
8083 
8084 	static const char* mib_type_strings[] = {
8085 	    "Local MIB",
8086 	    "Remote MIB",
8087 	    "Reserved",
8088 	    "Reserved"
8089 	};
8090 	static const char* bridge_type_strings[] = {
8091 	    "Nearest Bridge",
8092 	    "Non-TPMR Bridge",
8093 	    "Reserved",
8094 	    "Reserved"
8095 	};
8096 	static const char* tx_status_strings[] = {
8097 	    "Port's TX active",
8098 	    "Port's TX suspended and drained",
8099 	    "Reserved",
8100 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8101 	};
8102 
8103 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8104 	    ICE_AQ_LLDP_MIB_TYPE_S;
8105 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8106 	    ICE_AQ_LLDP_BRID_TYPE_S;
8107 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8108 	    ICE_AQ_LLDP_TX_S;
8109 
8110 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8111 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8112 	    tx_status_strings[tx_status]);
8113 
8114 	/* Nothing else to report */
8115 	if (!event->msg_buf)
8116 		return;
8117 
8118 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8119 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8120 			event->msg_len);
8121 }
8122 
8123 /**
8124  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8125  * @sc: the device private softc
8126  * @old_cfg: Old DCBX configuration to compare against
8127  * @new_cfg: New DCBX configuration to check
8128  *
8129  * @return true if something changed in new_cfg that requires the driver
8130  * to do some reconfiguration.
8131  */
8132 static bool
8133 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8134     struct ice_dcbx_cfg *new_cfg)
8135 {
8136 	struct ice_hw *hw = &sc->hw;
8137 	bool needs_reconfig = false;
8138 
8139 	/* No change detected in DCBX config */
8140 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8141 		ice_debug(hw, ICE_DBG_DCB,
8142 		    "No change detected in local DCBX configuration\n");
8143 		return (false);
8144 	}
8145 
8146 	/* Check if ETS config has changed */
8147 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8148 		   sizeof(new_cfg->etscfg))) {
8149 		/* If Priority Table has changed, then driver reconfig is needed */
8150 		if (memcmp(&new_cfg->etscfg.prio_table,
8151 			   &old_cfg->etscfg.prio_table,
8152 			   sizeof(new_cfg->etscfg.prio_table))) {
8153 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8154 			needs_reconfig = true;
8155 		}
8156 
8157 		/* These are just informational */
8158 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8159 			   &old_cfg->etscfg.tcbwtable,
8160 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8161 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8162 			needs_reconfig = true;
8163 		}
8164 
8165 		if (memcmp(&new_cfg->etscfg.tsatable,
8166 			   &old_cfg->etscfg.tsatable,
8167 			   sizeof(new_cfg->etscfg.tsatable))) {
8168 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8169 			needs_reconfig = true;
8170 		}
8171 	}
8172 
8173 	/* Check if PFC config has changed */
8174 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8175 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8176 		needs_reconfig = true;
8177 	}
8178 
8179 	/* Check if APP table has changed */
8180 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8181 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8182 
8183 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8184 
8185 	return (needs_reconfig);
8186 }
8187 
8188 /**
8189  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8190  * @sc: the device private softc
8191  *
8192  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8193  */
8194 static void
8195 ice_stop_pf_vsi(struct ice_softc *sc)
8196 {
8197 	/* Dissociate the Tx and Rx queues from the interrupts */
8198 	ice_flush_txq_interrupts(&sc->pf_vsi);
8199 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8200 
8201 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8202 		return;
8203 
8204 	/* Disable the Tx and Rx queues */
8205 	ice_vsi_disable_tx(&sc->pf_vsi);
8206 	ice_control_rx_queues(&sc->pf_vsi, false);
8207 }
8208 
8209 /**
8210  * ice_vsi_setup_q_map - Setup a VSI queue map
8211  * @vsi: the VSI being configured
8212  * @ctxt: VSI context structure
8213  */
8214 static void
8215 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8216 {
8217 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8218 	u16 offset = 0, qmap = 0, pow = 0;
8219 	u16 num_q_per_tc, qcount_rx, rem_queues;
8220 	int i, j, k;
8221 
8222 	if (vsi->num_tcs == 0) {
8223 		/* at least TC0 should be enabled by default */
8224 		vsi->num_tcs = 1;
8225 		vsi->tc_map = 0x1;
8226 	}
8227 
8228 	qcount_rx = vsi->num_rx_queues;
8229 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8230 
8231 	if (!num_q_per_tc)
8232 		num_q_per_tc = 1;
8233 
8234 	/* Set initial values for # of queues to use for each active TC */
8235 	ice_for_each_traffic_class(i)
8236 		if (i < vsi->num_tcs)
8237 			qcounts[i] = num_q_per_tc;
8238 
8239 	/* If any queues are unassigned, add them to TC 0 */
8240 	rem_queues = qcount_rx % vsi->num_tcs;
8241 	if (rem_queues > 0)
8242 		qcounts[0] += rem_queues;
8243 
8244 	/* TC mapping is a function of the number of Rx queues assigned to the
8245 	 * VSI for each traffic class and the offset of these queues.
8246 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8247 	 * queues allocated to TC0. No:of queues is a power-of-2.
8248 	 *
8249 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8250 	 * queue, this way, traffic for the given TC will be sent to the default
8251 	 * queue.
8252 	 *
8253 	 * Setup number and offset of Rx queues for all TCs for the VSI
8254 	 */
8255 	ice_for_each_traffic_class(i) {
8256 		if (!(vsi->tc_map & BIT(i))) {
8257 			/* TC is not enabled */
8258 			vsi->tc_info[i].qoffset = 0;
8259 			vsi->tc_info[i].qcount_rx = 1;
8260 			vsi->tc_info[i].qcount_tx = 1;
8261 
8262 			ctxt->info.tc_mapping[i] = 0;
8263 			continue;
8264 		}
8265 
8266 		/* TC is enabled */
8267 		vsi->tc_info[i].qoffset = offset;
8268 		vsi->tc_info[i].qcount_rx = qcounts[i];
8269 		vsi->tc_info[i].qcount_tx = qcounts[i];
8270 
8271 		/* find the (rounded up) log-2 of queue count for current TC */
8272 		pow = fls(qcounts[i] - 1);
8273 
8274 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8275 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8276 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8277 			 ICE_AQ_VSI_TC_Q_NUM_M);
8278 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8279 
8280 		/* Store traffic class and handle data in queue structures */
8281 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8282 			vsi->tx_queues[j].q_handle = k;
8283 			vsi->tx_queues[j].tc = i;
8284 
8285 			vsi->rx_queues[j].tc = i;
8286 		}
8287 
8288 		offset += qcounts[i];
8289 	}
8290 
8291 	/* Rx queue mapping */
8292 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8293 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8294 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8295 }
8296 
8297 /**
8298  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8299  * @sc: the device private softc
8300  * @tc_map: traffic class bitmap
8301  *
8302  * @pre VSI queues are stopped
8303  *
8304  * @return 0 if configuration is successful
8305  * @return EIO if Update VSI AQ cmd fails
8306  * @return ENODEV if updating Tx Scheduler fails
8307  */
8308 static int
8309 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8310 {
8311 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8312 	struct ice_vsi *vsi = &sc->pf_vsi;
8313 	struct ice_hw *hw = &sc->hw;
8314 	struct ice_vsi_ctx ctx = { 0 };
8315 	device_t dev = sc->dev;
8316 	enum ice_status status;
8317 	u8 num_tcs = 0;
8318 	int i = 0;
8319 
8320 	/* Count the number of enabled Traffic Classes */
8321 	ice_for_each_traffic_class(i)
8322 		if (tc_map & BIT(i))
8323 			num_tcs++;
8324 
8325 	vsi->tc_map = tc_map;
8326 	vsi->num_tcs = num_tcs;
8327 
8328 	/* Set default parameters for context */
8329 	ctx.vf_num = 0;
8330 	ctx.info = vsi->info;
8331 
8332 	/* Setup queue map */
8333 	ice_vsi_setup_q_map(vsi, &ctx);
8334 
8335 	/* Update VSI configuration in firmware (RX queues) */
8336 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8337 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8338 	if (status) {
8339 		device_printf(dev,
8340 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8341 		    __func__, ice_status_str(status),
8342 		    ice_aq_str(hw->adminq.sq_last_status));
8343 		return (EIO);
8344 	}
8345 	vsi->info = ctx.info;
8346 
8347 	/* Use values derived in ice_vsi_setup_q_map() */
8348 	for (i = 0; i < num_tcs; i++)
8349 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8350 
8351 	if (hw->debug_mask & ICE_DBG_DCB) {
8352 		device_printf(dev, "%s: max_txqs:", __func__);
8353 		ice_for_each_traffic_class(i)
8354 			printf(" %d", max_txqs[i]);
8355 		printf("\n");
8356 	}
8357 
8358 	/* Update LAN Tx queue info in firmware */
8359 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8360 				 max_txqs);
8361 	if (status) {
8362 		device_printf(dev,
8363 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8364 		    __func__, ice_status_str(status),
8365 		    ice_aq_str(hw->adminq.sq_last_status));
8366 		return (ENODEV);
8367 	}
8368 
8369 	vsi->info.valid_sections = 0;
8370 
8371 	return (0);
8372 }
8373 
8374 /**
8375  * ice_dcb_tc_contig - Count TCs if they're contiguous
8376  * @tc_map: pointer to priority table
8377  *
8378  * @return The number of traffic classes in
8379  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8380  */
8381 static u8
8382 ice_dcb_tc_contig(u8 tc_map)
8383 {
8384 	bool tc_unused = false;
8385 	u8 ret = 0;
8386 
8387 	/* Scan bitmask for contiguous TCs starting with TC0 */
8388 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8389 		if (tc_map & BIT(i)) {
8390 			if (!tc_unused) {
8391 				ret++;
8392 			} else {
8393 				/* Non-contiguous TCs detected */
8394 				return (0);
8395 			}
8396 		} else
8397 			tc_unused = true;
8398 	}
8399 
8400 	return (ret);
8401 }
8402 
8403 /**
8404  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8405  * @sc: the device private softc
8406  *
8407  * @pre All VSIs have been disabled/stopped
8408  *
8409  * Reconfigures VSI settings based on local_dcbx_cfg.
8410  */
8411 static void
8412 ice_dcb_recfg(struct ice_softc *sc)
8413 {
8414 	struct ice_dcbx_cfg *dcbcfg =
8415 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8416 	device_t dev = sc->dev;
8417 	u8 tc_map = 0;
8418 	int ret;
8419 
8420 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8421 
8422 	/* If non-contiguous TCs are used, then configure
8423 	 * the default TC instead. There's no support for
8424 	 * non-contiguous TCs being used.
8425 	 */
8426 	if (ice_dcb_tc_contig(tc_map) == 0) {
8427 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8428 		ice_set_default_local_lldp_mib(sc);
8429 	}
8430 
8431 	/* Reconfigure VSI queues to add/remove traffic classes */
8432 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8433 	if (ret)
8434 		device_printf(dev,
8435 		    "Failed to configure TCs for PF VSI, err %s\n",
8436 		    ice_err_str(ret));
8437 
8438 }
8439 
8440 /**
8441  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8442  * @sc: device softc structure
8443  *
8444  * Overwrites the driver's SW local LLDP MIB with default settings. This
8445  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8446  * admin queue command.
8447  */
8448 static void
8449 ice_set_default_local_mib_settings(struct ice_softc *sc)
8450 {
8451 	struct ice_dcbx_cfg *dcbcfg;
8452 	struct ice_hw *hw = &sc->hw;
8453 	struct ice_port_info *pi;
8454 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8455 
8456 	pi = hw->port_info;
8457 
8458 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8459 
8460 	maxtcs = hw->func_caps.common_cap.maxtc;
8461 	/* This value is only 3 bits; 8 TCs maps to 0 */
8462 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8463 
8464 	/* VLAN vs DSCP mode needs to be preserved */
8465 	old_pfc_mode = dcbcfg->pfc_mode;
8466 
8467 	/**
8468 	 * Setup the default settings used by the driver for the Set Local
8469 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8470 	 * PFC, TSA=2).
8471 	 */
8472 	memset(dcbcfg, 0, sizeof(*dcbcfg));
8473 
8474 	dcbcfg->etscfg.willing = 1;
8475 	dcbcfg->etscfg.tcbwtable[0] = 100;
8476 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8477 	dcbcfg->etscfg.tsatable[0] = 2;
8478 
8479 	dcbcfg->etsrec = dcbcfg->etscfg;
8480 	dcbcfg->etsrec.willing = 0;
8481 
8482 	dcbcfg->pfc.willing = 1;
8483 	dcbcfg->pfc.pfccap = maxtcs;
8484 
8485 	dcbcfg->pfc_mode = old_pfc_mode;
8486 }
8487 
8488 /**
8489  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8490  * @sc: the device private softc
8491  * @pending_mib: FW has a pending MIB change to execute
8492  *
8493  * @pre Determined that the DCB configuration requires a change
8494  *
8495  * Reconfigures the PF LAN VSI based on updated DCB configuration
8496  * found in the hw struct's/port_info's/ local dcbx configuration.
8497  */
8498 static void
8499 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8500 {
8501 	struct ice_aqc_port_ets_elem port_ets = { 0 };
8502 	struct ice_dcbx_cfg *local_dcbx_cfg;
8503 	struct ice_hw *hw = &sc->hw;
8504 	struct ice_port_info *pi;
8505 	device_t dev = sc->dev;
8506 	enum ice_status status;
8507 
8508 	pi = sc->hw.port_info;
8509 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8510 
8511 	ice_rdma_notify_dcb_qos_change(sc);
8512 	/* If there's a pending MIB, tell the FW to execute the MIB change
8513 	 * now.
8514 	 */
8515 	if (pending_mib) {
8516 		status = ice_lldp_execute_pending_mib(hw);
8517 		if ((status == ICE_ERR_AQ_ERROR) &&
8518 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8519 			device_printf(dev,
8520 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8521 		} else if (status) {
8522 			device_printf(dev,
8523 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8524 			    ice_status_str(status),
8525 			    ice_aq_str(hw->adminq.sq_last_status));
8526 			/* This won't break traffic, but QoS will not work as expected */
8527 		}
8528 	}
8529 
8530 	/* Set state when there's more than one TC */
8531 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8532 		device_printf(dev, "Multiple traffic classes enabled\n");
8533 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8534 	} else {
8535 		device_printf(dev, "Multiple traffic classes disabled\n");
8536 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8537 	}
8538 
8539 	/* Disable PF VSI since it's going to be reconfigured */
8540 	ice_stop_pf_vsi(sc);
8541 
8542 	/* Query ETS configuration and update SW Tx scheduler info */
8543 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8544 	if (status != ICE_SUCCESS) {
8545 		device_printf(dev,
8546 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8547 		    ice_status_str(status),
8548 		    ice_aq_str(hw->adminq.sq_last_status));
8549 		/* This won't break traffic, but QoS will not work as expected */
8550 	}
8551 
8552 	/* Change PF VSI configuration */
8553 	ice_dcb_recfg(sc);
8554 
8555 	/* Send new configuration to RDMA client driver */
8556 	ice_rdma_dcb_qos_update(sc, pi);
8557 
8558 	ice_request_stack_reinit(sc);
8559 }
8560 
8561 /**
8562  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8563  * @sc: the device private softc
8564  * @event: event received on a control queue
8565  *
8566  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8567  * VSI depending on what has changed. This will also print out some debug
8568  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8569  */
8570 static void
8571 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8572 {
8573 	struct ice_aqc_lldp_get_mib *params =
8574 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8575 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8576 	struct ice_port_info *pi;
8577 	device_t dev = sc->dev;
8578 	struct ice_hw *hw = &sc->hw;
8579 	bool needs_reconfig, mib_is_pending;
8580 	enum ice_status status;
8581 	u8 mib_type, bridge_type;
8582 
8583 	ASSERT_CFG_LOCKED(sc);
8584 
8585 	ice_debug_print_mib_change_event(sc, event);
8586 
8587 	pi = sc->hw.port_info;
8588 
8589 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8590 	    ICE_AQ_LLDP_MIB_TYPE_S;
8591 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8592 	    ICE_AQ_LLDP_BRID_TYPE_S;
8593 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8594 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8595 
8596 	/* Ignore if event is not for Nearest Bridge */
8597 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8598 		return;
8599 
8600 	/* Check MIB Type and return if event for Remote MIB update */
8601 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8602 		/* Update the cached remote MIB and return */
8603 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8604 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8605 					 &pi->qos_cfg.remote_dcbx_cfg);
8606 		if (status)
8607 			device_printf(dev,
8608 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8609 			    __func__, ice_status_str(status),
8610 			    ice_aq_str(hw->adminq.sq_last_status));
8611 		/* Not fatal if this fails */
8612 		return;
8613 	}
8614 
8615 	/* Save line length by aliasing the local dcbx cfg */
8616 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8617 	/* Save off the old configuration and clear current config */
8618 	tmp_dcbx_cfg = *local_dcbx_cfg;
8619 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8620 
8621 	/* Update the current local_dcbx_cfg with new data */
8622 	if (mib_is_pending) {
8623 		ice_get_dcb_cfg_from_mib_change(pi, event);
8624 	} else {
8625 		/* Get updated DCBX data from firmware */
8626 		status = ice_get_dcb_cfg(pi);
8627 		if (status) {
8628 			device_printf(dev,
8629 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
8630 			    __func__, ice_status_str(status),
8631 			    ice_aq_str(hw->adminq.sq_last_status));
8632 			return;
8633 		}
8634 	}
8635 
8636 	/* Check to see if DCB needs reconfiguring */
8637 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
8638 	    local_dcbx_cfg);
8639 
8640 	if (!needs_reconfig && !mib_is_pending)
8641 		return;
8642 
8643 	/* Reconfigure -- this will also notify FW that configuration is done,
8644 	 * if the FW MIB change is only pending instead of executed.
8645 	 */
8646 	ice_do_dcb_reconfig(sc, mib_is_pending);
8647 }
8648 
8649 /**
8650  * ice_send_version - Send driver version to firmware
8651  * @sc: the device private softc
8652  *
8653  * Send the driver version to the firmware. This must be called as early as
8654  * possible after ice_init_hw().
8655  */
8656 int
8657 ice_send_version(struct ice_softc *sc)
8658 {
8659 	struct ice_driver_ver driver_version = {0};
8660 	struct ice_hw *hw = &sc->hw;
8661 	device_t dev = sc->dev;
8662 	enum ice_status status;
8663 
8664 	driver_version.major_ver = ice_major_version;
8665 	driver_version.minor_ver = ice_minor_version;
8666 	driver_version.build_ver = ice_patch_version;
8667 	driver_version.subbuild_ver = ice_rc_version;
8668 
8669 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
8670 		sizeof(driver_version.driver_string));
8671 
8672 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
8673 	if (status) {
8674 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
8675 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8676 		return (EIO);
8677 	}
8678 
8679 	return (0);
8680 }
8681 
8682 /**
8683  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
8684  * @sc: device softc
8685  * @event: event received on a control queue
8686  *
8687  * Prints out a message when a LAN overflow event is detected on a receive
8688  * queue.
8689  */
8690 static void
8691 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8692 {
8693 	struct ice_aqc_event_lan_overflow *params =
8694 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
8695 	struct ice_hw *hw = &sc->hw;
8696 
8697 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
8698 		  LE32_TO_CPU(params->prtdcb_ruptq),
8699 		  LE32_TO_CPU(params->qtx_ctl));
8700 }
8701 
8702 /**
8703  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
8704  * @vsi: the VSI to target packets to
8705  * @list: the list to add the filter to
8706  * @ethertype: the Ethertype to filter on
8707  * @direction: The direction of the filter (Tx or Rx)
8708  * @action: the action to take
8709  *
8710  * Add an Ethertype filter to a filter list. Used to forward a series of
8711  * filters to the firmware for configuring the switch.
8712  *
8713  * Returns 0 on success, and an error code on failure.
8714  */
8715 static int
8716 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
8717 			  u16 ethertype, u16 direction,
8718 			  enum ice_sw_fwd_act_type action)
8719 {
8720 	struct ice_fltr_list_entry *entry;
8721 
8722 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
8723 
8724 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
8725 	if (!entry)
8726 		return (ENOMEM);
8727 
8728 	entry->fltr_info.flag = direction;
8729 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
8730 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
8731 	entry->fltr_info.fltr_act = action;
8732 	entry->fltr_info.vsi_handle = vsi->idx;
8733 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
8734 
8735 	LIST_ADD(&entry->list_entry, list);
8736 
8737 	return 0;
8738 }
8739 
8740 #define ETHERTYPE_PAUSE_FRAMES 0x8808
8741 #define ETHERTYPE_LLDP_FRAMES 0x88cc
8742 
8743 /**
8744  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
8745  * @sc: the device private softc
8746  *
8747  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
8748  * the host. This prevents malicious VFs from sending these frames and being
8749  * able to control or configure the network.
8750  */
8751 int
8752 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
8753 {
8754 	struct ice_list_head ethertype_list;
8755 	struct ice_vsi *vsi = &sc->pf_vsi;
8756 	struct ice_hw *hw = &sc->hw;
8757 	device_t dev = sc->dev;
8758 	enum ice_status status;
8759 	int err = 0;
8760 
8761 	INIT_LIST_HEAD(&ethertype_list);
8762 
8763 	/*
8764 	 * Note that the switch filters will ignore the VSI index for the drop
8765 	 * action, so we only need to program drop filters once for the main
8766 	 * VSI.
8767 	 */
8768 
8769 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
8770 	if (sc->enable_tx_fc_filter) {
8771 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8772 						ETHERTYPE_PAUSE_FRAMES,
8773 						ICE_FLTR_TX, ICE_DROP_PACKET);
8774 		if (err)
8775 			goto free_ethertype_list;
8776 	}
8777 
8778 	/* Configure switch to drop LLDP frames coming from any VSI */
8779 	if (sc->enable_tx_lldp_filter) {
8780 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8781 						ETHERTYPE_LLDP_FRAMES,
8782 						ICE_FLTR_TX, ICE_DROP_PACKET);
8783 		if (err)
8784 			goto free_ethertype_list;
8785 	}
8786 
8787 	status = ice_add_eth_mac(hw, &ethertype_list);
8788 	if (status) {
8789 		device_printf(dev,
8790 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
8791 			      ice_status_str(status),
8792 			      ice_aq_str(hw->adminq.sq_last_status));
8793 		err = (EIO);
8794 	}
8795 
8796 free_ethertype_list:
8797 	ice_free_fltr_list(&ethertype_list);
8798 	return err;
8799 }
8800 
8801 /**
8802  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
8803  * @sc: the device private structure
8804  *
8805  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
8806  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
8807  * be forwarded to the stack.
8808  */
8809 static void
8810 ice_add_rx_lldp_filter(struct ice_softc *sc)
8811 {
8812 	struct ice_list_head ethertype_list;
8813 	struct ice_vsi *vsi = &sc->pf_vsi;
8814 	struct ice_hw *hw = &sc->hw;
8815 	device_t dev = sc->dev;
8816 	enum ice_status status;
8817 	int err;
8818 	u16 vsi_num;
8819 
8820 	/*
8821 	 * If FW is new enough, use a direct AQ command to perform the filter
8822 	 * addition.
8823 	 */
8824 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
8825 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
8826 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
8827 		if (status) {
8828 			device_printf(dev,
8829 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8830 			    ice_status_str(status),
8831 			    ice_aq_str(hw->adminq.sq_last_status));
8832 		} else
8833 			ice_set_state(&sc->state,
8834 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
8835 		return;
8836 	}
8837 
8838 	INIT_LIST_HEAD(&ethertype_list);
8839 
8840 	/* Forward Rx LLDP frames to the stack */
8841 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8842 					ETHERTYPE_LLDP_FRAMES,
8843 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
8844 	if (err) {
8845 		device_printf(dev,
8846 			      "Failed to add Rx LLDP filter, err %s\n",
8847 			      ice_err_str(err));
8848 		goto free_ethertype_list;
8849 	}
8850 
8851 	status = ice_add_eth_mac(hw, &ethertype_list);
8852 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
8853 		device_printf(dev,
8854 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
8855 			      ice_status_str(status),
8856 			      ice_aq_str(hw->adminq.sq_last_status));
8857 	} else {
8858 		/*
8859 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
8860 		 * already existing filter as an error case.
8861 		 */
8862 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
8863 	}
8864 
8865 free_ethertype_list:
8866 	ice_free_fltr_list(&ethertype_list);
8867 }
8868 
8869 /**
8870  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
8871  * @sc: the device private structure
8872  *
8873  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
8874  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
8875  * stack.
8876  */
8877 static void
8878 ice_del_rx_lldp_filter(struct ice_softc *sc)
8879 {
8880 	struct ice_list_head ethertype_list;
8881 	struct ice_vsi *vsi = &sc->pf_vsi;
8882 	struct ice_hw *hw = &sc->hw;
8883 	device_t dev = sc->dev;
8884 	enum ice_status status;
8885 	int err;
8886 	u16 vsi_num;
8887 
8888 	/*
8889 	 * Only in the scenario where the driver added the filter during
8890 	 * this session (while the driver was loaded) would we be able to
8891 	 * delete this filter.
8892 	 */
8893 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
8894 		return;
8895 
8896 	/*
8897 	 * If FW is new enough, use a direct AQ command to perform the filter
8898 	 * removal.
8899 	 */
8900 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
8901 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
8902 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
8903 		if (status) {
8904 			device_printf(dev,
8905 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
8906 			    ice_status_str(status),
8907 			    ice_aq_str(hw->adminq.sq_last_status));
8908 		}
8909 		return;
8910 	}
8911 
8912 	INIT_LIST_HEAD(&ethertype_list);
8913 
8914 	/* Remove filter forwarding Rx LLDP frames to the stack */
8915 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
8916 					ETHERTYPE_LLDP_FRAMES,
8917 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
8918 	if (err) {
8919 		device_printf(dev,
8920 			      "Failed to remove Rx LLDP filter, err %s\n",
8921 			      ice_err_str(err));
8922 		goto free_ethertype_list;
8923 	}
8924 
8925 	status = ice_remove_eth_mac(hw, &ethertype_list);
8926 	if (status == ICE_ERR_DOES_NOT_EXIST) {
8927 		; /* Don't complain if we try to remove a filter that doesn't exist */
8928 	} else if (status) {
8929 		device_printf(dev,
8930 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
8931 			      ice_status_str(status),
8932 			      ice_aq_str(hw->adminq.sq_last_status));
8933 	}
8934 
8935 free_ethertype_list:
8936 	ice_free_fltr_list(&ethertype_list);
8937 }
8938 
8939 /**
8940  * ice_init_link_configuration -- Setup link in different ways depending
8941  * on whether media is available or not.
8942  * @sc: device private structure
8943  *
8944  * Called at the end of the attach process to either set default link
8945  * parameters if there is media available, or force HW link down and
8946  * set a state bit if there is no media.
8947  */
8948 void
8949 ice_init_link_configuration(struct ice_softc *sc)
8950 {
8951 	struct ice_port_info *pi = sc->hw.port_info;
8952 	struct ice_hw *hw = &sc->hw;
8953 	device_t dev = sc->dev;
8954 	enum ice_status status;
8955 
8956 	pi->phy.get_link_info = true;
8957 	status = ice_get_link_status(pi, &sc->link_up);
8958 	if (status != ICE_SUCCESS) {
8959 		device_printf(dev,
8960 		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
8961 		    __func__, ice_status_str(status),
8962 		    ice_aq_str(hw->adminq.sq_last_status));
8963 		return;
8964 	}
8965 
8966 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
8967 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
8968 		/* Apply default link settings */
8969 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
8970 	} else {
8971 		 /* Set link down, and poll for media available in timer. This prevents the
8972 		  * driver from receiving spurious link-related events.
8973 		  */
8974 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
8975 		status = ice_aq_set_link_restart_an(pi, false, NULL);
8976 		if (status != ICE_SUCCESS)
8977 			device_printf(dev,
8978 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
8979 			    __func__, ice_status_str(status),
8980 			    ice_aq_str(hw->adminq.sq_last_status));
8981 	}
8982 }
8983 
8984 /**
8985  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
8986  * @sc: device private structure
8987  * @cfg: new PHY config data to be modified
8988  *
8989  * Applies user settings for advertised speeds to the PHY type fields in the
8990  * supplied PHY config struct. It uses the data from pcaps to check if the
8991  * saved settings are invalid and uses the pcaps data instead if they are
8992  * invalid.
8993  */
8994 static int
8995 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
8996 			       struct ice_aqc_set_phy_cfg_data *cfg)
8997 {
8998 	struct ice_phy_data phy_data = { 0 };
8999 	struct ice_port_info *pi = sc->hw.port_info;
9000 	u64 phy_low = 0, phy_high = 0;
9001 	u16 link_speeds;
9002 	int ret;
9003 
9004 	link_speeds = pi->phy.curr_user_speed_req;
9005 
9006 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9007 		memset(&phy_data, 0, sizeof(phy_data));
9008 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9009 		phy_data.user_speeds_orig = link_speeds;
9010 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9011 		if (ret != 0) {
9012 			/* Error message already printed within function */
9013 			return (ret);
9014 		}
9015 		phy_low = phy_data.phy_low_intr;
9016 		phy_high = phy_data.phy_high_intr;
9017 
9018 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9019 			goto finalize_link_speed;
9020 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9021 			memset(&phy_data, 0, sizeof(phy_data));
9022 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9023 			phy_data.user_speeds_orig = link_speeds;
9024 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9025 			if (ret != 0) {
9026 				/* Error message already printed within function */
9027 				return (ret);
9028 			}
9029 			phy_low = phy_data.phy_low_intr;
9030 			phy_high = phy_data.phy_high_intr;
9031 
9032 			if (!phy_data.user_speeds_intr) {
9033 				phy_low = phy_data.phy_low_orig;
9034 				phy_high = phy_data.phy_high_orig;
9035 			}
9036 			goto finalize_link_speed;
9037 		}
9038 		/* If we're here, then it means the benefits of Version 2
9039 		 * link management aren't utilized.  We fall through to
9040 		 * handling Strict Link Mode the same as Version 1 link
9041 		 * management.
9042 		 */
9043 	}
9044 
9045 	memset(&phy_data, 0, sizeof(phy_data));
9046 	if ((link_speeds == 0) &&
9047 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9048 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9049 	else
9050 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9051 	phy_data.user_speeds_orig = link_speeds;
9052 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9053 	if (ret != 0) {
9054 		/* Error message already printed within function */
9055 		return (ret);
9056 	}
9057 	phy_low = phy_data.phy_low_intr;
9058 	phy_high = phy_data.phy_high_intr;
9059 
9060 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9061 		if (phy_low == 0 && phy_high == 0) {
9062 			device_printf(sc->dev,
9063 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9064 			return (EINVAL);
9065 		}
9066 	} else {
9067 		if (link_speeds == 0) {
9068 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9069 			    sc->ldo_tlv.phy_type_high & phy_high) {
9070 				phy_low &= sc->ldo_tlv.phy_type_low;
9071 				phy_high &= sc->ldo_tlv.phy_type_high;
9072 			}
9073 		} else if (phy_low == 0 && phy_high == 0) {
9074 			memset(&phy_data, 0, sizeof(phy_data));
9075 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9076 			phy_data.user_speeds_orig = link_speeds;
9077 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9078 			if (ret != 0) {
9079 				/* Error message already printed within function */
9080 				return (ret);
9081 			}
9082 			phy_low = phy_data.phy_low_intr;
9083 			phy_high = phy_data.phy_high_intr;
9084 
9085 			if (!phy_data.user_speeds_intr) {
9086 				phy_low = phy_data.phy_low_orig;
9087 				phy_high = phy_data.phy_high_orig;
9088 			}
9089 		}
9090 	}
9091 
9092 finalize_link_speed:
9093 
9094 	/* Cache new user settings for speeds */
9095 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9096 	cfg->phy_type_low = htole64(phy_low);
9097 	cfg->phy_type_high = htole64(phy_high);
9098 
9099 	return (ret);
9100 }
9101 
9102 /**
9103  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9104  * @sc: device private structure
9105  * @cfg: new PHY config data to be modified
9106  *
9107  * Applies user setting for FEC mode to PHY config struct. It uses the data
9108  * from pcaps to check if the saved settings are invalid and uses the pcaps
9109  * data instead if they are invalid.
9110  */
9111 static int
9112 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9113 			       struct ice_aqc_set_phy_cfg_data *cfg)
9114 {
9115 	struct ice_port_info *pi = sc->hw.port_info;
9116 	enum ice_status status;
9117 
9118 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9119 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9120 	if (status)
9121 		return (EIO);
9122 
9123 	return (0);
9124 }
9125 
9126 /**
9127  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9128  * @pi: port info struct
9129  * @cfg: new PHY config data to be modified
9130  *
9131  * Applies user setting for flow control mode to PHY config struct. There are
9132  * no invalid flow control mode settings; if there are, then this function
9133  * treats them like "ICE_FC_NONE".
9134  */
9135 static void
9136 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9137 			      struct ice_aqc_set_phy_cfg_data *cfg)
9138 {
9139 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9140 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9141 
9142 	switch (pi->phy.curr_user_fc_req) {
9143 	case ICE_FC_FULL:
9144 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9145 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9146 		break;
9147 	case ICE_FC_RX_PAUSE:
9148 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9149 		break;
9150 	case ICE_FC_TX_PAUSE:
9151 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9152 		break;
9153 	default:
9154 		/* ICE_FC_NONE */
9155 		break;
9156 	}
9157 }
9158 
9159 /**
9160  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9161  * @sc: device private structure
9162  * @settings: which settings to apply
9163  *
9164  * Applies user settings for advertised speeds, FEC mode, and flow
9165  * control mode to a PHY config struct; it uses the data from pcaps
9166  * to check if the saved settings are invalid and uses the pcaps
9167  * data instead if they are invalid.
9168  *
9169  * For things like sysctls where only one setting needs to be
9170  * updated, the bitmap allows the caller to specify which setting
9171  * to update.
9172  */
9173 int
9174 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9175 {
9176 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9177 	struct ice_port_info *pi = sc->hw.port_info;
9178 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9179 	struct ice_hw *hw = &sc->hw;
9180 	device_t dev = sc->dev;
9181 	u64 phy_low, phy_high;
9182 	enum ice_status status;
9183 	enum ice_fec_mode dflt_fec_mode;
9184 	u16 dflt_user_speed;
9185 
9186 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9187 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9188 		    settings);
9189 	}
9190 
9191 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9192 				     &pcaps, NULL);
9193 	if (status != ICE_SUCCESS) {
9194 		device_printf(dev,
9195 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9196 		    __func__, ice_status_str(status),
9197 		    ice_aq_str(hw->adminq.sq_last_status));
9198 		return (EIO);
9199 	}
9200 
9201 	phy_low = le64toh(pcaps.phy_type_low);
9202 	phy_high = le64toh(pcaps.phy_type_high);
9203 
9204 	/* Save off initial config parameters */
9205 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9206 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9207 
9208 	/* Setup new PHY config */
9209 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9210 
9211 	/* On error, restore active configuration values */
9212 	if ((settings & ICE_APPLY_LS) &&
9213 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9214 		pi->phy.curr_user_speed_req = dflt_user_speed;
9215 		cfg.phy_type_low = pcaps.phy_type_low;
9216 		cfg.phy_type_high = pcaps.phy_type_high;
9217 	}
9218 	if ((settings & ICE_APPLY_FEC) &&
9219 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9220 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9221 	}
9222 	if (settings & ICE_APPLY_FC) {
9223 		/* No real error indicators for this process,
9224 		 * so we'll just have to assume it works. */
9225 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9226 	}
9227 
9228 	/* Enable link and re-negotiate it */
9229 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9230 
9231 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9232 	if (status != ICE_SUCCESS) {
9233 		/* Don't indicate failure if there's no media in the port.
9234 		 * The settings have been saved and will apply when media
9235 		 * is inserted.
9236 		 */
9237 		if ((status == ICE_ERR_AQ_ERROR) &&
9238 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9239 			device_printf(dev,
9240 			    "%s: Setting will be applied when media is inserted\n",
9241 			    __func__);
9242 			return (0);
9243 		} else {
9244 			device_printf(dev,
9245 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9246 			    __func__, ice_status_str(status),
9247 			    ice_aq_str(hw->adminq.sq_last_status));
9248 			return (EIO);
9249 		}
9250 	}
9251 
9252 	return (0);
9253 }
9254 
9255 /**
9256  * ice_print_ldo_tlv - Print out LDO TLV information
9257  * @sc: device private structure
9258  * @tlv: LDO TLV information from the adapter NVM
9259  *
9260  * Dump out the information in tlv to the kernel message buffer; intended for
9261  * debugging purposes.
9262  */
9263 static void
9264 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9265 {
9266 	device_t dev = sc->dev;
9267 
9268 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9269 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9270 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9271 	device_printf(dev, "     -phy_high    0x%016llx\n",
9272 	    (unsigned long long)tlv->phy_type_high);
9273 	device_printf(dev, "     -phy_low     0x%016llx\n",
9274 	    (unsigned long long)tlv->phy_type_low);
9275 }
9276 
9277 /**
9278  * ice_set_link_management_mode -- Strict or lenient link management
9279  * @sc: device private structure
9280  *
9281  * Some NVMs give the adapter the option to advertise a superset of link
9282  * configurations.  This checks to see if that option is enabled.
9283  * Further, the NVM could also provide a specific set of configurations
9284  * to try; these are cached in the driver's private structure if they
9285  * are available.
9286  */
9287 void
9288 ice_set_link_management_mode(struct ice_softc *sc)
9289 {
9290 	struct ice_port_info *pi = sc->hw.port_info;
9291 	device_t dev = sc->dev;
9292 	struct ice_link_default_override_tlv tlv = { 0 };
9293 	enum ice_status status;
9294 
9295 	/* Port must be in strict mode if FW version is below a certain
9296 	 * version. (i.e. Don't set lenient mode features)
9297 	 */
9298 	if (!(ice_fw_supports_link_override(&sc->hw)))
9299 		return;
9300 
9301 	status = ice_get_link_default_override(&tlv, pi);
9302 	if (status != ICE_SUCCESS) {
9303 		device_printf(dev,
9304 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9305 		    __func__, ice_status_str(status),
9306 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9307 		return;
9308 	}
9309 
9310 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9311 		ice_print_ldo_tlv(sc, &tlv);
9312 
9313 	/* Set lenient link mode */
9314 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9315 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9316 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9317 
9318 	/* FW supports reporting a default configuration */
9319 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9320 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9321 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9322 		/* Knowing we're at a high enough firmware revision to
9323 		 * support this link management configuration, we don't
9324 		 * need to check/support earlier versions.
9325 		 */
9326 		return;
9327 	}
9328 
9329 	/* Default overrides only work if in lenient link mode */
9330 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9331 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9332 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9333 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9334 
9335 	/* Cache the LDO TLV structure in the driver, since it
9336 	 * won't change during the driver's lifetime.
9337 	 */
9338 	sc->ldo_tlv = tlv;
9339 }
9340 
9341 /**
9342  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9343  * @sc: device private structure
9344  *
9345  * This should be called before the tunables for these link settings
9346  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9347  * the cached values that the sysctl handlers will write.
9348  *
9349  * This also needs to be called before ice_init_link_configuration, to ensure
9350  * that there are sane values that can be written if there is media available
9351  * in the port.
9352  */
9353 void
9354 ice_init_saved_phy_cfg(struct ice_softc *sc)
9355 {
9356 	struct ice_port_info *pi = sc->hw.port_info;
9357 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9358 	struct ice_hw *hw = &sc->hw;
9359 	device_t dev = sc->dev;
9360 	enum ice_status status;
9361 	u64 phy_low, phy_high;
9362 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9363 
9364 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9365 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9366 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9367 	if (status != ICE_SUCCESS) {
9368 		device_printf(dev,
9369 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9370 		    __func__,
9371 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9372 		    ice_status_str(status),
9373 		    ice_aq_str(hw->adminq.sq_last_status));
9374 		return;
9375 	}
9376 
9377 	phy_low = le64toh(pcaps.phy_type_low);
9378 	phy_high = le64toh(pcaps.phy_type_high);
9379 
9380 	/* Save off initial config parameters */
9381 	pi->phy.curr_user_speed_req =
9382 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9383 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9384 	    pcaps.link_fec_options);
9385 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9386 }
9387 
9388 /**
9389  * ice_module_init - Driver callback to handle module load
9390  *
9391  * Callback for handling module load events. This function should initialize
9392  * any data structures that are used for the life of the device driver.
9393  */
9394 static int
9395 ice_module_init(void)
9396 {
9397 	ice_rdma_init();
9398 	return (0);
9399 }
9400 
9401 /**
9402  * ice_module_exit - Driver callback to handle module exit
9403  *
9404  * Callback for handling module unload events. This function should release
9405  * any resources initialized during ice_module_init.
9406  *
9407  * If this function returns non-zero, the module will not be unloaded. It
9408  * should only return such a value if the module cannot be unloaded at all,
9409  * such as due to outstanding memory references that cannot be revoked.
9410  */
9411 static int
9412 ice_module_exit(void)
9413 {
9414 	ice_rdma_exit();
9415 	return (0);
9416 }
9417 
9418 /**
9419  * ice_module_event_handler - Callback for module events
9420  * @mod: unused module_t parameter
9421  * @what: the event requested
9422  * @arg: unused event argument
9423  *
9424  * Callback used to handle module events from the stack. Used to allow the
9425  * driver to define custom behavior that should happen at module load and
9426  * unload.
9427  */
9428 int
9429 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9430 {
9431 	switch (what) {
9432 	case MOD_LOAD:
9433 		return ice_module_init();
9434 	case MOD_UNLOAD:
9435 		return ice_module_exit();
9436 	default:
9437 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9438 		return (EOPNOTSUPP);
9439 	}
9440 }
9441 
9442 /**
9443  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9444  * @sc: the device private softc
9445  * @ifd: ifdrv ioctl request pointer
9446  */
9447 int
9448 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9449 {
9450 	union ice_nvm_access_data *data;
9451 	struct ice_nvm_access_cmd *cmd;
9452 	size_t ifd_len = ifd->ifd_len, malloc_len;
9453 	struct ice_hw *hw = &sc->hw;
9454 	device_t dev = sc->dev;
9455 	enum ice_status status;
9456 	u8 *nvm_buffer;
9457 	int err;
9458 
9459 	/*
9460 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9461 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9462 	 * without performing a privilege check. Perform one here to ensure
9463 	 * that non-privileged threads cannot access this interface.
9464 	 */
9465 	err = priv_check(curthread, PRIV_DRIVER);
9466 	if (err)
9467 		return (err);
9468 
9469 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9470 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9471 			      __func__);
9472 		return (EBUSY);
9473 	}
9474 
9475 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9476 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9477 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9478 		return (EINVAL);
9479 	}
9480 
9481 	if (ifd->ifd_data == NULL) {
9482 		device_printf(dev, "%s: ifd data buffer not present.\n",
9483 			      __func__);
9484 		return (EINVAL);
9485 	}
9486 
9487 	/*
9488 	 * If everything works correctly, ice_handle_nvm_access should not
9489 	 * modify data past the size of the ioctl length. However, it could
9490 	 * lead to memory corruption if it did. Make sure to allocate at least
9491 	 * enough space for the command and data regardless. This
9492 	 * ensures that any access to the data union will not access invalid
9493 	 * memory.
9494 	 */
9495 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9496 
9497 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9498 	if (!nvm_buffer)
9499 		return (ENOMEM);
9500 
9501 	/* Copy the NVM access command and data in from user space */
9502 	/* coverity[tainted_data_argument] */
9503 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9504 	if (err) {
9505 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9506 			      __func__, ice_err_str(err));
9507 		goto cleanup_free_nvm_buffer;
9508 	}
9509 
9510 	/*
9511 	 * The NVM command structure is immediately followed by data which
9512 	 * varies in size based on the command.
9513 	 */
9514 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9515 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9516 
9517 	/* Handle the NVM access request */
9518 	status = ice_handle_nvm_access(hw, cmd, data);
9519 	if (status)
9520 		ice_debug(hw, ICE_DBG_NVM,
9521 			  "NVM access request failed, err %s\n",
9522 			  ice_status_str(status));
9523 
9524 	/* Copy the possibly modified contents of the handled request out */
9525 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9526 	if (err) {
9527 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9528 			      __func__, ice_err_str(err));
9529 		goto cleanup_free_nvm_buffer;
9530 	}
9531 
9532 	/* Convert private status to an error code for proper ioctl response */
9533 	switch (status) {
9534 	case ICE_SUCCESS:
9535 		err = (0);
9536 		break;
9537 	case ICE_ERR_NO_MEMORY:
9538 		err = (ENOMEM);
9539 		break;
9540 	case ICE_ERR_OUT_OF_RANGE:
9541 		err = (ENOTTY);
9542 		break;
9543 	case ICE_ERR_PARAM:
9544 	default:
9545 		err = (EINVAL);
9546 		break;
9547 	}
9548 
9549 cleanup_free_nvm_buffer:
9550 	free(nvm_buffer, M_ICE);
9551 	return err;
9552 }
9553 
9554 /**
9555  * ice_read_sff_eeprom - Read data from SFF eeprom
9556  * @sc: device softc
9557  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9558  * @offset: offset into the eeprom
9559  * @data: pointer to data buffer to store read data in
9560  * @length: length to read; max length is 16
9561  *
9562  * Read from the SFF eeprom in the module for this PF's port. For more details
9563  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9564  * and SFF-8024 (both).
9565  */
9566 int
9567 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9568 {
9569 	struct ice_hw *hw = &sc->hw;
9570 	int ret = 0, retries = 0;
9571 	enum ice_status status;
9572 
9573 	if (length > 16)
9574 		return (EINVAL);
9575 
9576 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9577 		return (ENOSYS);
9578 
9579 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9580 		return (ENXIO);
9581 
9582 	do {
9583 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
9584 					   offset, 0, 0, data, length,
9585 					   false, NULL);
9586 		if (!status) {
9587 			ret = 0;
9588 			break;
9589 		}
9590 		if (status == ICE_ERR_AQ_ERROR &&
9591 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
9592 			ret = EBUSY;
9593 			continue;
9594 		}
9595 		if (status == ICE_ERR_AQ_ERROR &&
9596 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
9597 			/* FW says I2C access isn't supported */
9598 			ret = EACCES;
9599 			break;
9600 		}
9601 		if (status == ICE_ERR_AQ_ERROR &&
9602 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
9603 			device_printf(sc->dev,
9604 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
9605 				  __func__);
9606 			ret = EPERM;
9607 			break;
9608 		} else {
9609 			device_printf(sc->dev,
9610 				  "%s: Error reading I2C data: err %s aq_err %s\n",
9611 				  __func__, ice_status_str(status),
9612 				  ice_aq_str(hw->adminq.sq_last_status));
9613 			ret = EIO;
9614 			break;
9615 		}
9616 	} while (retries++ < ICE_I2C_MAX_RETRIES);
9617 
9618 	if (ret == EBUSY)
9619 		device_printf(sc->dev,
9620 			  "%s: Error reading I2C data after %d retries\n",
9621 			  __func__, ICE_I2C_MAX_RETRIES);
9622 
9623 	return (ret);
9624 }
9625 
9626 /**
9627  * ice_handle_i2c_req - Driver independent I2C request handler
9628  * @sc: device softc
9629  * @req: The I2C parameters to use
9630  *
9631  * Read from the port's I2C eeprom using the parameters from the ioctl.
9632  */
9633 int
9634 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
9635 {
9636 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
9637 }
9638 
9639 /**
9640  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
9641  * @oidp: sysctl oid structure
9642  * @arg1: pointer to private data structure
9643  * @arg2: unused
9644  * @req: sysctl request pointer
9645  *
9646  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
9647  * inserted into the port.
9648  *
9649  *             | SFP A2  | QSFP Lower Page
9650  * ------------|---------|----------------
9651  * Temperature | 96-97	 | 22-23
9652  * Vcc         | 98-99   | 26-27
9653  * TX power    | 102-103 | 34-35..40-41
9654  * RX power    | 104-105 | 50-51..56-57
9655  */
9656 static int
9657 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
9658 {
9659 	struct ice_softc *sc = (struct ice_softc *)arg1;
9660 	device_t dev = sc->dev;
9661 	struct sbuf *sbuf;
9662 	int ret;
9663 	u8 data[16];
9664 
9665 	UNREFERENCED_PARAMETER(arg2);
9666 	UNREFERENCED_PARAMETER(oidp);
9667 
9668 	if (ice_driver_is_detaching(sc))
9669 		return (ESHUTDOWN);
9670 
9671 	if (req->oldptr == NULL) {
9672 		ret = SYSCTL_OUT(req, 0, 128);
9673 		return (ret);
9674 	}
9675 
9676 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
9677 	if (ret)
9678 		return (ret);
9679 
9680 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
9681 	if (data[0] == 0x3) {
9682 		/*
9683 		 * Check for:
9684 		 * - Internally calibrated data
9685 		 * - Diagnostic monitoring is implemented
9686 		 */
9687 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
9688 		if (!(data[0] & 0x60)) {
9689 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
9690 			return (ENODEV);
9691 		}
9692 
9693 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9694 
9695 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
9696 		for (int i = 0; i < 4; i++)
9697 			sbuf_printf(sbuf, "%02X ", data[i]);
9698 
9699 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
9700 		for (int i = 0; i < 4; i++)
9701 			sbuf_printf(sbuf, "%02X ", data[i]);
9702 	} else if (data[0] == 0xD || data[0] == 0x11) {
9703 		/*
9704 		 * QSFP+ modules are always internally calibrated, and must indicate
9705 		 * what types of diagnostic monitoring are implemented
9706 		 */
9707 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
9708 
9709 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
9710 		for (int i = 0; i < 2; i++)
9711 			sbuf_printf(sbuf, "%02X ", data[i]);
9712 
9713 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
9714 		for (int i = 0; i < 2; i++)
9715 			sbuf_printf(sbuf, "%02X ", data[i]);
9716 
9717 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
9718 		for (int i = 0; i < 2; i++)
9719 			sbuf_printf(sbuf, "%02X ", data[i]);
9720 
9721 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
9722 		for (int i = 0; i < 2; i++)
9723 			sbuf_printf(sbuf, "%02X ", data[i]);
9724 	} else {
9725 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
9726 		return (ENODEV);
9727 	}
9728 
9729 	sbuf_finish(sbuf);
9730 	sbuf_delete(sbuf);
9731 
9732 	return (0);
9733 }
9734 
9735 /**
9736  * ice_alloc_intr_tracking - Setup interrupt tracking structures
9737  * @sc: device softc structure
9738  *
9739  * Sets up the resource manager for keeping track of interrupt allocations,
9740  * and initializes the tracking maps for the PF's interrupt allocations.
9741  *
9742  * Unlike the scheme for queues, this is done in one step since both the
9743  * manager and the maps both have the same lifetime.
9744  *
9745  * @returns 0 on success, or an error code on failure.
9746  */
9747 int
9748 ice_alloc_intr_tracking(struct ice_softc *sc)
9749 {
9750 	struct ice_hw *hw = &sc->hw;
9751 	device_t dev = sc->dev;
9752 	int err;
9753 
9754 	/* Initialize the interrupt allocation manager */
9755 	err = ice_resmgr_init_contig_only(&sc->imgr,
9756 	    hw->func_caps.common_cap.num_msix_vectors);
9757 	if (err) {
9758 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
9759 			      ice_err_str(err));
9760 		return (err);
9761 	}
9762 
9763 	/* Allocate PF interrupt mapping storage */
9764 	if (!(sc->pf_imap =
9765 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9766 	      M_ICE, M_NOWAIT))) {
9767 		device_printf(dev, "Unable to allocate PF imap memory\n");
9768 		err = ENOMEM;
9769 		goto free_imgr;
9770 	}
9771 	if (!(sc->rdma_imap =
9772 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
9773 	      M_ICE, M_NOWAIT))) {
9774 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
9775 		err = ENOMEM;
9776 		free(sc->pf_imap, M_ICE);
9777 		goto free_imgr;
9778 	}
9779 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
9780 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
9781 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
9782 	}
9783 
9784 	return (0);
9785 
9786 free_imgr:
9787 	ice_resmgr_destroy(&sc->imgr);
9788 	return (err);
9789 }
9790 
9791 /**
9792  * ice_free_intr_tracking - Free PF interrupt tracking structures
9793  * @sc: device softc structure
9794  *
9795  * Frees the interrupt resource allocation manager and the PF's owned maps.
9796  *
9797  * VF maps are released when the owning VF's are destroyed, which should always
9798  * happen before this function is called.
9799  */
9800 void
9801 ice_free_intr_tracking(struct ice_softc *sc)
9802 {
9803 	if (sc->pf_imap) {
9804 		ice_resmgr_release_map(&sc->imgr, sc->pf_imap,
9805 				       sc->lan_vectors);
9806 		free(sc->pf_imap, M_ICE);
9807 		sc->pf_imap = NULL;
9808 	}
9809 	if (sc->rdma_imap) {
9810 		ice_resmgr_release_map(&sc->imgr, sc->rdma_imap,
9811 				       sc->lan_vectors);
9812 		free(sc->rdma_imap, M_ICE);
9813 		sc->rdma_imap = NULL;
9814 	}
9815 
9816 	ice_resmgr_destroy(&sc->imgr);
9817 }
9818 
9819 /**
9820  * ice_apply_supported_speed_filter - Mask off unsupported speeds
9821  * @report_speeds: bit-field for the desired link speeds
9822  * @mod_type: type of module/sgmii connection we have
9823  *
9824  * Given a bitmap of the desired lenient mode link speeds,
9825  * this function will mask off the speeds that are not currently
9826  * supported by the device.
9827  */
9828 static u16
9829 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
9830 {
9831 	u16 speed_mask;
9832 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
9833 
9834 	/*
9835 	 * The SFF specification says 0 is unknown, so we'll
9836 	 * treat it like we're connected through SGMII for now.
9837 	 * This may need revisiting if a new type is supported
9838 	 * in the future.
9839 	 */
9840 	switch (mod_type) {
9841 	case 0:
9842 		module = IS_SGMII;
9843 		break;
9844 	case 3:
9845 		module = IS_SFP;
9846 		break;
9847 	default:
9848 		module = IS_QSFP;
9849 		break;
9850 	}
9851 
9852 	/* We won't offer anything lower than 100M for any part,
9853 	 * but we'll need to mask off other speeds based on the
9854 	 * device and module type.
9855 	 */
9856 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
9857 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
9858 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9859 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
9860 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9861 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
9862 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
9863 		if (module == IS_QSFP)
9864 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
9865 	}
9866 	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
9867 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
9868 	return (report_speeds & speed_mask);
9869 }
9870 
9871 /**
9872  * ice_init_health_events - Enable FW health event reporting
9873  * @sc: device softc
9874  *
9875  * Will try to enable firmware health event reporting, but shouldn't
9876  * cause any grief (to the caller) if this fails.
9877  */
9878 void
9879 ice_init_health_events(struct ice_softc *sc)
9880 {
9881 	enum ice_status status;
9882 	u8 health_mask;
9883 
9884 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
9885 		(!sc->enable_health_events))
9886 		return;
9887 
9888 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
9889 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
9890 
9891 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
9892 	if (status)
9893 		device_printf(sc->dev,
9894 		    "Failed to enable firmware health events, err %s aq_err %s\n",
9895 		    ice_status_str(status),
9896 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9897 	else
9898 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
9899 }
9900 
9901 /**
9902  * ice_print_health_status_string - Print message for given FW health event
9903  * @dev: the PCIe device
9904  * @elem: health status element containing status code
9905  *
9906  * A rather large list of possible health status codes and their associated
9907  * messages.
9908  */
9909 static void
9910 ice_print_health_status_string(device_t dev,
9911 			       struct ice_aqc_health_status_elem *elem)
9912 {
9913 	u16 status_code = le16toh(elem->health_status_code);
9914 
9915 	switch (status_code) {
9916 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
9917 		device_printf(dev, "The device is in firmware recovery mode.\n");
9918 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9919 		break;
9920 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
9921 		device_printf(dev, "The flash chip cannot be accessed.\n");
9922 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
9923 		break;
9924 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
9925 		device_printf(dev, "NVM authentication failed.\n");
9926 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9927 		break;
9928 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
9929 		device_printf(dev, "Option ROM authentication failed.\n");
9930 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9931 		break;
9932 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
9933 		device_printf(dev, "DDP package failed.\n");
9934 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
9935 		break;
9936 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
9937 		device_printf(dev, "NVM image is incompatible.\n");
9938 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9939 		break;
9940 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
9941 		device_printf(dev, "Option ROM is incompatible.\n");
9942 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9943 		break;
9944 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
9945 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
9946 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
9947 		break;
9948 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
9949 		device_printf(dev, "An unsupported module was detected.\n");
9950 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
9951 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
9952 		break;
9953 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
9954 		device_printf(dev, "Module type is not supported.\n");
9955 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
9956 		break;
9957 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
9958 		device_printf(dev, "Module is not qualified.\n");
9959 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
9960 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
9961 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
9962 		break;
9963 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
9964 		device_printf(dev, "Device cannot communicate with the module.\n");
9965 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
9966 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
9967 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
9968 		break;
9969 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
9970 		device_printf(dev, "Unresolved module conflict.\n");
9971 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
9972 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
9973 		break;
9974 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
9975 		device_printf(dev, "Module is not present.\n");
9976 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
9977 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
9978 		break;
9979 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
9980 		device_printf(dev, "Underutilized module.\n");
9981 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
9982 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
9983 		break;
9984 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
9985 		device_printf(dev, "An unsupported module was detected.\n");
9986 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
9987 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
9988 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
9989 		break;
9990 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
9991 		device_printf(dev, "Invalid link configuration.\n");
9992 		break;
9993 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
9994 		device_printf(dev, "Port hardware access error.\n");
9995 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
9996 		break;
9997 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
9998 		device_printf(dev, "A port is unreachable.\n");
9999 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10000 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10001 		break;
10002 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10003 		device_printf(dev, "Port speed is limited due to module.\n");
10004 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10005 		break;
10006 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10007 		device_printf(dev, "A parallel fault was detected.\n");
10008 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10009 		break;
10010 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10011 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10012 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10013 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10014 		break;
10015 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10016 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10017 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10018 		break;
10019 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10020 		device_printf(dev, "Unrecoverable netlist error.\n");
10021 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10022 		break;
10023 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10024 		device_printf(dev, "Port topology conflict.\n");
10025 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10026 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10027 		break;
10028 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10029 		device_printf(dev, "Unrecoverable hardware access error.\n");
10030 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10031 		break;
10032 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10033 		device_printf(dev, "Unrecoverable runtime error.\n");
10034 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10035 		break;
10036 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10037 		device_printf(dev, "Link management engine failed to initialize.\n");
10038 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10039 		break;
10040 	default:
10041 		break;
10042 	}
10043 }
10044 
10045 /**
10046  * ice_handle_health_status_event - helper function to output health status
10047  * @sc: device softc structure
10048  * @event: event received on a control queue
10049  *
10050  * Prints out the appropriate string based on the given Health Status Event
10051  * code.
10052  */
10053 static void
10054 ice_handle_health_status_event(struct ice_softc *sc,
10055 			       struct ice_rq_event_info *event)
10056 {
10057 	struct ice_aqc_health_status_elem *health_info;
10058 	u16 status_count;
10059 	int i;
10060 
10061 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10062 		return;
10063 
10064 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10065 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10066 
10067 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10068 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10069 		return;
10070 	}
10071 
10072 	for (i = 0; i < status_count; i++) {
10073 		ice_print_health_status_string(sc->dev, health_info);
10074 		health_info++;
10075 	}
10076 }
10077 
10078 /**
10079  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10080  * @sc: device softc structure
10081  *
10082  * This function needs to be called after link up; it makes sure the FW has
10083  * certain PFC/DCB settings. In certain configurations this will re-apply a
10084  * default local LLDP MIB configuration; this is intended to workaround a FW
10085  * behavior where these settings seem to be cleared on link up.
10086  */
10087 void
10088 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10089 {
10090 	struct ice_hw *hw = &sc->hw;
10091 	struct ice_port_info *pi;
10092 	device_t dev = sc->dev;
10093 	enum ice_status status;
10094 
10095 	/* Set Local MIB can disrupt flow control settings for
10096 	 * non-DCB-supported devices.
10097 	 */
10098 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10099 		return;
10100 
10101 	pi = hw->port_info;
10102 
10103 	/* Don't overwrite a custom SW configuration */
10104 	if (!pi->qos_cfg.is_sw_lldp &&
10105 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10106 		ice_set_default_local_mib_settings(sc);
10107 
10108 	status = ice_set_dcb_cfg(pi);
10109 
10110 	if (status)
10111 		device_printf(dev,
10112 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10113 		    ice_status_str(status),
10114 		    ice_aq_str(hw->adminq.sq_last_status));
10115 }
10116 
10117 /**
10118  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10119  * @sbuf: string buffer to print to
10120  * @name: prefix string to use
10121  * @ets: structure to pull values from
10122  *
10123  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10124  * formats the ETS rec and cfg TLVs into text.
10125  */
10126 static void
10127 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10128 {
10129 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10130 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10131 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10132 
10133 	sbuf_printf(sbuf, "%s.prio_table:", name);
10134 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10135 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10136 	sbuf_printf(sbuf, "\n");
10137 
10138 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10139 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10140 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10141 	sbuf_printf(sbuf, "\n");
10142 
10143 	sbuf_printf(sbuf, "%s.tsatable:", name);
10144 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10145 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10146 	sbuf_printf(sbuf, "\n");
10147 }
10148 
10149 /**
10150  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10151  * @oidp: sysctl oid structure
10152  * @arg1: pointer to private data structure
10153  * @arg2: AQ define for either Local or Remote MIB
10154  * @req: sysctl request pointer
10155  *
10156  * Prints out DCB/DCBX configuration, including the contents
10157  * of either the local or remote MIB, depending on the value
10158  * used in arg2.
10159  */
10160 static int
10161 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10162 {
10163 	struct ice_softc *sc = (struct ice_softc *)arg1;
10164 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10165 	struct ice_dcbx_cfg dcb_buf = {};
10166 	struct ice_dcbx_cfg *dcbcfg;
10167 	struct ice_hw *hw = &sc->hw;
10168 	device_t dev = sc->dev;
10169 	struct sbuf *sbuf;
10170 	enum ice_status status;
10171 	u8 maxtcs, dcbx_status, is_sw_lldp;
10172 
10173 	UNREFERENCED_PARAMETER(oidp);
10174 
10175 	if (ice_driver_is_detaching(sc))
10176 		return (ESHUTDOWN);
10177 
10178 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10179 
10180 	/* The driver doesn't receive a Remote MIB via SW */
10181 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10182 		return (ENOENT);
10183 
10184 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10185 	if (!is_sw_lldp) {
10186 		/* Collect information from the FW in FW LLDP mode */
10187 		dcbcfg = &dcb_buf;
10188 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10189 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10190 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10191 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10192 			device_printf(dev,
10193 			    "Unable to query Remote MIB; port has not received one yet\n");
10194 			return (ENOENT);
10195 		}
10196 		if (status) {
10197 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10198 			    ice_status_str(status),
10199 			    ice_aq_str(hw->adminq.sq_last_status));
10200 			return (EIO);
10201 		}
10202 	}
10203 
10204 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10205 	if (status == ICE_SUCCESS)
10206 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10207 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10208 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10209 	else
10210 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10211 		    ice_status_str(status),
10212 		    ice_aq_str(hw->adminq.sq_last_status));
10213 
10214 	maxtcs = hw->func_caps.common_cap.maxtc;
10215 	dcbx_status = ice_get_dcbx_status(hw);
10216 
10217 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10218 
10219 	/* Do the actual printing */
10220 	sbuf_printf(sbuf, "\n");
10221 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10222 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10223 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10224 
10225 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10226 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10227 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10228 	    "DSCP" : "VLAN");
10229 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10230 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10231 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10232 	    "Unknown");
10233 
10234 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10235 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10236 
10237 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10238 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10239 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10240 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10241 
10242 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10243 		sbuf_printf(sbuf, "dscp_map:\n");
10244 		for (int i = 0; i < 8; i++) {
10245 			for (int j = 0; j < 8; j++)
10246 				sbuf_printf(sbuf, " %d",
10247 					    dcbcfg->dscp_map[i * 8 + j]);
10248 			sbuf_printf(sbuf, "\n");
10249 		}
10250 
10251 		sbuf_printf(sbuf, "\nLocal registers:\n");
10252 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10253 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10254 		        >> PRTDCB_GENC_NUMTC_S);
10255 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10256 		    (rd32(hw, PRTDCB_TUP2TC)));
10257 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10258 		    (rd32(hw, PRTDCB_RUP2TC)));
10259 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10260 		    (rd32(hw, GLDCB_TC2PFC)));
10261 	}
10262 
10263 	/* Finish */
10264 	sbuf_finish(sbuf);
10265 	sbuf_delete(sbuf);
10266 
10267 	return (0);
10268 }
10269 
10270 /**
10271  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10272  * @oidp: sysctl oid structure
10273  * @arg1: pointer to private data structure
10274  * @arg2: unused
10275  * @req: sysctl request pointer
10276  *
10277  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10278  * but for simplicity, this only works on the PF's LAN VSI.
10279  */
10280 static int
10281 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10282 {
10283 	struct ice_softc *sc = (struct ice_softc *)arg1;
10284 	struct ice_vsi_ctx ctx = { 0 };
10285 	struct ice_hw *hw = &sc->hw;
10286 	device_t dev = sc->dev;
10287 	struct sbuf *sbuf;
10288 	enum ice_status status;
10289 
10290 	UNREFERENCED_PARAMETER(oidp);
10291 	UNREFERENCED_PARAMETER(arg2);
10292 
10293 	if (ice_driver_is_detaching(sc))
10294 		return (ESHUTDOWN);
10295 
10296 	/* Get HW absolute index of a VSI */
10297 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10298 
10299 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10300 	if (status != ICE_SUCCESS) {
10301 		device_printf(dev,
10302 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10303 		    ice_status_str(status),
10304 		    ice_aq_str(hw->adminq.sq_last_status));
10305 		return (EIO);
10306 	}
10307 
10308 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10309 
10310 	/* Do the actual printing */
10311 	sbuf_printf(sbuf, "\n");
10312 
10313 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10314 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10315 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10316 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10317 
10318 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10319 	    LE16_TO_CPU(ctx.info.mapping_flags));
10320 	/* The PF VSI is always contiguous, so there's no if-statement here */
10321 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10322 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10323 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10324 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10325 
10326 	sbuf_printf(sbuf, "TC qbases  :");
10327 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10328 		sbuf_printf(sbuf, " %4d",
10329 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10330 	}
10331 	sbuf_printf(sbuf, "\n");
10332 
10333 	sbuf_printf(sbuf, "TC qcounts :");
10334 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10335 		sbuf_printf(sbuf, " %4d",
10336 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10337 	}
10338 
10339 	/* Finish */
10340 	sbuf_finish(sbuf);
10341 	sbuf_delete(sbuf);
10342 
10343 	return (0);
10344 }
10345 
10346 /**
10347  * ice_ets_str_to_tbl - Parse string into ETS table
10348  * @str: input string to parse
10349  * @table: output eight values used for ETS values
10350  * @limit: max valid value to accept for ETS values
10351  *
10352  * Parses a string and converts the eight values within
10353  * into a table that can be used in setting ETS settings
10354  * in a MIB.
10355  *
10356  * @return 0 on success, EINVAL if a parsed value is
10357  * not between 0 and limit.
10358  */
10359 static int
10360 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10361 {
10362 	const char *str_start = str;
10363 	char *str_end;
10364 	long token;
10365 
10366 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10367 		token = strtol(str_start, &str_end, 0);
10368 		if (token < 0 || token > limit)
10369 			return (EINVAL);
10370 
10371 		table[i] = (u8)token;
10372 		str_start = (str_end + 1);
10373 	}
10374 
10375 	return (0);
10376 }
10377 
10378 /**
10379  * ice_check_ets_bw - Check if ETS bw vals are valid
10380  * @table: eight values used for ETS bandwidth
10381  *
10382  * @return true if the sum of all 8 values in table
10383  * equals 100.
10384  */
10385 static bool
10386 ice_check_ets_bw(u8 *table)
10387 {
10388 	int sum = 0;
10389 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10390 		sum += (int)table[i];
10391 
10392 	return (sum == 100);
10393 }
10394 
10395 /**
10396  * ice_cfg_pba_num - Determine if PBA Number is retrievable
10397  * @sc: the device private softc structure
10398  *
10399  * Sets the feature flag for the existence of a PBA number
10400  * based on the success of the read command.  This does not
10401  * cache the result.
10402  */
10403 void
10404 ice_cfg_pba_num(struct ice_softc *sc)
10405 {
10406 	u8 pba_string[32] = "";
10407 
10408 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10409 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10410 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10411 }
10412 
10413 /**
10414  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10415  * @oidp: sysctl oid structure
10416  * @arg1: pointer to private data structure
10417  * @arg2: unused
10418  * @req: sysctl request pointer
10419  */
10420 static int
10421 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10422 {
10423 	struct ice_softc *sc = (struct ice_softc *)arg1;
10424 	struct ice_aqc_port_ets_elem port_ets = { 0 };
10425 	struct ice_hw *hw = &sc->hw;
10426 	struct ice_port_info *pi;
10427 	device_t dev = sc->dev;
10428 	struct sbuf *sbuf;
10429 	enum ice_status status;
10430 	int i = 0;
10431 
10432 	UNREFERENCED_PARAMETER(oidp);
10433 	UNREFERENCED_PARAMETER(arg2);
10434 
10435 	if (ice_driver_is_detaching(sc))
10436 		return (ESHUTDOWN);
10437 
10438 	pi = hw->port_info;
10439 
10440 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10441 	if (status != ICE_SUCCESS) {
10442 		device_printf(dev,
10443 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10444 		    ice_status_str(status),
10445 		    ice_aq_str(hw->adminq.sq_last_status));
10446 		return (EIO);
10447 	}
10448 
10449 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10450 
10451 	/* Do the actual printing */
10452 	sbuf_printf(sbuf, "\n");
10453 
10454 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10455 
10456 	sbuf_printf(sbuf, "TC BW %%:");
10457 	ice_for_each_traffic_class(i) {
10458 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10459 	}
10460 	sbuf_printf(sbuf, "\n");
10461 
10462 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10463 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10464 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10465 
10466 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10467 	ice_for_each_traffic_class(i) {
10468 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10469 	}
10470 
10471 	/* Finish */
10472 	sbuf_finish(sbuf);
10473 	sbuf_delete(sbuf);
10474 
10475 	return (0);
10476 }
10477 
10478 /**
10479  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10480  * @oidp: sysctl oid structure
10481  * @arg1: pointer to private data structure
10482  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10483  * @req: sysctl request pointer
10484  *
10485  * Gets or sets the current DSCP to UP table cached by the driver. Since there
10486  * are 64 possible DSCP values to configure, this sysctl only configures
10487  * chunks of 8 in that space at a time.
10488  *
10489  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10490  * mode.
10491  */
10492 static int
10493 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10494 {
10495 	struct ice_softc *sc = (struct ice_softc *)arg1;
10496 	struct ice_dcbx_cfg *local_dcbx_cfg;
10497 	struct ice_port_info *pi;
10498 	struct ice_hw *hw = &sc->hw;
10499 	device_t dev = sc->dev;
10500 	enum ice_status status;
10501 	struct sbuf *sbuf;
10502 	int ret;
10503 
10504 	/* Store input rates from user */
10505 	char dscp_user_buf[128] = "";
10506 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10507 
10508 	if (ice_driver_is_detaching(sc))
10509 		return (ESHUTDOWN);
10510 
10511 	if (req->oldptr == NULL && req->newptr == NULL) {
10512 		ret = SYSCTL_OUT(req, 0, 128);
10513 		return (ret);
10514 	}
10515 
10516 	pi = hw->port_info;
10517 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10518 
10519 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10520 
10521 	/* Format DSCP-to-UP data for output */
10522 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10523 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10524 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10525 			sbuf_printf(sbuf, ",");
10526 	}
10527 
10528 	sbuf_finish(sbuf);
10529 	sbuf_delete(sbuf);
10530 
10531 	/* Read in the new DSCP mapping values */
10532 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10533 	if ((ret) || (req->newptr == NULL))
10534 		return (ret);
10535 
10536 	/* Don't allow setting changes in FW DCB mode */
10537 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10538 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10539 		    __func__);
10540 		return (EINVAL);
10541 	}
10542 
10543 	/* Convert 8 values in a string to a table; this is similar to what
10544 	 * needs to be done for ETS settings, so this function can be re-used
10545 	 * for that purpose.
10546 	 */
10547 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10548 	if (ret) {
10549 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10550 		    __func__, dscp_user_buf);
10551 		return (ret);
10552 	}
10553 
10554 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10555 	    sizeof(new_dscp_table_seg));
10556 
10557 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10558 
10559 	status = ice_set_dcb_cfg(pi);
10560 	if (status) {
10561 		device_printf(dev,
10562 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10563 		    __func__, ice_status_str(status),
10564 		    ice_aq_str(hw->adminq.sq_last_status));
10565 		return (EIO);
10566 	}
10567 
10568 	ice_do_dcb_reconfig(sc, false);
10569 
10570 	return (0);
10571 }
10572 
10573 /**
10574  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10575  * @sc: the device private softc
10576  * @ifd: ifdrv ioctl request pointer
10577  */
10578 int
10579 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10580 {
10581 	size_t ifd_len = ifd->ifd_len;
10582 	struct ice_hw *hw = &sc->hw;
10583 	device_t dev = sc->dev;
10584 	struct ice_debug_dump_cmd *ddc;
10585 	enum ice_status status;
10586 	int err = 0;
10587 
10588 	/* Returned arguments from the Admin Queue */
10589 	u16 ret_buf_size = 0;
10590 	u16 ret_next_table = 0;
10591 	u32 ret_next_index = 0;
10592 
10593 	/*
10594 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
10595 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
10596 	 * without performing a privilege check. Perform one here to ensure
10597 	 * that non-privileged threads cannot access this interface.
10598 	 */
10599 	err = priv_check(curthread, PRIV_DRIVER);
10600 	if (err)
10601 		return (err);
10602 
10603 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
10604 		device_printf(dev,
10605 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
10606 		    __func__);
10607 		return (EBUSY);
10608 	}
10609 
10610 	if (ifd_len < sizeof(*ddc)) {
10611 		device_printf(dev,
10612 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
10613 		    __func__, ifd_len, sizeof(*ddc));
10614 		return (EINVAL);
10615 	}
10616 
10617 	if (ifd->ifd_data == NULL) {
10618 		device_printf(dev, "%s: ifd data buffer not present.\n",
10619 		     __func__);
10620 		return (EINVAL);
10621 	}
10622 
10623 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
10624 	if (!ddc)
10625 		return (ENOMEM);
10626 
10627 	/* Copy the NVM access command and data in from user space */
10628 	/* coverity[tainted_data_argument] */
10629 	err = copyin(ifd->ifd_data, ddc, ifd_len);
10630 	if (err) {
10631 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
10632 			      __func__, ice_err_str(err));
10633 		goto out;
10634 	}
10635 
10636 	/* The data_size arg must be at least 1 for the AQ cmd to work */
10637 	if (ddc->data_size == 0) {
10638 		device_printf(dev,
10639 		    "%s: data_size must be greater than 0\n", __func__);
10640 		err = EINVAL;
10641 		goto out;
10642 	}
10643 	/* ...and it can't be too long */
10644 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
10645 		device_printf(dev,
10646 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
10647 		    ddc->data_size, ifd_len - sizeof(*ddc));
10648 		err = EINVAL;
10649 		goto out;
10650 	}
10651 
10652 	/* Make sure any possible data buffer space is zeroed */
10653 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
10654 
10655 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
10656 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size, &ret_next_table, &ret_next_index, NULL);
10657 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
10658 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
10659 	if (status) {
10660 		device_printf(dev,
10661 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
10662 		    __func__,
10663 		    ice_status_str(status),
10664 		    ice_aq_str(hw->adminq.sq_last_status));
10665 		goto aq_error;
10666 	}
10667 
10668 	ddc->table_id = ret_next_table;
10669 	ddc->offset = ret_next_index;
10670 	ddc->data_size = ret_buf_size;
10671 
10672 	/* Copy the possibly modified contents of the handled request out */
10673 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
10674 	if (err) {
10675 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
10676 			      __func__, ice_err_str(err));
10677 		goto out;
10678 	}
10679 
10680 aq_error:
10681 	/* Convert private status to an error code for proper ioctl response */
10682 	switch (status) {
10683 	case ICE_SUCCESS:
10684 		err = (0);
10685 		break;
10686 	case ICE_ERR_NO_MEMORY:
10687 		err = (ENOMEM);
10688 		break;
10689 	case ICE_ERR_OUT_OF_RANGE:
10690 		err = (ENOTTY);
10691 		break;
10692 	case ICE_ERR_AQ_ERROR:
10693 		err = (EIO);
10694 		break;
10695 	case ICE_ERR_PARAM:
10696 	default:
10697 		err = (EINVAL);
10698 		break;
10699 	}
10700 
10701 out:
10702 	free(ddc, M_ICE);
10703 	return (err);
10704 }
10705 
10706 /**
10707  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
10708  * @oidp: sysctl oid structure
10709  * @arg1: pointer to private data structure
10710  * @arg2: unused
10711  * @req: sysctl request pointer
10712  *
10713  * Allows user to let "No FEC" mode to be used in "Auto"
10714  * FEC mode during FEC negotiation. This is only supported
10715  * on newer firmware versions.
10716  */
10717 static int
10718 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
10719 {
10720 	struct ice_softc *sc = (struct ice_softc *)arg1;
10721 	struct ice_hw *hw = &sc->hw;
10722 	device_t dev = sc->dev;
10723 	u8 user_flag;
10724 	int ret;
10725 
10726 	UNREFERENCED_PARAMETER(arg2);
10727 
10728 	ret = priv_check(curthread, PRIV_DRIVER);
10729 	if (ret)
10730 		return (ret);
10731 
10732 	if (ice_driver_is_detaching(sc))
10733 		return (ESHUTDOWN);
10734 
10735 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
10736 
10737 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
10738 	if ((ret) || (req->newptr == NULL))
10739 		return (ret);
10740 
10741 	if (!ice_fw_supports_fec_dis_auto(hw)) {
10742 		log(LOG_INFO,
10743 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
10744 		    device_get_nameunit(dev));
10745 		return (ENODEV);
10746 	}
10747 
10748 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
10749 		return (0);
10750 
10751 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
10752 
10753 	if (sc->allow_no_fec_mod_in_auto)
10754 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
10755 		    device_get_nameunit(dev));
10756 	else
10757 		log(LOG_INFO,
10758 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
10759 		    device_get_nameunit(dev));
10760 
10761 	return (0);
10762 }
10763 
10764