xref: /freebsd/sys/dev/ice/ice_lib.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_lib.c
34  * @brief Generic device setup and sysctl functions
35  *
36  * Library of generic device functions not specific to the networking stack.
37  *
38  * This includes hardware initialization functions, as well as handlers for
39  * many of the device sysctls used to probe driver status or tune specific
40  * behaviors.
41  */
42 
43 #include "ice_lib.h"
44 #include "ice_iflib.h"
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 #include <machine/resource.h>
48 #include <net/if_dl.h>
49 #include <sys/firmware.h>
50 #include <sys/priv.h>
51 #include <sys/limits.h>
52 
53 /**
54  * @var M_ICE
55  * @brief main ice driver allocation type
56  *
57  * malloc(9) allocation type used by the majority of memory allocations in the
58  * ice driver.
59  */
60 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
61 
62 /*
63  * Helper function prototypes
64  */
65 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
66 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
67 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
68 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
69 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
70 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
71 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
72 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
73 static void ice_free_fltr_list(struct ice_list_head *list);
74 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
75 			       const u8 *addr, enum ice_sw_fwd_act_type action);
76 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
77 				   struct ice_ctl_q_info *cq);
78 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
79 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
80 				    struct ice_rq_event_info *event);
81 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
82 static void ice_update_port_oversize(struct ice_softc *sc, u64 rx_errors);
83 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
85 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
86 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
87 static void ice_add_debug_tunables(struct ice_softc *sc);
88 static void ice_add_debug_sysctls(struct ice_softc *sc);
89 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
90 static void ice_get_default_rss_key(u8 *seed);
91 static int  ice_set_rss_key(struct ice_vsi *vsi);
92 static int  ice_set_rss_lut(struct ice_vsi *vsi);
93 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
94 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
95 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
96 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
97 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
98 static const char *ice_autoneg_mode(struct ice_port_info *pi);
99 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
100 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
101 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
102 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
103 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
104 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
105 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
106 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
107 				     struct sysctl_ctx_list *ctx,
108 				     struct sysctl_oid *parent);
109 static void
110 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
111 				 struct sysctl_oid_list *parent_list,
112 				 u64* pfc_stat_location,
113 				 const char *node_name,
114 				 const char *descr);
115 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
116 					  struct sysctl_oid *parent,
117 					  struct ice_hw_port_stats *stats);
118 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
119 				 enum ice_vsi_type type, int idx,
120 				 bool dynamic);
121 static void ice_handle_mib_change_event(struct ice_softc *sc,
122 				 struct ice_rq_event_info *event);
123 static void
124 ice_handle_lan_overflow_event(struct ice_softc *sc,
125 			      struct ice_rq_event_info *event);
126 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
127 				     struct ice_list_head *list,
128 				     u16 ethertype, u16 direction,
129 				     enum ice_sw_fwd_act_type action);
130 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
131 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
132 					   u64 phy_type_high);
133 struct ice_phy_data;
134 static int
135 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
136 				   struct ice_phy_data *phy_data);
137 static int
138 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
139 			       struct ice_aqc_set_phy_cfg_data *cfg);
140 static int
141 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
142 			       struct ice_aqc_set_phy_cfg_data *cfg);
143 static void
144 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
145 			      struct ice_aqc_set_phy_cfg_data *cfg);
146 static void
147 ice_print_ldo_tlv(struct ice_softc *sc,
148 		  struct ice_link_default_override_tlv *tlv);
149 static void
150 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
151 				  u64 *phy_type_high);
152 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
153 static void
154 ice_handle_health_status_event(struct ice_softc *sc,
155 			       struct ice_rq_event_info *event);
156 static void
157 ice_print_health_status_string(device_t dev,
158 			       struct ice_aqc_health_status_elem *elem);
159 static void
160 ice_debug_print_mib_change_event(struct ice_softc *sc,
161 				 struct ice_rq_event_info *event);
162 static bool ice_check_ets_bw(u8 *table);
163 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
164 static bool
165 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
166 		       struct ice_dcbx_cfg *new_cfg);
167 static void ice_dcb_recfg(struct ice_softc *sc);
168 static u8 ice_dcb_tc_contig(u8 tc_map);
169 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
170 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
171 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
172 				   struct ice_dcb_ets_cfg *ets);
173 static void ice_stop_pf_vsi(struct ice_softc *sc);
174 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
175 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
176 void
177 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
178 			    struct sysctl_ctx_list *ctx,
179 			    struct sysctl_oid_list *ctx_list);
180 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
181 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
182 static void ice_start_dcbx_agent(struct ice_softc *sc);
183 static u16 ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
184 					   struct sbuf *sbuf, u16 cluster_id);
185 static void ice_fw_debug_dump_print_clusters(struct ice_softc *sc,
186 					     struct sbuf *sbuf);
187 static void ice_remove_vsi_mirroring(struct ice_vsi *vsi);
188 static int ice_get_tx_rx_equalizations(struct ice_hw *hw, u8 serdes_num,
189 				       struct ice_serdes_equalization *ptr);
190 static int ice_fec_counter_read(struct ice_hw *hw, u32 receiver_id,
191 				u32 reg_offset, u16 *output);
192 static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
193 				  struct ice_fec_stats_to_sysctl *fec_stats);
194 static bool ice_is_serdes_muxed(struct ice_hw *hw);
195 static int ice_get_maxspeed(struct ice_hw *hw, u8 lport, u8 *max_speed);
196 static int ice_update_port_topology(u8 lport,
197 				    struct ice_port_topology *port_topology,
198 				    bool is_muxed);
199 static int ice_get_port_topology(struct ice_hw *hw, u8 lport,
200 				 struct ice_port_topology *port_topology);
201 
202 static int ice_module_init(void);
203 static int ice_module_exit(void);
204 
205 /*
206  * package version comparison functions
207  */
208 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
209 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
210 
211 /*
212  * dynamic sysctl handlers
213  */
214 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
215 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
216 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
217 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
218 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
222 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
223 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
224 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
225 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
230 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
231 					 bool is_phy_type_high);
232 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
239 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
244 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
245 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
246 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
247 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
248 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
249 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
250 static int ice_sysctl_dump_phy_stats(SYSCTL_HANDLER_ARGS);
251 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
252 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
253 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
254 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
255 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
256 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
257 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
258 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
259 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
260 static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS);
261 static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS);
262 static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS);
263 static int ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS);
264 static int ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS);
265 
266 /**
267  * ice_map_bar - Map PCIe BAR memory
268  * @dev: the PCIe device
269  * @bar: the BAR info structure
270  * @bar_num: PCIe BAR number
271  *
272  * Maps the specified PCIe BAR. Stores the mapping data in struct
273  * ice_bar_info.
274  */
275 int
276 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
277 {
278 	if (bar->res != NULL) {
279 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
280 		return (EDOOFUS);
281 	}
282 
283 	bar->rid = PCIR_BAR(bar_num);
284 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
285 					  RF_ACTIVE);
286 	if (!bar->res) {
287 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
288 		return (ENXIO);
289 	}
290 
291 	bar->tag = rman_get_bustag(bar->res);
292 	bar->handle = rman_get_bushandle(bar->res);
293 	bar->size = rman_get_size(bar->res);
294 
295 	return (0);
296 }
297 
298 /**
299  * ice_free_bar - Free PCIe BAR memory
300  * @dev: the PCIe device
301  * @bar: the BAR info structure
302  *
303  * Frees the specified PCIe BAR, releasing its resources.
304  */
305 void
306 ice_free_bar(device_t dev, struct ice_bar_info *bar)
307 {
308 	if (bar->res != NULL)
309 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
310 	bar->res = NULL;
311 }
312 
313 /**
314  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
315  * @hw: the device hardware structure
316  *
317  * Configures the control queues for the given device, setting up the
318  * specified lengths, prior to initializing hardware.
319  */
320 void
321 ice_set_ctrlq_len(struct ice_hw *hw)
322 {
323 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
324 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
325 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
326 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
327 
328 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
329 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
330 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
331 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
332 
333 	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
334 	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
335 	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
336 	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
337 }
338 
339 /**
340  * ice_get_next_vsi - Get the next available VSI slot
341  * @all_vsi: the VSI list
342  * @size: the size of the VSI list
343  *
344  * Returns the index to the first available VSI slot. Will return size (one
345  * past the last index) if there are no slots available.
346  */
347 static int
348 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
349 {
350 	int i;
351 
352 	for (i = 0; i < size; i++) {
353 		if (all_vsi[i] == NULL)
354 			return i;
355 	}
356 
357 	return size;
358 }
359 
360 /**
361  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
362  * @sc: the device private softc structure
363  * @vsi: the VSI to setup
364  * @type: the VSI type of the new VSI
365  * @idx: the index in the all_vsi array to use
366  * @dynamic: whether this VSI memory was dynamically allocated
367  *
368  * Perform setup for a VSI that is common to both dynamically allocated VSIs
369  * and the static PF VSI which is embedded in the softc structure.
370  */
371 static void
372 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
373 		     enum ice_vsi_type type, int idx, bool dynamic)
374 {
375 	/* Store important values in VSI struct */
376 	vsi->type = type;
377 	vsi->sc = sc;
378 	vsi->idx = idx;
379 	sc->all_vsi[idx] = vsi;
380 	vsi->dynamic = dynamic;
381 
382 	/* Set default mirroring rule information */
383 	vsi->rule_mir_ingress = ICE_INVAL_MIRROR_RULE_ID;
384 	vsi->rule_mir_egress = ICE_INVAL_MIRROR_RULE_ID;
385 
386 	/* Setup the VSI tunables now */
387 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
388 }
389 
390 /**
391  * ice_alloc_vsi - Allocate a dynamic VSI
392  * @sc: device softc structure
393  * @type: VSI type
394  *
395  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
396  */
397 struct ice_vsi *
398 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
399 {
400 	struct ice_vsi *vsi;
401 	int idx;
402 
403 	/* Find an open index for a new VSI to be allocated. If the returned
404 	 * index is >= the num_available_vsi then it means no slot is
405 	 * available.
406 	 */
407 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
408 	if (idx >= sc->num_available_vsi) {
409 		device_printf(sc->dev, "No available VSI slots\n");
410 		return NULL;
411 	}
412 
413 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_NOWAIT | M_ZERO);
414 	if (!vsi) {
415 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
416 		return NULL;
417 	}
418 
419 	ice_setup_vsi_common(sc, vsi, type, idx, true);
420 
421 	return vsi;
422 }
423 
424 /**
425  * ice_setup_pf_vsi - Setup the PF VSI
426  * @sc: the device private softc
427  *
428  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
429  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
430  * the softc memory, instead of being dynamically allocated at creation.
431  */
432 void
433 ice_setup_pf_vsi(struct ice_softc *sc)
434 {
435 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
436 }
437 
438 /**
439  * ice_alloc_vsi_qmap
440  * @vsi: VSI structure
441  * @max_tx_queues: Number of transmit queues to identify
442  * @max_rx_queues: Number of receive queues to identify
443  *
444  * Allocates a max_[t|r]x_queues array of words for the VSI where each
445  * word contains the index of the queue it represents.  In here, all
446  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
447  * all queues for this VSI are not yet assigned an index and thus,
448  * not ready for use.
449  *
450  */
451 void
452 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
453 		   const int max_rx_queues)
454 {
455 	int i;
456 
457 	MPASS(max_tx_queues > 0);
458 	MPASS(max_rx_queues > 0);
459 
460 	/* Allocate Tx queue mapping memory */
461 	vsi->tx_qmap = malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK);
462 
463 	/* Allocate Rx queue mapping memory */
464 	vsi->rx_qmap = malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK);
465 
466 	/* Mark every queue map as invalid to start with */
467 	for (i = 0; i < max_tx_queues; i++) {
468 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
469 	}
470 	for (i = 0; i < max_rx_queues; i++) {
471 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
472 	}
473 }
474 
475 /**
476  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
477  * @vsi: the VSI private structure
478  *
479  * Frees the PF qmaps associated with the given VSI. Generally this will be
480  * called by ice_release_vsi, but may need to be called during attach cleanup,
481  * depending on when the qmaps were allocated.
482  */
483 void
484 ice_free_vsi_qmaps(struct ice_vsi *vsi)
485 {
486 	struct ice_softc *sc = vsi->sc;
487 
488 	if (vsi->tx_qmap) {
489 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
490 					   vsi->num_tx_queues);
491 		free(vsi->tx_qmap, M_ICE);
492 		vsi->tx_qmap = NULL;
493 	}
494 
495 	if (vsi->rx_qmap) {
496 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
497 					   vsi->num_rx_queues);
498 		free(vsi->rx_qmap, M_ICE);
499 		vsi->rx_qmap = NULL;
500 	}
501 }
502 
503 /**
504  * ice_set_default_vsi_ctx - Setup default VSI context parameters
505  * @ctx: the VSI context to initialize
506  *
507  * Initialize and prepare a default VSI context for configuring a new VSI.
508  */
509 static void
510 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
511 {
512 	u32 table = 0;
513 
514 	memset(&ctx->info, 0, sizeof(ctx->info));
515 	/* VSI will be allocated from shared pool */
516 	ctx->alloc_from_pool = true;
517 	/* Enable source pruning by default */
518 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
519 	/* Traffic from VSI can be sent to LAN */
520 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
521 	/* Allow all packets untagged/tagged */
522 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
523 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
524 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
525 	/* Show VLAN/UP from packets in Rx descriptors */
526 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
527 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
528 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
529 	/* Have 1:1 UP mapping for both ingress/egress tables */
530 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
531 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
532 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
533 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
534 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
535 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
536 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
537 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
538 	ctx->info.ingress_table = CPU_TO_LE32(table);
539 	ctx->info.egress_table = CPU_TO_LE32(table);
540 	/* Have 1:1 UP mapping for outer to inner UP table */
541 	ctx->info.outer_up_table = CPU_TO_LE32(table);
542 	/* No Outer tag support, so outer_vlan_flags remains zero */
543 }
544 
545 /**
546  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
547  * @ctx: the VSI context to configure
548  * @type: the VSI type
549  *
550  * Configures the VSI context for RSS, based on the VSI type.
551  */
552 static void
553 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
554 {
555 	u8 lut_type, hash_type;
556 
557 	switch (type) {
558 	case ICE_VSI_PF:
559 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
560 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
561 		break;
562 	case ICE_VSI_VF:
563 	case ICE_VSI_VMDQ2:
564 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
565 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
566 		break;
567 	default:
568 		/* Other VSI types do not support RSS */
569 		return;
570 	}
571 
572 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
573 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
574 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
575 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
576 }
577 
578 /**
579  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
580  * @vsi: the VSI to configure
581  * @ctx: the VSI context to configure
582  *
583  * Configures the context for the given VSI, setting up how the firmware
584  * should map the queues for this VSI.
585  *
586  * @pre vsi->qmap_type is set to a valid type
587  */
588 static int
589 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
590 {
591 	int pow = 0;
592 	u16 qmap;
593 
594 	MPASS(vsi->rx_qmap != NULL);
595 
596 	switch (vsi->qmap_type) {
597 	case ICE_RESMGR_ALLOC_CONTIGUOUS:
598 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
599 
600 		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
601 		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
602 
603 		break;
604 	case ICE_RESMGR_ALLOC_SCATTERED:
605 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
606 
607 		for (int i = 0; i < vsi->num_rx_queues; i++)
608 			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
609 		break;
610 	default:
611 		return (EOPNOTSUPP);
612 	}
613 
614 	/* Calculate the next power-of-2 of number of queues */
615 	if (vsi->num_rx_queues)
616 		pow = flsl(vsi->num_rx_queues - 1);
617 
618 	/* Assign all the queues to traffic class zero */
619 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
620 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
621 
622 	/* Fill out default driver TC queue info for VSI */
623 	vsi->tc_info[0].qoffset = 0;
624 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
625 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
626 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
627 		vsi->tc_info[i].qoffset = 0;
628 		vsi->tc_info[i].qcount_rx = 1;
629 		vsi->tc_info[i].qcount_tx = 1;
630 	}
631 	vsi->tc_map = 0x1;
632 
633 	return 0;
634 }
635 
636 /**
637  * ice_setup_vsi_mirroring -- Setup a VSI for mirroring PF VSI traffic
638  * @vsi: VSI to setup
639  *
640  * @pre vsi->mirror_src_vsi is set to the SW VSI num that traffic is to be
641  * mirrored from
642  *
643  * Returns 0 on success, EINVAL on failure.
644  */
645 int
646 ice_setup_vsi_mirroring(struct ice_vsi *vsi)
647 {
648 	struct ice_mir_rule_buf rule = { };
649 	struct ice_softc *sc = vsi->sc;
650 	struct ice_hw *hw = &sc->hw;
651 	device_t dev = sc->dev;
652 	int status;
653 	u16 rule_id, dest_vsi;
654 	u16 count = 1;
655 
656 	rule.vsi_idx = ice_get_hw_vsi_num(hw, vsi->mirror_src_vsi);
657 	rule.add = true;
658 
659 	dest_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
660 	rule_id = ICE_INVAL_MIRROR_RULE_ID;
661 	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_INGRESS,
662 					    dest_vsi, count, &rule, NULL,
663 					    &rule_id);
664 	if (status) {
665 		device_printf(dev,
666 		    "Could not add INGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
667 		    rule.vsi_idx, dest_vsi, ice_status_str(status),
668 		    ice_aq_str(hw->adminq.sq_last_status));
669 		return (EINVAL);
670 	}
671 
672 	vsi->rule_mir_ingress = rule_id;
673 
674 	rule_id = ICE_INVAL_MIRROR_RULE_ID;
675 	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_EGRESS,
676 					    dest_vsi, count, &rule, NULL, &rule_id);
677 	if (status) {
678 		device_printf(dev,
679 		    "Could not add EGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
680 		    rule.vsi_idx, dest_vsi, ice_status_str(status),
681 		    ice_aq_str(hw->adminq.sq_last_status));
682 		return (EINVAL);
683 	}
684 
685 	vsi->rule_mir_egress = rule_id;
686 
687 	return (0);
688 }
689 
690 /**
691  * ice_remove_vsi_mirroring -- Teardown any VSI mirroring rules
692  * @vsi: VSI to remove mirror rules from
693  */
694 static void
695 ice_remove_vsi_mirroring(struct ice_vsi *vsi)
696 {
697 	struct ice_hw *hw = &vsi->sc->hw;
698 	int status = 0;
699 	bool keep_alloc = false;
700 
701 	if (vsi->rule_mir_ingress != ICE_INVAL_MIRROR_RULE_ID)
702 		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_ingress, keep_alloc, NULL);
703 
704 	if (status)
705 		device_printf(vsi->sc->dev, "Could not remove mirror VSI ingress rule, err %s aq_err %s\n",
706 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
707 
708 	status = 0;
709 
710 	if (vsi->rule_mir_egress != ICE_INVAL_MIRROR_RULE_ID)
711 		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_egress, keep_alloc, NULL);
712 
713 	if (status)
714 		device_printf(vsi->sc->dev, "Could not remove mirror VSI egress rule, err %s aq_err %s\n",
715 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
716 }
717 
718 /**
719  * ice_initialize_vsi - Initialize a VSI for use
720  * @vsi: the vsi to initialize
721  *
722  * Initialize a VSI over the adminq and prepare it for operation.
723  *
724  * @pre vsi->num_tx_queues is set
725  * @pre vsi->num_rx_queues is set
726  */
727 int
728 ice_initialize_vsi(struct ice_vsi *vsi)
729 {
730 	struct ice_vsi_ctx ctx = { 0 };
731 	struct ice_hw *hw = &vsi->sc->hw;
732 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
733 	int status;
734 	int err;
735 
736 	/* For now, we only have code supporting PF VSIs */
737 	switch (vsi->type) {
738 	case ICE_VSI_PF:
739 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
740 		break;
741 	case ICE_VSI_VMDQ2:
742 		ctx.flags = ICE_AQ_VSI_TYPE_VMDQ2;
743 		break;
744 	default:
745 		return (ENODEV);
746 	}
747 
748 	ice_set_default_vsi_ctx(&ctx);
749 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
750 
751 	/* XXX: VSIs of other types may need different port info? */
752 	ctx.info.sw_id = hw->port_info->sw_id;
753 
754 	/* Set some RSS parameters based on the VSI type */
755 	ice_vsi_set_rss_params(vsi);
756 
757 	/* Initialize the Rx queue mapping for this VSI */
758 	err = ice_setup_vsi_qmap(vsi, &ctx);
759 	if (err) {
760 		return err;
761 	}
762 
763 	/* (Re-)add VSI to HW VSI handle list */
764 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
765 	if (status != 0) {
766 		device_printf(vsi->sc->dev,
767 		    "Add VSI AQ call failed, err %s aq_err %s\n",
768 		    ice_status_str(status),
769 		    ice_aq_str(hw->adminq.sq_last_status));
770 		return (EIO);
771 	}
772 	vsi->info = ctx.info;
773 
774 	/* Initialize VSI with just 1 TC to start */
775 	max_txqs[0] = vsi->num_tx_queues;
776 
777 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
778 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
779 	if (status) {
780 		device_printf(vsi->sc->dev,
781 		    "Failed VSI lan queue config, err %s aq_err %s\n",
782 		    ice_status_str(status),
783 		    ice_aq_str(hw->adminq.sq_last_status));
784 		ice_deinit_vsi(vsi);
785 		return (ENODEV);
786 	}
787 
788 	/* Reset VSI stats */
789 	ice_reset_vsi_stats(vsi);
790 
791 	return 0;
792 }
793 
794 /**
795  * ice_deinit_vsi - Tell firmware to release resources for a VSI
796  * @vsi: the VSI to release
797  *
798  * Helper function which requests the firmware to release the hardware
799  * resources associated with a given VSI.
800  */
801 void
802 ice_deinit_vsi(struct ice_vsi *vsi)
803 {
804 	struct ice_vsi_ctx ctx = { 0 };
805 	struct ice_softc *sc = vsi->sc;
806 	struct ice_hw *hw = &sc->hw;
807 	int status;
808 
809 	/* Assert that the VSI pointer matches in the list */
810 	MPASS(vsi == sc->all_vsi[vsi->idx]);
811 
812 	ctx.info = vsi->info;
813 
814 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
815 	if (status) {
816 		/*
817 		 * This should only fail if the VSI handle is invalid, or if
818 		 * any of the nodes have leaf nodes which are still in use.
819 		 */
820 		device_printf(sc->dev,
821 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
822 			      vsi->idx, ice_status_str(status));
823 	}
824 
825 	/* Tell firmware to release the VSI resources */
826 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
827 	if (status != 0) {
828 		device_printf(sc->dev,
829 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
830 		    vsi->idx, ice_status_str(status),
831 		    ice_aq_str(hw->adminq.sq_last_status));
832 	}
833 }
834 
835 /**
836  * ice_release_vsi - Release resources associated with a VSI
837  * @vsi: the VSI to release
838  *
839  * Release software and firmware resources associated with a VSI. Release the
840  * queue managers associated with this VSI. Also free the VSI structure memory
841  * if the VSI was allocated dynamically using ice_alloc_vsi().
842  */
843 void
844 ice_release_vsi(struct ice_vsi *vsi)
845 {
846 	struct ice_softc *sc = vsi->sc;
847 	int idx = vsi->idx;
848 
849 	/* Assert that the VSI pointer matches in the list */
850 	MPASS(vsi == sc->all_vsi[idx]);
851 
852 	/* Cleanup RSS configuration */
853 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
854 		ice_clean_vsi_rss_cfg(vsi);
855 
856 	ice_del_vsi_sysctl_ctx(vsi);
857 
858 	/* Remove the configured mirror rule, if it exists */
859 	ice_remove_vsi_mirroring(vsi);
860 
861 	/*
862 	 * If we unload the driver after a reset fails, we do not need to do
863 	 * this step.
864 	 */
865 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
866 		ice_deinit_vsi(vsi);
867 
868 	ice_free_vsi_qmaps(vsi);
869 
870 	if (vsi->dynamic) {
871 		free(sc->all_vsi[idx], M_ICE);
872 	}
873 
874 	sc->all_vsi[idx] = NULL;
875 }
876 
877 /**
878  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
879  * @pi: port info data
880  *
881  * Returns the baudrate value for the current link speed of a given port.
882  */
883 uint64_t
884 ice_aq_speed_to_rate(struct ice_port_info *pi)
885 {
886 	switch (pi->phy.link_info.link_speed) {
887 	case ICE_AQ_LINK_SPEED_200GB:
888 		return IF_Gbps(200);
889 	case ICE_AQ_LINK_SPEED_100GB:
890 		return IF_Gbps(100);
891 	case ICE_AQ_LINK_SPEED_50GB:
892 		return IF_Gbps(50);
893 	case ICE_AQ_LINK_SPEED_40GB:
894 		return IF_Gbps(40);
895 	case ICE_AQ_LINK_SPEED_25GB:
896 		return IF_Gbps(25);
897 	case ICE_AQ_LINK_SPEED_10GB:
898 		return IF_Gbps(10);
899 	case ICE_AQ_LINK_SPEED_5GB:
900 		return IF_Gbps(5);
901 	case ICE_AQ_LINK_SPEED_2500MB:
902 		return IF_Mbps(2500);
903 	case ICE_AQ_LINK_SPEED_1000MB:
904 		return IF_Mbps(1000);
905 	case ICE_AQ_LINK_SPEED_100MB:
906 		return IF_Mbps(100);
907 	case ICE_AQ_LINK_SPEED_10MB:
908 		return IF_Mbps(10);
909 	case ICE_AQ_LINK_SPEED_UNKNOWN:
910 	default:
911 		/* return 0 if we don't know the link speed */
912 		return 0;
913 	}
914 }
915 
916 /**
917  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
918  * @pi: port info data
919  *
920  * Returns the string representation of the current link speed for a given
921  * port.
922  */
923 static const char *
924 ice_aq_speed_to_str(struct ice_port_info *pi)
925 {
926 	switch (pi->phy.link_info.link_speed) {
927 	case ICE_AQ_LINK_SPEED_200GB:
928 		return "200 Gbps";
929 	case ICE_AQ_LINK_SPEED_100GB:
930 		return "100 Gbps";
931 	case ICE_AQ_LINK_SPEED_50GB:
932 		return "50 Gbps";
933 	case ICE_AQ_LINK_SPEED_40GB:
934 		return "40 Gbps";
935 	case ICE_AQ_LINK_SPEED_25GB:
936 		return "25 Gbps";
937 	case ICE_AQ_LINK_SPEED_20GB:
938 		return "20 Gbps";
939 	case ICE_AQ_LINK_SPEED_10GB:
940 		return "10 Gbps";
941 	case ICE_AQ_LINK_SPEED_5GB:
942 		return "5 Gbps";
943 	case ICE_AQ_LINK_SPEED_2500MB:
944 		return "2.5 Gbps";
945 	case ICE_AQ_LINK_SPEED_1000MB:
946 		return "1 Gbps";
947 	case ICE_AQ_LINK_SPEED_100MB:
948 		return "100 Mbps";
949 	case ICE_AQ_LINK_SPEED_10MB:
950 		return "10 Mbps";
951 	case ICE_AQ_LINK_SPEED_UNKNOWN:
952 	default:
953 		return "Unknown speed";
954 	}
955 }
956 
957 /**
958  * ice_get_phy_type_low - Get media associated with phy_type_low
959  * @phy_type_low: the low 64bits of phy_type from the AdminQ
960  *
961  * Given the lower 64bits of the phy_type from the hardware, return the
962  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
963  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
964  * be called. If phy_type_low is zero, call ice_phy_type_high.
965  */
966 int
967 ice_get_phy_type_low(uint64_t phy_type_low)
968 {
969 	switch (phy_type_low) {
970 	case ICE_PHY_TYPE_LOW_100BASE_TX:
971 		return IFM_100_TX;
972 	case ICE_PHY_TYPE_LOW_100M_SGMII:
973 		return IFM_100_SGMII;
974 	case ICE_PHY_TYPE_LOW_1000BASE_T:
975 		return IFM_1000_T;
976 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
977 		return IFM_1000_SX;
978 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
979 		return IFM_1000_LX;
980 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
981 		return IFM_1000_KX;
982 	case ICE_PHY_TYPE_LOW_1G_SGMII:
983 		return IFM_1000_SGMII;
984 	case ICE_PHY_TYPE_LOW_2500BASE_T:
985 		return IFM_2500_T;
986 	case ICE_PHY_TYPE_LOW_2500BASE_X:
987 		return IFM_2500_X;
988 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
989 		return IFM_2500_KX;
990 	case ICE_PHY_TYPE_LOW_5GBASE_T:
991 		return IFM_5000_T;
992 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
993 		return IFM_5000_KR;
994 	case ICE_PHY_TYPE_LOW_10GBASE_T:
995 		return IFM_10G_T;
996 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
997 		return IFM_10G_TWINAX;
998 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
999 		return IFM_10G_SR;
1000 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
1001 		return IFM_10G_LR;
1002 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
1003 		return IFM_10G_KR;
1004 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
1005 		return IFM_10G_AOC;
1006 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
1007 		return IFM_10G_SFI;
1008 	case ICE_PHY_TYPE_LOW_25GBASE_T:
1009 		return IFM_25G_T;
1010 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
1011 		return IFM_25G_CR;
1012 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
1013 		return IFM_25G_CR_S;
1014 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
1015 		return IFM_25G_CR1;
1016 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
1017 		return IFM_25G_SR;
1018 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
1019 		return IFM_25G_LR;
1020 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
1021 		return IFM_25G_KR;
1022 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
1023 		return IFM_25G_KR_S;
1024 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
1025 		return IFM_25G_KR1;
1026 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
1027 		return IFM_25G_AOC;
1028 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
1029 		return IFM_25G_AUI;
1030 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
1031 		return IFM_40G_CR4;
1032 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
1033 		return IFM_40G_SR4;
1034 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
1035 		return IFM_40G_LR4;
1036 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
1037 		return IFM_40G_KR4;
1038 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
1039 		return IFM_40G_XLAUI_AC;
1040 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
1041 		return IFM_40G_XLAUI;
1042 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
1043 		return IFM_50G_CR2;
1044 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
1045 		return IFM_50G_SR2;
1046 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
1047 		return IFM_50G_LR2;
1048 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
1049 		return IFM_50G_KR2;
1050 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
1051 		return IFM_50G_LAUI2_AC;
1052 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
1053 		return IFM_50G_LAUI2;
1054 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
1055 		return IFM_50G_AUI2_AC;
1056 	case ICE_PHY_TYPE_LOW_50G_AUI2:
1057 		return IFM_50G_AUI2;
1058 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
1059 		return IFM_50G_CP;
1060 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
1061 		return IFM_50G_SR;
1062 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
1063 		return IFM_50G_FR;
1064 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
1065 		return IFM_50G_LR;
1066 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
1067 		return IFM_50G_KR_PAM4;
1068 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
1069 		return IFM_50G_AUI1_AC;
1070 	case ICE_PHY_TYPE_LOW_50G_AUI1:
1071 		return IFM_50G_AUI1;
1072 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
1073 		return IFM_100G_CR4;
1074 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
1075 		return IFM_100G_SR4;
1076 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
1077 		return IFM_100G_LR4;
1078 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
1079 		return IFM_100G_KR4;
1080 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
1081 		return IFM_100G_CAUI4_AC;
1082 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
1083 		return IFM_100G_CAUI4;
1084 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
1085 		return IFM_100G_AUI4_AC;
1086 	case ICE_PHY_TYPE_LOW_100G_AUI4:
1087 		return IFM_100G_AUI4;
1088 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
1089 		return IFM_100G_CR_PAM4;
1090 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
1091 		return IFM_100G_KR_PAM4;
1092 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
1093 		return IFM_100G_CP2;
1094 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
1095 		return IFM_100G_SR2;
1096 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
1097 		return IFM_100G_DR;
1098 	default:
1099 		return IFM_UNKNOWN;
1100 	}
1101 }
1102 
1103 /**
1104  * ice_get_phy_type_high - Get media associated with phy_type_high
1105  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
1106  *
1107  * Given the upper 64bits of the phy_type from the hardware, return the
1108  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1109  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1110  * called. If phy_type_high is zero, call ice_get_phy_type_low.
1111  */
1112 int
1113 ice_get_phy_type_high(uint64_t phy_type_high)
1114 {
1115 	switch (phy_type_high) {
1116 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1117 		return IFM_100G_KR2_PAM4;
1118 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1119 		return IFM_100G_CAUI2_AC;
1120 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1121 		return IFM_100G_CAUI2;
1122 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1123 		return IFM_100G_AUI2_AC;
1124 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1125 		return IFM_100G_AUI2;
1126 	case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4:
1127 		return IFM_200G_CR4_PAM4;
1128 	case ICE_PHY_TYPE_HIGH_200G_SR4:
1129 		return IFM_200G_SR4;
1130 	case ICE_PHY_TYPE_HIGH_200G_FR4:
1131 		return IFM_200G_FR4;
1132 	case ICE_PHY_TYPE_HIGH_200G_LR4:
1133 		return IFM_200G_LR4;
1134 	case ICE_PHY_TYPE_HIGH_200G_DR4:
1135 		return IFM_200G_DR4;
1136 	case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4:
1137 		return IFM_200G_KR4_PAM4;
1138 	case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC:
1139 		return IFM_200G_AUI4_AC;
1140 	case ICE_PHY_TYPE_HIGH_200G_AUI4:
1141 		return IFM_200G_AUI4;
1142 	case ICE_PHY_TYPE_HIGH_200G_AUI8_AOC_ACC:
1143 		return IFM_200G_AUI8_AC;
1144 	case ICE_PHY_TYPE_HIGH_200G_AUI8:
1145 		return IFM_200G_AUI8;
1146 	default:
1147 		return IFM_UNKNOWN;
1148 	}
1149 }
1150 
1151 /**
1152  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1153  * @pi: port info struct
1154  *
1155  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1156  * to have been called before this function for it to work.
1157  */
1158 static uint64_t
1159 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1160 {
1161 	uint64_t phy_low = pi->phy.phy_type_low;
1162 	uint64_t phy_high = pi->phy.phy_type_high;
1163 	uint64_t max_rate = 0;
1164 	int bit;
1165 
1166 	/*
1167 	 * These are based on the indices used in the BIT() macros for
1168 	 * ICE_PHY_TYPE_LOW_*
1169 	 */
1170 	static const uint64_t phy_rates[] = {
1171 	    IF_Mbps(100),
1172 	    IF_Mbps(100),
1173 	    IF_Gbps(1ULL),
1174 	    IF_Gbps(1ULL),
1175 	    IF_Gbps(1ULL),
1176 	    IF_Gbps(1ULL),
1177 	    IF_Gbps(1ULL),
1178 	    IF_Mbps(2500ULL),
1179 	    IF_Mbps(2500ULL),
1180 	    IF_Mbps(2500ULL),
1181 	    IF_Gbps(5ULL),
1182 	    IF_Gbps(5ULL),
1183 	    IF_Gbps(10ULL),
1184 	    IF_Gbps(10ULL),
1185 	    IF_Gbps(10ULL),
1186 	    IF_Gbps(10ULL),
1187 	    IF_Gbps(10ULL),
1188 	    IF_Gbps(10ULL),
1189 	    IF_Gbps(10ULL),
1190 	    IF_Gbps(25ULL),
1191 	    IF_Gbps(25ULL),
1192 	    IF_Gbps(25ULL),
1193 	    IF_Gbps(25ULL),
1194 	    IF_Gbps(25ULL),
1195 	    IF_Gbps(25ULL),
1196 	    IF_Gbps(25ULL),
1197 	    IF_Gbps(25ULL),
1198 	    IF_Gbps(25ULL),
1199 	    IF_Gbps(25ULL),
1200 	    IF_Gbps(25ULL),
1201 	    IF_Gbps(40ULL),
1202 	    IF_Gbps(40ULL),
1203 	    IF_Gbps(40ULL),
1204 	    IF_Gbps(40ULL),
1205 	    IF_Gbps(40ULL),
1206 	    IF_Gbps(40ULL),
1207 	    IF_Gbps(50ULL),
1208 	    IF_Gbps(50ULL),
1209 	    IF_Gbps(50ULL),
1210 	    IF_Gbps(50ULL),
1211 	    IF_Gbps(50ULL),
1212 	    IF_Gbps(50ULL),
1213 	    IF_Gbps(50ULL),
1214 	    IF_Gbps(50ULL),
1215 	    IF_Gbps(50ULL),
1216 	    IF_Gbps(50ULL),
1217 	    IF_Gbps(50ULL),
1218 	    IF_Gbps(50ULL),
1219 	    IF_Gbps(50ULL),
1220 	    IF_Gbps(50ULL),
1221 	    IF_Gbps(50ULL),
1222 	    IF_Gbps(100ULL),
1223 	    IF_Gbps(100ULL),
1224 	    IF_Gbps(100ULL),
1225 	    IF_Gbps(100ULL),
1226 	    IF_Gbps(100ULL),
1227 	    IF_Gbps(100ULL),
1228 	    IF_Gbps(100ULL),
1229 	    IF_Gbps(100ULL),
1230 	    IF_Gbps(100ULL),
1231 	    IF_Gbps(100ULL),
1232 	    IF_Gbps(100ULL),
1233 	    IF_Gbps(100ULL),
1234 	    IF_Gbps(100ULL),
1235 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1236 	    IF_Gbps(100ULL),
1237 	    IF_Gbps(100ULL),
1238 	    IF_Gbps(100ULL),
1239 	    IF_Gbps(100ULL),
1240 	    IF_Gbps(100ULL),
1241 	    IF_Gbps(200ULL),
1242 	    IF_Gbps(200ULL),
1243 	    IF_Gbps(200ULL),
1244 	    IF_Gbps(200ULL),
1245 	    IF_Gbps(200ULL),
1246 	    IF_Gbps(200ULL),
1247 	    IF_Gbps(200ULL),
1248 	    IF_Gbps(200ULL),
1249 	    IF_Gbps(200ULL),
1250 	    IF_Gbps(200ULL),
1251 	};
1252 
1253 	/* coverity[address_of] */
1254 	for_each_set_bit(bit, &phy_high, 64)
1255 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1256 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1257 
1258 	/* coverity[address_of] */
1259 	for_each_set_bit(bit, &phy_low, 64)
1260 		max_rate = uqmax(max_rate, phy_rates[bit]);
1261 
1262 	return (max_rate);
1263 }
1264 
1265 /* The if_media type is split over the original 5 bit media variant field,
1266  * along with extended types using up extra bits in the options section.
1267  * We want to convert this split number into a bitmap index, so we reverse the
1268  * calculation of IFM_X here.
1269  */
1270 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1271 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1272 
1273 /**
1274  * ice_add_media_types - Add supported media types to the media structure
1275  * @sc: ice private softc structure
1276  * @media: ifmedia structure to setup
1277  *
1278  * Looks up the supported phy types, and initializes the various media types
1279  * available.
1280  *
1281  * @pre this function must be protected from being called while another thread
1282  * is accessing the ifmedia types.
1283  */
1284 int
1285 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1286 {
1287 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1288 	struct ice_port_info *pi = sc->hw.port_info;
1289 	int status;
1290 	uint64_t phy_low, phy_high;
1291 	int bit;
1292 
1293 	ASSERT_CFG_LOCKED(sc);
1294 
1295 	/* the maximum possible media type index is 511. We probably don't
1296 	 * need most of this space, but this ensures future compatibility when
1297 	 * additional media types are used.
1298 	 */
1299 	ice_declare_bitmap(already_added, 511);
1300 
1301 	/* Remove all previous media types */
1302 	ifmedia_removeall(media);
1303 
1304 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1305 				     &pcaps, NULL);
1306 	if (status) {
1307 		device_printf(sc->dev,
1308 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1309 		    __func__, ice_status_str(status),
1310 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1311 		return (status);
1312 	}
1313 	phy_low = le64toh(pcaps.phy_type_low);
1314 	phy_high = le64toh(pcaps.phy_type_high);
1315 
1316 	/* make sure the added bitmap is zero'd */
1317 	memset(already_added, 0, sizeof(already_added));
1318 
1319 	/* coverity[address_of] */
1320 	for_each_set_bit(bit, &phy_low, 64) {
1321 		uint64_t type = BIT_ULL(bit);
1322 		int ostype;
1323 
1324 		/* get the OS media type */
1325 		ostype = ice_get_phy_type_low(type);
1326 
1327 		/* don't bother adding the unknown type */
1328 		if (ostype == IFM_UNKNOWN)
1329 			continue;
1330 
1331 		/* only add each media type to the list once */
1332 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1333 			continue;
1334 
1335 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1336 		ice_set_bit(IFM_IDX(ostype), already_added);
1337 	}
1338 
1339 	/* coverity[address_of] */
1340 	for_each_set_bit(bit, &phy_high, 64) {
1341 		uint64_t type = BIT_ULL(bit);
1342 		int ostype;
1343 
1344 		/* get the OS media type */
1345 		ostype = ice_get_phy_type_high(type);
1346 
1347 		/* don't bother adding the unknown type */
1348 		if (ostype == IFM_UNKNOWN)
1349 			continue;
1350 
1351 		/* only add each media type to the list once */
1352 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1353 			continue;
1354 
1355 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1356 		ice_set_bit(IFM_IDX(ostype), already_added);
1357 	}
1358 
1359 	/* Use autoselect media by default */
1360 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1361 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1362 
1363 	return (0);
1364 }
1365 
1366 /**
1367  * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1368  * @hw: ice hw structure
1369  * @rxqid: Rx queue index in PF space
1370  * @vector: MSI-X vector index in PF/VF space
1371  * @itr_idx: ITR index to use for interrupt
1372  *
1373  * @remark ice_flush() may need to be called after this
1374  */
1375 void
1376 ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1377 {
1378 	u32 val;
1379 
1380 	MPASS(itr_idx <= ICE_ITR_NONE);
1381 
1382 	val = (QINT_RQCTL_CAUSE_ENA_M |
1383 	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1384 	       (vector << QINT_RQCTL_MSIX_INDX_S));
1385 	wr32(hw, QINT_RQCTL(rxqid), val);
1386 }
1387 
1388 /**
1389  * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1390  * @vsi: the VSI to configure
1391  *
1392  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1393  */
1394 void
1395 ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1396 {
1397 	struct ice_hw *hw = &vsi->sc->hw;
1398 	int i;
1399 
1400 	for (i = 0; i < vsi->num_rx_queues; i++) {
1401 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1402 
1403 		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1404 					    rxq->irqv->me, ICE_RX_ITR);
1405 
1406 		ice_debug(hw, ICE_DBG_INIT,
1407 		    "RXQ(%d) intr enable: me %d rxqid %d vector %d\n",
1408 		    i, rxq->me, vsi->rx_qmap[rxq->me], rxq->irqv->me);
1409 	}
1410 
1411 	ice_flush(hw);
1412 }
1413 
1414 /**
1415  * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1416  * @hw: ice hw structure
1417  * @txqid: Tx queue index in PF space
1418  * @vector: MSI-X vector index in PF/VF space
1419  * @itr_idx: ITR index to use for interrupt
1420  *
1421  * @remark ice_flush() may need to be called after this
1422  */
1423 void
1424 ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1425 {
1426 	u32 val;
1427 
1428 	MPASS(itr_idx <= ICE_ITR_NONE);
1429 
1430 	val = (QINT_TQCTL_CAUSE_ENA_M |
1431 	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1432 	       (vector << QINT_TQCTL_MSIX_INDX_S));
1433 	wr32(hw, QINT_TQCTL(txqid), val);
1434 }
1435 
1436 /**
1437  * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1438  * @vsi: the VSI to configure
1439  *
1440  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1441  */
1442 void
1443 ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1444 {
1445 	struct ice_hw *hw = &vsi->sc->hw;
1446 	int i;
1447 
1448 	for (i = 0; i < vsi->num_tx_queues; i++) {
1449 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1450 
1451 		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1452 					    txq->irqv->me, ICE_TX_ITR);
1453 	}
1454 
1455 	ice_flush(hw);
1456 }
1457 
1458 /**
1459  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1460  * @vsi: the VSI to configure
1461  *
1462  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1463  * a software interrupt on that cause. This is required as part of the Rx
1464  * queue disable logic to dissociate the Rx queue from the interrupt.
1465  *
1466  * Note: this function must be called prior to disabling Rx queues with
1467  * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1468  */
1469 void
1470 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1471 {
1472 	struct ice_hw *hw = &vsi->sc->hw;
1473 	int i;
1474 
1475 	for (i = 0; i < vsi->num_rx_queues; i++) {
1476 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1477 		u32 reg, val;
1478 
1479 		/* Clear the CAUSE_ENA flag */
1480 		reg = vsi->rx_qmap[rxq->me];
1481 		val = rd32(hw, QINT_RQCTL(reg));
1482 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1483 		wr32(hw, QINT_RQCTL(reg), val);
1484 
1485 		ice_flush(hw);
1486 
1487 		/* Trigger a software interrupt to complete interrupt
1488 		 * dissociation.
1489 		 */
1490 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1491 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1492 	}
1493 }
1494 
1495 /**
1496  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1497  * @vsi: the VSI to configure
1498  *
1499  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1500  * a software interrupt on that cause. This is required as part of the Tx
1501  * queue disable logic to dissociate the Tx queue from the interrupt.
1502  *
1503  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1504  * the Tx queue disable may not complete properly.
1505  */
1506 void
1507 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1508 {
1509 	struct ice_hw *hw = &vsi->sc->hw;
1510 	int i;
1511 
1512 	for (i = 0; i < vsi->num_tx_queues; i++) {
1513 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1514 		u32 reg, val;
1515 
1516 		/* Clear the CAUSE_ENA flag */
1517 		reg = vsi->tx_qmap[txq->me];
1518 		val = rd32(hw, QINT_TQCTL(reg));
1519 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1520 		wr32(hw, QINT_TQCTL(reg), val);
1521 
1522 		ice_flush(hw);
1523 
1524 		/* Trigger a software interrupt to complete interrupt
1525 		 * dissociation.
1526 		 */
1527 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1528 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1529 	}
1530 }
1531 
1532 /**
1533  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1534  * @vsi: the VSI to configure
1535  *
1536  * Program the hardware ITR registers with the settings for this VSI.
1537  */
1538 void
1539 ice_configure_rx_itr(struct ice_vsi *vsi)
1540 {
1541 	struct ice_hw *hw = &vsi->sc->hw;
1542 	int i;
1543 
1544 	/* TODO: Handle per-queue/per-vector ITR? */
1545 
1546 	for (i = 0; i < vsi->num_rx_queues; i++) {
1547 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1548 
1549 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1550 		     ice_itr_to_reg(hw, vsi->rx_itr));
1551 	}
1552 
1553 	ice_flush(hw);
1554 }
1555 
1556 /**
1557  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1558  * @vsi: the VSI to configure
1559  *
1560  * Program the hardware ITR registers with the settings for this VSI.
1561  */
1562 void
1563 ice_configure_tx_itr(struct ice_vsi *vsi)
1564 {
1565 	struct ice_hw *hw = &vsi->sc->hw;
1566 	int i;
1567 
1568 	/* TODO: Handle per-queue/per-vector ITR? */
1569 
1570 	for (i = 0; i < vsi->num_tx_queues; i++) {
1571 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1572 
1573 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1574 		     ice_itr_to_reg(hw, vsi->tx_itr));
1575 	}
1576 
1577 	ice_flush(hw);
1578 }
1579 
1580 /**
1581  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1582  * @txq: the Tx queue to configure
1583  * @tlan_ctx: the Tx LAN queue context structure to initialize
1584  * @pf_q: real queue number
1585  */
1586 static int
1587 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1588 {
1589 	struct ice_vsi *vsi = txq->vsi;
1590 	struct ice_softc *sc = vsi->sc;
1591 	struct ice_hw *hw = &sc->hw;
1592 
1593 	tlan_ctx->port_num = hw->port_info->lport;
1594 
1595 	/* number of descriptors in the queue */
1596 	tlan_ctx->qlen = txq->desc_count;
1597 
1598 	/* set the transmit queue base address, defined in 128 byte units */
1599 	tlan_ctx->base = txq->tx_paddr >> 7;
1600 
1601 	tlan_ctx->pf_num = hw->pf_id;
1602 
1603 	switch (vsi->type) {
1604 	case ICE_VSI_PF:
1605 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1606 		break;
1607 	case ICE_VSI_VMDQ2:
1608 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
1609 		break;
1610 	default:
1611 		return (ENODEV);
1612 	}
1613 
1614 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1615 
1616 	/* Enable TSO */
1617 	tlan_ctx->tso_ena = 1;
1618 	tlan_ctx->internal_usage_flag = 1;
1619 
1620 	tlan_ctx->tso_qnum = pf_q;
1621 
1622 	/*
1623 	 * Stick with the older legacy Tx queue interface, instead of the new
1624 	 * advanced queue interface.
1625 	 */
1626 	tlan_ctx->legacy_int = 1;
1627 
1628 	/* Descriptor WB mode */
1629 	tlan_ctx->wb_mode = 0;
1630 
1631 	return (0);
1632 }
1633 
1634 /**
1635  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1636  * @vsi: the VSI to configure
1637  *
1638  * Configure the device Tx queues through firmware AdminQ commands. After
1639  * this, Tx queues will be ready for transmit.
1640  */
1641 int
1642 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1643 {
1644 	struct ice_aqc_add_tx_qgrp *qg;
1645 	struct ice_hw *hw = &vsi->sc->hw;
1646 	device_t dev = vsi->sc->dev;
1647 	int status;
1648 	int i;
1649 	int err = 0;
1650 	u16 qg_size, pf_q;
1651 
1652 	qg_size = ice_struct_size(qg, txqs, 1);
1653 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1654 	if (!qg)
1655 		return (ENOMEM);
1656 
1657 	qg->num_txqs = 1;
1658 
1659 	for (i = 0; i < vsi->num_tx_queues; i++) {
1660 		struct ice_tlan_ctx tlan_ctx = { 0 };
1661 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1662 
1663 		pf_q = vsi->tx_qmap[txq->me];
1664 		qg->txqs[0].txq_id = htole16(pf_q);
1665 
1666 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1667 		if (err)
1668 			goto free_txqg;
1669 
1670 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1671 			    ice_tlan_ctx_info);
1672 
1673 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1674 					 txq->q_handle, 1, qg, qg_size, NULL);
1675 		if (status) {
1676 			device_printf(dev,
1677 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1678 				      i, txq->tc, txq->q_handle,
1679 				      ice_status_str(status),
1680 				      ice_aq_str(hw->adminq.sq_last_status));
1681 			err = ENODEV;
1682 			goto free_txqg;
1683 		}
1684 
1685 		/* Keep track of the Tx queue TEID */
1686 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1687 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1688 	}
1689 
1690 free_txqg:
1691 	free(qg, M_ICE);
1692 
1693 	return (err);
1694 }
1695 
1696 /**
1697  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1698  * @rxq: the receive queue to program
1699  *
1700  * Setup an Rx queue context structure and program it into the hardware
1701  * registers. This is a necessary step for enabling the Rx queue.
1702  *
1703  * @pre the VSI associated with this queue must have initialized mbuf_sz
1704  */
1705 static int
1706 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1707 {
1708 	struct ice_rlan_ctx rlan_ctx = {0};
1709 	struct ice_vsi *vsi = rxq->vsi;
1710 	struct ice_softc *sc = vsi->sc;
1711 	struct ice_hw *hw = &sc->hw;
1712 	int status;
1713 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1714 	u32 regval;
1715 	u16 pf_q;
1716 
1717 	pf_q = vsi->rx_qmap[rxq->me];
1718 
1719 	/* set the receive queue base address, defined in 128 byte units */
1720 	rlan_ctx.base = rxq->rx_paddr >> 7;
1721 
1722 	rlan_ctx.qlen = rxq->desc_count;
1723 
1724 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1725 
1726 	/* use 32 byte descriptors */
1727 	rlan_ctx.dsize = 1;
1728 
1729 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1730 	 * host memory.
1731 	 */
1732 	rlan_ctx.crcstrip = 1;
1733 
1734 	rlan_ctx.l2tsel = 1;
1735 
1736 	/* don't do header splitting */
1737 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1738 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1739 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1740 
1741 	/* strip VLAN from inner headers */
1742 	rlan_ctx.showiv = 1;
1743 
1744 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1745 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1746 
1747 	rlan_ctx.lrxqthresh = 1;
1748 
1749 	if (vsi->type != ICE_VSI_VF) {
1750 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1751 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1752 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1753 			QRXFLXP_CNTXT_RXDID_IDX_M;
1754 
1755 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1756 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1757 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1758 
1759 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1760 	}
1761 
1762 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1763 	if (status) {
1764 		device_printf(sc->dev,
1765 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1766 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1767 		return (EIO);
1768 	}
1769 
1770 	wr32(hw, rxq->tail, 0);
1771 
1772 	return 0;
1773 }
1774 
1775 /**
1776  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1777  * @vsi: the VSI to configure
1778  *
1779  * Prepare an Rx context descriptor and configure the device to receive
1780  * traffic.
1781  *
1782  * @pre the VSI must have initialized mbuf_sz
1783  */
1784 int
1785 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1786 {
1787 	int i, err;
1788 
1789 	for (i = 0; i < vsi->num_rx_queues; i++) {
1790 		MPASS(vsi->mbuf_sz > 0);
1791 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1792 		if (err)
1793 			return err;
1794 	}
1795 
1796 	return (0);
1797 }
1798 
1799 /**
1800  * ice_is_rxq_ready - Check if an Rx queue is ready
1801  * @hw: ice hw structure
1802  * @pf_q: absolute PF queue index to check
1803  * @reg: on successful return, contains qrx_ctrl contents
1804  *
1805  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1806  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1807  * a request to change the queue, as well as to verify the request has
1808  * finished. The queue should change status within a few microseconds, so we
1809  * use a small delay while polling the register.
1810  *
1811  * Returns an error code if the queue does not update after a few retries.
1812  */
1813 static int
1814 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1815 {
1816 	u32 qrx_ctrl, qena_req, qena_stat;
1817 	int i;
1818 
1819 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1820 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1821 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1822 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1823 
1824 		/* if the request and status bits equal, then the queue is
1825 		 * fully disabled or enabled.
1826 		 */
1827 		if (qena_req == qena_stat) {
1828 			*reg = qrx_ctrl;
1829 			return (0);
1830 		}
1831 
1832 		/* wait a few microseconds before we check again */
1833 		DELAY(10);
1834 	}
1835 
1836 	return (ETIMEDOUT);
1837 }
1838 
1839 /**
1840  * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1841  * @vsi: VSI containing queue to enable/disable
1842  * @qidx: Queue index in VSI space
1843  * @enable: true to enable queue, false to disable
1844  *
1845  * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1846  * it. Wait for the appropriate time to ensure that the queue has actually
1847  * reached the expected state.
1848  */
1849 int
1850 ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1851 {
1852 	struct ice_hw *hw = &vsi->sc->hw;
1853 	device_t dev = vsi->sc->dev;
1854 	u32 qrx_ctrl = 0;
1855 	int err;
1856 
1857 	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1858 	int pf_q = vsi->rx_qmap[rxq->me];
1859 
1860 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1861 	if (err) {
1862 		device_printf(dev,
1863 			      "Rx queue %d is not ready\n",
1864 			      pf_q);
1865 		return err;
1866 	}
1867 
1868 	/* Skip if the queue is already in correct state */
1869 	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1870 		return (0);
1871 
1872 	if (enable)
1873 		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1874 	else
1875 		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1876 	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1877 
1878 	/* wait for the queue to finalize the request */
1879 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1880 	if (err) {
1881 		device_printf(dev,
1882 			      "Rx queue %d %sable timeout\n",
1883 			      pf_q, (enable ? "en" : "dis"));
1884 		return err;
1885 	}
1886 
1887 	/* this should never happen */
1888 	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1889 		device_printf(dev,
1890 			      "Rx queue %d invalid state\n",
1891 			      pf_q);
1892 		return (EDOOFUS);
1893 	}
1894 
1895 	return (0);
1896 }
1897 
1898 /**
1899  * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1900  * @vsi: VSI to enable/disable queues
1901  * @enable: true to enable queues, false to disable
1902  *
1903  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1904  * them. Wait for the appropriate time to ensure that the queues have actually
1905  * reached the expected state.
1906  */
1907 int
1908 ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1909 {
1910 	int i, err;
1911 
1912 	/* TODO: amortize waits by changing all queues up front and then
1913 	 * checking their status afterwards. This will become more necessary
1914 	 * when we have a large number of queues.
1915 	 */
1916 	for (i = 0; i < vsi->num_rx_queues; i++) {
1917 		err = ice_control_rx_queue(vsi, i, enable);
1918 		if (err)
1919 			break;
1920 	}
1921 
1922 	return (0);
1923 }
1924 
1925 /**
1926  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1927  * @vsi: the VSI to forward to
1928  * @list: list which contains MAC filter entries
1929  * @addr: the MAC address to be added
1930  * @action: filter action to perform on match
1931  *
1932  * Adds a MAC address filter to the list which will be forwarded to firmware
1933  * to add a series of MAC address filters.
1934  *
1935  * Returns 0 on success, and an error code on failure.
1936  *
1937  */
1938 static int
1939 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1940 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1941 {
1942 	struct ice_fltr_list_entry *entry;
1943 
1944 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1945 	if (!entry)
1946 		return (ENOMEM);
1947 
1948 	entry->fltr_info.flag = ICE_FLTR_TX;
1949 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1950 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1951 	entry->fltr_info.fltr_act = action;
1952 	entry->fltr_info.vsi_handle = vsi->idx;
1953 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1954 
1955 	LIST_ADD(&entry->list_entry, list);
1956 
1957 	return 0;
1958 }
1959 
1960 /**
1961  * ice_free_fltr_list - Free memory associated with a MAC address list
1962  * @list: the list to free
1963  *
1964  * Free the memory of each entry associated with the list.
1965  */
1966 static void
1967 ice_free_fltr_list(struct ice_list_head *list)
1968 {
1969 	struct ice_fltr_list_entry *e, *tmp;
1970 
1971 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1972 		LIST_DEL(&e->list_entry);
1973 		free(e, M_ICE);
1974 	}
1975 }
1976 
1977 /**
1978  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1979  * @vsi: the VSI to add the filter for
1980  * @addr: MAC address to add a filter for
1981  *
1982  * Add a MAC address filter for a given VSI. This is a wrapper around
1983  * ice_add_mac to simplify the interface. First, it only accepts a single
1984  * address, so we don't have to mess around with the list setup in other
1985  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1986  * callers don't need to worry about attempting to add the same filter twice.
1987  */
1988 int
1989 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1990 {
1991 	struct ice_list_head mac_addr_list;
1992 	struct ice_hw *hw = &vsi->sc->hw;
1993 	device_t dev = vsi->sc->dev;
1994 	int status;
1995 	int err = 0;
1996 
1997 	INIT_LIST_HEAD(&mac_addr_list);
1998 
1999 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
2000 	if (err)
2001 		goto free_mac_list;
2002 
2003 	status = ice_add_mac(hw, &mac_addr_list);
2004 	if (status == ICE_ERR_ALREADY_EXISTS) {
2005 		; /* Don't complain if we try to add a filter that already exists */
2006 	} else if (status) {
2007 		device_printf(dev,
2008 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
2009 			      addr, ":",
2010 			      ice_status_str(status),
2011 			      ice_aq_str(hw->adminq.sq_last_status));
2012 		err = (EIO);
2013 	}
2014 
2015 free_mac_list:
2016 	ice_free_fltr_list(&mac_addr_list);
2017 	return err;
2018 }
2019 
2020 /**
2021  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
2022  * @sc: device softc structure
2023  *
2024  * Program the default unicast and broadcast filters for the PF VSI.
2025  */
2026 int
2027 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
2028 {
2029 	struct ice_vsi *vsi = &sc->pf_vsi;
2030 	struct ice_hw *hw = &sc->hw;
2031 	int err;
2032 
2033 	/* Add the LAN MAC address */
2034 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
2035 	if (err)
2036 		return err;
2037 
2038 	/* Add the broadcast address */
2039 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
2040 	if (err)
2041 		return err;
2042 
2043 	return (0);
2044 }
2045 
2046 /**
2047  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
2048  * @vsi: the VSI to add the filter for
2049  * @addr: MAC address to remove a filter for
2050  *
2051  * Remove a MAC address filter from a given VSI. This is a wrapper around
2052  * ice_remove_mac to simplify the interface. First, it only accepts a single
2053  * address, so we don't have to mess around with the list setup in other
2054  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
2055  * callers don't need to worry about attempting to remove filters which
2056  * haven't yet been added.
2057  */
2058 int
2059 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
2060 {
2061 	struct ice_list_head mac_addr_list;
2062 	struct ice_hw *hw = &vsi->sc->hw;
2063 	device_t dev = vsi->sc->dev;
2064 	int status;
2065 	int err = 0;
2066 
2067 	INIT_LIST_HEAD(&mac_addr_list);
2068 
2069 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
2070 	if (err)
2071 		goto free_mac_list;
2072 
2073 	status = ice_remove_mac(hw, &mac_addr_list);
2074 	if (status == ICE_ERR_DOES_NOT_EXIST) {
2075 		; /* Don't complain if we try to remove a filter that doesn't exist */
2076 	} else if (status) {
2077 		device_printf(dev,
2078 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
2079 			      addr, ":",
2080 			      ice_status_str(status),
2081 			      ice_aq_str(hw->adminq.sq_last_status));
2082 		err = (EIO);
2083 	}
2084 
2085 free_mac_list:
2086 	ice_free_fltr_list(&mac_addr_list);
2087 	return err;
2088 }
2089 
2090 /**
2091  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
2092  * @sc: device softc structure
2093  *
2094  * Remove the default unicast and broadcast filters from the PF VSI.
2095  */
2096 int
2097 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
2098 {
2099 	struct ice_vsi *vsi = &sc->pf_vsi;
2100 	struct ice_hw *hw = &sc->hw;
2101 	int err;
2102 
2103 	/* Remove the LAN MAC address */
2104 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
2105 	if (err)
2106 		return err;
2107 
2108 	/* Remove the broadcast address */
2109 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
2110 	if (err)
2111 		return (EIO);
2112 
2113 	return (0);
2114 }
2115 
2116 /**
2117  * ice_check_ctrlq_errors - Check for and report controlq errors
2118  * @sc: device private structure
2119  * @qname: name of the controlq
2120  * @cq: the controlq to check
2121  *
2122  * Check and report controlq errors. Currently all we do is report them to the
2123  * kernel message log, but we might want to improve this in the future, such
2124  * as to keep track of statistics.
2125  */
2126 static void
2127 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
2128 		       struct ice_ctl_q_info *cq)
2129 {
2130 	struct ice_hw *hw = &sc->hw;
2131 	u32 val;
2132 
2133 	/* Check for error indications. Note that all the controlqs use the
2134 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
2135 	 */
2136 	val = rd32(hw, cq->rq.len);
2137 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2138 		   PF_FW_ARQLEN_ARQCRIT_M)) {
2139 		if (val & PF_FW_ARQLEN_ARQVFE_M)
2140 			device_printf(sc->dev,
2141 				"%s Receive Queue VF Error detected\n", qname);
2142 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
2143 			device_printf(sc->dev,
2144 				"%s Receive Queue Overflow Error detected\n",
2145 				qname);
2146 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2147 			device_printf(sc->dev,
2148 				"%s Receive Queue Critical Error detected\n",
2149 				qname);
2150 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2151 			 PF_FW_ARQLEN_ARQCRIT_M);
2152 		wr32(hw, cq->rq.len, val);
2153 	}
2154 
2155 	val = rd32(hw, cq->sq.len);
2156 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2157 		   PF_FW_ATQLEN_ATQCRIT_M)) {
2158 		if (val & PF_FW_ATQLEN_ATQVFE_M)
2159 			device_printf(sc->dev,
2160 				"%s Send Queue VF Error detected\n", qname);
2161 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2162 			device_printf(sc->dev,
2163 				"%s Send Queue Overflow Error detected\n",
2164 				qname);
2165 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2166 			device_printf(sc->dev,
2167 				"%s Send Queue Critical Error detected\n",
2168 				qname);
2169 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2170 			 PF_FW_ATQLEN_ATQCRIT_M);
2171 		wr32(hw, cq->sq.len, val);
2172 	}
2173 }
2174 
2175 /**
2176  * ice_process_link_event - Process a link event indication from firmware
2177  * @sc: device softc structure
2178  * @e: the received event data
2179  *
2180  * Gets the current link status from hardware, and may print a message if an
2181  * unqualified is detected.
2182  */
2183 static void
2184 ice_process_link_event(struct ice_softc *sc,
2185 		       struct ice_rq_event_info __invariant_only *e)
2186 {
2187 	struct ice_port_info *pi = sc->hw.port_info;
2188 	struct ice_hw *hw = &sc->hw;
2189 	device_t dev = sc->dev;
2190 	int status;
2191 
2192 	/* Sanity check that the data length isn't too small */
2193 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2194 
2195 	/*
2196 	 * Even though the adapter gets link status information inside the
2197 	 * event, it needs to send a Get Link Status AQ command in order
2198 	 * to re-enable link events.
2199 	 */
2200 	pi->phy.get_link_info = true;
2201 	ice_get_link_status(pi, &sc->link_up);
2202 
2203 	if (pi->phy.link_info.topo_media_conflict &
2204 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2205 	    ICE_AQ_LINK_TOPO_CORRUPT))
2206 		device_printf(dev,
2207 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2208 
2209 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2210 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2211 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2212 			device_printf(dev,
2213 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2214 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2215 			device_printf(dev,
2216 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2217 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2218 			device_printf(dev,
2219 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2220 	}
2221 
2222 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2223 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2224 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2225 			if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
2226 				device_printf(dev,
2227 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2228 				    __func__, ice_status_str(status),
2229 				    ice_aq_str(hw->adminq.sq_last_status));
2230 		}
2231 	}
2232 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2233 
2234 	/* Indicate that link status must be reported again */
2235 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2236 
2237 	/* OS link info is updated elsewhere */
2238 }
2239 
2240 /**
2241  * ice_process_ctrlq_event - Respond to a controlq event
2242  * @sc: device private structure
2243  * @qname: the name for this controlq
2244  * @event: the event to process
2245  *
2246  * Perform actions in response to various controlq event notifications.
2247  */
2248 static void
2249 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2250 			struct ice_rq_event_info *event)
2251 {
2252 	u16 opcode;
2253 
2254 	opcode = le16toh(event->desc.opcode);
2255 
2256 	switch (opcode) {
2257 	case ice_aqc_opc_get_link_status:
2258 		ice_process_link_event(sc, event);
2259 		break;
2260 	case ice_aqc_opc_fw_logs_event:
2261 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2262 		break;
2263 	case ice_aqc_opc_lldp_set_mib_change:
2264 		ice_handle_mib_change_event(sc, event);
2265 		break;
2266 	case ice_aqc_opc_event_lan_overflow:
2267 		ice_handle_lan_overflow_event(sc, event);
2268 		break;
2269 	case ice_aqc_opc_get_health_status:
2270 		ice_handle_health_status_event(sc, event);
2271 		break;
2272 	default:
2273 		device_printf(sc->dev,
2274 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2275 			      qname, opcode);
2276 	}
2277 }
2278 
2279 /**
2280  * ice_process_ctrlq - helper function to process controlq rings
2281  * @sc: device private structure
2282  * @q_type: specific control queue type
2283  * @pending: return parameter to track remaining events
2284  *
2285  * Process controlq events for a given control queue type. Returns zero on
2286  * success, and an error code on failure. If successful, pending is the number
2287  * of remaining events left in the queue.
2288  */
2289 int
2290 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2291 {
2292 	struct ice_rq_event_info event = { { 0 } };
2293 	struct ice_hw *hw = &sc->hw;
2294 	struct ice_ctl_q_info *cq;
2295 	int status;
2296 	const char *qname;
2297 	int loop = 0;
2298 
2299 	switch (q_type) {
2300 	case ICE_CTL_Q_ADMIN:
2301 		cq = &hw->adminq;
2302 		qname = "Admin";
2303 		break;
2304 	case ICE_CTL_Q_SB:
2305 		cq = &hw->sbq;
2306 		qname = "Sideband";
2307 		break;
2308 	case ICE_CTL_Q_MAILBOX:
2309 		cq = &hw->mailboxq;
2310 		qname = "Mailbox";
2311 		break;
2312 	default:
2313 		device_printf(sc->dev,
2314 			      "Unknown control queue type 0x%x\n",
2315 			      q_type);
2316 		return 0;
2317 	}
2318 
2319 	ice_check_ctrlq_errors(sc, qname, cq);
2320 
2321 	/*
2322 	 * Control queue processing happens during the admin task which may be
2323 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2324 	 */
2325 	event.buf_len = cq->rq_buf_size;
2326 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2327 	if (!event.msg_buf) {
2328 		device_printf(sc->dev,
2329 			      "Unable to allocate memory for %s Receive Queue event\n",
2330 			      qname);
2331 		return (ENOMEM);
2332 	}
2333 
2334 	do {
2335 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2336 		if (status == ICE_ERR_AQ_NO_WORK)
2337 			break;
2338 		if (status) {
2339 			device_printf(sc->dev,
2340 				      "%s Receive Queue event error %s\n",
2341 				      qname, ice_status_str(status));
2342 			free(event.msg_buf, M_ICE);
2343 			return (EIO);
2344 		}
2345 		/* XXX should we separate this handler by controlq type? */
2346 		ice_process_ctrlq_event(sc, qname, &event);
2347 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2348 
2349 	free(event.msg_buf, M_ICE);
2350 
2351 	return 0;
2352 }
2353 
2354 /**
2355  * pkg_ver_empty - Check if a package version is empty
2356  * @pkg_ver: the package version to check
2357  * @pkg_name: the package name to check
2358  *
2359  * Checks if the package version structure is empty. We consider a package
2360  * version as empty if none of the versions are non-zero and the name string
2361  * is null as well.
2362  *
2363  * This is used to check if the package version was initialized by the driver,
2364  * as we do not expect an actual DDP package file to have a zero'd version and
2365  * name.
2366  *
2367  * @returns true if the package version is valid, or false otherwise.
2368  */
2369 static bool
2370 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2371 {
2372 	return (pkg_name[0] == '\0' &&
2373 		pkg_ver->major == 0 &&
2374 		pkg_ver->minor == 0 &&
2375 		pkg_ver->update == 0 &&
2376 		pkg_ver->draft == 0);
2377 }
2378 
2379 /**
2380  * pkg_ver_compatible - Check if the package version is compatible
2381  * @pkg_ver: the package version to check
2382  *
2383  * Compares the package version number to the driver's expected major/minor
2384  * version. Returns an integer indicating whether the version is older, newer,
2385  * or compatible with the driver.
2386  *
2387  * @returns 0 if the package version is compatible, -1 if the package version
2388  * is older, and 1 if the package version is newer than the driver version.
2389  */
2390 static int
2391 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2392 {
2393 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2394 		return (1); /* newer */
2395 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2396 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2397 		return (1); /* newer */
2398 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2399 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2400 		return (0); /* compatible */
2401 	else
2402 		return (-1); /* older */
2403 }
2404 
2405 /**
2406  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2407  * @hw: device hw structure
2408  * @buf: string buffer to store name/version string
2409  *
2410  * Formats the name and version of the OS DDP package as found in the ice_ddp
2411  * module into a string.
2412  *
2413  * @remark This will almost always be the same as the active package, but
2414  * could be different in some cases. Use ice_active_pkg_version_str to get the
2415  * version of the active DDP package.
2416  */
2417 static void
2418 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2419 {
2420 	char name_buf[ICE_PKG_NAME_SIZE];
2421 
2422 	/* If the OS DDP package info is empty, use "None" */
2423 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2424 		sbuf_printf(buf, "None");
2425 		return;
2426 	}
2427 
2428 	/*
2429 	 * This should already be null-terminated, but since this is a raw
2430 	 * value from an external source, strlcpy() into a new buffer to
2431 	 * make sure.
2432 	 */
2433 	bzero(name_buf, sizeof(name_buf));
2434 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2435 
2436 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2437 	    name_buf,
2438 	    hw->pkg_ver.major,
2439 	    hw->pkg_ver.minor,
2440 	    hw->pkg_ver.update,
2441 	    hw->pkg_ver.draft);
2442 }
2443 
2444 /**
2445  * ice_active_pkg_version_str - Format active package version info into a sbuf
2446  * @hw: device hw structure
2447  * @buf: string buffer to store name/version string
2448  *
2449  * Formats the name and version of the active DDP package info into a string
2450  * buffer for use.
2451  */
2452 static void
2453 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2454 {
2455 	char name_buf[ICE_PKG_NAME_SIZE];
2456 
2457 	/* If the active DDP package info is empty, use "None" */
2458 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2459 		sbuf_printf(buf, "None");
2460 		return;
2461 	}
2462 
2463 	/*
2464 	 * This should already be null-terminated, but since this is a raw
2465 	 * value from an external source, strlcpy() into a new buffer to
2466 	 * make sure.
2467 	 */
2468 	bzero(name_buf, sizeof(name_buf));
2469 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2470 
2471 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2472 	    name_buf,
2473 	    hw->active_pkg_ver.major,
2474 	    hw->active_pkg_ver.minor,
2475 	    hw->active_pkg_ver.update,
2476 	    hw->active_pkg_ver.draft);
2477 
2478 	if (hw->active_track_id != 0)
2479 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2480 }
2481 
2482 /**
2483  * ice_nvm_version_str - Format the NVM version information into a sbuf
2484  * @hw: device hw structure
2485  * @buf: string buffer to store version string
2486  *
2487  * Formats the NVM information including firmware version, API version, NVM
2488  * version, the EETRACK id, and OEM specific version information into a string
2489  * buffer.
2490  */
2491 static void
2492 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2493 {
2494 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2495 	struct ice_orom_info *orom = &hw->flash.orom;
2496 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2497 
2498 	/* Note that the netlist versions are stored in packed Binary Coded
2499 	 * Decimal format. The use of '%x' will correctly display these as
2500 	 * decimal numbers. This works because every 4 bits will be displayed
2501 	 * as a hexadecimal digit, and the BCD format will only use the values
2502 	 * 0-9.
2503 	 */
2504 	sbuf_printf(buf,
2505 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2506 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2507 		    hw->api_maj_ver, hw->api_min_ver,
2508 		    nvm->major, nvm->minor, nvm->eetrack,
2509 		    netlist->major, netlist->minor,
2510 		    netlist->type >> 16, netlist->type & 0xFFFF,
2511 		    netlist->rev, netlist->cust_ver, netlist->hash,
2512 		    orom->major, orom->build, orom->patch);
2513 }
2514 
2515 /**
2516  * ice_print_nvm_version - Print the NVM info to the kernel message log
2517  * @sc: the device softc structure
2518  *
2519  * Format and print an NVM version string using ice_nvm_version_str().
2520  */
2521 void
2522 ice_print_nvm_version(struct ice_softc *sc)
2523 {
2524 	struct ice_hw *hw = &sc->hw;
2525 	device_t dev = sc->dev;
2526 	struct sbuf *sbuf;
2527 
2528 	sbuf = sbuf_new_auto();
2529 	ice_nvm_version_str(hw, sbuf);
2530 	sbuf_finish(sbuf);
2531 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2532 	sbuf_delete(sbuf);
2533 }
2534 
2535 /**
2536  * ice_update_port_oversize - Update port oversize stats
2537  * @sc: device private structure
2538  * @rx_errors: VSI error drops
2539  *
2540  * Add ERROR_CNT from GLV_REPC VSI register and rx_oversize stats counter
2541  */
2542 static void
2543 ice_update_port_oversize(struct ice_softc *sc, u64 rx_errors)
2544 {
2545 	struct ice_hw_port_stats *cur_ps;
2546 	cur_ps = &sc->stats.cur;
2547 
2548 	sc->soft_stats.rx_roc_error = rx_errors + cur_ps->rx_oversize;
2549 }
2550 
2551 /**
2552  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2553  * @vsi: the VSI to be updated
2554  *
2555  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2556  * the updated values.
2557  */
2558 void
2559 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2560 {
2561 	struct ice_eth_stats *prev_es, *cur_es;
2562 	struct ice_hw *hw = &vsi->sc->hw;
2563 	u16 vsi_num;
2564 
2565 	if (!ice_is_vsi_valid(hw, vsi->idx))
2566 		return;
2567 
2568 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2569 	prev_es = &vsi->hw_stats.prev;
2570 	cur_es = &vsi->hw_stats.cur;
2571 
2572 #define ICE_VSI_STAT40(name, location) \
2573 	ice_stat_update40(hw, name ## L(vsi_num), \
2574 			  vsi->hw_stats.offsets_loaded, \
2575 			  &prev_es->location, &cur_es->location)
2576 
2577 #define ICE_VSI_STAT32(name, location) \
2578 	ice_stat_update32(hw, name(vsi_num), \
2579 			  vsi->hw_stats.offsets_loaded, \
2580 			  &prev_es->location, &cur_es->location)
2581 
2582 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2583 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2584 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2585 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2586 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2587 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2588 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2589 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2590 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2591 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2592 
2593 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2594 			     cur_es);
2595 	ice_update_port_oversize(vsi->sc, cur_es->rx_errors);
2596 #undef ICE_VSI_STAT40
2597 #undef ICE_VSI_STAT32
2598 
2599 	vsi->hw_stats.offsets_loaded = true;
2600 }
2601 
2602 /**
2603  * ice_reset_vsi_stats - Reset VSI statistics counters
2604  * @vsi: VSI structure
2605  *
2606  * Resets the software tracking counters for the VSI statistics, and indicate
2607  * that the offsets haven't been loaded. This is intended to be called
2608  * post-reset so that VSI statistics count from zero again.
2609  */
2610 void
2611 ice_reset_vsi_stats(struct ice_vsi *vsi)
2612 {
2613 	/* Reset HW stats */
2614 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2615 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2616 	vsi->hw_stats.offsets_loaded = false;
2617 }
2618 
2619 /**
2620  * ice_update_pf_stats - Update port stats counters
2621  * @sc: device private softc structure
2622  *
2623  * Reads hardware statistics registers and updates the software tracking
2624  * structure with new values.
2625  */
2626 void
2627 ice_update_pf_stats(struct ice_softc *sc)
2628 {
2629 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2630 	struct ice_hw *hw = &sc->hw;
2631 	u8 lport;
2632 
2633 	MPASS(hw->port_info);
2634 
2635 	prev_ps = &sc->stats.prev;
2636 	cur_ps = &sc->stats.cur;
2637 	lport = hw->port_info->lport;
2638 
2639 #define ICE_PF_STAT_PFC(name, location, index) \
2640 	ice_stat_update40(hw, name(lport, index), \
2641 			  sc->stats.offsets_loaded, \
2642 			  &prev_ps->location[index], &cur_ps->location[index])
2643 
2644 #define ICE_PF_STAT40(name, location) \
2645 	ice_stat_update40(hw, name ## L(lport), \
2646 			  sc->stats.offsets_loaded, \
2647 			  &prev_ps->location, &cur_ps->location)
2648 
2649 #define ICE_PF_STAT32(name, location) \
2650 	ice_stat_update32(hw, name(lport), \
2651 			  sc->stats.offsets_loaded, \
2652 			  &prev_ps->location, &cur_ps->location)
2653 
2654 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2655 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2656 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2657 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2658 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2659 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2660 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2661 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2662 	/* This stat register doesn't have an lport */
2663 	ice_stat_update32(hw, PRTRPB_RDPC,
2664 			  sc->stats.offsets_loaded,
2665 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2666 
2667 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2668 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2669 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2670 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2671 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2672 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2673 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2674 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2675 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2676 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2677 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2678 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2679 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2680 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2681 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2682 
2683 	/* Update Priority Flow Control Stats */
2684 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2685 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2686 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2687 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2688 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2689 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2690 	}
2691 
2692 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2693 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2694 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2695 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2696 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2697 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2698 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2699 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2700 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2701 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2702 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2703 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2704 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2705 
2706 #undef ICE_PF_STAT40
2707 #undef ICE_PF_STAT32
2708 #undef ICE_PF_STAT_PFC
2709 
2710 	sc->stats.offsets_loaded = true;
2711 }
2712 
2713 /**
2714  * ice_reset_pf_stats - Reset port stats counters
2715  * @sc: Device private softc structure
2716  *
2717  * Reset software tracking values for statistics to zero, and indicate that
2718  * offsets haven't been loaded. Intended to be called after a device reset so
2719  * that statistics count from zero again.
2720  */
2721 void
2722 ice_reset_pf_stats(struct ice_softc *sc)
2723 {
2724 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2725 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2726 	sc->stats.offsets_loaded = false;
2727 }
2728 
2729 /**
2730  * ice_sysctl_show_fw - sysctl callback to show firmware information
2731  * @oidp: sysctl oid structure
2732  * @arg1: pointer to private data structure
2733  * @arg2: unused
2734  * @req: sysctl request pointer
2735  *
2736  * Callback for the fw_version sysctl, to display the current firmware
2737  * information found at hardware init time.
2738  */
2739 static int
2740 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2741 {
2742 	struct ice_softc *sc = (struct ice_softc *)arg1;
2743 	struct ice_hw *hw = &sc->hw;
2744 	struct sbuf *sbuf;
2745 
2746 	UNREFERENCED_PARAMETER(oidp);
2747 	UNREFERENCED_PARAMETER(arg2);
2748 
2749 	if (ice_driver_is_detaching(sc))
2750 		return (ESHUTDOWN);
2751 
2752 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2753 	ice_nvm_version_str(hw, sbuf);
2754 	sbuf_finish(sbuf);
2755 	sbuf_delete(sbuf);
2756 
2757 	return (0);
2758 }
2759 
2760 /**
2761  * ice_sysctl_pba_number - sysctl callback to show PBA number
2762  * @oidp: sysctl oid structure
2763  * @arg1: pointer to private data structure
2764  * @arg2: unused
2765  * @req: sysctl request pointer
2766  *
2767  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2768  * number for this device.
2769  */
2770 static int
2771 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2772 {
2773 	struct ice_softc *sc = (struct ice_softc *)arg1;
2774 	struct ice_hw *hw = &sc->hw;
2775 	device_t dev = sc->dev;
2776 	u8 pba_string[32] = "";
2777 	int status;
2778 
2779 	UNREFERENCED_PARAMETER(arg2);
2780 
2781 	if (ice_driver_is_detaching(sc))
2782 		return (ESHUTDOWN);
2783 
2784 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2785 	if (status) {
2786 		device_printf(dev,
2787 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2788 		    __func__, ice_status_str(status),
2789 		    ice_aq_str(hw->adminq.sq_last_status));
2790 		return (EIO);
2791 	}
2792 
2793 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2794 }
2795 
2796 /**
2797  * ice_sysctl_pkg_version - sysctl to show the active package version info
2798  * @oidp: sysctl oid structure
2799  * @arg1: pointer to private data structure
2800  * @arg2: unused
2801  * @req: sysctl request pointer
2802  *
2803  * Callback for the pkg_version sysctl, to display the active DDP package name
2804  * and version information.
2805  */
2806 static int
2807 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2808 {
2809 	struct ice_softc *sc = (struct ice_softc *)arg1;
2810 	struct ice_hw *hw = &sc->hw;
2811 	struct sbuf *sbuf;
2812 
2813 	UNREFERENCED_PARAMETER(oidp);
2814 	UNREFERENCED_PARAMETER(arg2);
2815 
2816 	if (ice_driver_is_detaching(sc))
2817 		return (ESHUTDOWN);
2818 
2819 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2820 	ice_active_pkg_version_str(hw, sbuf);
2821 	sbuf_finish(sbuf);
2822 	sbuf_delete(sbuf);
2823 
2824 	return (0);
2825 }
2826 
2827 /**
2828  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2829  * @oidp: sysctl oid structure
2830  * @arg1: pointer to private data structure
2831  * @arg2: unused
2832  * @req: sysctl request pointer
2833  *
2834  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2835  * version info found in the ice_ddp module.
2836  */
2837 static int
2838 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2839 {
2840 	struct ice_softc *sc = (struct ice_softc *)arg1;
2841 	struct ice_hw *hw = &sc->hw;
2842 	struct sbuf *sbuf;
2843 
2844 	UNREFERENCED_PARAMETER(oidp);
2845 	UNREFERENCED_PARAMETER(arg2);
2846 
2847 	if (ice_driver_is_detaching(sc))
2848 		return (ESHUTDOWN);
2849 
2850 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2851 	ice_os_pkg_version_str(hw, sbuf);
2852 	sbuf_finish(sbuf);
2853 	sbuf_delete(sbuf);
2854 
2855 	return (0);
2856 }
2857 
2858 /**
2859  * ice_sysctl_current_speed - sysctl callback to show current link speed
2860  * @oidp: sysctl oid structure
2861  * @arg1: pointer to private data structure
2862  * @arg2: unused
2863  * @req: sysctl request pointer
2864  *
2865  * Callback for the current_speed sysctl, to display the string representing
2866  * the current link speed.
2867  */
2868 static int
2869 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2870 {
2871 	struct ice_softc *sc = (struct ice_softc *)arg1;
2872 	struct ice_hw *hw = &sc->hw;
2873 	struct sbuf *sbuf;
2874 
2875 	UNREFERENCED_PARAMETER(oidp);
2876 	UNREFERENCED_PARAMETER(arg2);
2877 
2878 	if (ice_driver_is_detaching(sc))
2879 		return (ESHUTDOWN);
2880 
2881 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2882 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2883 	sbuf_finish(sbuf);
2884 	sbuf_delete(sbuf);
2885 
2886 	return (0);
2887 }
2888 
2889 /**
2890  * @var phy_link_speeds
2891  * @brief PHY link speed conversion array
2892  *
2893  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2894  * link speeds used by the link speed sysctls.
2895  *
2896  * @remark these are based on the indices used in the BIT() macros for the
2897  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2898  */
2899 static const uint16_t phy_link_speeds[] = {
2900     ICE_AQ_LINK_SPEED_100MB,
2901     ICE_AQ_LINK_SPEED_100MB,
2902     ICE_AQ_LINK_SPEED_1000MB,
2903     ICE_AQ_LINK_SPEED_1000MB,
2904     ICE_AQ_LINK_SPEED_1000MB,
2905     ICE_AQ_LINK_SPEED_1000MB,
2906     ICE_AQ_LINK_SPEED_1000MB,
2907     ICE_AQ_LINK_SPEED_2500MB,
2908     ICE_AQ_LINK_SPEED_2500MB,
2909     ICE_AQ_LINK_SPEED_2500MB,
2910     ICE_AQ_LINK_SPEED_5GB,
2911     ICE_AQ_LINK_SPEED_5GB,
2912     ICE_AQ_LINK_SPEED_10GB,
2913     ICE_AQ_LINK_SPEED_10GB,
2914     ICE_AQ_LINK_SPEED_10GB,
2915     ICE_AQ_LINK_SPEED_10GB,
2916     ICE_AQ_LINK_SPEED_10GB,
2917     ICE_AQ_LINK_SPEED_10GB,
2918     ICE_AQ_LINK_SPEED_10GB,
2919     ICE_AQ_LINK_SPEED_25GB,
2920     ICE_AQ_LINK_SPEED_25GB,
2921     ICE_AQ_LINK_SPEED_25GB,
2922     ICE_AQ_LINK_SPEED_25GB,
2923     ICE_AQ_LINK_SPEED_25GB,
2924     ICE_AQ_LINK_SPEED_25GB,
2925     ICE_AQ_LINK_SPEED_25GB,
2926     ICE_AQ_LINK_SPEED_25GB,
2927     ICE_AQ_LINK_SPEED_25GB,
2928     ICE_AQ_LINK_SPEED_25GB,
2929     ICE_AQ_LINK_SPEED_25GB,
2930     ICE_AQ_LINK_SPEED_40GB,
2931     ICE_AQ_LINK_SPEED_40GB,
2932     ICE_AQ_LINK_SPEED_40GB,
2933     ICE_AQ_LINK_SPEED_40GB,
2934     ICE_AQ_LINK_SPEED_40GB,
2935     ICE_AQ_LINK_SPEED_40GB,
2936     ICE_AQ_LINK_SPEED_50GB,
2937     ICE_AQ_LINK_SPEED_50GB,
2938     ICE_AQ_LINK_SPEED_50GB,
2939     ICE_AQ_LINK_SPEED_50GB,
2940     ICE_AQ_LINK_SPEED_50GB,
2941     ICE_AQ_LINK_SPEED_50GB,
2942     ICE_AQ_LINK_SPEED_50GB,
2943     ICE_AQ_LINK_SPEED_50GB,
2944     ICE_AQ_LINK_SPEED_50GB,
2945     ICE_AQ_LINK_SPEED_50GB,
2946     ICE_AQ_LINK_SPEED_50GB,
2947     ICE_AQ_LINK_SPEED_50GB,
2948     ICE_AQ_LINK_SPEED_50GB,
2949     ICE_AQ_LINK_SPEED_50GB,
2950     ICE_AQ_LINK_SPEED_50GB,
2951     ICE_AQ_LINK_SPEED_100GB,
2952     ICE_AQ_LINK_SPEED_100GB,
2953     ICE_AQ_LINK_SPEED_100GB,
2954     ICE_AQ_LINK_SPEED_100GB,
2955     ICE_AQ_LINK_SPEED_100GB,
2956     ICE_AQ_LINK_SPEED_100GB,
2957     ICE_AQ_LINK_SPEED_100GB,
2958     ICE_AQ_LINK_SPEED_100GB,
2959     ICE_AQ_LINK_SPEED_100GB,
2960     ICE_AQ_LINK_SPEED_100GB,
2961     ICE_AQ_LINK_SPEED_100GB,
2962     ICE_AQ_LINK_SPEED_100GB,
2963     ICE_AQ_LINK_SPEED_100GB,
2964     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2965     ICE_AQ_LINK_SPEED_100GB,
2966     ICE_AQ_LINK_SPEED_100GB,
2967     ICE_AQ_LINK_SPEED_100GB,
2968     ICE_AQ_LINK_SPEED_100GB,
2969     ICE_AQ_LINK_SPEED_100GB,
2970     ICE_AQ_LINK_SPEED_200GB,
2971     ICE_AQ_LINK_SPEED_200GB,
2972     ICE_AQ_LINK_SPEED_200GB,
2973     ICE_AQ_LINK_SPEED_200GB,
2974     ICE_AQ_LINK_SPEED_200GB,
2975     ICE_AQ_LINK_SPEED_200GB,
2976     ICE_AQ_LINK_SPEED_200GB,
2977     ICE_AQ_LINK_SPEED_200GB,
2978     ICE_AQ_LINK_SPEED_200GB,
2979     ICE_AQ_LINK_SPEED_200GB,
2980 };
2981 
2982 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2983 "\nControl advertised link speed."		\
2984 "\nFlags:"					\
2985 "\n\t   0x0 - Auto"				\
2986 "\n\t   0x1 - 10 Mb"				\
2987 "\n\t   0x2 - 100 Mb"				\
2988 "\n\t   0x4 - 1G"				\
2989 "\n\t   0x8 - 2.5G"				\
2990 "\n\t  0x10 - 5G"				\
2991 "\n\t  0x20 - 10G"				\
2992 "\n\t  0x40 - 20G"				\
2993 "\n\t  0x80 - 25G"				\
2994 "\n\t 0x100 - 40G"				\
2995 "\n\t 0x200 - 50G"				\
2996 "\n\t 0x400 - 100G"				\
2997 "\n\t 0x800 - 200G"				\
2998 "\n\t0x8000 - Unknown"				\
2999 "\n\t"						\
3000 "\nUse \"sysctl -x\" to view flags properly."
3001 
3002 #define ICE_PHYS_100MB			\
3003     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
3004      ICE_PHY_TYPE_LOW_100M_SGMII)
3005 #define ICE_PHYS_1000MB			\
3006     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
3007      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
3008      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
3009      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
3010      ICE_PHY_TYPE_LOW_1G_SGMII)
3011 #define ICE_PHYS_2500MB			\
3012     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
3013      ICE_PHY_TYPE_LOW_2500BASE_X |	\
3014      ICE_PHY_TYPE_LOW_2500BASE_KX)
3015 #define ICE_PHYS_5GB			\
3016     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
3017      ICE_PHY_TYPE_LOW_5GBASE_KR)
3018 #define ICE_PHYS_10GB			\
3019     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
3020      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
3021      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
3022      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
3023      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
3024      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
3025      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
3026 #define ICE_PHYS_25GB			\
3027     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
3028      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
3029      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
3030      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
3031      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
3032      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
3033      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
3034      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
3035      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
3036      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
3037      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
3038 #define ICE_PHYS_40GB			\
3039     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
3040      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
3041      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
3042      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
3043      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
3044      ICE_PHY_TYPE_LOW_40G_XLAUI)
3045 #define ICE_PHYS_50GB			\
3046     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
3047      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
3048      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
3049      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
3050      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
3051      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
3052      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
3053      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
3054      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
3055      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
3056      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
3057      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
3058      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
3059      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
3060      ICE_PHY_TYPE_LOW_50G_AUI1)
3061 #define ICE_PHYS_100GB_LOW		\
3062     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
3063      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
3064      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
3065      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
3066      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
3067      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
3068      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
3069      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
3070      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
3071      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
3072      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
3073      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
3074      ICE_PHY_TYPE_LOW_100GBASE_DR)
3075 #define ICE_PHYS_100GB_HIGH		\
3076     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
3077      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
3078      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
3079      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
3080      ICE_PHY_TYPE_HIGH_100G_AUI2)
3081 #define ICE_PHYS_200GB			\
3082     (ICE_PHY_TYPE_HIGH_200G_CR4_PAM4 |	\
3083      ICE_PHY_TYPE_HIGH_200G_SR4 |	\
3084      ICE_PHY_TYPE_HIGH_200G_FR4 |	\
3085      ICE_PHY_TYPE_HIGH_200G_LR4 |	\
3086      ICE_PHY_TYPE_HIGH_200G_DR4 |	\
3087      ICE_PHY_TYPE_HIGH_200G_KR4_PAM4 |	\
3088      ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC | \
3089      ICE_PHY_TYPE_HIGH_200G_AUI4 |	\
3090      ICE_PHY_TYPE_HIGH_200G_AUI8_AOC_ACC | \
3091      ICE_PHY_TYPE_HIGH_200G_AUI8)
3092 
3093 /**
3094  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
3095  * @phy_type_low: lower 64-bit PHY Type bitmask
3096  * @phy_type_high: upper 64-bit PHY Type bitmask
3097  *
3098  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
3099  * link speed flags. If phy_type_high has an unknown PHY type, then the return
3100  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
3101  */
3102 static u16
3103 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
3104 {
3105 	u16 sysctl_speeds = 0;
3106 	int bit;
3107 
3108 	/* coverity[address_of] */
3109 	for_each_set_bit(bit, &phy_type_low, 64)
3110 		sysctl_speeds |= phy_link_speeds[bit];
3111 
3112 	/* coverity[address_of] */
3113 	for_each_set_bit(bit, &phy_type_high, 64) {
3114 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
3115 			sysctl_speeds |= phy_link_speeds[bit + 64];
3116 		else
3117 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
3118 	}
3119 
3120 	return (sysctl_speeds);
3121 }
3122 
3123 /**
3124  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
3125  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
3126  * @phy_type_low: output parameter for lower AQ PHY flags
3127  * @phy_type_high: output parameter for higher AQ PHY flags
3128  *
3129  * Converts the given link speed flags into AQ PHY type flag sets appropriate
3130  * for use in a Set PHY Config command.
3131  */
3132 static void
3133 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
3134 				  u64 *phy_type_high)
3135 {
3136 	*phy_type_low = 0, *phy_type_high = 0;
3137 
3138 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
3139 		*phy_type_low |= ICE_PHYS_100MB;
3140 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
3141 		*phy_type_low |= ICE_PHYS_1000MB;
3142 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
3143 		*phy_type_low |= ICE_PHYS_2500MB;
3144 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
3145 		*phy_type_low |= ICE_PHYS_5GB;
3146 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
3147 		*phy_type_low |= ICE_PHYS_10GB;
3148 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
3149 		*phy_type_low |= ICE_PHYS_25GB;
3150 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
3151 		*phy_type_low |= ICE_PHYS_40GB;
3152 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
3153 		*phy_type_low |= ICE_PHYS_50GB;
3154 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
3155 		*phy_type_low |= ICE_PHYS_100GB_LOW;
3156 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
3157 	}
3158 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_200GB)
3159 		*phy_type_high |= ICE_PHYS_200GB;
3160 }
3161 
3162 /**
3163  * @struct ice_phy_data
3164  * @brief PHY caps and link speeds
3165  *
3166  * Buffer providing report mode and user speeds;
3167  * returning intersection of PHY types and speeds.
3168  */
3169 struct ice_phy_data {
3170 	u64 phy_low_orig;     /* PHY low quad from report */
3171 	u64 phy_high_orig;    /* PHY high quad from report */
3172 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
3173 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
3174 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
3175 	u16 user_speeds_intr; /* Intersect with report speeds */
3176 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
3177 };
3178 
3179 /**
3180  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
3181  * @sc: device private structure
3182  * @phy_data: device PHY data
3183  *
3184  * On read: Displays the currently supported speeds
3185  * On write: Sets the device's supported speeds
3186  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3187  */
3188 static int
3189 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3190 				   struct ice_phy_data *phy_data)
3191 {
3192 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3193 	const char *report_types[5] = { "w/o MEDIA",
3194 					"w/MEDIA",
3195 					"ACTIVE",
3196 					"EDOOFUS", /* Not used */
3197 					"DFLT" };
3198 	struct ice_hw *hw = &sc->hw;
3199 	struct ice_port_info *pi = hw->port_info;
3200 	int status;
3201 	u16 report_speeds, temp_speeds;
3202 	u8 report_type;
3203 	bool apply_speed_filter = false;
3204 
3205 	switch (phy_data->report_mode) {
3206 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3207 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3208 	case ICE_AQC_REPORT_ACTIVE_CFG:
3209 	case ICE_AQC_REPORT_DFLT_CFG:
3210 		report_type = phy_data->report_mode >> 1;
3211 		break;
3212 	default:
3213 		device_printf(sc->dev,
3214 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3215 		    __func__, phy_data->report_mode);
3216 		return (EINVAL);
3217 	}
3218 
3219 	/* 0 is treated as "Auto"; the driver will handle selecting the
3220 	 * correct speeds. Including, in some cases, applying an override
3221 	 * if provided.
3222 	 */
3223 	if (phy_data->user_speeds_orig == 0)
3224 		phy_data->user_speeds_orig = USHRT_MAX;
3225 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3226 		apply_speed_filter = true;
3227 
3228 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3229 	if (status) {
3230 		device_printf(sc->dev,
3231 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3232 		    __func__, report_types[report_type],
3233 		    ice_status_str(status),
3234 		    ice_aq_str(sc->hw.adminq.sq_last_status));
3235 		return (EIO);
3236 	}
3237 
3238 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3239 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3240 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3241 	    phy_data->phy_high_orig);
3242 	if (apply_speed_filter) {
3243 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3244 		    pcaps.module_type[0]);
3245 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3246 			device_printf(sc->dev,
3247 			    "User-specified speeds (\"0x%04X\") not supported\n",
3248 			    phy_data->user_speeds_orig);
3249 			return (EINVAL);
3250 		}
3251 		report_speeds = temp_speeds;
3252 	}
3253 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3254 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3255 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3256 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3257 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3258 
3259 	return (0);
3260  }
3261 
3262 /**
3263  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3264  * @oidp: sysctl oid structure
3265  * @arg1: pointer to private data structure
3266  * @arg2: unused
3267  * @req: sysctl request pointer
3268  *
3269  * On read: Displays the currently supported speeds
3270  * On write: Sets the device's supported speeds
3271  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3272  */
3273 static int
3274 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3275 {
3276 	struct ice_softc *sc = (struct ice_softc *)arg1;
3277 	struct ice_port_info *pi = sc->hw.port_info;
3278 	struct ice_phy_data phy_data = { 0 };
3279 	device_t dev = sc->dev;
3280 	u16 sysctl_speeds;
3281 	int ret;
3282 
3283 	UNREFERENCED_PARAMETER(arg2);
3284 
3285 	if (ice_driver_is_detaching(sc))
3286 		return (ESHUTDOWN);
3287 
3288 	/* Get the current speeds from the adapter's "active" configuration. */
3289 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3290 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3291 	if (ret) {
3292 		/* Error message already printed within function */
3293 		return (ret);
3294 	}
3295 
3296 	sysctl_speeds = phy_data.user_speeds_intr;
3297 
3298 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3299 	if ((ret) || (req->newptr == NULL))
3300 		return (ret);
3301 
3302 	if (sysctl_speeds > ICE_SYSCTL_SPEEDS_VALID_RANGE) {
3303 		device_printf(dev,
3304 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3305 			      __func__, sysctl_speeds);
3306 		return (EINVAL);
3307 	}
3308 
3309 	pi->phy.curr_user_speed_req = sysctl_speeds;
3310 
3311 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3312 	    !sc->link_up && !(if_getflags(sc->ifp) & IFF_UP))
3313 		return 0;
3314 
3315 	/* Apply settings requested by user */
3316 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3317 }
3318 
3319 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3320 "\nDisplay or set the port's requested FEC mode."	\
3321 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3322 "\n\tfc - " ICE_FEC_STRING_BASER			\
3323 "\n\trs - " ICE_FEC_STRING_RS				\
3324 "\n\tnone - " ICE_FEC_STRING_NONE			\
3325 "\nEither of the left or right strings above can be used to set the requested mode."
3326 
3327 /**
3328  * ice_sysctl_fec_config - Display/change the configured FEC mode
3329  * @oidp: sysctl oid structure
3330  * @arg1: pointer to private data structure
3331  * @arg2: unused
3332  * @req: sysctl request pointer
3333  *
3334  * On read: Displays the configured FEC mode
3335  * On write: Sets the device's FEC mode to the input string, if it's valid.
3336  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3337  */
3338 static int
3339 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3340 {
3341 	struct ice_softc *sc = (struct ice_softc *)arg1;
3342 	struct ice_port_info *pi = sc->hw.port_info;
3343 	enum ice_fec_mode new_mode;
3344 	device_t dev = sc->dev;
3345 	char req_fec[32];
3346 	int ret;
3347 
3348 	UNREFERENCED_PARAMETER(arg2);
3349 
3350 	if (ice_driver_is_detaching(sc))
3351 		return (ESHUTDOWN);
3352 
3353 	bzero(req_fec, sizeof(req_fec));
3354 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3355 
3356 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3357 	if ((ret) || (req->newptr == NULL))
3358 		return (ret);
3359 
3360 	if (strcmp(req_fec, "auto") == 0 ||
3361 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3362 		if (sc->allow_no_fec_mod_in_auto)
3363 			new_mode = ICE_FEC_DIS_AUTO;
3364 		else
3365 			new_mode = ICE_FEC_AUTO;
3366 	} else if (strcmp(req_fec, "fc") == 0 ||
3367 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3368 		new_mode = ICE_FEC_BASER;
3369 	} else if (strcmp(req_fec, "rs") == 0 ||
3370 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3371 		new_mode = ICE_FEC_RS;
3372 	} else if (strcmp(req_fec, "none") == 0 ||
3373 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3374 		new_mode = ICE_FEC_NONE;
3375 	} else {
3376 		device_printf(dev,
3377 		    "%s: \"%s\" is not a valid FEC mode\n",
3378 		    __func__, req_fec);
3379 		return (EINVAL);
3380 	}
3381 
3382 	/* Cache user FEC mode for later link ups */
3383 	pi->phy.curr_user_fec_req = new_mode;
3384 
3385 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3386 		return 0;
3387 
3388 	/* Apply settings requested by user */
3389 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3390 }
3391 
3392 /**
3393  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3394  * @oidp: sysctl oid structure
3395  * @arg1: pointer to private data structure
3396  * @arg2: unused
3397  * @req: sysctl request pointer
3398  *
3399  * On read: Displays the negotiated FEC mode, in a string
3400  */
3401 static int
3402 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3403 {
3404 	struct ice_softc *sc = (struct ice_softc *)arg1;
3405 	struct ice_hw *hw = &sc->hw;
3406 	char neg_fec[32];
3407 	int ret;
3408 
3409 	UNREFERENCED_PARAMETER(arg2);
3410 
3411 	if (ice_driver_is_detaching(sc))
3412 		return (ESHUTDOWN);
3413 
3414 	/* Copy const string into a buffer to drop const qualifier */
3415 	bzero(neg_fec, sizeof(neg_fec));
3416 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3417 
3418 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3419 	if (req->newptr != NULL)
3420 		return (EPERM);
3421 
3422 	return (ret);
3423 }
3424 
3425 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3426 "\nDisplay or set the port's advertised flow control mode.\n"	\
3427 "\t0 - " ICE_FC_STRING_NONE					\
3428 "\n\t1 - " ICE_FC_STRING_RX					\
3429 "\n\t2 - " ICE_FC_STRING_TX					\
3430 "\n\t3 - " ICE_FC_STRING_FULL					\
3431 "\nEither the numbers or the strings above can be used to set the advertised mode."
3432 
3433 /**
3434  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3435  * @oidp: sysctl oid structure
3436  * @arg1: pointer to private data structure
3437  * @arg2: unused
3438  * @req: sysctl request pointer
3439  *
3440  * On read: Displays the configured flow control mode
3441  * On write: Sets the device's flow control mode to the input, if it's valid.
3442  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3443  */
3444 static int
3445 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3446 {
3447 	struct ice_softc *sc = (struct ice_softc *)arg1;
3448 	struct ice_port_info *pi = sc->hw.port_info;
3449 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3450 	enum ice_fc_mode old_mode, new_mode;
3451 	struct ice_hw *hw = &sc->hw;
3452 	device_t dev = sc->dev;
3453 	int status;
3454 	int ret, fc_num;
3455 	bool mode_set = false;
3456 	struct sbuf buf;
3457 	char *fc_str_end;
3458 	char fc_str[32];
3459 
3460 	UNREFERENCED_PARAMETER(arg2);
3461 
3462 	if (ice_driver_is_detaching(sc))
3463 		return (ESHUTDOWN);
3464 
3465 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3466 				     &pcaps, NULL);
3467 	if (status) {
3468 		device_printf(dev,
3469 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3470 		    __func__, ice_status_str(status),
3471 		    ice_aq_str(hw->adminq.sq_last_status));
3472 		return (EIO);
3473 	}
3474 
3475 	/* Convert HW response format to SW enum value */
3476 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3477 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3478 		old_mode = ICE_FC_FULL;
3479 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3480 		old_mode = ICE_FC_TX_PAUSE;
3481 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3482 		old_mode = ICE_FC_RX_PAUSE;
3483 	else
3484 		old_mode = ICE_FC_NONE;
3485 
3486 	/* Create "old" string for output */
3487 	bzero(fc_str, sizeof(fc_str));
3488 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3489 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3490 	sbuf_finish(&buf);
3491 	sbuf_delete(&buf);
3492 
3493 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3494 	if ((ret) || (req->newptr == NULL))
3495 		return (ret);
3496 
3497 	/* Try to parse input as a string, first */
3498 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3499 		new_mode = ICE_FC_FULL;
3500 		mode_set = true;
3501 	}
3502 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3503 		new_mode = ICE_FC_TX_PAUSE;
3504 		mode_set = true;
3505 	}
3506 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3507 		new_mode = ICE_FC_RX_PAUSE;
3508 		mode_set = true;
3509 	}
3510 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3511 		new_mode = ICE_FC_NONE;
3512 		mode_set = true;
3513 	}
3514 
3515 	/*
3516 	 * Then check if it's an integer, for compatibility with the method
3517 	 * used in older drivers.
3518 	 */
3519 	if (!mode_set) {
3520 		fc_num = strtol(fc_str, &fc_str_end, 0);
3521 		if (fc_str_end == fc_str)
3522 			fc_num = -1;
3523 		switch (fc_num) {
3524 		case 3:
3525 			new_mode = ICE_FC_FULL;
3526 			break;
3527 		case 2:
3528 			new_mode = ICE_FC_TX_PAUSE;
3529 			break;
3530 		case 1:
3531 			new_mode = ICE_FC_RX_PAUSE;
3532 			break;
3533 		case 0:
3534 			new_mode = ICE_FC_NONE;
3535 			break;
3536 		default:
3537 			device_printf(dev,
3538 			    "%s: \"%s\" is not a valid flow control mode\n",
3539 			    __func__, fc_str);
3540 			return (EINVAL);
3541 		}
3542 	}
3543 
3544 	/* Save flow control mode from user */
3545 	pi->phy.curr_user_fc_req = new_mode;
3546 
3547 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3548 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3549 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3550 	    (new_mode != ICE_FC_NONE)) {
3551 		ret = ice_config_pfc(sc, 0x0);
3552 		if (ret)
3553 			return (ret);
3554 	}
3555 
3556 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3557 		return 0;
3558 
3559 	/* Apply settings requested by user */
3560 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3561 }
3562 
3563 /**
3564  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3565  * @oidp: sysctl oid structure
3566  * @arg1: pointer to private data structure
3567  * @arg2: unused
3568  * @req: sysctl request pointer
3569  *
3570  * On read: Displays the currently negotiated flow control settings.
3571  *
3572  * If link is not established, this will report ICE_FC_NONE, as no flow
3573  * control is negotiated while link is down.
3574  */
3575 static int
3576 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3577 {
3578 	struct ice_softc *sc = (struct ice_softc *)arg1;
3579 	struct ice_port_info *pi = sc->hw.port_info;
3580 	const char *negotiated_fc;
3581 
3582 	UNREFERENCED_PARAMETER(arg2);
3583 
3584 	if (ice_driver_is_detaching(sc))
3585 		return (ESHUTDOWN);
3586 
3587 	negotiated_fc = ice_flowcontrol_mode(pi);
3588 
3589 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3590 }
3591 
3592 /**
3593  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3594  * @oidp: sysctl oid structure
3595  * @arg1: pointer to private data structure
3596  * @arg2: unused
3597  * @req: sysctl request pointer
3598  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3599  *
3600  * Private handler for phy_type_high and phy_type_low sysctls.
3601  */
3602 static int
3603 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3604 {
3605 	struct ice_softc *sc = (struct ice_softc *)arg1;
3606 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3607 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3608 	struct ice_hw *hw = &sc->hw;
3609 	device_t dev = sc->dev;
3610 	int status;
3611 	uint64_t types;
3612 	int ret;
3613 
3614 	UNREFERENCED_PARAMETER(arg2);
3615 
3616 	if (ice_driver_is_detaching(sc))
3617 		return (ESHUTDOWN);
3618 
3619 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3620 				     &pcaps, NULL);
3621 	if (status) {
3622 		device_printf(dev,
3623 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3624 		    __func__, ice_status_str(status),
3625 		    ice_aq_str(hw->adminq.sq_last_status));
3626 		return (EIO);
3627 	}
3628 
3629 	if (is_phy_type_high)
3630 		types = pcaps.phy_type_high;
3631 	else
3632 		types = pcaps.phy_type_low;
3633 
3634 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3635 	if ((ret) || (req->newptr == NULL))
3636 		return (ret);
3637 
3638 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3639 
3640 	if (is_phy_type_high)
3641 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3642 	else
3643 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3644 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3645 
3646 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3647 	if (status) {
3648 		device_printf(dev,
3649 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3650 		    __func__, ice_status_str(status),
3651 		    ice_aq_str(hw->adminq.sq_last_status));
3652 		return (EIO);
3653 	}
3654 
3655 	return (0);
3656 
3657 }
3658 
3659 /**
3660  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3661  * @oidp: sysctl oid structure
3662  * @arg1: pointer to private data structure
3663  * @arg2: unused
3664  * @req: sysctl request pointer
3665  *
3666  * On read: Displays the currently supported lower PHY types
3667  * On write: Sets the device's supported low PHY types
3668  */
3669 static int
3670 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3671 {
3672 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3673 }
3674 
3675 /**
3676  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3677  * @oidp: sysctl oid structure
3678  * @arg1: pointer to private data structure
3679  * @arg2: unused
3680  * @req: sysctl request pointer
3681  *
3682  * On read: Displays the currently supported higher PHY types
3683  * On write: Sets the device's supported high PHY types
3684  */
3685 static int
3686 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3687 {
3688 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3689 }
3690 
3691 /**
3692  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3693  * @oidp: sysctl oid structure
3694  * @arg1: pointer to private data structure
3695  * @arg2: unused
3696  * @req: sysctl request pointer
3697  * @report_mode: the mode to report
3698  *
3699  * On read: Display the response from Get PHY abillities with the given report
3700  * mode.
3701  */
3702 static int
3703 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3704 {
3705 	struct ice_softc *sc = (struct ice_softc *)arg1;
3706 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3707 	struct ice_hw *hw = &sc->hw;
3708 	struct ice_port_info *pi = hw->port_info;
3709 	device_t dev = sc->dev;
3710 	int status;
3711 	int ret;
3712 
3713 	UNREFERENCED_PARAMETER(arg2);
3714 
3715 	ret = priv_check(curthread, PRIV_DRIVER);
3716 	if (ret)
3717 		return (ret);
3718 
3719 	if (ice_driver_is_detaching(sc))
3720 		return (ESHUTDOWN);
3721 
3722 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3723 	if (status) {
3724 		device_printf(dev,
3725 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3726 		    __func__, ice_status_str(status),
3727 		    ice_aq_str(hw->adminq.sq_last_status));
3728 		return (EIO);
3729 	}
3730 
3731 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3732 	if (req->newptr != NULL)
3733 		return (EPERM);
3734 
3735 	return (ret);
3736 }
3737 
3738 /**
3739  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3740  * @oidp: sysctl oid structure
3741  * @arg1: pointer to private data structure
3742  * @arg2: unused
3743  * @req: sysctl request pointer
3744  *
3745  * On read: Display the response from Get PHY abillities reporting the last
3746  * software configuration.
3747  */
3748 static int
3749 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3750 {
3751 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3752 				   ICE_AQC_REPORT_ACTIVE_CFG);
3753 }
3754 
3755 /**
3756  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3757  * @oidp: sysctl oid structure
3758  * @arg1: pointer to private data structure
3759  * @arg2: unused
3760  * @req: sysctl request pointer
3761  *
3762  * On read: Display the response from Get PHY abillities reporting the NVM
3763  * configuration.
3764  */
3765 static int
3766 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3767 {
3768 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3769 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3770 }
3771 
3772 /**
3773  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3774  * @oidp: sysctl oid structure
3775  * @arg1: pointer to private data structure
3776  * @arg2: unused
3777  * @req: sysctl request pointer
3778  *
3779  * On read: Display the response from Get PHY abillities reporting the
3780  * topology configuration.
3781  */
3782 static int
3783 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3784 {
3785 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3786 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3787 }
3788 
3789 /**
3790  * ice_sysctl_phy_link_status - Display response from Get Link Status
3791  * @oidp: sysctl oid structure
3792  * @arg1: pointer to private data structure
3793  * @arg2: unused
3794  * @req: sysctl request pointer
3795  *
3796  * On read: Display the response from firmware for the Get Link Status
3797  * request.
3798  */
3799 static int
3800 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3801 {
3802 	struct ice_aqc_get_link_status_data link_data = { 0 };
3803 	struct ice_softc *sc = (struct ice_softc *)arg1;
3804 	struct ice_hw *hw = &sc->hw;
3805 	struct ice_port_info *pi = hw->port_info;
3806 	struct ice_aqc_get_link_status *resp;
3807 	struct ice_aq_desc desc;
3808 	device_t dev = sc->dev;
3809 	int status;
3810 	int ret;
3811 
3812 	UNREFERENCED_PARAMETER(arg2);
3813 
3814 	/*
3815 	 * Ensure that only contexts with driver privilege are allowed to
3816 	 * access this information
3817 	 */
3818 	ret = priv_check(curthread, PRIV_DRIVER);
3819 	if (ret)
3820 		return (ret);
3821 
3822 	if (ice_driver_is_detaching(sc))
3823 		return (ESHUTDOWN);
3824 
3825 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3826 	resp = &desc.params.get_link_status;
3827 	resp->lport_num = pi->lport;
3828 
3829 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3830 	if (status) {
3831 		device_printf(dev,
3832 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3833 		    __func__, ice_status_str(status),
3834 		    ice_aq_str(hw->adminq.sq_last_status));
3835 		return (EIO);
3836 	}
3837 
3838 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3839 	if (req->newptr != NULL)
3840 		return (EPERM);
3841 
3842 	return (ret);
3843 }
3844 
3845 /**
3846  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3847  * @oidp: sysctl oid structure
3848  * @arg1: pointer to private softc structure
3849  * @arg2: unused
3850  * @req: sysctl request pointer
3851  *
3852  * On read: Displays current persistent LLDP status.
3853  */
3854 static int
3855 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3856 {
3857 	struct ice_softc *sc = (struct ice_softc *)arg1;
3858 	struct ice_hw *hw = &sc->hw;
3859 	device_t dev = sc->dev;
3860 	int status;
3861 	struct sbuf *sbuf;
3862 	u32 lldp_state;
3863 
3864 	UNREFERENCED_PARAMETER(arg2);
3865 	UNREFERENCED_PARAMETER(oidp);
3866 
3867 	if (ice_driver_is_detaching(sc))
3868 		return (ESHUTDOWN);
3869 
3870 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3871 	if (status) {
3872 		device_printf(dev,
3873 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3874 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3875 		return (EIO);
3876 	}
3877 
3878 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3879 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3880 	sbuf_finish(sbuf);
3881 	sbuf_delete(sbuf);
3882 
3883 	return (0);
3884 }
3885 
3886 /**
3887  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3888  * @oidp: sysctl oid structure
3889  * @arg1: pointer to private softc structure
3890  * @arg2: unused
3891  * @req: sysctl request pointer
3892  *
3893  * On read: Displays default persistent LLDP status.
3894  */
3895 static int
3896 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3897 {
3898 	struct ice_softc *sc = (struct ice_softc *)arg1;
3899 	struct ice_hw *hw = &sc->hw;
3900 	device_t dev = sc->dev;
3901 	int status;
3902 	struct sbuf *sbuf;
3903 	u32 lldp_state;
3904 
3905 	UNREFERENCED_PARAMETER(arg2);
3906 	UNREFERENCED_PARAMETER(oidp);
3907 
3908 	if (ice_driver_is_detaching(sc))
3909 		return (ESHUTDOWN);
3910 
3911 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3912 	if (status) {
3913 		device_printf(dev,
3914 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3915 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3916 		return (EIO);
3917 	}
3918 
3919 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3920 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3921 	sbuf_finish(sbuf);
3922 	sbuf_delete(sbuf);
3923 
3924 	return (0);
3925 }
3926 
3927 /**
3928  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3929  * @dcbcfg: Configuration struct to check for mappings in
3930  *
3931  * @return true if there exists a non-zero DSCP to TC mapping
3932  * inside the input DCB configuration struct.
3933  */
3934 static bool
3935 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3936 {
3937 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3938 		if (dcbcfg->dscp_map[i] != 0)
3939 			return (true);
3940 
3941 	return (false);
3942 }
3943 
3944 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3945 "\nDisplay or change FW LLDP agent state:" \
3946 "\n\t0 - disabled"			\
3947 "\n\t1 - enabled"
3948 
3949 /**
3950  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3951  * @oidp: sysctl oid structure
3952  * @arg1: pointer to private softc structure
3953  * @arg2: unused
3954  * @req: sysctl request pointer
3955  *
3956  * On read: Displays whether the FW LLDP agent is running
3957  * On write: Persistently enables or disables the FW LLDP agent
3958  */
3959 static int
3960 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3961 {
3962 	struct ice_softc *sc = (struct ice_softc *)arg1;
3963 	struct ice_dcbx_cfg *local_dcbx_cfg;
3964 	struct ice_hw *hw = &sc->hw;
3965 	device_t dev = sc->dev;
3966 	int status;
3967 	int ret;
3968 	u32 old_state;
3969 	u8 fw_lldp_enabled;
3970 	bool retried_start_lldp = false;
3971 
3972 	UNREFERENCED_PARAMETER(arg2);
3973 
3974 	if (ice_driver_is_detaching(sc))
3975 		return (ESHUTDOWN);
3976 
3977 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3978 	if (status) {
3979 		device_printf(dev,
3980 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3981 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3982 		return (EIO);
3983 	}
3984 
3985 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3986 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3987 		if (status) {
3988 			device_printf(dev,
3989 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3990 			    ice_status_str(status),
3991 			    ice_aq_str(hw->adminq.sq_last_status));
3992 			return (EIO);
3993 		}
3994 	}
3995 	if (old_state == 0)
3996 		fw_lldp_enabled = false;
3997 	else
3998 		fw_lldp_enabled = true;
3999 
4000 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
4001 	if ((ret) || (req->newptr == NULL))
4002 		return (ret);
4003 
4004 	if (old_state == 0 && fw_lldp_enabled == false)
4005 		return (0);
4006 
4007 	if (old_state != 0 && fw_lldp_enabled == true)
4008 		return (0);
4009 
4010 	/* Block transition to FW LLDP if DSCP mode is enabled */
4011 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
4012 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) ||
4013 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
4014 		device_printf(dev,
4015 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
4016 		return (EOPNOTSUPP);
4017 	}
4018 
4019 	if (fw_lldp_enabled == false) {
4020 		status = ice_aq_stop_lldp(hw, true, true, NULL);
4021 		/* EPERM is returned if the LLDP agent is already shutdown */
4022 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
4023 			device_printf(dev,
4024 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
4025 			    __func__, ice_status_str(status),
4026 			    ice_aq_str(hw->adminq.sq_last_status));
4027 			return (EIO);
4028 		}
4029 		ice_aq_set_dcb_parameters(hw, true, NULL);
4030 		hw->port_info->qos_cfg.is_sw_lldp = true;
4031 		ice_add_rx_lldp_filter(sc);
4032 	} else {
4033 		ice_del_rx_lldp_filter(sc);
4034 retry_start_lldp:
4035 		status = ice_aq_start_lldp(hw, true, NULL);
4036 		if (status) {
4037 			switch (hw->adminq.sq_last_status) {
4038 			/* EEXIST is returned if the LLDP agent is already started */
4039 			case ICE_AQ_RC_EEXIST:
4040 				break;
4041 			case ICE_AQ_RC_EAGAIN:
4042 				/* Retry command after a 2 second wait */
4043 				if (retried_start_lldp == false) {
4044 					retried_start_lldp = true;
4045 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
4046 					goto retry_start_lldp;
4047 				}
4048 				/* Fallthrough */
4049 			default:
4050 				device_printf(dev,
4051 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
4052 				    __func__, ice_status_str(status),
4053 				    ice_aq_str(hw->adminq.sq_last_status));
4054 				return (EIO);
4055 			}
4056 		}
4057 		ice_start_dcbx_agent(sc);
4058 
4059 		/* Init DCB needs to be done during enabling LLDP to properly
4060 		 * propagate the configuration.
4061 		 */
4062 		status = ice_init_dcb(hw, true);
4063 		if (status) {
4064 			device_printf(dev,
4065 			    "%s: ice_init_dcb failed; status %s, aq_err %s\n",
4066 			    __func__, ice_status_str(status),
4067 			    ice_aq_str(hw->adminq.sq_last_status));
4068 			hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
4069 		}
4070 	}
4071 
4072 	return (ret);
4073 }
4074 
4075 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
4076 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
4077 "\nIn SW DCB mode, displays and allows setting the table." \
4078 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
4079 "\nWhere the bandwidth total must add up to 100"
4080 
4081 /**
4082  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
4083  * @oidp: sysctl oid structure
4084  * @arg1: pointer to private data structure
4085  * @arg2: unused
4086  * @req: sysctl request pointer
4087  *
4088  * Returns the current ETS TC bandwidth table
4089  * cached by the driver.
4090  *
4091  * In SW DCB mode this sysctl also accepts a value that will
4092  * be sent to the firmware for configuration.
4093  */
4094 static int
4095 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
4096 {
4097 	struct ice_softc *sc = (struct ice_softc *)arg1;
4098 	struct ice_dcbx_cfg *local_dcbx_cfg;
4099 	struct ice_port_info *pi;
4100 	struct ice_hw *hw = &sc->hw;
4101 	device_t dev = sc->dev;
4102 	int status;
4103 	struct sbuf *sbuf;
4104 	int ret;
4105 
4106 	/* Store input rates from user */
4107 	char ets_user_buf[128] = "";
4108 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
4109 
4110 	UNREFERENCED_PARAMETER(arg2);
4111 
4112 	if (ice_driver_is_detaching(sc))
4113 		return (ESHUTDOWN);
4114 
4115 	if (req->oldptr == NULL && req->newptr == NULL) {
4116 		ret = SYSCTL_OUT(req, 0, 128);
4117 		return (ret);
4118 	}
4119 
4120 	pi = hw->port_info;
4121 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4122 
4123 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4124 
4125 	/* Format ETS BW data for output */
4126 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4127 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
4128 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4129 			sbuf_printf(sbuf, ",");
4130 	}
4131 
4132 	sbuf_finish(sbuf);
4133 	sbuf_delete(sbuf);
4134 
4135 	/* Read in the new ETS values */
4136 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
4137 	if ((ret) || (req->newptr == NULL))
4138 		return (ret);
4139 
4140 	/* Don't allow setting changes in FW DCB mode */
4141 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4142 		return (EPERM);
4143 
4144 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
4145 	if (ret) {
4146 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
4147 		    __func__, ets_user_buf);
4148 		return (ret);
4149 	}
4150 
4151 	if (!ice_check_ets_bw(new_ets_table)) {
4152 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
4153 		    __func__, ets_user_buf);
4154 		return (EINVAL);
4155 	}
4156 
4157 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
4158 	    sizeof(new_ets_table));
4159 
4160 	/* If BW > 0, then set TSA entry to 2 */
4161 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4162 		if (new_ets_table[i] > 0)
4163 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
4164 		else
4165 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
4166 	}
4167 	local_dcbx_cfg->etscfg.willing = 0;
4168 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
4169 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
4170 
4171 	status = ice_set_dcb_cfg(pi);
4172 	if (status) {
4173 		device_printf(dev,
4174 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4175 		    __func__, ice_status_str(status),
4176 		    ice_aq_str(hw->adminq.sq_last_status));
4177 		return (EIO);
4178 	}
4179 
4180 	ice_do_dcb_reconfig(sc, false);
4181 
4182 	return (0);
4183 }
4184 
4185 #define ICE_SYSCTL_HELP_UP2TC_MAP \
4186 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
4187 "\nIn SW DCB mode, displays and allows setting the table." \
4188 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
4189 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
4190 
4191 /**
4192  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
4193  * @oidp: sysctl oid structure
4194  * @arg1: pointer to private data structure
4195  * @arg2: unused
4196  * @req: sysctl request pointer
4197  *
4198  * In FW DCB mode, returns the current ETS prio table /
4199  * UP2TC mapping from the local MIB.
4200  *
4201  * In SW DCB mode this sysctl also accepts a value that will
4202  * be sent to the firmware for configuration.
4203  */
4204 static int
4205 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4206 {
4207 	struct ice_softc *sc = (struct ice_softc *)arg1;
4208 	struct ice_dcbx_cfg *local_dcbx_cfg;
4209 	struct ice_port_info *pi;
4210 	struct ice_hw *hw = &sc->hw;
4211 	device_t dev = sc->dev;
4212 	int status;
4213 	struct sbuf *sbuf;
4214 	int ret;
4215 
4216 	/* Store input rates from user */
4217 	char up2tc_user_buf[128] = "";
4218 	/* This array is indexed by UP, not TC */
4219 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4220 
4221 	UNREFERENCED_PARAMETER(arg2);
4222 
4223 	if (ice_driver_is_detaching(sc))
4224 		return (ESHUTDOWN);
4225 
4226 	if (req->oldptr == NULL && req->newptr == NULL) {
4227 		ret = SYSCTL_OUT(req, 0, 128);
4228 		return (ret);
4229 	}
4230 
4231 	pi = hw->port_info;
4232 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4233 
4234 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4235 
4236 	/* Format ETS Priority Mapping Table for output */
4237 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4238 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4239 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4240 			sbuf_printf(sbuf, ",");
4241 	}
4242 
4243 	sbuf_finish(sbuf);
4244 	sbuf_delete(sbuf);
4245 
4246 	/* Read in the new ETS priority mapping */
4247 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4248 	if ((ret) || (req->newptr == NULL))
4249 		return (ret);
4250 
4251 	/* Don't allow setting changes in FW DCB mode */
4252 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4253 		return (EPERM);
4254 
4255 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc,
4256 	    ICE_MAX_TRAFFIC_CLASS - 1);
4257 	if (ret) {
4258 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4259 		    __func__, up2tc_user_buf);
4260 		return (ret);
4261 	}
4262 
4263 	/* Prepare updated ETS CFG/REC TLVs */
4264 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4265 	    sizeof(new_up2tc));
4266 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4267 	    sizeof(new_up2tc));
4268 
4269 	status = ice_set_dcb_cfg(pi);
4270 	if (status) {
4271 		device_printf(dev,
4272 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4273 		    __func__, ice_status_str(status),
4274 		    ice_aq_str(hw->adminq.sq_last_status));
4275 		return (EIO);
4276 	}
4277 
4278 	ice_do_dcb_reconfig(sc, false);
4279 
4280 	return (0);
4281 }
4282 
4283 /**
4284  * ice_config_pfc - helper function to set PFC config in FW
4285  * @sc: device private structure
4286  * @new_mode: bit flags indicating PFC status for TCs
4287  *
4288  * @pre must be in SW DCB mode
4289  *
4290  * Configures the driver's local PFC TLV and sends it to the
4291  * FW for configuration, then reconfigures the driver/VSI
4292  * for DCB if needed.
4293  */
4294 static int
4295 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4296 {
4297 	struct ice_dcbx_cfg *local_dcbx_cfg;
4298 	struct ice_hw *hw = &sc->hw;
4299 	struct ice_port_info *pi;
4300 	device_t dev = sc->dev;
4301 	int status;
4302 
4303 	pi = hw->port_info;
4304 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4305 
4306 	/* Prepare updated PFC TLV */
4307 	local_dcbx_cfg->pfc.pfcena = new_mode;
4308 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4309 	local_dcbx_cfg->pfc.willing = 0;
4310 	local_dcbx_cfg->pfc.mbc = 0;
4311 
4312 	/* Warn if PFC is being disabled with RoCE v2 in use */
4313 	if (new_mode == 0 && sc->rdma_entry.attached)
4314 		device_printf(dev,
4315 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4316 
4317 	status = ice_set_dcb_cfg(pi);
4318 	if (status) {
4319 		device_printf(dev,
4320 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4321 		    __func__, ice_status_str(status),
4322 		    ice_aq_str(hw->adminq.sq_last_status));
4323 		return (EIO);
4324 	}
4325 
4326 	ice_do_dcb_reconfig(sc, false);
4327 
4328 	return (0);
4329 }
4330 
4331 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4332 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4333 "\nIn SW DCB mode, displays and allows setting the configuration" \
4334 "\nInput/Output is in this format: 0xff" \
4335 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4336 
4337 /**
4338  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4339  * @oidp: sysctl oid structure
4340  * @arg1: pointer to private data structure
4341  * @arg2: unused
4342  * @req: sysctl request pointer
4343  *
4344  * In FW DCB mode, returns a bitmap containing the current TCs
4345  * that have PFC enabled on them.
4346  *
4347  * In SW DCB mode this sysctl also accepts a value that will
4348  * be sent to the firmware for configuration.
4349  */
4350 static int
4351 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4352 {
4353 	struct ice_softc *sc = (struct ice_softc *)arg1;
4354 	struct ice_dcbx_cfg *local_dcbx_cfg;
4355 	struct ice_port_info *pi;
4356 	struct ice_hw *hw = &sc->hw;
4357 	int ret;
4358 
4359 	/* Store input flags from user */
4360 	u8 user_pfc;
4361 
4362 	UNREFERENCED_PARAMETER(arg2);
4363 
4364 	if (ice_driver_is_detaching(sc))
4365 		return (ESHUTDOWN);
4366 
4367 	if (req->oldptr == NULL && req->newptr == NULL) {
4368 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4369 		return (ret);
4370 	}
4371 
4372 	pi = hw->port_info;
4373 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4374 
4375 	/* Format current PFC enable setting for output */
4376 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4377 
4378 	/* Read in the new PFC config */
4379 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4380 	if ((ret) || (req->newptr == NULL))
4381 		return (ret);
4382 
4383 	/* Don't allow setting changes in FW DCB mode */
4384 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4385 		return (EPERM);
4386 
4387 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4388 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4389 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4390 		if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
4391 			 sc->link_up) {
4392 			ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4393 			if (ret)
4394 				return (ret);
4395 		}
4396 	}
4397 
4398 	return ice_config_pfc(sc, user_pfc);
4399 }
4400 
4401 #define ICE_SYSCTL_HELP_PFC_MODE \
4402 "\nDisplay and set the current QoS mode for the firmware" \
4403 "\n\t0: VLAN UP mode" \
4404 "\n\t1: DSCP mode"
4405 
4406 /**
4407  * ice_sysctl_pfc_mode
4408  * @oidp: sysctl oid structure
4409  * @arg1: pointer to private data structure
4410  * @arg2: unused
4411  * @req: sysctl request pointer
4412  *
4413  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4414  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4415  * -based settings are configured for DCB.
4416  */
4417 static int
4418 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4419 {
4420 	struct ice_softc *sc = (struct ice_softc *)arg1;
4421 	struct ice_dcbx_cfg *local_dcbx_cfg;
4422 	struct ice_port_info *pi;
4423 	struct ice_hw *hw = &sc->hw;
4424 	device_t dev = sc->dev;
4425 	int status;
4426 	u8 user_pfc_mode, aq_pfc_mode;
4427 	int ret;
4428 
4429 	UNREFERENCED_PARAMETER(arg2);
4430 
4431 	if (ice_driver_is_detaching(sc))
4432 		return (ESHUTDOWN);
4433 
4434 	if (req->oldptr == NULL && req->newptr == NULL) {
4435 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4436 		return (ret);
4437 	}
4438 
4439 	pi = hw->port_info;
4440 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4441 
4442 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4443 
4444 	/* Read in the new mode */
4445 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4446 	if ((ret) || (req->newptr == NULL))
4447 		return (ret);
4448 
4449 	/* Don't allow setting changes in FW DCB mode */
4450 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4451 		return (EPERM);
4452 
4453 	/* Currently, there are only two modes */
4454 	switch (user_pfc_mode) {
4455 	case 0:
4456 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4457 		break;
4458 	case 1:
4459 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4460 		break;
4461 	default:
4462 		device_printf(dev,
4463 		    "%s: Valid input range is 0-1 (input %d)\n",
4464 		    __func__, user_pfc_mode);
4465 		return (EINVAL);
4466 	}
4467 
4468 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4469 	if (status == ICE_ERR_NOT_SUPPORTED) {
4470 		device_printf(dev,
4471 		    "%s: Failed to set PFC mode; DCB not supported\n",
4472 		    __func__);
4473 		return (ENODEV);
4474 	}
4475 	if (status) {
4476 		device_printf(dev,
4477 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4478 		    __func__, ice_status_str(status),
4479 		    ice_aq_str(hw->adminq.sq_last_status));
4480 		return (EIO);
4481 	}
4482 
4483 	/* Reset settings to default when mode is changed */
4484 	ice_set_default_local_mib_settings(sc);
4485 	/* Cache current settings and reconfigure */
4486 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4487 	ice_do_dcb_reconfig(sc, false);
4488 
4489 	return (0);
4490 }
4491 
4492 #define ICE_SYSCTL_HELP_SET_LINK_ACTIVE \
4493 "\nKeep link active after setting interface down:" \
4494 "\n\t0 - disable" \
4495 "\n\t1 - enable"
4496 
4497 /**
4498  * ice_sysctl_set_link_active
4499  * @oidp: sysctl oid structure
4500  * @arg1: pointer to private data structure
4501  * @arg2: unused
4502  * @req: sysctl request pointer
4503  *
4504  * Set the link_active_on_if_down sysctl flag.
4505  */
4506 static int
4507 ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS)
4508 {
4509 	struct ice_softc *sc = (struct ice_softc *)arg1;
4510 	bool mode;
4511 	int ret;
4512 
4513 	UNREFERENCED_PARAMETER(arg2);
4514 
4515 	mode = ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4516 
4517 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4518 	if ((ret) || (req->newptr == NULL))
4519 		return (ret);
4520 
4521 	if (mode)
4522 		ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4523 	else
4524 		ice_clear_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4525 
4526 	return (0);
4527 }
4528 
4529 /**
4530  * ice_sysctl_debug_set_link
4531  * @oidp: sysctl oid structure
4532  * @arg1: pointer to private data structure
4533  * @arg2: unused
4534  * @req: sysctl request pointer
4535  *
4536  * Set link up/down in debug session.
4537  */
4538 static int
4539 ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS)
4540 {
4541 	struct ice_softc *sc = (struct ice_softc *)arg1;
4542 	bool mode;
4543 	int ret;
4544 
4545 	UNREFERENCED_PARAMETER(arg2);
4546 
4547 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4548 	if ((ret) || (req->newptr == NULL))
4549 		return (ret);
4550 
4551 	ice_set_link(sc, mode != 0);
4552 
4553 	return (0);
4554 }
4555 
4556 /**
4557  * ice_add_device_sysctls - add device specific dynamic sysctls
4558  * @sc: device private structure
4559  *
4560  * Add per-device dynamic sysctls which show device configuration or enable
4561  * configuring device functionality. For tunable values which can be set prior
4562  * to load, see ice_add_device_tunables.
4563  *
4564  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4565  * and likely should be called near the end of the attach process.
4566  */
4567 void
4568 ice_add_device_sysctls(struct ice_softc *sc)
4569 {
4570 	struct sysctl_oid *hw_node;
4571 	device_t dev = sc->dev;
4572 
4573 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4574 	struct sysctl_oid_list *ctx_list =
4575 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4576 
4577 	SYSCTL_ADD_PROC(ctx, ctx_list,
4578 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4579 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4580 
4581 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4582 		SYSCTL_ADD_PROC(ctx, ctx_list,
4583 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4584 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4585 	}
4586 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_TEMP_SENSOR)) {
4587 		SYSCTL_ADD_PROC(ctx, ctx_list,
4588 		    OID_AUTO, "temp", CTLTYPE_S8 | CTLFLAG_RD,
4589 		    sc, 0, ice_sysctl_temperature, "CU",
4590 		    "Device temperature in degrees Celcius (C)");
4591 	}
4592 
4593 	SYSCTL_ADD_PROC(ctx, ctx_list,
4594 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4595 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4596 
4597 	SYSCTL_ADD_PROC(ctx, ctx_list,
4598 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4599 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4600 
4601 	SYSCTL_ADD_PROC(ctx, ctx_list,
4602 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4603 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4604 
4605 	SYSCTL_ADD_PROC(ctx, ctx_list,
4606 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4607 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4608 
4609 	SYSCTL_ADD_PROC(ctx, ctx_list,
4610 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4611 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4612 
4613 	SYSCTL_ADD_PROC(ctx, ctx_list,
4614 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4615 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4616 
4617 	SYSCTL_ADD_PROC(ctx, ctx_list,
4618 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4619 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4620 
4621 	SYSCTL_ADD_PROC(ctx, ctx_list,
4622 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4623 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4624 
4625 	SYSCTL_ADD_PROC(ctx, ctx_list,
4626 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4627 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4628 
4629 	SYSCTL_ADD_PROC(ctx, ctx_list,
4630 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4631 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4632 
4633 	SYSCTL_ADD_PROC(ctx, ctx_list,
4634 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4635 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4636 
4637 	SYSCTL_ADD_PROC(ctx, ctx_list,
4638 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4639 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4640 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4641 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4642 
4643 	SYSCTL_ADD_PROC(ctx, ctx_list,
4644 	    OID_AUTO, "link_active_on_if_down", CTLTYPE_U8 | CTLFLAG_RWTUN,
4645 	    sc, 0, ice_sysctl_set_link_active, "CU", ICE_SYSCTL_HELP_SET_LINK_ACTIVE);
4646 
4647 	SYSCTL_ADD_PROC(ctx, ctx_list,
4648 	    OID_AUTO, "create_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4649 	    sc, 0, ice_sysctl_create_mirror_interface, "A", "");
4650 
4651 	SYSCTL_ADD_PROC(ctx, ctx_list,
4652 	    OID_AUTO, "destroy_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4653 	    sc, 0, ice_sysctl_destroy_mirror_interface, "A", "");
4654 
4655 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4656 
4657 	/* Differentiate software and hardware statistics, by keeping hw stats
4658 	 * in their own node. This isn't in ice_add_device_tunables, because
4659 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4660 	 */
4661 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4662 				  NULL, "Port Hardware Statistics");
4663 
4664 	ice_add_sysctls_mac_stats(ctx, hw_node, sc);
4665 
4666 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4667 	 * during creation
4668 	 */
4669 	ice_add_vsi_sysctls(&sc->pf_vsi);
4670 
4671 	/* Add sysctls related to debugging the device driver. This includes
4672 	 * sysctls which display additional internal driver state for use in
4673 	 * understanding what is happening within the driver.
4674 	 */
4675 	ice_add_debug_sysctls(sc);
4676 }
4677 
4678 /**
4679  * @enum hmc_error_type
4680  * @brief enumeration of HMC errors
4681  *
4682  * Enumeration defining the possible HMC errors that might occur.
4683  */
4684 enum hmc_error_type {
4685 	HMC_ERR_PMF_INVALID = 0,
4686 	HMC_ERR_VF_IDX_INVALID = 1,
4687 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4688 	/* 3 is reserved */
4689 	HMC_ERR_INDEX_TOO_BIG = 4,
4690 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4691 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4692 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4693 	HMC_ERR_PAGE_DESC_INVALID = 8,
4694 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4695 	/* 10 is reserved */
4696 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4697 	/* 12 is reserved */
4698 };
4699 
4700 /**
4701  * ice_log_hmc_error - Log an HMC error message
4702  * @hw: device hw structure
4703  * @dev: the device to pass to device_printf()
4704  *
4705  * Log a message when an HMC error interrupt is triggered.
4706  */
4707 void
4708 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4709 {
4710 	u32 info, data;
4711 	u8 index, errtype, objtype;
4712 	bool isvf;
4713 
4714 	info = rd32(hw, PFHMC_ERRORINFO);
4715 	data = rd32(hw, PFHMC_ERRORDATA);
4716 
4717 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4718 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4719 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4720 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4721 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4722 
4723 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4724 
4725 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4726 		      isvf ? "VF" : "PF", index);
4727 
4728 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4729 		      errtype, objtype, data);
4730 
4731 	switch (errtype) {
4732 	case HMC_ERR_PMF_INVALID:
4733 		device_printf(dev, "Private Memory Function is not valid\n");
4734 		break;
4735 	case HMC_ERR_VF_IDX_INVALID:
4736 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4737 		break;
4738 	case HMC_ERR_VF_PARENT_PF_INVALID:
4739 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4740 		break;
4741 	case HMC_ERR_INDEX_TOO_BIG:
4742 		device_printf(dev, "Object index too big\n");
4743 		break;
4744 	case HMC_ERR_ADDRESS_TOO_LARGE:
4745 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4746 		break;
4747 	case HMC_ERR_SEGMENT_DESC_INVALID:
4748 		device_printf(dev, "Segment descriptor is invalid\n");
4749 		break;
4750 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4751 		device_printf(dev, "Segment descriptor is too small\n");
4752 		break;
4753 	case HMC_ERR_PAGE_DESC_INVALID:
4754 		device_printf(dev, "Page descriptor is invalid\n");
4755 		break;
4756 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4757 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4758 		break;
4759 	case HMC_ERR_INVALID_OBJECT_TYPE:
4760 		device_printf(dev, "Invalid object type\n");
4761 		break;
4762 	default:
4763 		device_printf(dev, "Unknown HMC error\n");
4764 	}
4765 
4766 	/* Clear the error indication */
4767 	wr32(hw, PFHMC_ERRORINFO, 0);
4768 }
4769 
4770 /**
4771  * @struct ice_sysctl_info
4772  * @brief sysctl information
4773  *
4774  * Structure used to simplify the process of defining the many similar
4775  * statistics sysctls.
4776  */
4777 struct ice_sysctl_info {
4778 	u64		*stat;
4779 	const char	*name;
4780 	const char	*description;
4781 };
4782 
4783 /**
4784  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4785  * @ctx: sysctl ctx to use
4786  * @parent: the parent node to add sysctls under
4787  * @stats: the ethernet stats structure to source values from
4788  *
4789  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4790  * Will add them under the parent node specified.
4791  *
4792  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4793  * statistics, so it is not included here. Similarly, rx_discards has different
4794  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4795  */
4796 void
4797 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4798 			  struct sysctl_oid *parent,
4799 			  struct ice_eth_stats *stats)
4800 {
4801 	const struct ice_sysctl_info ctls[] = {
4802 		/* Rx Stats */
4803 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4804 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4805 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4806 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4807 		/* Tx Stats */
4808 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4809 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4810 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4811 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4812 		/* End */
4813 		{ 0, 0, 0 }
4814 	};
4815 
4816 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4817 
4818 	const struct ice_sysctl_info *entry = ctls;
4819 	while (entry->stat != 0) {
4820 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4821 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4822 			       entry->description);
4823 		entry++;
4824 	}
4825 }
4826 
4827 /**
4828  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4829  * @oidp: sysctl oid structure
4830  * @arg1: pointer to private data structure
4831  * @arg2: Tx CSO stat to read
4832  * @req: sysctl request pointer
4833  *
4834  * On read: Sums the per-queue Tx CSO stat and displays it.
4835  */
4836 static int
4837 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4838 {
4839 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4840 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4841 	u64 stat = 0;
4842 	int i;
4843 
4844 	if (ice_driver_is_detaching(vsi->sc))
4845 		return (ESHUTDOWN);
4846 
4847 	/* Check that the type is valid */
4848 	if (type >= ICE_CSO_STAT_TX_COUNT)
4849 		return (EDOOFUS);
4850 
4851 	/* Sum the stat for each of the Tx queues */
4852 	for (i = 0; i < vsi->num_tx_queues; i++)
4853 		stat += vsi->tx_queues[i].stats.cso[type];
4854 
4855 	return sysctl_handle_64(oidp, NULL, stat, req);
4856 }
4857 
4858 /**
4859  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4860  * @oidp: sysctl oid structure
4861  * @arg1: pointer to private data structure
4862  * @arg2: Rx CSO stat to read
4863  * @req: sysctl request pointer
4864  *
4865  * On read: Sums the per-queue Rx CSO stat and displays it.
4866  */
4867 static int
4868 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4869 {
4870 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4871 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4872 	u64 stat = 0;
4873 	int i;
4874 
4875 	if (ice_driver_is_detaching(vsi->sc))
4876 		return (ESHUTDOWN);
4877 
4878 	/* Check that the type is valid */
4879 	if (type >= ICE_CSO_STAT_RX_COUNT)
4880 		return (EDOOFUS);
4881 
4882 	/* Sum the stat for each of the Rx queues */
4883 	for (i = 0; i < vsi->num_rx_queues; i++)
4884 		stat += vsi->rx_queues[i].stats.cso[type];
4885 
4886 	return sysctl_handle_64(oidp, NULL, stat, req);
4887 }
4888 
4889 /**
4890  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4891  * @oidp: sysctl oid structure
4892  * @arg1: pointer to private data structure
4893  * @arg2: unused
4894  * @req: sysctl request pointer
4895  *
4896  * On read: Sums current values of Rx error statistics and
4897  * displays it.
4898  */
4899 static int
4900 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4901 {
4902 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4903 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4904 	u64 stat = 0;
4905 	int i, type;
4906 
4907 	UNREFERENCED_PARAMETER(arg2);
4908 
4909 	if (ice_driver_is_detaching(vsi->sc))
4910 		return (ESHUTDOWN);
4911 
4912 	stat += hs->rx_undersize;
4913 	stat += hs->rx_fragments;
4914 	stat += hs->rx_oversize;
4915 	stat += hs->rx_jabber;
4916 	stat += hs->crc_errors;
4917 	stat += hs->illegal_bytes;
4918 
4919 	/* Checksum error stats */
4920 	for (i = 0; i < vsi->num_rx_queues; i++)
4921 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4922 		     type < ICE_CSO_STAT_RX_COUNT;
4923 		     type++)
4924 			stat += vsi->rx_queues[i].stats.cso[type];
4925 
4926 	return sysctl_handle_64(oidp, NULL, stat, req);
4927 }
4928 
4929 /**
4930  * @struct ice_rx_cso_stat_info
4931  * @brief sysctl information for an Rx checksum offload statistic
4932  *
4933  * Structure used to simplify the process of defining the checksum offload
4934  * statistics.
4935  */
4936 struct ice_rx_cso_stat_info {
4937 	enum ice_rx_cso_stat	type;
4938 	const char		*name;
4939 	const char		*description;
4940 };
4941 
4942 /**
4943  * @struct ice_tx_cso_stat_info
4944  * @brief sysctl information for a Tx checksum offload statistic
4945  *
4946  * Structure used to simplify the process of defining the checksum offload
4947  * statistics.
4948  */
4949 struct ice_tx_cso_stat_info {
4950 	enum ice_tx_cso_stat	type;
4951 	const char		*name;
4952 	const char		*description;
4953 };
4954 
4955 /**
4956  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4957  * @vsi: pointer to the VSI to add sysctls for
4958  * @ctx: sysctl ctx to use
4959  * @parent: the parent node to add sysctls under
4960  *
4961  * Add statistics sysctls for software tracked statistics of a VSI.
4962  *
4963  * Currently this only adds checksum offload statistics, but more counters may
4964  * be added in the future.
4965  */
4966 static void
4967 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4968 			 struct sysctl_ctx_list *ctx,
4969 			 struct sysctl_oid *parent)
4970 {
4971 	struct sysctl_oid *cso_node;
4972 	struct sysctl_oid_list *cso_list;
4973 
4974 	/* Tx CSO Stats */
4975 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4976 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4977 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4978 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4979 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4980 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4981 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4982 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4983 		/* End */
4984 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4985 	};
4986 
4987 	/* Rx CSO Stats */
4988 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4989 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4990 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4991 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4992 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4993 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4994 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4995 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4996 		/* End */
4997 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4998 	};
4999 
5000 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5001 
5002 	/* Add a node for statistics tracked by software. */
5003 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
5004 				  NULL, "Checksum offload Statistics");
5005 	cso_list = SYSCTL_CHILDREN(cso_node);
5006 
5007 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
5008 	while (tx_entry->name && tx_entry->description) {
5009 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
5010 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
5011 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
5012 				tx_entry->description);
5013 		tx_entry++;
5014 	}
5015 
5016 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
5017 	while (rx_entry->name && rx_entry->description) {
5018 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
5019 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
5020 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
5021 				rx_entry->description);
5022 		rx_entry++;
5023 	}
5024 }
5025 
5026 /**
5027  * ice_add_vsi_sysctls - Add sysctls for a VSI
5028  * @vsi: pointer to VSI structure
5029  *
5030  * Add various sysctls for a given VSI.
5031  */
5032 void
5033 ice_add_vsi_sysctls(struct ice_vsi *vsi)
5034 {
5035 	struct sysctl_ctx_list *ctx = &vsi->ctx;
5036 	struct sysctl_oid *hw_node, *sw_node;
5037 	struct sysctl_oid_list *vsi_list, *hw_list;
5038 
5039 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5040 
5041 	/* Keep hw stats in their own node. */
5042 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
5043 				  NULL, "VSI Hardware Statistics");
5044 	hw_list = SYSCTL_CHILDREN(hw_node);
5045 
5046 	/* Add the ethernet statistics for this VSI */
5047 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
5048 
5049 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
5050 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
5051 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
5052 
5053 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
5054 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
5055 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
5056 			"Aggregate of all Rx errors");
5057 
5058 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
5059 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
5060 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
5061 
5062 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
5063 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
5064 			0, "Tx Packets Discarded Due To Error");
5065 
5066 	/* Add a node for statistics tracked by software. */
5067 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
5068 				  NULL, "VSI Software Statistics");
5069 
5070 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
5071 }
5072 
5073 /**
5074  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
5075  * @ctx: sysctl ctx to use
5076  * @parent_list: parent sysctl list to add sysctls under
5077  * @pfc_stat_location: address of statistic for sysctl to display
5078  * @node_name: Name for statistic node
5079  * @descr: Description used for nodes added in this function
5080  *
5081  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
5082  * for a stat and leaves for each traffic class for that stat.
5083  */
5084 static void
5085 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
5086 				 struct sysctl_oid_list *parent_list,
5087 				 u64* pfc_stat_location,
5088 				 const char *node_name,
5089 				 const char *descr)
5090 {
5091 	struct sysctl_oid_list *node_list;
5092 	struct sysctl_oid *node;
5093 	struct sbuf *namebuf, *descbuf;
5094 
5095 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
5096 				   NULL, descr);
5097 	node_list = SYSCTL_CHILDREN(node);
5098 
5099 	namebuf = sbuf_new_auto();
5100 	descbuf = sbuf_new_auto();
5101 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5102 		sbuf_clear(namebuf);
5103 		sbuf_clear(descbuf);
5104 
5105 		sbuf_printf(namebuf, "%d", i);
5106 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
5107 
5108 		sbuf_finish(namebuf);
5109 		sbuf_finish(descbuf);
5110 
5111 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
5112 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
5113 			sbuf_data(descbuf));
5114 	}
5115 
5116 	sbuf_delete(namebuf);
5117 	sbuf_delete(descbuf);
5118 }
5119 
5120 /**
5121  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
5122  * @ctx: the sysctl ctx to use
5123  * @parent: parent node to add the sysctls under
5124  * @stats: the hw ports stat structure to pull values from
5125  *
5126  * Add global Priority Flow Control MAC statistics sysctls. These are
5127  * structured as a node with the PFC statistic, where there are eight
5128  * nodes for each traffic class.
5129  */
5130 static void
5131 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
5132 			      struct sysctl_oid *parent,
5133 			      struct ice_hw_port_stats *stats)
5134 {
5135 	struct sysctl_oid_list *parent_list;
5136 
5137 	parent_list = SYSCTL_CHILDREN(parent);
5138 
5139 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
5140 	    "p_xon_recvd", "PFC XON received");
5141 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
5142 	    "p_xoff_recvd", "PFC XOFF received");
5143 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
5144 	    "p_xon_txd", "PFC XON transmitted");
5145 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
5146 	    "p_xoff_txd", "PFC XOFF transmitted");
5147 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
5148 	    "p_xon2xoff", "PFC XON to XOFF transitions");
5149 }
5150 
5151 /**
5152  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
5153  * @ctx: the sysctl ctx to use
5154  * @parent: parent node to add the sysctls under
5155  * @sc: device private structure
5156  *
5157  * Add global MAC statistics sysctls.
5158  */
5159 void
5160 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
5161 			  struct sysctl_oid *parent,
5162 			  struct ice_softc *sc)
5163 {
5164 	struct sysctl_oid *mac_node;
5165 	struct sysctl_oid_list *parent_list, *mac_list;
5166 	struct ice_hw_port_stats *stats = &sc->stats.cur;
5167 
5168 	parent_list = SYSCTL_CHILDREN(parent);
5169 
5170 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
5171 				   NULL, "Mac Hardware Statistics");
5172 	mac_list = SYSCTL_CHILDREN(mac_node);
5173 
5174 	/* Add the ethernet statistics common to VSI and MAC */
5175 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
5176 
5177 	/* Add PFC stats that add per-TC counters */
5178 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
5179 
5180 	const struct ice_sysctl_info ctls[] = {
5181 		/* Packet Reception Stats */
5182 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
5183 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
5184 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
5185 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
5186 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
5187 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
5188 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
5189 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
5190 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
5191 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
5192 		{&stats->eth.rx_discards, "rx_discards",
5193 		    "Discarded Rx Packets by Port (shortage of storage space)"},
5194 		/* Packet Transmission Stats */
5195 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
5196 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
5197 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
5198 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
5199 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
5200 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
5201 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
5202 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
5203 		/* Flow control */
5204 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
5205 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
5206 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
5207 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
5208 		/* Other */
5209 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
5210 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
5211 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
5212 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
5213 		/* End */
5214 		{ 0, 0, 0 }
5215 	};
5216 
5217 	const struct ice_sysctl_info *entry = ctls;
5218 	while (entry->stat != 0) {
5219 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
5220 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
5221 			entry->description);
5222 		entry++;
5223 	}
5224 	/* Port oversize packet stats */
5225 	SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, "rx_oversized",
5226 		       CTLFLAG_RD | CTLFLAG_STATS, &sc->soft_stats.rx_roc_error,
5227 		       0, "Oversized packets received");
5228 
5229 }
5230 
5231 /**
5232  * ice_configure_misc_interrupts - enable 'other' interrupt causes
5233  * @sc: pointer to device private softc
5234  *
5235  * Enable various "other" interrupt causes, and associate them to interrupt 0,
5236  * which is our administrative interrupt.
5237  */
5238 void
5239 ice_configure_misc_interrupts(struct ice_softc *sc)
5240 {
5241 	struct ice_hw *hw = &sc->hw;
5242 	u32 val;
5243 
5244 	/* Read the OICR register to clear it */
5245 	rd32(hw, PFINT_OICR);
5246 
5247 	/* Enable useful "other" interrupt causes */
5248 	val = (PFINT_OICR_ECC_ERR_M |
5249 	       PFINT_OICR_MAL_DETECT_M |
5250 	       PFINT_OICR_GRST_M |
5251 	       PFINT_OICR_PCI_EXCEPTION_M |
5252 	       PFINT_OICR_VFLR_M |
5253 	       PFINT_OICR_HMC_ERR_M |
5254 	       PFINT_OICR_PE_CRITERR_M);
5255 
5256 	wr32(hw, PFINT_OICR_ENA, val);
5257 
5258 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
5259 	 * not explicitly program them when writing to the PFINT_*_CTL
5260 	 * registers. Nevertheless, these writes are associating the
5261 	 * interrupts with the ITR 0 vector
5262 	 */
5263 
5264 	/* Associate the OICR interrupt with ITR 0, and enable it */
5265 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
5266 
5267 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
5268 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
5269 
5270 	/* Associate the SB Queue interrupt with ITR 0, and enable it */
5271 	wr32(hw, PFINT_SB_CTL, PFINT_SB_CTL_CAUSE_ENA_M);
5272 
5273 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
5274 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
5275 }
5276 
5277 /**
5278  * ice_filter_is_mcast - Check if info is a multicast filter
5279  * @vsi: vsi structure addresses are targeted towards
5280  * @info: filter info
5281  *
5282  * @returns true if the provided info is a multicast filter, and false
5283  * otherwise.
5284  */
5285 static bool
5286 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
5287 {
5288 	const u8 *addr = info->l_data.mac.mac_addr;
5289 
5290 	/*
5291 	 * Check if this info matches a multicast filter added by
5292 	 * ice_add_mac_to_list
5293 	 */
5294 	if ((info->flag == ICE_FLTR_TX) &&
5295 	    (info->src_id == ICE_SRC_ID_VSI) &&
5296 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5297 	    (info->vsi_handle == vsi->idx) &&
5298 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5299 		return true;
5300 
5301 	return false;
5302 }
5303 
5304 /**
5305  * @struct ice_mcast_sync_data
5306  * @brief data used by ice_sync_one_mcast_filter function
5307  *
5308  * Structure used to store data needed for processing by the
5309  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5310  * to be added, an error indication, and a pointer to the device softc.
5311  */
5312 struct ice_mcast_sync_data {
5313 	struct ice_list_head add_list;
5314 	struct ice_softc *sc;
5315 	int err;
5316 };
5317 
5318 /**
5319  * ice_sync_one_mcast_filter - Check if we need to program the filter
5320  * @p: void pointer to algorithm data
5321  * @sdl: link level socket address
5322  * @count: unused count value
5323  *
5324  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5325  * list. For the given address, search our internal list to see if we have
5326  * found the filter. If not, add it to our list of filters that need to be
5327  * programmed.
5328  *
5329  * @returns (1) if we've actually setup the filter to be added
5330  */
5331 static u_int
5332 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5333 			  u_int __unused count)
5334 {
5335 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5336 	struct ice_softc *sc = data->sc;
5337 	struct ice_hw *hw = &sc->hw;
5338 	struct ice_switch_info *sw = hw->switch_info;
5339 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5340 	struct ice_fltr_mgmt_list_entry *itr;
5341 	struct ice_list_head *rules;
5342 	int err;
5343 
5344 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5345 
5346 	/*
5347 	 * If a previous filter already indicated an error, there is no need
5348 	 * for us to finish processing the rest of the filters.
5349 	 */
5350 	if (data->err)
5351 		return (0);
5352 
5353 	/* See if this filter has already been programmed */
5354 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5355 		struct ice_fltr_info *info = &itr->fltr_info;
5356 		const u8 *addr = info->l_data.mac.mac_addr;
5357 
5358 		/* Only check multicast filters */
5359 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5360 			continue;
5361 
5362 		/*
5363 		 * If this filter matches, mark the internal filter as
5364 		 * "found", and exit.
5365 		 */
5366 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5367 			itr->marker = ICE_FLTR_FOUND;
5368 			return (1);
5369 		}
5370 	}
5371 
5372 	/*
5373 	 * If we failed to locate the filter in our internal list, we need to
5374 	 * place it into our add list.
5375 	 */
5376 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5377 				  ICE_FWD_TO_VSI);
5378 	if (err) {
5379 		device_printf(sc->dev,
5380 			      "Failed to place MAC %6D onto add list, err %s\n",
5381 			      sdl_addr, ":", ice_err_str(err));
5382 		data->err = err;
5383 
5384 		return (0);
5385 	}
5386 
5387 	return (1);
5388 }
5389 
5390 /**
5391  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5392  * @sc: device private structure
5393  *
5394  * Called in response to SIOCDELMULTI to synchronize the operating system
5395  * multicast address list with the internal list of filters programmed to
5396  * firmware.
5397  *
5398  * Works in one phase to find added and deleted filters using a marker bit on
5399  * the internal list.
5400  *
5401  * First, a loop over the internal list clears the marker bit. Second, for
5402  * each filter in the ifp list is checked. If we find it in the internal list,
5403  * the marker bit is set. Otherwise, the filter is added to the add list.
5404  * Third, a loop over the internal list determines if any filters have not
5405  * been found. Each of these is added to the delete list. Finally, the add and
5406  * delete lists are programmed to firmware to update the filters.
5407  *
5408  * @returns zero on success or an integer error code on failure.
5409  */
5410 int
5411 ice_sync_multicast_filters(struct ice_softc *sc)
5412 {
5413 	struct ice_hw *hw = &sc->hw;
5414 	struct ice_switch_info *sw = hw->switch_info;
5415 	struct ice_fltr_mgmt_list_entry *itr;
5416 	struct ice_mcast_sync_data data = {};
5417 	struct ice_list_head *rules, remove_list;
5418 	int status;
5419 	int err = 0;
5420 
5421 	INIT_LIST_HEAD(&data.add_list);
5422 	INIT_LIST_HEAD(&remove_list);
5423 	data.sc = sc;
5424 	data.err = 0;
5425 
5426 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5427 
5428 	/* Acquire the lock for the entire duration */
5429 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5430 
5431 	/* (1) Reset the marker state for all filters */
5432 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5433 		itr->marker = ICE_FLTR_NOT_FOUND;
5434 
5435 	/* (2) determine which filters need to be added and removed */
5436 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5437 	if (data.err) {
5438 		/* ice_sync_one_mcast_filter already prints an error */
5439 		err = data.err;
5440 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5441 		goto free_filter_lists;
5442 	}
5443 
5444 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5445 		struct ice_fltr_info *info = &itr->fltr_info;
5446 		const u8 *addr = info->l_data.mac.mac_addr;
5447 
5448 		/* Only check multicast filters */
5449 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5450 			continue;
5451 
5452 		/*
5453 		 * If the filter is not marked as found, then it must no
5454 		 * longer be in the ifp address list, so we need to remove it.
5455 		 */
5456 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5457 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5458 						  addr, ICE_FWD_TO_VSI);
5459 			if (err) {
5460 				device_printf(sc->dev,
5461 					      "Failed to place MAC %6D onto remove list, err %s\n",
5462 					      addr, ":", ice_err_str(err));
5463 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5464 				goto free_filter_lists;
5465 			}
5466 		}
5467 	}
5468 
5469 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5470 
5471 	status = ice_add_mac(hw, &data.add_list);
5472 	if (status) {
5473 		device_printf(sc->dev,
5474 			      "Could not add new MAC filters, err %s aq_err %s\n",
5475 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5476 		err = (EIO);
5477 		goto free_filter_lists;
5478 	}
5479 
5480 	status = ice_remove_mac(hw, &remove_list);
5481 	if (status) {
5482 		device_printf(sc->dev,
5483 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5484 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5485 		err = (EIO);
5486 		goto free_filter_lists;
5487 	}
5488 
5489 free_filter_lists:
5490 	ice_free_fltr_list(&data.add_list);
5491 	ice_free_fltr_list(&remove_list);
5492 
5493 	return (err);
5494 }
5495 
5496 /**
5497  * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5498  * @vsi: The VSI to add the filter for
5499  * @vid: array of VLAN ids to add
5500  * @length: length of vid array
5501  *
5502  * Programs HW filters so that the given VSI will receive the specified VLANs.
5503  */
5504 int
5505 ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5506 {
5507 	struct ice_hw *hw = &vsi->sc->hw;
5508 	struct ice_list_head vlan_list;
5509 	struct ice_fltr_list_entry *vlan_entries;
5510 	int status;
5511 
5512 	MPASS(length > 0);
5513 
5514 	INIT_LIST_HEAD(&vlan_list);
5515 
5516 	vlan_entries = (struct ice_fltr_list_entry *)
5517 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5518 	if (!vlan_entries)
5519 		return (ICE_ERR_NO_MEMORY);
5520 
5521 	for (u16 i = 0; i < length; i++) {
5522 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5523 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5524 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5525 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5526 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5527 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5528 
5529 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5530 	}
5531 
5532 	status = ice_add_vlan(hw, &vlan_list);
5533 	if (!status)
5534 		goto done;
5535 
5536 	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5537 	for (u16 i = 0; i < length; i++) {
5538 		device_printf(vsi->sc->dev,
5539 		    "- vlan %d, status %d\n",
5540 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5541 		    vlan_entries[i].status);
5542 	}
5543 done:
5544 	free(vlan_entries, M_ICE);
5545 	return (status);
5546 }
5547 
5548 /**
5549  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5550  * @vsi: The VSI to add the filter for
5551  * @vid: VLAN to add
5552  *
5553  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5554  */
5555 int
5556 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5557 {
5558 	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5559 }
5560 
5561 /**
5562  * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5563  * @vsi: The VSI to remove the filters from
5564  * @vid: array of VLAN ids to remove
5565  * @length: length of vid array
5566  *
5567  * Removes previously programmed HW filters for the specified VSI.
5568  */
5569 int
5570 ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5571 {
5572 	struct ice_hw *hw = &vsi->sc->hw;
5573 	struct ice_list_head vlan_list;
5574 	struct ice_fltr_list_entry *vlan_entries;
5575 	int status;
5576 
5577 	MPASS(length > 0);
5578 
5579 	INIT_LIST_HEAD(&vlan_list);
5580 
5581 	vlan_entries = (struct ice_fltr_list_entry *)
5582 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5583 	if (!vlan_entries)
5584 		return (ICE_ERR_NO_MEMORY);
5585 
5586 	for (u16 i = 0; i < length; i++) {
5587 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5588 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5589 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5590 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5591 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5592 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5593 
5594 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5595 	}
5596 
5597 	status = ice_remove_vlan(hw, &vlan_list);
5598 	if (!status)
5599 		goto done;
5600 
5601 	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5602 	for (u16 i = 0; i < length; i++) {
5603 		device_printf(vsi->sc->dev,
5604 		    "- vlan %d, status %d\n",
5605 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5606 		    vlan_entries[i].status);
5607 	}
5608 done:
5609 	free(vlan_entries, M_ICE);
5610 	return (status);
5611 }
5612 
5613 /**
5614  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5615  * @vsi: The VSI to remove the filter from
5616  * @vid: VLAN to remove
5617  *
5618  * Removes a previously programmed HW filter for the specified VSI.
5619  */
5620 int
5621 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5622 {
5623 	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5624 }
5625 
5626 #define ICE_SYSCTL_HELP_RX_ITR			\
5627 "\nControl Rx interrupt throttle rate."		\
5628 "\n\t0-8160 - sets interrupt rate in usecs"	\
5629 "\n\t    -1 - reset the Rx itr to default"
5630 
5631 /**
5632  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5633  * @oidp: sysctl oid structure
5634  * @arg1: pointer to private data structure
5635  * @arg2: unused
5636  * @req: sysctl request pointer
5637  *
5638  * On read: Displays the current Rx ITR value
5639  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5640  */
5641 static int
5642 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5643 {
5644 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5645 	struct ice_softc *sc = vsi->sc;
5646 	int increment, ret;
5647 
5648 	UNREFERENCED_PARAMETER(arg2);
5649 
5650 	if (ice_driver_is_detaching(sc))
5651 		return (ESHUTDOWN);
5652 
5653 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5654 	if ((ret) || (req->newptr == NULL))
5655 		return (ret);
5656 
5657 	if (vsi->rx_itr < 0)
5658 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5659 	if (vsi->rx_itr > ICE_ITR_MAX)
5660 		vsi->rx_itr = ICE_ITR_MAX;
5661 
5662 	/* Assume 2usec increment if it hasn't been loaded yet */
5663 	increment = sc->hw.itr_gran ? : 2;
5664 
5665 	/* We need to round the value to the hardware's ITR granularity */
5666 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5667 
5668 	/* If the driver has finished initializing, then we need to reprogram
5669 	 * the ITR registers now. Otherwise, they will be programmed during
5670 	 * driver initialization.
5671 	 */
5672 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5673 		ice_configure_rx_itr(vsi);
5674 
5675 	return (0);
5676 }
5677 
5678 #define ICE_SYSCTL_HELP_TX_ITR			\
5679 "\nControl Tx interrupt throttle rate."		\
5680 "\n\t0-8160 - sets interrupt rate in usecs"	\
5681 "\n\t    -1 - reset the Tx itr to default"
5682 
5683 /**
5684  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5685  * @oidp: sysctl oid structure
5686  * @arg1: pointer to private data structure
5687  * @arg2: unused
5688  * @req: sysctl request pointer
5689  *
5690  * On read: Displays the current Tx ITR value
5691  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5692  */
5693 static int
5694 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5695 {
5696 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5697 	struct ice_softc *sc = vsi->sc;
5698 	int increment, ret;
5699 
5700 	UNREFERENCED_PARAMETER(arg2);
5701 
5702 	if (ice_driver_is_detaching(sc))
5703 		return (ESHUTDOWN);
5704 
5705 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5706 	if ((ret) || (req->newptr == NULL))
5707 		return (ret);
5708 
5709 	/* Allow configuring a negative value to reset to the default */
5710 	if (vsi->tx_itr < 0)
5711 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5712 	if (vsi->tx_itr > ICE_ITR_MAX)
5713 		vsi->tx_itr = ICE_ITR_MAX;
5714 
5715 	/* Assume 2usec increment if it hasn't been loaded yet */
5716 	increment = sc->hw.itr_gran ? : 2;
5717 
5718 	/* We need to round the value to the hardware's ITR granularity */
5719 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5720 
5721 	/* If the driver has finished initializing, then we need to reprogram
5722 	 * the ITR registers now. Otherwise, they will be programmed during
5723 	 * driver initialization.
5724 	 */
5725 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5726 		ice_configure_tx_itr(vsi);
5727 
5728 	return (0);
5729 }
5730 
5731 /**
5732  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5733  * @vsi: pointer to VSI structure
5734  * @parent: parent node to add the tunables under
5735  *
5736  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5737  * dynamically removed upon VSI removal.
5738  *
5739  * Add various tunables and set up the basic node structure for the VSI. Must
5740  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5741  * possible after the VSI memory is initialized.
5742  *
5743  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5744  * their values can be read from loader.conf prior to their first use in the
5745  * driver.
5746  */
5747 void
5748 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5749 {
5750 	struct sysctl_oid_list *vsi_list;
5751 	char vsi_name[32], vsi_desc[32];
5752 
5753 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5754 
5755 	/* Initialize the sysctl context for this VSI */
5756 	sysctl_ctx_init(&vsi->ctx);
5757 
5758 	/* Add a node to collect this VSI's statistics together */
5759 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5760 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5761 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5762 					CTLFLAG_RD, NULL, vsi_desc);
5763 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5764 
5765 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5766 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5767 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5768 			vsi, 0, ice_sysctl_rx_itr, "S",
5769 			ICE_SYSCTL_HELP_RX_ITR);
5770 
5771 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5772 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5773 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5774 			vsi, 0, ice_sysctl_tx_itr, "S",
5775 			ICE_SYSCTL_HELP_TX_ITR);
5776 }
5777 
5778 /**
5779  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5780  * @vsi: the VSI to remove contexts for
5781  *
5782  * Free the context for the VSI sysctls. This includes the main context, as
5783  * well as the per-queue sysctls.
5784  */
5785 void
5786 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5787 {
5788 	device_t dev = vsi->sc->dev;
5789 	int err;
5790 
5791 	if (vsi->vsi_node) {
5792 		err = sysctl_ctx_free(&vsi->ctx);
5793 		if (err)
5794 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5795 				      vsi->idx, ice_err_str(err));
5796 		vsi->vsi_node = NULL;
5797 	}
5798 }
5799 
5800 /**
5801  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5802  * @sc: pointer to device private softc
5803  * @ctx: the sysctl ctx to use
5804  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5805  *
5806  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5807  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5808  * of 64 DSCP to TC map values that the user can configure.
5809  */
5810 void
5811 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5812 			    struct sysctl_ctx_list *ctx,
5813 			    struct sysctl_oid_list *ctx_list)
5814 {
5815 	struct sysctl_oid_list *node_list;
5816 	struct sysctl_oid *node;
5817 	struct sbuf *namebuf, *descbuf;
5818 	int first_dscp_val, last_dscp_val;
5819 
5820 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5821 			       NULL, "Map of DSCP values to DCB TCs");
5822 	node_list = SYSCTL_CHILDREN(node);
5823 
5824 	namebuf = sbuf_new_auto();
5825 	descbuf = sbuf_new_auto();
5826 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5827 		sbuf_clear(namebuf);
5828 		sbuf_clear(descbuf);
5829 
5830 		first_dscp_val = i * 8;
5831 		last_dscp_val = first_dscp_val + 7;
5832 
5833 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5834 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5835 			    first_dscp_val, last_dscp_val);
5836 
5837 		sbuf_finish(namebuf);
5838 		sbuf_finish(descbuf);
5839 
5840 		SYSCTL_ADD_PROC(ctx, node_list,
5841 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5842 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5843 	}
5844 
5845 	sbuf_delete(namebuf);
5846 	sbuf_delete(descbuf);
5847 }
5848 
5849 /**
5850  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5851  * @sc: device private structure
5852  *
5853  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5854  * for re-use by ice_add_device_sysctls.
5855  *
5856  * In order for the sysctl fields to be initialized before use, this function
5857  * should be called as early as possible during attach activities.
5858  *
5859  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5860  * here in this function, rather than later in ice_add_device_sysctls.
5861  *
5862  * To make things easier, this function is also expected to setup the various
5863  * sysctl nodes in addition to tunables so that other sysctls which can't be
5864  * initialized early can hook into the same nodes.
5865  */
5866 void
5867 ice_add_device_tunables(struct ice_softc *sc)
5868 {
5869 	device_t dev = sc->dev;
5870 
5871 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5872 	struct sysctl_oid_list *ctx_list =
5873 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5874 
5875 	sc->enable_health_events = ice_enable_health_events;
5876 
5877 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5878 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5879 			"Enable FW health event reporting for this PF");
5880 
5881 	/* Add a node to track VSI sysctls. Keep track of the node in the
5882 	 * softc so that we can hook other sysctls into it later. This
5883 	 * includes both the VSI statistics, as well as potentially dynamic
5884 	 * VSIs in the future.
5885 	 */
5886 
5887 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5888 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5889 
5890 	/* Add debug tunables */
5891 	ice_add_debug_tunables(sc);
5892 }
5893 
5894 /**
5895  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5896  * @oidp: sysctl oid structure
5897  * @arg1: pointer to private data structure
5898  * @arg2: unused
5899  * @req: sysctl request pointer
5900  *
5901  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5902  */
5903 static int
5904 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5905 {
5906 	struct ice_softc *sc = (struct ice_softc *)arg1;
5907 	struct ice_hw *hw = &sc->hw;
5908 	struct ice_switch_info *sw = hw->switch_info;
5909 	struct ice_fltr_mgmt_list_entry *fm_entry;
5910 	struct ice_list_head *rule_head;
5911 	struct ice_lock *rule_lock;
5912 	struct ice_fltr_info *fi;
5913 	struct sbuf *sbuf;
5914 	int ret;
5915 
5916 	UNREFERENCED_PARAMETER(oidp);
5917 	UNREFERENCED_PARAMETER(arg2);
5918 
5919 	if (ice_driver_is_detaching(sc))
5920 		return (ESHUTDOWN);
5921 
5922 	/* Wire the old buffer so we can take a non-sleepable lock */
5923 	ret = sysctl_wire_old_buffer(req, 0);
5924 	if (ret)
5925 		return (ret);
5926 
5927 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5928 
5929 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5930 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5931 
5932 	sbuf_printf(sbuf, "MAC Filter List");
5933 
5934 	ice_acquire_lock(rule_lock);
5935 
5936 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5937 		fi = &fm_entry->fltr_info;
5938 
5939 		sbuf_printf(sbuf,
5940 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5941 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5942 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5943 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5944 
5945 		/* if we have a vsi_list_info, print some information about that */
5946 		if (fm_entry->vsi_list_info) {
5947 			sbuf_printf(sbuf,
5948 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5949 				    fm_entry->vsi_count,
5950 				    fm_entry->vsi_list_info->vsi_list_id,
5951 				    fm_entry->vsi_list_info->ref_cnt);
5952 		}
5953 	}
5954 
5955 	ice_release_lock(rule_lock);
5956 
5957 	sbuf_finish(sbuf);
5958 	sbuf_delete(sbuf);
5959 
5960 	return (0);
5961 }
5962 
5963 /**
5964  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5965  * @oidp: sysctl oid structure
5966  * @arg1: pointer to private data structure
5967  * @arg2: unused
5968  * @req: sysctl request pointer
5969  *
5970  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5971  */
5972 static int
5973 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5974 {
5975 	struct ice_softc *sc = (struct ice_softc *)arg1;
5976 	struct ice_hw *hw = &sc->hw;
5977 	struct ice_switch_info *sw = hw->switch_info;
5978 	struct ice_fltr_mgmt_list_entry *fm_entry;
5979 	struct ice_list_head *rule_head;
5980 	struct ice_lock *rule_lock;
5981 	struct ice_fltr_info *fi;
5982 	struct sbuf *sbuf;
5983 	int ret;
5984 
5985 	UNREFERENCED_PARAMETER(oidp);
5986 	UNREFERENCED_PARAMETER(arg2);
5987 
5988 	if (ice_driver_is_detaching(sc))
5989 		return (ESHUTDOWN);
5990 
5991 	/* Wire the old buffer so we can take a non-sleepable lock */
5992 	ret = sysctl_wire_old_buffer(req, 0);
5993 	if (ret)
5994 		return (ret);
5995 
5996 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5997 
5998 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5999 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
6000 
6001 	sbuf_printf(sbuf, "VLAN Filter List");
6002 
6003 	ice_acquire_lock(rule_lock);
6004 
6005 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
6006 		fi = &fm_entry->fltr_info;
6007 
6008 		sbuf_printf(sbuf,
6009 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
6010 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
6011 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
6012 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
6013 
6014 		/* if we have a vsi_list_info, print some information about that */
6015 		if (fm_entry->vsi_list_info) {
6016 			sbuf_printf(sbuf,
6017 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6018 				    fm_entry->vsi_count,
6019 				    fm_entry->vsi_list_info->vsi_list_id,
6020 				    fm_entry->vsi_list_info->ref_cnt);
6021 		}
6022 	}
6023 
6024 	ice_release_lock(rule_lock);
6025 
6026 	sbuf_finish(sbuf);
6027 	sbuf_delete(sbuf);
6028 
6029 	return (0);
6030 }
6031 
6032 /**
6033  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
6034  * @oidp: sysctl oid structure
6035  * @arg1: pointer to private data structure
6036  * @arg2: unused
6037  * @req: sysctl request pointer
6038  *
6039  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
6040  * filters.
6041  */
6042 static int
6043 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
6044 {
6045 	struct ice_softc *sc = (struct ice_softc *)arg1;
6046 	struct ice_hw *hw = &sc->hw;
6047 	struct ice_switch_info *sw = hw->switch_info;
6048 	struct ice_fltr_mgmt_list_entry *fm_entry;
6049 	struct ice_list_head *rule_head;
6050 	struct ice_lock *rule_lock;
6051 	struct ice_fltr_info *fi;
6052 	struct sbuf *sbuf;
6053 	int ret;
6054 
6055 	UNREFERENCED_PARAMETER(oidp);
6056 	UNREFERENCED_PARAMETER(arg2);
6057 
6058 	if (ice_driver_is_detaching(sc))
6059 		return (ESHUTDOWN);
6060 
6061 	/* Wire the old buffer so we can take a non-sleepable lock */
6062 	ret = sysctl_wire_old_buffer(req, 0);
6063 	if (ret)
6064 		return (ret);
6065 
6066 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6067 
6068 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
6069 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
6070 
6071 	sbuf_printf(sbuf, "Ethertype Filter List");
6072 
6073 	ice_acquire_lock(rule_lock);
6074 
6075 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
6076 		fi = &fm_entry->fltr_info;
6077 
6078 		sbuf_printf(sbuf,
6079 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
6080 			fi->l_data.ethertype_mac.ethertype,
6081 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
6082 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
6083 			fi->fltr_rule_id);
6084 
6085 		/* if we have a vsi_list_info, print some information about that */
6086 		if (fm_entry->vsi_list_info) {
6087 			sbuf_printf(sbuf,
6088 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6089 				    fm_entry->vsi_count,
6090 				    fm_entry->vsi_list_info->vsi_list_id,
6091 				    fm_entry->vsi_list_info->ref_cnt);
6092 		}
6093 	}
6094 
6095 	ice_release_lock(rule_lock);
6096 
6097 	sbuf_finish(sbuf);
6098 	sbuf_delete(sbuf);
6099 
6100 	return (0);
6101 }
6102 
6103 /**
6104  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
6105  * @oidp: sysctl oid structure
6106  * @arg1: pointer to private data structure
6107  * @arg2: unused
6108  * @req: sysctl request pointer
6109  *
6110  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
6111  * Ethertype/MAC filters.
6112  */
6113 static int
6114 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
6115 {
6116 	struct ice_softc *sc = (struct ice_softc *)arg1;
6117 	struct ice_hw *hw = &sc->hw;
6118 	struct ice_switch_info *sw = hw->switch_info;
6119 	struct ice_fltr_mgmt_list_entry *fm_entry;
6120 	struct ice_list_head *rule_head;
6121 	struct ice_lock *rule_lock;
6122 	struct ice_fltr_info *fi;
6123 	struct sbuf *sbuf;
6124 	int ret;
6125 
6126 	UNREFERENCED_PARAMETER(oidp);
6127 	UNREFERENCED_PARAMETER(arg2);
6128 
6129 	if (ice_driver_is_detaching(sc))
6130 		return (ESHUTDOWN);
6131 
6132 	/* Wire the old buffer so we can take a non-sleepable lock */
6133 	ret = sysctl_wire_old_buffer(req, 0);
6134 	if (ret)
6135 		return (ret);
6136 
6137 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6138 
6139 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
6140 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
6141 
6142 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
6143 
6144 	ice_acquire_lock(rule_lock);
6145 
6146 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
6147 		fi = &fm_entry->fltr_info;
6148 
6149 		sbuf_printf(sbuf,
6150 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
6151 			    fi->l_data.ethertype_mac.ethertype,
6152 			    fi->l_data.ethertype_mac.mac_addr, ":",
6153 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
6154 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
6155 			    fi->fltr_rule_id);
6156 
6157 		/* if we have a vsi_list_info, print some information about that */
6158 		if (fm_entry->vsi_list_info) {
6159 			sbuf_printf(sbuf,
6160 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6161 				    fm_entry->vsi_count,
6162 				    fm_entry->vsi_list_info->vsi_list_id,
6163 				    fm_entry->vsi_list_info->ref_cnt);
6164 		}
6165 	}
6166 
6167 	ice_release_lock(rule_lock);
6168 
6169 	sbuf_finish(sbuf);
6170 	sbuf_delete(sbuf);
6171 
6172 	return (0);
6173 }
6174 
6175 /**
6176  * ice_sysctl_dump_state_flags - Dump device driver state flags
6177  * @oidp: sysctl oid structure
6178  * @arg1: pointer to private data structure
6179  * @arg2: unused
6180  * @req: sysctl request pointer
6181  *
6182  * Callback for "state" sysctl to display currently set driver state flags.
6183  */
6184 static int
6185 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
6186 {
6187 	struct ice_softc *sc = (struct ice_softc *)arg1;
6188 	struct sbuf *sbuf;
6189 	u32 copied_state;
6190 	unsigned int i;
6191 	bool at_least_one = false;
6192 
6193 	UNREFERENCED_PARAMETER(oidp);
6194 	UNREFERENCED_PARAMETER(arg2);
6195 
6196 	if (ice_driver_is_detaching(sc))
6197 		return (ESHUTDOWN);
6198 
6199 	/* Make a copy of the state to ensure we display coherent values */
6200 	copied_state = atomic_load_acq_32(&sc->state);
6201 
6202 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6203 
6204 	/* Add the string for each set state to the sbuf */
6205 	for (i = 0; i < 32; i++) {
6206 		if (copied_state & BIT(i)) {
6207 			const char *str = ice_state_to_str((enum ice_state)i);
6208 
6209 			at_least_one = true;
6210 
6211 			if (str)
6212 				sbuf_printf(sbuf, "\n%s", str);
6213 			else
6214 				sbuf_printf(sbuf, "\nBIT(%u)", i);
6215 		}
6216 	}
6217 
6218 	if (!at_least_one)
6219 		sbuf_printf(sbuf, "Nothing set");
6220 
6221 	sbuf_finish(sbuf);
6222 	sbuf_delete(sbuf);
6223 
6224 	return (0);
6225 }
6226 
6227 #define ICE_SYSCTL_DEBUG_MASK_HELP \
6228 "\nSelect debug statements to print to kernel message log"	\
6229 "\nFlags:"							\
6230 "\n\t         0x1 - Function Tracing"				\
6231 "\n\t         0x2 - Driver Initialization"			\
6232 "\n\t         0x4 - Release"					\
6233 "\n\t         0x8 - FW Logging"					\
6234 "\n\t        0x10 - Link"					\
6235 "\n\t        0x20 - PHY"					\
6236 "\n\t        0x40 - Queue Context"				\
6237 "\n\t        0x80 - NVM"					\
6238 "\n\t       0x100 - LAN"					\
6239 "\n\t       0x200 - Flow"					\
6240 "\n\t       0x400 - DCB"					\
6241 "\n\t       0x800 - Diagnostics"				\
6242 "\n\t      0x1000 - Flow Director"				\
6243 "\n\t      0x2000 - Switch"					\
6244 "\n\t      0x4000 - Scheduler"					\
6245 "\n\t      0x8000 - RDMA"					\
6246 "\n\t     0x10000 - DDP Package"				\
6247 "\n\t     0x20000 - Resources"					\
6248 "\n\t     0x40000 - ACL"					\
6249 "\n\t     0x80000 - PTP"					\
6250 "\n\t   ..."							\
6251 "\n\t   0x1000000 - Admin Queue messages"			\
6252 "\n\t   0x2000000 - Admin Queue descriptors"			\
6253 "\n\t   0x4000000 - Admin Queue descriptor buffers"		\
6254 "\n\t   0x8000000 - Admin Queue commands"			\
6255 "\n\t  0x10000000 - Parser"					\
6256 "\n\t   ..."							\
6257 "\n\t  0x80000000 - (Reserved for user)"			\
6258 "\n\t"								\
6259 "\nUse \"sysctl -x\" to view flags properly."
6260 
6261 /**
6262  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
6263  * @sc: device private structure
6264  *
6265  * Add sysctl tunable values related to debugging the device driver. For now,
6266  * this means a tunable to set the debug mask early during driver load.
6267  *
6268  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
6269  * that in normal kernel builds, these will all be hidden, but on a debug
6270  * kernel they will be more easily visible.
6271  */
6272 static void
6273 ice_add_debug_tunables(struct ice_softc *sc)
6274 {
6275 	struct sysctl_oid_list *debug_list;
6276 	device_t dev = sc->dev;
6277 
6278 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6279 	struct sysctl_oid_list *ctx_list =
6280 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
6281 
6282 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
6283 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6284 					    NULL, "Debug Sysctls");
6285 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6286 
6287 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
6288 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6289 		       &sc->hw.debug_mask, 0,
6290 		       ICE_SYSCTL_DEBUG_MASK_HELP);
6291 
6292 	/* Load the default value from the global sysctl first */
6293 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
6294 
6295 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6296 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6297 			&sc->enable_tx_fc_filter, 0,
6298 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6299 
6300 	sc->tx_balance_en = ice_tx_balance_en;
6301 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6302 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6303 			&sc->tx_balance_en, 0,
6304 			"Enable 5-layer scheduler topology");
6305 
6306 	/* Load the default value from the global sysctl first */
6307 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6308 
6309 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6310 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6311 			&sc->enable_tx_lldp_filter, 0,
6312 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6313 
6314 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6315 }
6316 
6317 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
6318 "\nRequest the driver to initiate a reset."	\
6319 "\n\tpfr - Initiate a PF reset"			\
6320 "\n\tcorer - Initiate a CORE reset"		\
6321 "\n\tglobr - Initiate a GLOBAL reset"
6322 
6323 /**
6324  * @var rl_sysctl_ticks
6325  * @brief timestamp for latest reset request sysctl call
6326  *
6327  * Helps rate-limit the call to the sysctl which resets the device
6328  */
6329 int rl_sysctl_ticks = 0;
6330 
6331 /**
6332  * ice_sysctl_request_reset - Request that the driver initiate a reset
6333  * @oidp: sysctl oid structure
6334  * @arg1: pointer to private data structure
6335  * @arg2: unused
6336  * @req: sysctl request pointer
6337  *
6338  * Callback for "request_reset" sysctl to request that the driver initiate
6339  * a reset. Expects to be passed one of the following strings
6340  *
6341  * "pfr" - Initiate a PF reset
6342  * "corer" - Initiate a CORE reset
6343  * "globr" - Initiate a Global reset
6344  */
6345 static int
6346 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6347 {
6348 	struct ice_softc *sc = (struct ice_softc *)arg1;
6349 	struct ice_hw *hw = &sc->hw;
6350 	int status;
6351 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6352 	const char *reset_message;
6353 	int ret;
6354 
6355 	/* Buffer to store the requested reset string. Must contain enough
6356 	 * space to store the largest expected reset string, which currently
6357 	 * means 6 bytes of space.
6358 	 */
6359 	char reset[6] = "";
6360 
6361 	UNREFERENCED_PARAMETER(arg2);
6362 
6363 	ret = priv_check(curthread, PRIV_DRIVER);
6364 	if (ret)
6365 		return (ret);
6366 
6367 	if (ice_driver_is_detaching(sc))
6368 		return (ESHUTDOWN);
6369 
6370 	/* Read in the requested reset type. */
6371 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6372 	if ((ret) || (req->newptr == NULL))
6373 		return (ret);
6374 
6375 	if (strcmp(reset, "pfr") == 0) {
6376 		reset_message = "Requesting a PF reset";
6377 		reset_type = ICE_RESET_PFR;
6378 	} else if (strcmp(reset, "corer") == 0) {
6379 		reset_message = "Initiating a CORE reset";
6380 		reset_type = ICE_RESET_CORER;
6381 	} else if (strcmp(reset, "globr") == 0) {
6382 		reset_message = "Initiating a GLOBAL reset";
6383 		reset_type = ICE_RESET_GLOBR;
6384 	} else if (strcmp(reset, "empr") == 0) {
6385 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6386 		return (EOPNOTSUPP);
6387 	}
6388 
6389 	if (reset_type == ICE_RESET_INVAL) {
6390 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6391 		return (EINVAL);
6392 	}
6393 
6394 	/*
6395 	 * Rate-limit the frequency at which this function is called.
6396 	 * Assuming this is called successfully once, typically,
6397 	 * everything should be handled within the allotted time frame.
6398 	 * However, in the odd setup situations, we've also put in
6399 	 * guards for when the reset has finished, but we're in the
6400 	 * process of rebuilding. And instead of queueing an intent,
6401 	 * simply error out and let the caller retry, if so desired.
6402 	 */
6403 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6404 		device_printf(sc->dev,
6405 		    "Call frequency too high. Operation aborted.\n");
6406 		return (EBUSY);
6407 	}
6408 	rl_sysctl_ticks = ticks;
6409 
6410 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6411 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6412 		return (EBUSY);
6413 	}
6414 
6415 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6416 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6417 		return (EBUSY);
6418 	}
6419 
6420 	device_printf(sc->dev, "%s\n", reset_message);
6421 
6422 	/* Initiate the PF reset during the admin status task */
6423 	if (reset_type == ICE_RESET_PFR) {
6424 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6425 		return (0);
6426 	}
6427 
6428 	/*
6429 	 * Other types of resets including CORE and GLOBAL resets trigger an
6430 	 * interrupt on all PFs. Initiate the reset now. Preparation and
6431 	 * rebuild logic will be handled by the admin status task.
6432 	 */
6433 	status = ice_reset(hw, reset_type);
6434 
6435 	/*
6436 	 * Resets can take a long time and we still don't want another call
6437 	 * to this function before we settle down.
6438 	 */
6439 	rl_sysctl_ticks = ticks;
6440 
6441 	if (status) {
6442 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6443 			      ice_status_str(status));
6444 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6445 		return (EFAULT);
6446 	}
6447 
6448 	return (0);
6449 }
6450 
6451 #define ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID	(0xFFFFFF)
6452 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6453 "\nSelect clusters to dump with \"dump\" sysctl"		\
6454 "\nFlags:"							\
6455 "\n\t        0 - All clusters (default)"			\
6456 "\n\t      0x1 - Switch"					\
6457 "\n\t      0x2 - ACL"						\
6458 "\n\t      0x4 - Tx Scheduler"					\
6459 "\n\t      0x8 - Profile Configuration"				\
6460 "\n\t     0x20 - Link"						\
6461 "\n\t     0x80 - DCB"						\
6462 "\n\t    0x100 - L2P"						\
6463 "\n\t 0x400000 - Manageability Transactions (excluding E830)"	\
6464 "\n"								\
6465 "\nUse \"sysctl -x\" to view flags properly."
6466 
6467 /**
6468  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6469  *     from FW when FW debug dump occurs
6470  * @oidp: sysctl oid structure
6471  * @arg1: pointer to private data structure
6472  * @arg2: unused
6473  * @req: sysctl request pointer
6474  */
6475 static int
6476 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6477 {
6478 	struct ice_softc *sc = (struct ice_softc *)arg1;
6479 	device_t dev = sc->dev;
6480 	u32 clusters;
6481 	int ret;
6482 
6483 	UNREFERENCED_PARAMETER(arg2);
6484 
6485 	ret = priv_check(curthread, PRIV_DRIVER);
6486 	if (ret)
6487 		return (ret);
6488 
6489 	if (ice_driver_is_detaching(sc))
6490 		return (ESHUTDOWN);
6491 
6492 	clusters = sc->fw_debug_dump_cluster_mask;
6493 
6494 	ret = sysctl_handle_32(oidp, &clusters, 0, req);
6495 	if ((ret) || (req->newptr == NULL))
6496 		return (ret);
6497 
6498 	u32 valid_cluster_mask;
6499 	if (ice_is_e830(&sc->hw))
6500 		valid_cluster_mask = ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E830;
6501 	else
6502 		valid_cluster_mask = ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E810;
6503 
6504 	if (clusters & ~(valid_cluster_mask)) {
6505 		device_printf(dev,
6506 		    "%s: ERROR: Incorrect settings requested\n",
6507 		    __func__);
6508 		sc->fw_debug_dump_cluster_mask = ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID;
6509 		return (EINVAL);
6510 	}
6511 
6512 	sc->fw_debug_dump_cluster_mask = clusters;
6513 
6514 	return (0);
6515 }
6516 
6517 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6518 
6519 /**
6520  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6521  * @sc: the device softc
6522  * @sbuf: initialized sbuf to print data to
6523  * @cluster_id: FW cluster ID to print data from
6524  *
6525  * Reads debug data from the specified cluster id in the FW and prints it to
6526  * the input sbuf. This function issues multiple AQ commands to the FW in
6527  * order to get all of the data in the cluster.
6528  *
6529  * @remark Only intended to be used by the sysctl handler
6530  * ice_sysctl_fw_debug_dump_do_dump
6531  */
6532 static u16
6533 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6534 {
6535 	struct ice_hw *hw = &sc->hw;
6536 	device_t dev = sc->dev;
6537 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6538 	const u8 reserved_buf[8] = {};
6539 	int status;
6540 	int counter = 0;
6541 	u8 *data_buf;
6542 
6543 	/* Input parameters / loop variables */
6544 	u16 table_id = 0;
6545 	u32 offset = 0;
6546 
6547 	/* Output from the Get Internal Data AQ command */
6548 	u16 ret_buf_size = 0;
6549 	u16 ret_next_cluster = 0;
6550 	u16 ret_next_table = 0;
6551 	u32 ret_next_index = 0;
6552 
6553 	/* Other setup */
6554 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6555 	if (!data_buf)
6556 		return ret_next_cluster;
6557 
6558 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6559 	    cluster_id);
6560 
6561 	for (;;) {
6562 		/* Do not trust the FW behavior to be completely correct */
6563 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6564 			device_printf(dev,
6565 			    "%s: Exceeded counter limit for cluster %d\n",
6566 			    __func__, cluster_id);
6567 			break;
6568 		}
6569 
6570 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6571 		ice_debug(hw, ICE_DBG_DIAG,
6572 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6573 		    table_id, offset, data_buf_size);
6574 
6575 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6576 		    offset, data_buf, data_buf_size, &ret_buf_size,
6577 		    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
6578 		if (status) {
6579 			device_printf(dev,
6580 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6581 			    __func__, cluster_id, ice_status_str(status),
6582 			    ice_aq_str(hw->adminq.sq_last_status));
6583 			break;
6584 		}
6585 
6586 		ice_debug(hw, ICE_DBG_DIAG,
6587 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6588 		    ret_next_table, ret_next_index, ret_buf_size);
6589 
6590 		/* Print cluster id */
6591 		u32 print_cluster_id = (u32)cluster_id;
6592 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6593 		/* Print table id */
6594 		u32 print_table_id = (u32)table_id;
6595 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6596 		/* Print table length */
6597 		u32 print_table_length = (u32)ret_buf_size;
6598 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6599 		/* Print current offset */
6600 		u32 print_curr_offset = offset;
6601 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6602 		/* Print reserved bytes */
6603 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6604 		/* Print data */
6605 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6606 
6607 		/* Adjust loop variables */
6608 		memset(data_buf, 0, data_buf_size);
6609 		bool same_table_next = (table_id == ret_next_table);
6610 		bool last_table_next;
6611 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_NEXT_CLUSTER_ID))
6612 			last_table_next =
6613 			    (ret_next_table == 0xffff);
6614 		else
6615 			last_table_next =
6616 			    (ret_next_table == 0xff || ret_next_table == 0xffff);
6617 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6618 
6619 		if ((!same_table_next && !last_offset_next) ||
6620 		    (same_table_next && last_table_next)) {
6621 			device_printf(dev,
6622 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6623 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6624 			break;
6625 		}
6626 
6627 		if (!same_table_next && !last_table_next && last_offset_next) {
6628 			/* We've hit the end of the table */
6629 			table_id = ret_next_table;
6630 			offset = 0;
6631 		}
6632 		else if (!same_table_next && last_table_next && last_offset_next) {
6633 			/* We've hit the end of the cluster */
6634 			break;
6635 		}
6636 		else if (same_table_next && !last_table_next && last_offset_next) {
6637 			if (cluster_id == 0x1 && table_id < 39)
6638 				table_id += 1;
6639 			else
6640 				break;
6641 		}
6642 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6643 			/* More data left in the table */
6644 			offset = ret_next_index;
6645 		}
6646 	}
6647 
6648 	free(data_buf, M_ICE);
6649 	return ret_next_cluster;
6650 }
6651 
6652 /**
6653  * ice_fw_debug_dump_print_clusters - Print data from FW clusters to sbuf
6654  * @sc: the device softc
6655  * @sbuf: initialized sbuf to print data to
6656  *
6657  * Handles dumping all of the clusters to dump to the indicated sbuf. The
6658  * clusters do dump are determined by the value in the
6659  * fw_debug_dump_cluster_mask field in the sc argument.
6660  *
6661  * @remark Only intended to be used by the sysctl handler
6662  * ice_sysctl_fw_debug_dump_do_dump
6663  */
6664 static void
6665 ice_fw_debug_dump_print_clusters(struct ice_softc *sc, struct sbuf *sbuf)
6666 {
6667 	u16 next_cluster_id, max_cluster_id, start_cluster_id;
6668 	u32 cluster_mask = sc->fw_debug_dump_cluster_mask;
6669 	struct ice_hw *hw = &sc->hw;
6670 	int bit;
6671 
6672 	ice_debug(hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6673 
6674 	if (ice_is_e830(hw)) {
6675 		max_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG_E830;
6676 		start_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_SW_E830;
6677 	} else {
6678 		max_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG_E810;
6679 		start_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_SW_E810;
6680 	}
6681 
6682 	if (cluster_mask != 0) {
6683 		for_each_set_bit(bit, &cluster_mask,
6684 		    sizeof(cluster_mask) * BITS_PER_BYTE) {
6685 			ice_fw_debug_dump_print_cluster(sc, sbuf,
6686 			    bit + start_cluster_id);
6687 		}
6688 	} else {
6689 		next_cluster_id = start_cluster_id;
6690 
6691 		/* We don't support QUEUE_MNG and FULL_CSR_SPACE */
6692 		do {
6693 			next_cluster_id =
6694 			    ice_fw_debug_dump_print_cluster(sc, sbuf, next_cluster_id);
6695 		} while ((next_cluster_id != 0) &&
6696 			 (next_cluster_id < max_cluster_id));
6697 	}
6698 
6699 }
6700 
6701 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6702 "\nWrite 1 to output a FW debug dump containing the clusters specified by the" \
6703 "\n\"clusters\" sysctl."						\
6704 "\n"									\
6705 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6706 "\nthis data is opaque and not a string."
6707 
6708 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6709 #define ICE_FW_DUMP_ALL_TEXT_SIZE	(10 * 1024 * 1024)
6710 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6711 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6712 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6713 
6714 /**
6715  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6716  * @oidp: sysctl oid structure
6717  * @arg1: pointer to private data structure
6718  * @arg2: unused
6719  * @req: sysctl request pointer
6720  *
6721  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6722  * formatted dump of some debug FW data intended to be processed by a special
6723  * Intel tool. Prints out the cluster data specified by the "clusters"
6724  * sysctl.
6725  *
6726  * @remark The actual AQ calls and printing are handled by a helper
6727  * function above.
6728  */
6729 static int
6730 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6731 {
6732 	struct ice_softc *sc = (struct ice_softc *)arg1;
6733 	device_t dev = sc->dev;
6734 	struct sbuf *sbuf;
6735 	int ret;
6736 
6737 	UNREFERENCED_PARAMETER(arg2);
6738 
6739 	ret = priv_check(curthread, PRIV_DRIVER);
6740 	if (ret)
6741 		return (ret);
6742 
6743 	if (ice_driver_is_detaching(sc))
6744 		return (ESHUTDOWN);
6745 
6746 	/* If the user hasn't written "1" to this sysctl yet: */
6747 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6748 		/* Avoid output on the first set of reads to this sysctl in
6749 		 * order to prevent a null byte from being written to the
6750 		 * end result when called via sysctl(8).
6751 		 */
6752 		if (req->oldptr == NULL && req->newptr == NULL) {
6753 			ret = SYSCTL_OUT(req, 0, 0);
6754 			return (ret);
6755 		}
6756 
6757 		char input_buf[2] = "";
6758 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6759 		if ((ret) || (req->newptr == NULL))
6760 			return (ret);
6761 
6762 		/* If we get '1', then indicate we'll do a dump in the next
6763 		 * sysctl read call.
6764 		 */
6765 		if (input_buf[0] == '1') {
6766 			if (sc->fw_debug_dump_cluster_mask == ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID) {
6767 				device_printf(dev,
6768 				    "%s: Debug Dump failed because an invalid cluster was specified.\n",
6769 				    __func__);
6770 				return (EINVAL);
6771 			}
6772 
6773 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6774 			return (0);
6775 		}
6776 
6777 		return (EINVAL);
6778 	}
6779 
6780 	/* --- FW debug dump state is set --- */
6781 
6782 
6783 	/* Caller just wants the upper bound for size */
6784 	if (req->oldptr == NULL && req->newptr == NULL) {
6785 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6786 		if (sc->fw_debug_dump_cluster_mask == 0)
6787 			est_output_len += ICE_FW_DUMP_ALL_TEXT_SIZE;
6788 		else {
6789 			if (sc->fw_debug_dump_cluster_mask & 0x1)
6790 				est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6791 			if (sc->fw_debug_dump_cluster_mask & 0x2)
6792 				est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6793 			if (sc->fw_debug_dump_cluster_mask & 0x4)
6794 				est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6795 		}
6796 
6797 		ret = SYSCTL_OUT(req, 0, est_output_len);
6798 		return (ret);
6799 	}
6800 
6801 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6802 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6803 
6804 	ice_fw_debug_dump_print_clusters(sc, sbuf);
6805 
6806 	sbuf_finish(sbuf);
6807 	sbuf_delete(sbuf);
6808 
6809 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6810 	return (ret);
6811 }
6812 
6813 /**
6814  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6815  * @sc: device private structure
6816  *
6817  * Add sysctls related to debugging the device driver. Generally these should
6818  * simply be sysctls which dump internal driver state, to aid in understanding
6819  * what the driver is doing.
6820  */
6821 static void
6822 ice_add_debug_sysctls(struct ice_softc *sc)
6823 {
6824 	struct sysctl_oid *sw_node, *dump_node;
6825 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6826 	device_t dev = sc->dev;
6827 
6828 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6829 
6830 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6831 
6832 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6833 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6834 			ice_sysctl_request_reset, "A",
6835 			ICE_SYSCTL_HELP_REQUEST_RESET);
6836 
6837 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6838 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6839 		       &sc->soft_stats.pfr_count, 0,
6840 		       "# of PF resets handled");
6841 
6842 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6843 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6844 		       &sc->soft_stats.corer_count, 0,
6845 		       "# of CORE resets handled");
6846 
6847 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6848 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6849 		       &sc->soft_stats.globr_count, 0,
6850 		       "# of Global resets handled");
6851 
6852 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6853 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6854 		       &sc->soft_stats.empr_count, 0,
6855 		       "# of EMP resets handled");
6856 
6857 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6858 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6859 		       &sc->soft_stats.tx_mdd_count, 0,
6860 		       "# of Tx MDD events detected");
6861 
6862 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6863 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6864 		       &sc->soft_stats.rx_mdd_count, 0,
6865 		       "# of Rx MDD events detected");
6866 
6867 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6868 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6869 			ice_sysctl_dump_state_flags, "A",
6870 			"Driver State Flags");
6871 
6872 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "set_link",
6873 			ICE_CTLFLAG_DEBUG | CTLTYPE_U8 | CTLFLAG_RW, sc, 0,
6874 			ice_sysctl_debug_set_link, "CU", "Set link");
6875 
6876 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6877 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6878 			ice_sysctl_phy_type_low, "QU",
6879 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6880 
6881 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6882 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6883 			ice_sysctl_phy_type_high, "QU",
6884 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6885 
6886 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6887 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6888 			ice_sysctl_phy_sw_caps, "",
6889 			"Get PHY Capabilities (Software configuration)");
6890 
6891 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6892 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6893 			ice_sysctl_phy_nvm_caps, "",
6894 			"Get PHY Capabilities (NVM configuration)");
6895 
6896 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6897 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6898 			ice_sysctl_phy_topo_caps, "",
6899 			"Get PHY Capabilities (Topology configuration)");
6900 
6901 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6902 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6903 			ice_sysctl_phy_link_status, "",
6904 			"Get PHY Link Status");
6905 
6906 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6907 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6908 			ice_sysctl_read_i2c_diag_data, "A",
6909 			"Dump selected diagnostic data from FW");
6910 
6911 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6912 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6913 		       "FW Build ID");
6914 
6915 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6916 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6917 			ice_sysctl_os_pkg_version, "A",
6918 			"DDP package name and version found in ice_ddp");
6919 
6920 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6921 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6922 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6923 			"Current LLDP persistent status");
6924 
6925 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6926 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6927 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6928 			"Default LLDP persistent status");
6929 
6930 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6931 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6932 			ice_sysctl_negotiated_fc, "A",
6933 			"Current Negotiated Flow Control mode");
6934 
6935 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_PHY_STATISTICS)) {
6936 		SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_statistics",
6937 				CTLTYPE_STRING | CTLFLAG_RD,
6938 				sc, 0, ice_sysctl_dump_phy_stats, "A",
6939 				"Dumps PHY statistics from firmware");
6940 	}
6941 
6942 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6943 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6944 			ice_sysctl_dump_dcbx_cfg, "A",
6945 			"Dumps Local MIB information from firmware");
6946 
6947 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6948 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6949 			ice_sysctl_dump_dcbx_cfg, "A",
6950 			"Dumps Remote MIB information from firmware");
6951 
6952 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6953 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6954 			"Dumps Selected PF VSI parameters from firmware");
6955 
6956 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6957 			sc, 0, ice_sysctl_query_port_ets, "A",
6958 			"Prints selected output from Query Port ETS AQ command");
6959 
6960 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "rx_length_errors",
6961 		       CTLFLAG_RD | CTLFLAG_STATS, &sc->stats.cur.rx_len_errors, 0,
6962 		       "Receive Length Errors (SNAP packets)");
6963 
6964 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6965 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6966 				  "Switch Configuration");
6967 	sw_list = SYSCTL_CHILDREN(sw_node);
6968 
6969 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6970 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6971 			ice_sysctl_dump_mac_filters, "A",
6972 			"MAC Filters");
6973 
6974 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6975 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6976 			ice_sysctl_dump_vlan_filters, "A",
6977 			"VLAN Filters");
6978 
6979 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6980 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6981 			ice_sysctl_dump_ethertype_filters, "A",
6982 			"Ethertype Filters");
6983 
6984 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6985 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6986 			ice_sysctl_dump_ethertype_mac_filters, "A",
6987 			"Ethertype/MAC Filters");
6988 
6989 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6990 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6991 				  "Internal FW Dump");
6992 	dump_list = SYSCTL_CHILDREN(dump_node);
6993 
6994 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6995 			ICE_CTLFLAG_DEBUG | CTLTYPE_U32 | CTLFLAG_RW, sc, 0,
6996 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6997 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6998 
6999 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
7000 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
7001 			ice_sysctl_fw_debug_dump_do_dump, "",
7002 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
7003 }
7004 
7005 /**
7006  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
7007  * @vsi: the VSI to disable
7008  *
7009  * Disables the Tx queues associated with this VSI. Essentially the opposite
7010  * of ice_cfg_vsi_for_tx.
7011  */
7012 int
7013 ice_vsi_disable_tx(struct ice_vsi *vsi)
7014 {
7015 	struct ice_softc *sc = vsi->sc;
7016 	struct ice_hw *hw = &sc->hw;
7017 	int status;
7018 	u32 *q_teids;
7019 	u16 *q_ids, *q_handles;
7020 	size_t q_teids_size, q_ids_size, q_handles_size;
7021 	int tc, j, buf_idx, err = 0;
7022 
7023 	if (vsi->num_tx_queues > 255)
7024 		return (ENOSYS);
7025 
7026 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
7027 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
7028 	if (!q_teids)
7029 		return (ENOMEM);
7030 
7031 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
7032 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
7033 	if (!q_ids) {
7034 		err = (ENOMEM);
7035 		goto free_q_teids;
7036 	}
7037 
7038 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
7039 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
7040 	if (!q_handles) {
7041 		err = (ENOMEM);
7042 		goto free_q_ids;
7043 	}
7044 
7045 	ice_for_each_traffic_class(tc) {
7046 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
7047 		u16 start_idx, end_idx;
7048 
7049 		/* Skip rest of disabled TCs once the first
7050 		 * disabled TC is found */
7051 		if (!(vsi->tc_map & BIT(tc)))
7052 			break;
7053 
7054 		/* Fill out TX queue information for this TC */
7055 		start_idx = tc_info->qoffset;
7056 		end_idx = start_idx + tc_info->qcount_tx;
7057 		buf_idx = 0;
7058 		for (j = start_idx; j < end_idx; j++) {
7059 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
7060 
7061 			q_ids[buf_idx] = vsi->tx_qmap[j];
7062 			q_handles[buf_idx] = txq->q_handle;
7063 			q_teids[buf_idx] = txq->q_teid;
7064 			buf_idx++;
7065 		}
7066 
7067 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
7068 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
7069 		if (status == ICE_ERR_DOES_NOT_EXIST) {
7070 			; /* Queues have already been disabled, no need to report this as an error */
7071 		} else if (status == ICE_ERR_RESET_ONGOING) {
7072 			device_printf(sc->dev,
7073 				      "Reset in progress. LAN Tx queues already disabled\n");
7074 			break;
7075 		} else if (status) {
7076 			device_printf(sc->dev,
7077 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
7078 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7079 			err = (ENODEV);
7080 			break;
7081 		}
7082 
7083 		/* Clear buffers */
7084 		memset(q_teids, 0, q_teids_size);
7085 		memset(q_ids, 0, q_ids_size);
7086 		memset(q_handles, 0, q_handles_size);
7087 	}
7088 
7089 /* free_q_handles: */
7090 	free(q_handles, M_ICE);
7091 free_q_ids:
7092 	free(q_ids, M_ICE);
7093 free_q_teids:
7094 	free(q_teids, M_ICE);
7095 
7096 	return err;
7097 }
7098 
7099 /**
7100  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
7101  * @vsi: the VSI to configure
7102  *
7103  * Sets the RSS table size and lookup table type for the VSI based on its
7104  * VSI type.
7105  */
7106 static void
7107 ice_vsi_set_rss_params(struct ice_vsi *vsi)
7108 {
7109 	struct ice_softc *sc = vsi->sc;
7110 	struct ice_hw_common_caps *cap;
7111 
7112 	cap = &sc->hw.func_caps.common_cap;
7113 
7114 	switch (vsi->type) {
7115 	case ICE_VSI_PF:
7116 		/* The PF VSI inherits RSS instance of the PF */
7117 		vsi->rss_table_size = cap->rss_table_size;
7118 		vsi->rss_lut_type = ICE_LUT_PF;
7119 		break;
7120 	case ICE_VSI_VF:
7121 	case ICE_VSI_VMDQ2:
7122 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
7123 		vsi->rss_lut_type = ICE_LUT_VSI;
7124 		break;
7125 	default:
7126 		device_printf(sc->dev,
7127 			      "VSI %d: RSS not supported for VSI type %d\n",
7128 			      vsi->idx, vsi->type);
7129 		break;
7130 	}
7131 }
7132 
7133 /**
7134  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
7135  * @vsi: The VSI to add the context for
7136  *
7137  * Creates a sysctl context for storing txq sysctls. Additionally creates
7138  * a node rooted at the given VSI's main sysctl node. This context will be
7139  * used to store per-txq sysctls which may need to be released during the
7140  * driver's lifetime.
7141  */
7142 void
7143 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
7144 {
7145 	struct sysctl_oid_list *vsi_list;
7146 
7147 	sysctl_ctx_init(&vsi->txqs_ctx);
7148 
7149 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7150 
7151 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
7152 					 CTLFLAG_RD, NULL, "Tx Queues");
7153 }
7154 
7155 /**
7156  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
7157  * @vsi: The VSI to add the context for
7158  *
7159  * Creates a sysctl context for storing rxq sysctls. Additionally creates
7160  * a node rooted at the given VSI's main sysctl node. This context will be
7161  * used to store per-rxq sysctls which may need to be released during the
7162  * driver's lifetime.
7163  */
7164 void
7165 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
7166 {
7167 	struct sysctl_oid_list *vsi_list;
7168 
7169 	sysctl_ctx_init(&vsi->rxqs_ctx);
7170 
7171 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7172 
7173 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
7174 					 CTLFLAG_RD, NULL, "Rx Queues");
7175 }
7176 
7177 /**
7178  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
7179  * @vsi: The VSI to delete from
7180  *
7181  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
7182  * Must be called prior to freeing the Tx queue memory, in order to avoid
7183  * having sysctls point at stale memory.
7184  */
7185 void
7186 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
7187 {
7188 	device_t dev = vsi->sc->dev;
7189 	int err;
7190 
7191 	if (vsi->txqs_node) {
7192 		err = sysctl_ctx_free(&vsi->txqs_ctx);
7193 		if (err)
7194 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
7195 				      vsi->idx, ice_err_str(err));
7196 		vsi->txqs_node = NULL;
7197 	}
7198 }
7199 
7200 /**
7201  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
7202  * @vsi: The VSI to delete from
7203  *
7204  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
7205  * Must be called prior to freeing the Rx queue memory, in order to avoid
7206  * having sysctls point at stale memory.
7207  */
7208 void
7209 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
7210 {
7211 	device_t dev = vsi->sc->dev;
7212 	int err;
7213 
7214 	if (vsi->rxqs_node) {
7215 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
7216 		if (err)
7217 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
7218 				      vsi->idx, ice_err_str(err));
7219 		vsi->rxqs_node = NULL;
7220 	}
7221 }
7222 
7223 /**
7224  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
7225  * @txq: pointer to the Tx queue
7226  *
7227 * Add per-queue sysctls for a given Tx queue. Can't be called during
7228 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7229  */
7230 void
7231 ice_add_txq_sysctls(struct ice_tx_queue *txq)
7232 {
7233 	struct ice_vsi *vsi = txq->vsi;
7234 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
7235 	struct sysctl_oid_list *txqs_list, *this_txq_list;
7236 	struct sysctl_oid *txq_node;
7237 	char txq_name[32], txq_desc[32];
7238 
7239 	const struct ice_sysctl_info ctls[] = {
7240 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
7241 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
7242 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
7243 		{ &txq->stats.tso, "tso", "TSO packets" },
7244 		{ 0, 0, 0 }
7245 	};
7246 
7247 	const struct ice_sysctl_info *entry = ctls;
7248 
7249 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
7250 
7251 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
7252 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
7253 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
7254 				   CTLFLAG_RD, NULL, txq_desc);
7255 	this_txq_list = SYSCTL_CHILDREN(txq_node);
7256 
7257 	/* Add the Tx queue statistics */
7258 	while (entry->stat != 0) {
7259 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
7260 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7261 			       entry->description);
7262 		entry++;
7263 	}
7264 
7265 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
7266 		       CTLFLAG_RD, &txq->tc, 0,
7267 		       "Traffic Class that Queue belongs to");
7268 }
7269 
7270 /**
7271  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
7272  * @rxq: pointer to the Rx queue
7273  *
7274  * Add per-queue sysctls for a given Rx queue. Can't be called during
7275  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7276  */
7277 void
7278 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
7279 {
7280 	struct ice_vsi *vsi = rxq->vsi;
7281 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
7282 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
7283 	struct sysctl_oid *rxq_node;
7284 	char rxq_name[32], rxq_desc[32];
7285 
7286 	const struct ice_sysctl_info ctls[] = {
7287 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
7288 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
7289 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
7290 		{ 0, 0, 0 }
7291 	};
7292 
7293 	const struct ice_sysctl_info *entry = ctls;
7294 
7295 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
7296 
7297 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
7298 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
7299 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
7300 				   CTLFLAG_RD, NULL, rxq_desc);
7301 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
7302 
7303 	/* Add the Rx queue statistics */
7304 	while (entry->stat != 0) {
7305 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
7306 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7307 			       entry->description);
7308 		entry++;
7309 	}
7310 
7311 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
7312 		       CTLFLAG_RD, &rxq->tc, 0,
7313 		       "Traffic Class that Queue belongs to");
7314 }
7315 
7316 /**
7317  * ice_get_default_rss_key - Obtain a default RSS key
7318  * @seed: storage for the RSS key data
7319  *
7320  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
7321  * point to a block of memory that is at least 40 bytes in size.
7322  *
7323  * The key isn't randomly generated each time this function is called because
7324  * that makes the RSS key change every time we reconfigure RSS. This does mean
7325  * that we're hard coding a possibly 'well known' key. We might want to
7326  * investigate randomly generating this key once during the first call.
7327  */
7328 static void
7329 ice_get_default_rss_key(u8 *seed)
7330 {
7331 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
7332 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
7333 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
7334 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
7335 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
7336 	};
7337 
7338 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
7339 }
7340 
7341 /**
7342  * ice_set_rss_key - Configure a given VSI with the default RSS key
7343  * @vsi: the VSI to configure
7344  *
7345  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
7346  * If the kernel RSS interface is not available, this will fall back to our
7347  * pre-generated hash seed from ice_get_default_rss_key().
7348  */
7349 static int
7350 ice_set_rss_key(struct ice_vsi *vsi)
7351 {
7352 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
7353 	struct ice_softc *sc = vsi->sc;
7354 	struct ice_hw *hw = &sc->hw;
7355 	int status;
7356 
7357 	/*
7358 	 * If the RSS kernel interface is disabled, this will return the
7359 	 * default RSS key above.
7360 	 */
7361 	rss_getkey(keydata.standard_rss_key);
7362 
7363 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
7364 	if (status) {
7365 		device_printf(sc->dev,
7366 			      "ice_aq_set_rss_key status %s, error %s\n",
7367 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7368 		return (EIO);
7369 	}
7370 
7371 	return (0);
7372 }
7373 
7374 /**
7375  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
7376  * @vsi: the VSI to configure
7377  *
7378  * If the package file is initialized, the default RSS flows are reset. We
7379  * need to reprogram the expected hash configuration. We'll use
7380  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7381  * support is not enabled, this macro will fall back to suitable defaults.
7382  */
7383 static void
7384 ice_set_rss_flow_flds(struct ice_vsi *vsi)
7385 {
7386 	struct ice_softc *sc = vsi->sc;
7387 	struct ice_hw *hw = &sc->hw;
7388 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7389 	device_t dev = sc->dev;
7390 	int status;
7391 	u_int rss_hash_config;
7392 
7393 	rss_hash_config = rss_gethashconfig();
7394 
7395 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7396 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7397 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7398 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7399 		if (status)
7400 			device_printf(dev,
7401 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7402 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7403 	}
7404 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7405 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7406 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7407 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7408 		if (status)
7409 			device_printf(dev,
7410 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7411 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7412 	}
7413 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7414 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7415 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7416 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7417 		if (status)
7418 			device_printf(dev,
7419 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7420 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7421 	}
7422 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7423 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7424 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7425 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7426 		if (status)
7427 			device_printf(dev,
7428 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7429 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7430 	}
7431 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7432 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7433 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7434 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7435 		if (status)
7436 			device_printf(dev,
7437 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7438 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7439 	}
7440 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7441 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7442 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7443 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7444 		if (status)
7445 			device_printf(dev,
7446 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7447 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7448 	}
7449 
7450 	/* Warn about RSS hash types which are not supported */
7451 	/* coverity[dead_error_condition] */
7452 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7453 		device_printf(dev,
7454 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7455 			      vsi->idx);
7456 	}
7457 }
7458 
7459 /**
7460  * ice_set_rss_lut - Program the RSS lookup table for a VSI
7461  * @vsi: the VSI to configure
7462  *
7463  * Programs the RSS lookup table for a given VSI. We use
7464  * rss_get_indirection_to_bucket which will use the indirection table provided
7465  * by the kernel RSS interface when available. If the kernel RSS interface is
7466  * not available, we will fall back to a simple round-robin fashion queue
7467  * assignment.
7468  */
7469 static int
7470 ice_set_rss_lut(struct ice_vsi *vsi)
7471 {
7472 	struct ice_softc *sc = vsi->sc;
7473 	struct ice_hw *hw = &sc->hw;
7474 	device_t dev = sc->dev;
7475 	struct ice_aq_get_set_rss_lut_params lut_params;
7476 	int status;
7477 	int i, err = 0;
7478 	u8 *lut;
7479 
7480 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7481 	if (!lut) {
7482 		device_printf(dev, "Failed to allocate RSS lut memory\n");
7483 		return (ENOMEM);
7484 	}
7485 
7486 	/* Populate the LUT with max no. of queues. If the RSS kernel
7487 	 * interface is disabled, this will assign the lookup table in
7488 	 * a simple round robin fashion
7489 	 */
7490 	for (i = 0; i < vsi->rss_table_size; i++) {
7491 		/* XXX: this needs to be changed if num_rx_queues ever counts
7492 		 * more than just the RSS queues */
7493 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7494 	}
7495 
7496 	lut_params.vsi_handle = vsi->idx;
7497 	lut_params.lut_size = vsi->rss_table_size;
7498 	lut_params.lut_type = vsi->rss_lut_type;
7499 	lut_params.lut = lut;
7500 	lut_params.global_lut_id = 0;
7501 	status = ice_aq_set_rss_lut(hw, &lut_params);
7502 	if (status) {
7503 		device_printf(dev,
7504 			      "Cannot set RSS lut, err %s aq_err %s\n",
7505 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7506 		err = (EIO);
7507 	}
7508 
7509 	free(lut, M_ICE);
7510 	return err;
7511 }
7512 
7513 /**
7514  * ice_config_rss - Configure RSS for a VSI
7515  * @vsi: the VSI to configure
7516  *
7517  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7518  * a given VSI.
7519  */
7520 int
7521 ice_config_rss(struct ice_vsi *vsi)
7522 {
7523 	int err;
7524 
7525 	/* Nothing to do, if RSS is not enabled */
7526 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7527 		return 0;
7528 
7529 	err = ice_set_rss_key(vsi);
7530 	if (err)
7531 		return err;
7532 
7533 	ice_set_rss_flow_flds(vsi);
7534 
7535 	return ice_set_rss_lut(vsi);
7536 }
7537 
7538 /**
7539  * ice_log_pkg_init - Log a message about status of DDP initialization
7540  * @sc: the device softc pointer
7541  * @pkg_status: the status result of ice_copy_and_init_pkg
7542  *
7543  * Called by ice_load_pkg after an attempt to download the DDP package
7544  * contents to the device to log an appropriate message for the system
7545  * administrator about download status.
7546  *
7547  * @post ice_is_init_pkg_successful function is used to determine
7548  * whether the download was successful and DDP package is compatible
7549  * with this driver. Otherwise driver will transition to Safe Mode.
7550  */
7551 void
7552 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7553 {
7554 	struct ice_hw *hw = &sc->hw;
7555 	device_t dev = sc->dev;
7556 	struct sbuf *active_pkg, *os_pkg;
7557 
7558 	active_pkg = sbuf_new_auto();
7559 	ice_active_pkg_version_str(hw, active_pkg);
7560 	sbuf_finish(active_pkg);
7561 
7562 	os_pkg = sbuf_new_auto();
7563 	ice_os_pkg_version_str(hw, os_pkg);
7564 	sbuf_finish(os_pkg);
7565 
7566 	switch (pkg_status) {
7567 	case ICE_DDP_PKG_SUCCESS:
7568 		device_printf(dev,
7569 			      "The DDP package was successfully loaded: %s.\n",
7570 			      sbuf_data(active_pkg));
7571 		break;
7572 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7573 	case ICE_DDP_PKG_ALREADY_LOADED:
7574 		device_printf(dev,
7575 			      "DDP package already present on device: %s.\n",
7576 			      sbuf_data(active_pkg));
7577 		break;
7578 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7579 		device_printf(dev,
7580 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7581 			      sbuf_data(active_pkg),
7582 			      sbuf_data(os_pkg));
7583 		break;
7584 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7585 		device_printf(dev,
7586 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7587 			      sbuf_data(active_pkg),
7588 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7589 		break;
7590 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7591 		device_printf(dev,
7592 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7593 			      sbuf_data(active_pkg),
7594 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7595 		break;
7596 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7597 		/*
7598 		 * This assumes that the active_pkg_ver will not be
7599 		 * initialized if the ice_ddp package version is not
7600 		 * supported.
7601 		 */
7602 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7603 			/* The ice_ddp version is not supported */
7604 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7605 				device_printf(dev,
7606 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7607 					      sbuf_data(os_pkg),
7608 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7609 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7610 				device_printf(dev,
7611 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7612 					      sbuf_data(os_pkg),
7613 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7614 			} else {
7615 				device_printf(dev,
7616 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7617 					      sbuf_data(os_pkg),
7618 					      sbuf_data(active_pkg),
7619 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7620 			}
7621 		} else {
7622 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7623 				device_printf(dev,
7624 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7625 					      sbuf_data(active_pkg),
7626 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7627 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7628 				device_printf(dev,
7629 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7630 					      sbuf_data(active_pkg),
7631 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7632 			} else {
7633 				device_printf(dev,
7634 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7635 					      sbuf_data(os_pkg),
7636 					      sbuf_data(active_pkg),
7637 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7638 			}
7639 		}
7640 		break;
7641 	case ICE_DDP_PKG_INVALID_FILE:
7642 		device_printf(dev,
7643 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7644 		break;
7645 	case ICE_DDP_PKG_FW_MISMATCH:
7646 		device_printf(dev,
7647 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7648 		break;
7649 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7650 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7651 		device_printf(dev,
7652 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7653 		break;
7654 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7655 		device_printf(dev,
7656 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7657 		break;
7658 	case ICE_DDP_PKG_MANIFEST_INVALID:
7659 	case ICE_DDP_PKG_BUFFER_INVALID:
7660 		device_printf(dev,
7661 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7662 		break;
7663 	default:
7664 		device_printf(dev,
7665 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7666 		break;
7667 	}
7668 
7669 	sbuf_delete(active_pkg);
7670 	sbuf_delete(os_pkg);
7671 }
7672 
7673 /**
7674  * ice_load_pkg_file - Load the DDP package file using firmware_get
7675  * @sc: device private softc
7676  *
7677  * Use firmware_get to load the DDP package memory and then request that
7678  * firmware download the package contents and program the relevant hardware
7679  * bits.
7680  *
7681  * This function makes a copy of the DDP package memory which is tracked in
7682  * the ice_hw structure. The copy will be managed and released by
7683  * ice_deinit_hw(). This allows the firmware reference to be immediately
7684  * released using firmware_put.
7685  */
7686 int
7687 ice_load_pkg_file(struct ice_softc *sc)
7688 {
7689 	struct ice_hw *hw = &sc->hw;
7690 	device_t dev = sc->dev;
7691 	enum ice_ddp_state state;
7692 	const struct firmware *pkg;
7693 	int status = 0;
7694 	u8 cached_layer_count;
7695 	u8 *buf_copy;
7696 
7697 	pkg = firmware_get("ice_ddp");
7698 	if (!pkg) {
7699 		device_printf(dev,
7700 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7701 		if (cold)
7702 			device_printf(dev,
7703 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7704 		status = ICE_ERR_CFG;
7705 		goto err_load_pkg;
7706 	}
7707 
7708 	/* Check for topology change */
7709 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7710 		cached_layer_count = hw->num_tx_sched_layers;
7711 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7712 		if (buf_copy == NULL)
7713 			return ICE_ERR_NO_MEMORY;
7714 		memcpy(buf_copy, pkg->data, pkg->datasize);
7715 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7716 		free(buf_copy, M_ICE);
7717 		/* Success indicates a change was made */
7718 		if (!status) {
7719 			/* 9 -> 5 */
7720 			if (cached_layer_count == 9)
7721 				device_printf(dev,
7722 				    "Transmit balancing feature enabled\n");
7723 			else
7724 				device_printf(dev,
7725 				    "Transmit balancing feature disabled\n");
7726 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7727 			return (status);
7728 		} else if (status == ICE_ERR_CFG) {
7729 			/* Status is ICE_ERR_CFG when DDP does not support transmit balancing */
7730 			device_printf(dev,
7731 			    "DDP package does not support transmit balancing feature - please update to the latest DDP package and try again\n");
7732 		} else if (status == ICE_ERR_ALREADY_EXISTS) {
7733 			/* Requested config already loaded */
7734 		} else if (status == ICE_ERR_AQ_ERROR) {
7735 			device_printf(dev,
7736 			    "Error configuring transmit balancing: %s\n",
7737 			    ice_status_str(status));
7738 		}
7739 	}
7740 
7741 	/* Copy and download the pkg contents */
7742 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7743 
7744 	/* Release the firmware reference */
7745 	firmware_put(pkg, FIRMWARE_UNLOAD);
7746 
7747 	/* Check the active DDP package version and log a message */
7748 	ice_log_pkg_init(sc, state);
7749 
7750 	/* Place the driver into safe mode */
7751 	if (ice_is_init_pkg_successful(state))
7752 		return (ICE_ERR_ALREADY_EXISTS);
7753 
7754 err_load_pkg:
7755 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7756 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7757 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7758 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7759 
7760 	return (status);
7761 }
7762 
7763 /**
7764  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7765  * @vsi: the vsi to retrieve the value for
7766  * @counter: the counter type to retrieve
7767  *
7768  * Returns the value for a given ifnet counter. To do so, we calculate the
7769  * value based on the matching hardware statistics.
7770  */
7771 uint64_t
7772 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7773 {
7774 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7775 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7776 
7777 	/* For some statistics, especially those related to error flows, we do
7778 	 * not have per-VSI counters. In this case, we just report the global
7779 	 * counters.
7780 	 */
7781 
7782 	switch (counter) {
7783 	case IFCOUNTER_IPACKETS:
7784 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7785 	case IFCOUNTER_IERRORS:
7786 		return (hs->crc_errors + hs->illegal_bytes +
7787 			hs->mac_local_faults + hs->mac_remote_faults +
7788 			hs->rx_undersize + hs->rx_oversize + hs->rx_fragments +
7789 			hs->rx_jabber);
7790 	case IFCOUNTER_OPACKETS:
7791 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7792 	case IFCOUNTER_OERRORS:
7793 		return (es->tx_errors);
7794 	case IFCOUNTER_COLLISIONS:
7795 		return (0);
7796 	case IFCOUNTER_IBYTES:
7797 		return (es->rx_bytes);
7798 	case IFCOUNTER_OBYTES:
7799 		return (es->tx_bytes);
7800 	case IFCOUNTER_IMCASTS:
7801 		return (es->rx_multicast);
7802 	case IFCOUNTER_OMCASTS:
7803 		return (es->tx_multicast);
7804 	case IFCOUNTER_IQDROPS:
7805 		return (es->rx_discards);
7806 	case IFCOUNTER_OQDROPS:
7807 		return (hs->tx_dropped_link_down);
7808 	case IFCOUNTER_NOPROTO:
7809 		return (es->rx_unknown_protocol);
7810 	default:
7811 		return if_get_counter_default(vsi->sc->ifp, counter);
7812 	}
7813 }
7814 
7815 /**
7816  * ice_save_pci_info - Save PCI configuration fields in HW struct
7817  * @hw: the ice_hw struct to save the PCI information in
7818  * @dev: the device to get the PCI information from
7819  *
7820  * This should only be called once, early in the device attach
7821  * process.
7822  */
7823 void
7824 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7825 {
7826 	hw->vendor_id = pci_get_vendor(dev);
7827 	hw->device_id = pci_get_device(dev);
7828 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7829 	hw->subsystem_device_id = pci_get_subdevice(dev);
7830 	hw->revision_id = pci_get_revid(dev);
7831 	hw->bus.device = pci_get_slot(dev);
7832 	hw->bus.func = pci_get_function(dev);
7833 }
7834 
7835 /**
7836  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7837  * @sc: the device softc
7838  *
7839  * Replace the configuration for each VSI, and then cleanup replay
7840  * information. Called after a hardware reset in order to reconfigure the
7841  * active VSIs.
7842  */
7843 int
7844 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7845 {
7846 	struct ice_hw *hw = &sc->hw;
7847 	int status;
7848 	int i;
7849 
7850 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7851 		struct ice_vsi *vsi = sc->all_vsi[i];
7852 
7853 		if (!vsi)
7854 			continue;
7855 
7856 		status = ice_replay_vsi(hw, vsi->idx);
7857 		if (status) {
7858 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7859 				      vsi->idx, ice_status_str(status),
7860 				      ice_aq_str(hw->adminq.sq_last_status));
7861 			return (EIO);
7862 		}
7863 	}
7864 
7865 	/* Cleanup replay filters after successful reconfiguration */
7866 	ice_replay_post(hw);
7867 	return (0);
7868 }
7869 
7870 /**
7871  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7872  * @vsi: pointer to the VSI structure
7873  *
7874  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7875  * during driver removal to ensure that all RSS resources are properly
7876  * released.
7877  *
7878  * @remark this function doesn't report an error as it is expected to be
7879  * called during driver reset and unload, and there isn't much the driver can
7880  * do if freeing RSS resources fails.
7881  */
7882 static void
7883 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7884 {
7885 	struct ice_softc *sc = vsi->sc;
7886 	struct ice_hw *hw = &sc->hw;
7887 	device_t dev = sc->dev;
7888 	int status;
7889 
7890 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7891 	if (status)
7892 		device_printf(dev,
7893 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7894 			      vsi->idx, ice_status_str(status));
7895 
7896 	/* Remove this VSI from the RSS list */
7897 	ice_rem_vsi_rss_list(hw, vsi->idx);
7898 }
7899 
7900 /**
7901  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7902  * @sc: the device softc pointer
7903  *
7904  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7905  * interface.
7906  *
7907  * @remark This should be called while preparing for a reset, to cleanup stale
7908  * RSS configuration for all VSIs.
7909  */
7910 void
7911 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7912 {
7913 	int i;
7914 
7915 	/* No need to cleanup if RSS is not enabled */
7916 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7917 		return;
7918 
7919 	for (i = 0; i < sc->num_available_vsi; i++) {
7920 		struct ice_vsi *vsi = sc->all_vsi[i];
7921 
7922 		if (vsi)
7923 			ice_clean_vsi_rss_cfg(vsi);
7924 	}
7925 }
7926 
7927 /**
7928  * ice_requested_fec_mode - Return the requested FEC mode as a string
7929  * @pi: The port info structure
7930  *
7931  * Return a string representing the requested FEC mode.
7932  */
7933 static const char *
7934 ice_requested_fec_mode(struct ice_port_info *pi)
7935 {
7936 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7937 	int status;
7938 
7939 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7940 				     &pcaps, NULL);
7941 	if (status)
7942 		/* Just report unknown if we can't get capabilities */
7943 		return "Unknown";
7944 
7945 	/* Check if RS-FEC has been requested first */
7946 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7947 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7948 		return ice_fec_str(ICE_FEC_RS);
7949 
7950 	/* If RS FEC has not been requested, then check BASE-R */
7951 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7952 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7953 		return ice_fec_str(ICE_FEC_BASER);
7954 
7955 	return ice_fec_str(ICE_FEC_NONE);
7956 }
7957 
7958 /**
7959  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7960  * @pi: The port info structure
7961  *
7962  * Return a string representing the current FEC mode.
7963  */
7964 static const char *
7965 ice_negotiated_fec_mode(struct ice_port_info *pi)
7966 {
7967 	/* First, check if RS has been requested first */
7968 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7969 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7970 		return ice_fec_str(ICE_FEC_RS);
7971 
7972 	/* If RS FEC has not been requested, then check BASE-R */
7973 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7974 		return ice_fec_str(ICE_FEC_BASER);
7975 
7976 	return ice_fec_str(ICE_FEC_NONE);
7977 }
7978 
7979 /**
7980  * ice_autoneg_mode - Return string indicating of autoneg completed
7981  * @pi: The port info structure
7982  *
7983  * Return "True" if autonegotiation is completed, "False" otherwise.
7984  */
7985 static const char *
7986 ice_autoneg_mode(struct ice_port_info *pi)
7987 {
7988 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7989 		return "True";
7990 	else
7991 		return "False";
7992 }
7993 
7994 /**
7995  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7996  * @pi: The port info structure
7997  *
7998  * Returns the current Flow Control mode as a string.
7999  */
8000 static const char *
8001 ice_flowcontrol_mode(struct ice_port_info *pi)
8002 {
8003 	return ice_fc_str(pi->fc.current_mode);
8004 }
8005 
8006 /**
8007  * ice_link_up_msg - Log a link up message with associated info
8008  * @sc: the device private softc
8009  *
8010  * Log a link up message with LOG_NOTICE message level. Include information
8011  * about the duplex, FEC mode, autonegotiation and flow control.
8012  */
8013 void
8014 ice_link_up_msg(struct ice_softc *sc)
8015 {
8016 	struct ice_hw *hw = &sc->hw;
8017 	struct ifnet *ifp = sc->ifp;
8018 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
8019 
8020 	speed = ice_aq_speed_to_str(hw->port_info);
8021 	req_fec = ice_requested_fec_mode(hw->port_info);
8022 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
8023 	autoneg = ice_autoneg_mode(hw->port_info);
8024 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
8025 
8026 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
8027 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
8028 }
8029 
8030 /**
8031  * ice_update_laa_mac - Update MAC address if Locally Administered
8032  * @sc: the device softc
8033  *
8034  * Update the device MAC address when a Locally Administered Address is
8035  * assigned.
8036  *
8037  * This function does *not* update the MAC filter list itself. Instead, it
8038  * should be called after ice_rm_pf_default_mac_filters, so that the previous
8039  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
8040  * so that the new address filter will be assigned.
8041  */
8042 int
8043 ice_update_laa_mac(struct ice_softc *sc)
8044 {
8045 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
8046 	struct ice_hw *hw = &sc->hw;
8047 	int status;
8048 
8049 	/* If the address is the same, then there is nothing to update */
8050 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
8051 		return (0);
8052 
8053 	/* Reject Multicast addresses */
8054 	if (ETHER_IS_MULTICAST(lladdr))
8055 		return (EINVAL);
8056 
8057 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
8058 	if (status) {
8059 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
8060 			      lladdr, ":", ice_status_str(status),
8061 			      ice_aq_str(hw->adminq.sq_last_status));
8062 		return (EFAULT);
8063 	}
8064 
8065 	/* Copy the address into place of the LAN address. */
8066 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
8067 
8068 	return (0);
8069 }
8070 
8071 /**
8072  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
8073  * @sc: device softc
8074  *
8075  * This will potentially print out a warning message if bus bandwidth
8076  * is insufficient for full-speed operation. This will not print out anything
8077  * for E82x devices since those are in SoCs, do not report valid PCIe info,
8078  * and cannot be moved to a different slot.
8079  *
8080  * This should only be called once, during the attach process, after
8081  * hw->port_info has been filled out with port link topology information
8082  * (from the Get PHY Capabilities Admin Queue command).
8083  */
8084 void
8085 ice_get_and_print_bus_info(struct ice_softc *sc)
8086 {
8087 	struct ice_hw *hw = &sc->hw;
8088 	device_t dev = sc->dev;
8089 	u16 pci_link_status;
8090 	int offset;
8091 
8092 	if (!ice_is_e810(hw) && !ice_is_e830(hw))
8093 		return;
8094 
8095 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
8096 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
8097 
8098 	/* Fill out hw struct with PCIE link status info */
8099 	ice_set_pci_link_status_data(hw, pci_link_status);
8100 
8101 	/* Use info to print out bandwidth messages */
8102 	ice_print_bus_link_data(dev, hw);
8103 
8104 	if (ice_pcie_bandwidth_check(sc)) {
8105 		device_printf(dev,
8106 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
8107 		device_printf(dev,
8108 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
8109 	}
8110 }
8111 
8112 /**
8113  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
8114  * a 64-bit baudrate.
8115  * @speed: enum value to convert
8116  *
8117  * This only goes up to PCIE Gen 5.
8118  */
8119 static uint64_t
8120 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
8121 {
8122 	/* If the PCI-E speed is Gen1 or Gen2, then report
8123 	 * only 80% of bus speed to account for encoding overhead.
8124 	 */
8125 	switch (speed) {
8126 	case ice_pcie_speed_2_5GT:
8127 		return IF_Gbps(2);
8128 	case ice_pcie_speed_5_0GT:
8129 		return IF_Gbps(4);
8130 	case ice_pcie_speed_8_0GT:
8131 		return IF_Gbps(8);
8132 	case ice_pcie_speed_16_0GT:
8133 		return IF_Gbps(16);
8134 	case ice_pcie_speed_32_0GT:
8135 		return IF_Gbps(32);
8136 	case ice_pcie_speed_unknown:
8137 	default:
8138 		return 0;
8139 	}
8140 }
8141 
8142 /**
8143  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
8144  * a 32-bit number.
8145  * @width: enum value to convert
8146  */
8147 static int
8148 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
8149 {
8150 	switch (width) {
8151 	case ice_pcie_lnk_x1:
8152 		return (1);
8153 	case ice_pcie_lnk_x2:
8154 		return (2);
8155 	case ice_pcie_lnk_x4:
8156 		return (4);
8157 	case ice_pcie_lnk_x8:
8158 		return (8);
8159 	case ice_pcie_lnk_x12:
8160 		return (12);
8161 	case ice_pcie_lnk_x16:
8162 		return (16);
8163 	case ice_pcie_lnk_x32:
8164 		return (32);
8165 	case ice_pcie_lnk_width_resrv:
8166 	case ice_pcie_lnk_width_unknown:
8167 	default:
8168 		return (0);
8169 	}
8170 }
8171 
8172 /**
8173  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
8174  * full-speed device operation.
8175  * @sc: adapter softc
8176  *
8177  * Returns 0 if sufficient; 1 if not.
8178  */
8179 static uint8_t
8180 ice_pcie_bandwidth_check(struct ice_softc *sc)
8181 {
8182 	struct ice_hw *hw = &sc->hw;
8183 	int num_ports, pcie_width;
8184 	u64 pcie_speed, port_speed;
8185 
8186 	MPASS(hw->port_info);
8187 
8188 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
8189 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
8190 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
8191 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
8192 
8193 	/*
8194 	 * If 2x100 on E810 or 2x200 on E830, clamp ports to 1 -- 2nd port is
8195 	 * intended for failover.
8196 	 */
8197 	if ((port_speed >= IF_Gbps(100)) &&
8198 	    ((port_speed == IF_Gbps(100) && ice_is_e810(hw)) ||
8199 	     (port_speed == IF_Gbps(200) && ice_is_e830(hw))))
8200 		num_ports = 1;
8201 
8202 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
8203 }
8204 
8205 /**
8206  * ice_print_bus_link_data - Print PCI-E bandwidth information
8207  * @dev: device to print string for
8208  * @hw: hw struct with PCI-e link information
8209  */
8210 static void
8211 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
8212 {
8213         device_printf(dev, "PCI Express Bus: Speed %s Width %s\n",
8214             ((hw->bus.speed == ice_pcie_speed_32_0GT) ? "32.0GT/s" :
8215             (hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
8216             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
8217             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
8218             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
8219             (hw->bus.width == ice_pcie_lnk_x32) ? "x32" :
8220             (hw->bus.width == ice_pcie_lnk_x16) ? "x16" :
8221             (hw->bus.width == ice_pcie_lnk_x12) ? "x12" :
8222             (hw->bus.width == ice_pcie_lnk_x8) ? "x8" :
8223             (hw->bus.width == ice_pcie_lnk_x4) ? "x4" :
8224             (hw->bus.width == ice_pcie_lnk_x2) ? "x2" :
8225             (hw->bus.width == ice_pcie_lnk_x1) ? "x1" : "Unknown");
8226 }
8227 
8228 /**
8229  * ice_set_pci_link_status_data - store PCI bus info
8230  * @hw: pointer to hardware structure
8231  * @link_status: the link status word from PCI config space
8232  *
8233  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
8234  **/
8235 static void
8236 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
8237 {
8238 	u16 reg;
8239 
8240 	hw->bus.type = ice_bus_pci_express;
8241 
8242 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
8243 
8244 	switch (reg) {
8245 	case ice_pcie_lnk_x1:
8246 	case ice_pcie_lnk_x2:
8247 	case ice_pcie_lnk_x4:
8248 	case ice_pcie_lnk_x8:
8249 	case ice_pcie_lnk_x12:
8250 	case ice_pcie_lnk_x16:
8251 	case ice_pcie_lnk_x32:
8252 		hw->bus.width = (enum ice_pcie_link_width)reg;
8253 		break;
8254 	default:
8255 		hw->bus.width = ice_pcie_lnk_width_unknown;
8256 		break;
8257 	}
8258 
8259 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
8260 
8261 	switch (reg) {
8262 	case ice_pcie_speed_2_5GT:
8263 	case ice_pcie_speed_5_0GT:
8264 	case ice_pcie_speed_8_0GT:
8265 	case ice_pcie_speed_16_0GT:
8266 	case ice_pcie_speed_32_0GT:
8267 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
8268 		break;
8269 	default:
8270 		hw->bus.speed = ice_pcie_speed_unknown;
8271 		break;
8272 	}
8273 }
8274 
8275 /**
8276  * ice_init_link_events - Initialize Link Status Events mask
8277  * @sc: the device softc
8278  *
8279  * Initialize the Link Status Events mask to disable notification of link
8280  * events we don't care about in software. Also request that link status
8281  * events be enabled.
8282  */
8283 int
8284 ice_init_link_events(struct ice_softc *sc)
8285 {
8286 	struct ice_hw *hw = &sc->hw;
8287 	int status;
8288 	u16 wanted_events;
8289 
8290 	/* Set the bits for the events that we want to be notified by */
8291 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
8292 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
8293 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
8294 
8295 	/* request that every event except the wanted events be masked */
8296 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
8297 	if (status) {
8298 		device_printf(sc->dev,
8299 			      "Failed to set link status event mask, err %s aq_err %s\n",
8300 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8301 		return (EIO);
8302 	}
8303 
8304 	/* Request link info with the LSE bit set to enable link status events */
8305 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
8306 	if (status) {
8307 		device_printf(sc->dev,
8308 			      "Failed to enable link status events, err %s aq_err %s\n",
8309 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8310 		return (EIO);
8311 	}
8312 
8313 	return (0);
8314 }
8315 
8316 #ifndef GL_MDET_TX_TCLAN
8317 /* Temporarily use this redefinition until the definition is fixed */
8318 #define GL_MDET_TX_TCLAN	E800_GL_MDET_TX_TCLAN
8319 #define PF_MDET_TX_TCLAN	E800_PF_MDET_TX_TCLAN
8320 #endif /* !defined(GL_MDET_TX_TCLAN) */
8321 /**
8322  * ice_handle_mdd_event - Handle possibly malicious events
8323  * @sc: the device softc
8324  *
8325  * Called by the admin task if an MDD detection interrupt is triggered.
8326  * Identifies possibly malicious events coming from VFs. Also triggers for
8327  * similar incorrect behavior from the PF as well.
8328  */
8329 void
8330 ice_handle_mdd_event(struct ice_softc *sc)
8331 {
8332 	struct ice_hw *hw = &sc->hw;
8333 	bool mdd_detected = false, request_reinit = false;
8334 	device_t dev = sc->dev;
8335 	u32 reg;
8336 
8337 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
8338 		return;
8339 
8340 	reg = rd32(hw, GL_MDET_TX_TCLAN);
8341 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
8342 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
8343 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
8344 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
8345 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
8346 
8347 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
8348 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
8349 
8350 		/* Only clear this event if it matches this PF, that way other
8351 		 * PFs can read the event and determine VF and queue number.
8352 		 */
8353 		if (pf_num == hw->pf_id)
8354 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
8355 
8356 		mdd_detected = true;
8357 	}
8358 
8359 	/* Determine what triggered the MDD event */
8360 	reg = rd32(hw, GL_MDET_TX_PQM);
8361 	if (reg & GL_MDET_TX_PQM_VALID_M) {
8362 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
8363 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
8364 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
8365 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
8366 
8367 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
8368 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
8369 
8370 		/* Only clear this event if it matches this PF, that way other
8371 		 * PFs can read the event and determine VF and queue number.
8372 		 */
8373 		if (pf_num == hw->pf_id)
8374 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
8375 
8376 		mdd_detected = true;
8377 	}
8378 
8379 	reg = rd32(hw, GL_MDET_RX);
8380 	if (reg & GL_MDET_RX_VALID_M) {
8381 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
8382 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
8383 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
8384 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
8385 
8386 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
8387 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
8388 
8389 		/* Only clear this event if it matches this PF, that way other
8390 		 * PFs can read the event and determine VF and queue number.
8391 		 */
8392 		if (pf_num == hw->pf_id)
8393 			wr32(hw, GL_MDET_RX, 0xffffffff);
8394 
8395 		mdd_detected = true;
8396 	}
8397 
8398 	/* Now, confirm that this event actually affects this PF, by checking
8399 	 * the PF registers.
8400 	 */
8401 	if (mdd_detected) {
8402 		reg = rd32(hw, PF_MDET_TX_TCLAN);
8403 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
8404 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
8405 			sc->soft_stats.tx_mdd_count++;
8406 			request_reinit = true;
8407 		}
8408 
8409 		reg = rd32(hw, PF_MDET_TX_PQM);
8410 		if (reg & PF_MDET_TX_PQM_VALID_M) {
8411 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8412 			sc->soft_stats.tx_mdd_count++;
8413 			request_reinit = true;
8414 		}
8415 
8416 		reg = rd32(hw, PF_MDET_RX);
8417 		if (reg & PF_MDET_RX_VALID_M) {
8418 			wr32(hw, PF_MDET_RX, 0xffff);
8419 			sc->soft_stats.rx_mdd_count++;
8420 			request_reinit = true;
8421 		}
8422 	}
8423 
8424 	/* TODO: Implement logic to detect and handle events caused by VFs. */
8425 
8426 	/* request that the upper stack re-initialize the Tx/Rx queues */
8427 	if (request_reinit)
8428 		ice_request_stack_reinit(sc);
8429 
8430 	ice_flush(hw);
8431 }
8432 
8433 /**
8434  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8435  * @sc: the device softc
8436  *
8437  * @pre device is DCB capable and the FW LLDP agent has started
8438  *
8439  * Checks DCBX status and starts the DCBX agent if it is not in
8440  * a valid state via an AQ command.
8441  */
8442 static void
8443 ice_start_dcbx_agent(struct ice_softc *sc)
8444 {
8445 	struct ice_hw *hw = &sc->hw;
8446 	device_t dev = sc->dev;
8447 	bool dcbx_agent_status;
8448 	int status;
8449 
8450 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8451 
8452 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8453 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8454 		/*
8455 		 * Start DCBX agent, but not LLDP. The return value isn't
8456 		 * checked here because a more detailed dcbx agent status is
8457 		 * retrieved and checked in ice_init_dcb() and elsewhere.
8458 		 */
8459 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8460 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8461 			device_printf(dev,
8462 			    "start_stop_dcbx failed, err %s aq_err %s\n",
8463 			    ice_status_str(status),
8464 			    ice_aq_str(hw->adminq.sq_last_status));
8465 	}
8466 }
8467 
8468 /**
8469  * ice_init_dcb_setup - Initialize DCB settings for HW
8470  * @sc: the device softc
8471  *
8472  * This needs to be called after the fw_lldp_agent sysctl is added, since that
8473  * can update the device's LLDP agent status if a tunable value is set.
8474  *
8475  * Get and store the initial state of DCB settings on driver load. Print out
8476  * informational messages as well.
8477  */
8478 void
8479 ice_init_dcb_setup(struct ice_softc *sc)
8480 {
8481 	struct ice_dcbx_cfg *local_dcbx_cfg;
8482 	struct ice_hw *hw = &sc->hw;
8483 	device_t dev = sc->dev;
8484 	int status;
8485 	u8 pfcmode_ret;
8486 
8487 	/* Don't do anything if DCB isn't supported */
8488 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8489 		device_printf(dev, "%s: No DCB support\n", __func__);
8490 		return;
8491 	}
8492 
8493 	/* Starts DCBX agent if it needs starting */
8494 	ice_start_dcbx_agent(sc);
8495 
8496 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8497 	status = ice_init_dcb(hw, true);
8498 
8499 	/* If there is an error, then FW LLDP is not in a usable state */
8500 	if (status != 0 && status != ICE_ERR_NOT_READY) {
8501 		/* Don't print an error message if the return code from the AQ
8502 		 * cmd performed in ice_init_dcb() is EPERM; that means the
8503 		 * FW LLDP engine is disabled, and that is a valid state.
8504 		 */
8505 		if (!(status == ICE_ERR_AQ_ERROR &&
8506 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8507 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8508 				      ice_status_str(status),
8509 				      ice_aq_str(hw->adminq.sq_last_status));
8510 		}
8511 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8512 	}
8513 
8514 	switch (hw->port_info->qos_cfg.dcbx_status) {
8515 	case ICE_DCBX_STATUS_DIS:
8516 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8517 		break;
8518 	case ICE_DCBX_STATUS_NOT_STARTED:
8519 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8520 		break;
8521 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8522 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8523 		break;
8524 	default:
8525 		break;
8526 	}
8527 
8528 	/* LLDP disabled in FW */
8529 	if (hw->port_info->qos_cfg.is_sw_lldp) {
8530 		ice_add_rx_lldp_filter(sc);
8531 		device_printf(dev, "Firmware LLDP agent disabled\n");
8532 	}
8533 
8534 	/* Query and cache PFC mode */
8535 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8536 	if (status) {
8537 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8538 			      ice_status_str(status),
8539 			      ice_aq_str(hw->adminq.sq_last_status));
8540 	}
8541 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8542 	switch (pfcmode_ret) {
8543 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8544 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8545 		break;
8546 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8547 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8548 		break;
8549 	default:
8550 		/* DCB is disabled, but we shouldn't get here */
8551 		break;
8552 	}
8553 
8554 	/* Set default SW MIB for init */
8555 	ice_set_default_local_mib_settings(sc);
8556 
8557 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8558 }
8559 
8560 /**
8561  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8562  * @dcbcfg: DCB configuration to examine
8563  *
8564  * Scans a TC mapping table inside dcbcfg to find traffic classes
8565  * enabled and @returns a bitmask of enabled TCs
8566  */
8567 u8
8568 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8569 {
8570 	u8 tc_map = 0;
8571 	int i = 0;
8572 
8573 	switch (dcbcfg->pfc_mode) {
8574 	case ICE_QOS_MODE_VLAN:
8575 		/* XXX: "i" is actually "User Priority" here, not
8576 		 * Traffic Class, but the max for both is 8, so it works
8577 		 * out here.
8578 		 */
8579 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8580 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8581 		break;
8582 	case ICE_QOS_MODE_DSCP:
8583 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8584 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8585 		break;
8586 	default:
8587 		/* Invalid Mode */
8588 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8589 		break;
8590 	}
8591 
8592 	return (tc_map);
8593 }
8594 
8595 /**
8596  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8597  * @dcbcfg: config to retrieve number of TCs from
8598  *
8599  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8600  * Priority Assignment Table, a value from 1 to 8. If there are
8601  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8602  * then returns 0.
8603  */
8604 static u8
8605 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8606 {
8607 	u8 tc_map;
8608 
8609 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8610 
8611 	return (ice_dcb_tc_contig(tc_map));
8612 }
8613 
8614 /**
8615  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8616  * @sc: the device private softc
8617  * @event: event received on a control queue
8618  *
8619  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8620  */
8621 static void
8622 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8623 {
8624 	struct ice_aqc_lldp_get_mib *params =
8625 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8626 	u8 mib_type, bridge_type, tx_status;
8627 
8628 	static const char* mib_type_strings[] = {
8629 	    "Local MIB",
8630 	    "Remote MIB",
8631 	    "Reserved",
8632 	    "Reserved"
8633 	};
8634 	static const char* bridge_type_strings[] = {
8635 	    "Nearest Bridge",
8636 	    "Non-TPMR Bridge",
8637 	    "Reserved",
8638 	    "Reserved"
8639 	};
8640 	static const char* tx_status_strings[] = {
8641 	    "Port's TX active",
8642 	    "Port's TX suspended and drained",
8643 	    "Reserved",
8644 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8645 	};
8646 
8647 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8648 	    ICE_AQ_LLDP_MIB_TYPE_S;
8649 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8650 	    ICE_AQ_LLDP_BRID_TYPE_S;
8651 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8652 	    ICE_AQ_LLDP_TX_S;
8653 
8654 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8655 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8656 	    tx_status_strings[tx_status]);
8657 
8658 	/* Nothing else to report */
8659 	if (!event->msg_buf)
8660 		return;
8661 
8662 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8663 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8664 			event->msg_len);
8665 }
8666 
8667 /**
8668  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8669  * @sc: the device private softc
8670  * @old_cfg: Old DCBX configuration to compare against
8671  * @new_cfg: New DCBX configuration to check
8672  *
8673  * @return true if something changed in new_cfg that requires the driver
8674  * to do some reconfiguration.
8675  */
8676 static bool
8677 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8678     struct ice_dcbx_cfg *new_cfg)
8679 {
8680 	struct ice_hw *hw = &sc->hw;
8681 	bool needs_reconfig = false;
8682 
8683 	/* No change detected in DCBX config */
8684 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8685 		ice_debug(hw, ICE_DBG_DCB,
8686 		    "No change detected in local DCBX configuration\n");
8687 		return (false);
8688 	}
8689 
8690 	/* Check if ETS config has changed */
8691 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8692 		   sizeof(new_cfg->etscfg))) {
8693 		/* If Priority Table has changed, then driver reconfig is needed */
8694 		if (memcmp(&new_cfg->etscfg.prio_table,
8695 			   &old_cfg->etscfg.prio_table,
8696 			   sizeof(new_cfg->etscfg.prio_table))) {
8697 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8698 			needs_reconfig = true;
8699 		}
8700 
8701 		/* These are just informational */
8702 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8703 			   &old_cfg->etscfg.tcbwtable,
8704 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8705 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8706 			needs_reconfig = true;
8707 		}
8708 
8709 		if (memcmp(&new_cfg->etscfg.tsatable,
8710 			   &old_cfg->etscfg.tsatable,
8711 			   sizeof(new_cfg->etscfg.tsatable))) {
8712 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8713 			needs_reconfig = true;
8714 		}
8715 	}
8716 
8717 	/* Check if PFC config has changed */
8718 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8719 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8720 		needs_reconfig = true;
8721 	}
8722 
8723 	/* Check if APP table has changed */
8724 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8725 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8726 
8727 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8728 
8729 	return (needs_reconfig);
8730 }
8731 
8732 /**
8733  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8734  * @sc: the device private softc
8735  *
8736  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8737  */
8738 static void
8739 ice_stop_pf_vsi(struct ice_softc *sc)
8740 {
8741 	/* Dissociate the Tx and Rx queues from the interrupts */
8742 	ice_flush_txq_interrupts(&sc->pf_vsi);
8743 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8744 
8745 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8746 		return;
8747 
8748 	/* Disable the Tx and Rx queues */
8749 	ice_vsi_disable_tx(&sc->pf_vsi);
8750 	ice_control_all_rx_queues(&sc->pf_vsi, false);
8751 }
8752 
8753 /**
8754  * ice_vsi_setup_q_map - Setup a VSI queue map
8755  * @vsi: the VSI being configured
8756  * @ctxt: VSI context structure
8757  */
8758 static void
8759 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8760 {
8761 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8762 	u16 offset = 0, qmap = 0, pow = 0;
8763 	u16 num_q_per_tc, qcount_rx, rem_queues;
8764 	int i, j, k;
8765 
8766 	if (vsi->num_tcs == 0) {
8767 		/* at least TC0 should be enabled by default */
8768 		vsi->num_tcs = 1;
8769 		vsi->tc_map = 0x1;
8770 	}
8771 
8772 	qcount_rx = vsi->num_rx_queues;
8773 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8774 
8775 	if (!num_q_per_tc)
8776 		num_q_per_tc = 1;
8777 
8778 	/* Set initial values for # of queues to use for each active TC */
8779 	ice_for_each_traffic_class(i)
8780 		if (i < vsi->num_tcs)
8781 			qcounts[i] = num_q_per_tc;
8782 
8783 	/* If any queues are unassigned, add them to TC 0 */
8784 	rem_queues = qcount_rx % vsi->num_tcs;
8785 	if (rem_queues > 0)
8786 		qcounts[0] += rem_queues;
8787 
8788 	/* TC mapping is a function of the number of Rx queues assigned to the
8789 	 * VSI for each traffic class and the offset of these queues.
8790 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8791 	 * queues allocated to TC0. No:of queues is a power-of-2.
8792 	 *
8793 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8794 	 * queue, this way, traffic for the given TC will be sent to the default
8795 	 * queue.
8796 	 *
8797 	 * Setup number and offset of Rx queues for all TCs for the VSI
8798 	 */
8799 	ice_for_each_traffic_class(i) {
8800 		if (!(vsi->tc_map & BIT(i))) {
8801 			/* TC is not enabled */
8802 			vsi->tc_info[i].qoffset = 0;
8803 			vsi->tc_info[i].qcount_rx = 1;
8804 			vsi->tc_info[i].qcount_tx = 1;
8805 
8806 			ctxt->info.tc_mapping[i] = 0;
8807 			continue;
8808 		}
8809 
8810 		/* TC is enabled */
8811 		vsi->tc_info[i].qoffset = offset;
8812 		vsi->tc_info[i].qcount_rx = qcounts[i];
8813 		vsi->tc_info[i].qcount_tx = qcounts[i];
8814 
8815 		/* find the (rounded up) log-2 of queue count for current TC */
8816 		pow = fls(qcounts[i] - 1);
8817 
8818 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8819 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8820 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8821 			 ICE_AQ_VSI_TC_Q_NUM_M);
8822 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8823 
8824 		/* Store traffic class and handle data in queue structures */
8825 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8826 			vsi->tx_queues[j].q_handle = k;
8827 			vsi->tx_queues[j].tc = i;
8828 
8829 			vsi->rx_queues[j].tc = i;
8830 		}
8831 
8832 		offset += qcounts[i];
8833 	}
8834 
8835 	/* Rx queue mapping */
8836 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8837 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8838 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8839 }
8840 
8841 /**
8842  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8843  * @sc: the device private softc
8844  * @tc_map: traffic class bitmap
8845  *
8846  * @pre VSI queues are stopped
8847  *
8848  * @return 0 if configuration is successful
8849  * @return EIO if Update VSI AQ cmd fails
8850  * @return ENODEV if updating Tx Scheduler fails
8851  */
8852 static int
8853 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8854 {
8855 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8856 	struct ice_vsi *vsi = &sc->pf_vsi;
8857 	struct ice_hw *hw = &sc->hw;
8858 	struct ice_vsi_ctx ctx = { 0 };
8859 	device_t dev = sc->dev;
8860 	int status;
8861 	u8 num_tcs = 0;
8862 	int i = 0;
8863 
8864 	/* Count the number of enabled Traffic Classes */
8865 	ice_for_each_traffic_class(i)
8866 		if (tc_map & BIT(i))
8867 			num_tcs++;
8868 
8869 	vsi->tc_map = tc_map;
8870 	vsi->num_tcs = num_tcs;
8871 
8872 	/* Set default parameters for context */
8873 	ctx.vf_num = 0;
8874 	ctx.info = vsi->info;
8875 
8876 	/* Setup queue map */
8877 	ice_vsi_setup_q_map(vsi, &ctx);
8878 
8879 	/* Update VSI configuration in firmware (RX queues) */
8880 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8881 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8882 	if (status) {
8883 		device_printf(dev,
8884 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8885 		    __func__, ice_status_str(status),
8886 		    ice_aq_str(hw->adminq.sq_last_status));
8887 		return (EIO);
8888 	}
8889 	vsi->info = ctx.info;
8890 
8891 	/* Use values derived in ice_vsi_setup_q_map() */
8892 	for (i = 0; i < num_tcs; i++)
8893 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8894 
8895 	if (hw->debug_mask & ICE_DBG_DCB) {
8896 		device_printf(dev, "%s: max_txqs:", __func__);
8897 		ice_for_each_traffic_class(i)
8898 			printf(" %d", max_txqs[i]);
8899 		printf("\n");
8900 	}
8901 
8902 	/* Update LAN Tx queue info in firmware */
8903 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8904 				 max_txqs);
8905 	if (status) {
8906 		device_printf(dev,
8907 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8908 		    __func__, ice_status_str(status),
8909 		    ice_aq_str(hw->adminq.sq_last_status));
8910 		return (ENODEV);
8911 	}
8912 
8913 	vsi->info.valid_sections = 0;
8914 
8915 	return (0);
8916 }
8917 
8918 /**
8919  * ice_dcb_tc_contig - Count TCs if they're contiguous
8920  * @tc_map: pointer to priority table
8921  *
8922  * @return The number of traffic classes in
8923  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8924  */
8925 static u8
8926 ice_dcb_tc_contig(u8 tc_map)
8927 {
8928 	bool tc_unused = false;
8929 	u8 ret = 0;
8930 
8931 	/* Scan bitmask for contiguous TCs starting with TC0 */
8932 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8933 		if (tc_map & BIT(i)) {
8934 			if (!tc_unused) {
8935 				ret++;
8936 			} else {
8937 				/* Non-contiguous TCs detected */
8938 				return (0);
8939 			}
8940 		} else
8941 			tc_unused = true;
8942 	}
8943 
8944 	return (ret);
8945 }
8946 
8947 /**
8948  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8949  * @sc: the device private softc
8950  *
8951  * @pre All VSIs have been disabled/stopped
8952  *
8953  * Reconfigures VSI settings based on local_dcbx_cfg.
8954  */
8955 static void
8956 ice_dcb_recfg(struct ice_softc *sc)
8957 {
8958 	struct ice_dcbx_cfg *dcbcfg =
8959 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8960 	device_t dev = sc->dev;
8961 	u8 tc_map = 0;
8962 	int ret;
8963 
8964 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8965 
8966 	/* If non-contiguous TCs are used, then configure
8967 	 * the default TC instead. There's no support for
8968 	 * non-contiguous TCs being used.
8969 	 */
8970 	if (ice_dcb_tc_contig(tc_map) == 0) {
8971 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8972 		ice_set_default_local_lldp_mib(sc);
8973 	}
8974 
8975 	/* Reconfigure VSI queues to add/remove traffic classes */
8976 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8977 	if (ret)
8978 		device_printf(dev,
8979 		    "Failed to configure TCs for PF VSI, err %s\n",
8980 		    ice_err_str(ret));
8981 
8982 }
8983 
8984 /**
8985  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8986  * @sc: device softc structure
8987  *
8988  * Overwrites the driver's SW local LLDP MIB with default settings. This
8989  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8990  * admin queue command.
8991  */
8992 static void
8993 ice_set_default_local_mib_settings(struct ice_softc *sc)
8994 {
8995 	struct ice_dcbx_cfg *dcbcfg;
8996 	struct ice_hw *hw = &sc->hw;
8997 	struct ice_port_info *pi;
8998 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8999 
9000 	pi = hw->port_info;
9001 
9002 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
9003 
9004 	maxtcs = hw->func_caps.common_cap.maxtc;
9005 	/* This value is only 3 bits; 8 TCs maps to 0 */
9006 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
9007 
9008 	/* VLAN vs DSCP mode needs to be preserved */
9009 	old_pfc_mode = dcbcfg->pfc_mode;
9010 
9011 	/**
9012 	 * Setup the default settings used by the driver for the Set Local
9013 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
9014 	 * PFC, TSA=2).
9015 	 */
9016 	memset(dcbcfg, 0, sizeof(*dcbcfg));
9017 
9018 	dcbcfg->etscfg.willing = 1;
9019 	dcbcfg->etscfg.tcbwtable[0] = 100;
9020 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
9021 	dcbcfg->etscfg.tsatable[0] = 2;
9022 
9023 	dcbcfg->etsrec = dcbcfg->etscfg;
9024 	dcbcfg->etsrec.willing = 0;
9025 
9026 	dcbcfg->pfc.willing = 1;
9027 	dcbcfg->pfc.pfccap = maxtcs;
9028 
9029 	dcbcfg->pfc_mode = old_pfc_mode;
9030 }
9031 
9032 /**
9033  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
9034  * @sc: the device private softc
9035  * @pending_mib: FW has a pending MIB change to execute
9036  *
9037  * @pre Determined that the DCB configuration requires a change
9038  *
9039  * Reconfigures the PF LAN VSI based on updated DCB configuration
9040  * found in the hw struct's/port_info's/ local dcbx configuration.
9041  */
9042 void
9043 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
9044 {
9045 	struct ice_aqc_port_ets_elem port_ets = { 0 };
9046 	struct ice_dcbx_cfg *local_dcbx_cfg;
9047 	struct ice_hw *hw = &sc->hw;
9048 	struct ice_port_info *pi;
9049 	device_t dev = sc->dev;
9050 	int status;
9051 
9052 	pi = sc->hw.port_info;
9053 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
9054 
9055 	ice_rdma_notify_dcb_qos_change(sc);
9056 	/* If there's a pending MIB, tell the FW to execute the MIB change
9057 	 * now.
9058 	 */
9059 	if (pending_mib) {
9060 		status = ice_lldp_execute_pending_mib(hw);
9061 		if ((status == ICE_ERR_AQ_ERROR) &&
9062 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
9063 			device_printf(dev,
9064 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
9065 		} else if (status) {
9066 			device_printf(dev,
9067 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
9068 			    ice_status_str(status),
9069 			    ice_aq_str(hw->adminq.sq_last_status));
9070 			/* This won't break traffic, but QoS will not work as expected */
9071 		}
9072 	}
9073 
9074 	/* Set state when there's more than one TC */
9075 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
9076 		device_printf(dev, "Multiple traffic classes enabled\n");
9077 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
9078 	} else {
9079 		device_printf(dev, "Multiple traffic classes disabled\n");
9080 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
9081 	}
9082 
9083 	/* Disable PF VSI since it's going to be reconfigured */
9084 	ice_stop_pf_vsi(sc);
9085 
9086 	/* Query ETS configuration and update SW Tx scheduler info */
9087 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
9088 	if (status) {
9089 		device_printf(dev,
9090 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
9091 		    ice_status_str(status),
9092 		    ice_aq_str(hw->adminq.sq_last_status));
9093 		/* This won't break traffic, but QoS will not work as expected */
9094 	}
9095 
9096 	/* Change PF VSI configuration */
9097 	ice_dcb_recfg(sc);
9098 
9099 	/* Send new configuration to RDMA client driver */
9100 	ice_rdma_dcb_qos_update(sc, pi);
9101 
9102 	ice_request_stack_reinit(sc);
9103 }
9104 
9105 /**
9106  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
9107  * @sc: the device private softc
9108  * @event: event received on a control queue
9109  *
9110  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
9111  * VSI depending on what has changed. This will also print out some debug
9112  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
9113  */
9114 static void
9115 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
9116 {
9117 	struct ice_aqc_lldp_get_mib *params =
9118 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
9119 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
9120 	struct ice_port_info *pi;
9121 	device_t dev = sc->dev;
9122 	struct ice_hw *hw = &sc->hw;
9123 	bool needs_reconfig, mib_is_pending;
9124 	int status;
9125 	u8 mib_type, bridge_type;
9126 
9127 	ASSERT_CFG_LOCKED(sc);
9128 
9129 	ice_debug_print_mib_change_event(sc, event);
9130 
9131 	pi = sc->hw.port_info;
9132 
9133 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
9134 	    ICE_AQ_LLDP_MIB_TYPE_S;
9135 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
9136 	    ICE_AQ_LLDP_BRID_TYPE_S;
9137 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
9138 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
9139 
9140 	/* Ignore if event is not for Nearest Bridge */
9141 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
9142 		return;
9143 
9144 	/* Check MIB Type and return if event for Remote MIB update */
9145 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
9146 		/* Update the cached remote MIB and return */
9147 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
9148 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
9149 					 &pi->qos_cfg.remote_dcbx_cfg);
9150 		if (status)
9151 			device_printf(dev,
9152 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
9153 			    __func__, ice_status_str(status),
9154 			    ice_aq_str(hw->adminq.sq_last_status));
9155 		/* Not fatal if this fails */
9156 		return;
9157 	}
9158 
9159 	/* Save line length by aliasing the local dcbx cfg */
9160 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
9161 	/* Save off the old configuration and clear current config */
9162 	tmp_dcbx_cfg = *local_dcbx_cfg;
9163 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
9164 
9165 	/* Update the current local_dcbx_cfg with new data */
9166 	if (mib_is_pending) {
9167 		ice_get_dcb_cfg_from_mib_change(pi, event);
9168 	} else {
9169 		/* Get updated DCBX data from firmware */
9170 		status = ice_get_dcb_cfg(pi);
9171 		if (status) {
9172 			device_printf(dev,
9173 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
9174 			    __func__, ice_status_str(status),
9175 			    ice_aq_str(hw->adminq.sq_last_status));
9176 			return;
9177 		}
9178 	}
9179 
9180 	/* Check to see if DCB needs reconfiguring */
9181 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
9182 	    local_dcbx_cfg);
9183 
9184 	if (!needs_reconfig && !mib_is_pending)
9185 		return;
9186 
9187 	/* Reconfigure -- this will also notify FW that configuration is done,
9188 	 * if the FW MIB change is only pending instead of executed.
9189 	 */
9190 	ice_do_dcb_reconfig(sc, mib_is_pending);
9191 }
9192 
9193 /**
9194  * ice_send_version - Send driver version to firmware
9195  * @sc: the device private softc
9196  *
9197  * Send the driver version to the firmware. This must be called as early as
9198  * possible after ice_init_hw().
9199  */
9200 int
9201 ice_send_version(struct ice_softc *sc)
9202 {
9203 	struct ice_driver_ver driver_version = {0};
9204 	struct ice_hw *hw = &sc->hw;
9205 	device_t dev = sc->dev;
9206 	int status;
9207 
9208 	driver_version.major_ver = ice_major_version;
9209 	driver_version.minor_ver = ice_minor_version;
9210 	driver_version.build_ver = ice_patch_version;
9211 	driver_version.subbuild_ver = ice_rc_version;
9212 
9213 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
9214 		sizeof(driver_version.driver_string));
9215 
9216 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
9217 	if (status) {
9218 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
9219 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
9220 		return (EIO);
9221 	}
9222 
9223 	return (0);
9224 }
9225 
9226 /**
9227  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
9228  * @sc: device softc
9229  * @event: event received on a control queue
9230  *
9231  * Prints out a message when a LAN overflow event is detected on a receive
9232  * queue.
9233  */
9234 static void
9235 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
9236 {
9237 	struct ice_aqc_event_lan_overflow *params =
9238 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
9239 	struct ice_hw *hw = &sc->hw;
9240 
9241 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
9242 		  LE32_TO_CPU(params->prtdcb_ruptq),
9243 		  LE32_TO_CPU(params->qtx_ctl));
9244 }
9245 
9246 /**
9247  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
9248  * @vsi: the VSI to target packets to
9249  * @list: the list to add the filter to
9250  * @ethertype: the Ethertype to filter on
9251  * @direction: The direction of the filter (Tx or Rx)
9252  * @action: the action to take
9253  *
9254  * Add an Ethertype filter to a filter list. Used to forward a series of
9255  * filters to the firmware for configuring the switch.
9256  *
9257  * Returns 0 on success, and an error code on failure.
9258  */
9259 static int
9260 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
9261 			  u16 ethertype, u16 direction,
9262 			  enum ice_sw_fwd_act_type action)
9263 {
9264 	struct ice_fltr_list_entry *entry;
9265 
9266 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
9267 
9268 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
9269 	if (!entry)
9270 		return (ENOMEM);
9271 
9272 	entry->fltr_info.flag = direction;
9273 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
9274 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
9275 	entry->fltr_info.fltr_act = action;
9276 	entry->fltr_info.vsi_handle = vsi->idx;
9277 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
9278 
9279 	LIST_ADD(&entry->list_entry, list);
9280 
9281 	return 0;
9282 }
9283 
9284 #define ETHERTYPE_PAUSE_FRAMES 0x8808
9285 #define ETHERTYPE_LLDP_FRAMES 0x88cc
9286 
9287 /**
9288  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
9289  * @sc: the device private softc
9290  *
9291  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
9292  * the host. This prevents malicious VFs from sending these frames and being
9293  * able to control or configure the network.
9294  */
9295 int
9296 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
9297 {
9298 	struct ice_list_head ethertype_list;
9299 	struct ice_vsi *vsi = &sc->pf_vsi;
9300 	struct ice_hw *hw = &sc->hw;
9301 	device_t dev = sc->dev;
9302 	int status;
9303 	int err = 0;
9304 
9305 	INIT_LIST_HEAD(&ethertype_list);
9306 
9307 	/*
9308 	 * Note that the switch filters will ignore the VSI index for the drop
9309 	 * action, so we only need to program drop filters once for the main
9310 	 * VSI.
9311 	 */
9312 
9313 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
9314 	if (sc->enable_tx_fc_filter) {
9315 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9316 						ETHERTYPE_PAUSE_FRAMES,
9317 						ICE_FLTR_TX, ICE_DROP_PACKET);
9318 		if (err)
9319 			goto free_ethertype_list;
9320 	}
9321 
9322 	/* Configure switch to drop LLDP frames coming from any VSI */
9323 	if (sc->enable_tx_lldp_filter) {
9324 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9325 						ETHERTYPE_LLDP_FRAMES,
9326 						ICE_FLTR_TX, ICE_DROP_PACKET);
9327 		if (err)
9328 			goto free_ethertype_list;
9329 	}
9330 
9331 	status = ice_add_eth_mac(hw, &ethertype_list);
9332 	if (status) {
9333 		device_printf(dev,
9334 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
9335 			      ice_status_str(status),
9336 			      ice_aq_str(hw->adminq.sq_last_status));
9337 		err = (EIO);
9338 	}
9339 
9340 free_ethertype_list:
9341 	ice_free_fltr_list(&ethertype_list);
9342 	return err;
9343 }
9344 
9345 /**
9346  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
9347  * @sc: the device private structure
9348  *
9349  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
9350  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
9351  * be forwarded to the stack.
9352  */
9353 void
9354 ice_add_rx_lldp_filter(struct ice_softc *sc)
9355 {
9356 	struct ice_list_head ethertype_list;
9357 	struct ice_vsi *vsi = &sc->pf_vsi;
9358 	struct ice_hw *hw = &sc->hw;
9359 	device_t dev = sc->dev;
9360 	int status;
9361 	int err;
9362 	u16 vsi_num;
9363 
9364 	/*
9365 	 * If FW is new enough, use a direct AQ command to perform the filter
9366 	 * addition.
9367 	 */
9368 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9369 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9370 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
9371 		if (status) {
9372 			device_printf(dev,
9373 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9374 			    ice_status_str(status),
9375 			    ice_aq_str(hw->adminq.sq_last_status));
9376 		} else
9377 			ice_set_state(&sc->state,
9378 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9379 		return;
9380 	}
9381 
9382 	INIT_LIST_HEAD(&ethertype_list);
9383 
9384 	/* Forward Rx LLDP frames to the stack */
9385 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9386 					ETHERTYPE_LLDP_FRAMES,
9387 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9388 	if (err) {
9389 		device_printf(dev,
9390 			      "Failed to add Rx LLDP filter, err %s\n",
9391 			      ice_err_str(err));
9392 		goto free_ethertype_list;
9393 	}
9394 
9395 	status = ice_add_eth_mac(hw, &ethertype_list);
9396 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
9397 		device_printf(dev,
9398 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9399 			      ice_status_str(status),
9400 			      ice_aq_str(hw->adminq.sq_last_status));
9401 	} else {
9402 		/*
9403 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
9404 		 * already existing filter as an error case.
9405 		 */
9406 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9407 	}
9408 
9409 free_ethertype_list:
9410 	ice_free_fltr_list(&ethertype_list);
9411 }
9412 
9413 /**
9414  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9415  * @sc: the device private structure
9416  *
9417  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9418  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9419  * stack.
9420  */
9421 static void
9422 ice_del_rx_lldp_filter(struct ice_softc *sc)
9423 {
9424 	struct ice_list_head ethertype_list;
9425 	struct ice_vsi *vsi = &sc->pf_vsi;
9426 	struct ice_hw *hw = &sc->hw;
9427 	device_t dev = sc->dev;
9428 	int status;
9429 	int err;
9430 	u16 vsi_num;
9431 
9432 	/*
9433 	 * Only in the scenario where the driver added the filter during
9434 	 * this session (while the driver was loaded) would we be able to
9435 	 * delete this filter.
9436 	 */
9437 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9438 		return;
9439 
9440 	/*
9441 	 * If FW is new enough, use a direct AQ command to perform the filter
9442 	 * removal.
9443 	 */
9444 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9445 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9446 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9447 		if (status) {
9448 			device_printf(dev,
9449 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9450 			    ice_status_str(status),
9451 			    ice_aq_str(hw->adminq.sq_last_status));
9452 		}
9453 		return;
9454 	}
9455 
9456 	INIT_LIST_HEAD(&ethertype_list);
9457 
9458 	/* Remove filter forwarding Rx LLDP frames to the stack */
9459 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9460 					ETHERTYPE_LLDP_FRAMES,
9461 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9462 	if (err) {
9463 		device_printf(dev,
9464 			      "Failed to remove Rx LLDP filter, err %s\n",
9465 			      ice_err_str(err));
9466 		goto free_ethertype_list;
9467 	}
9468 
9469 	status = ice_remove_eth_mac(hw, &ethertype_list);
9470 	if (status == ICE_ERR_DOES_NOT_EXIST) {
9471 		; /* Don't complain if we try to remove a filter that doesn't exist */
9472 	} else if (status) {
9473 		device_printf(dev,
9474 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9475 			      ice_status_str(status),
9476 			      ice_aq_str(hw->adminq.sq_last_status));
9477 	}
9478 
9479 free_ethertype_list:
9480 	ice_free_fltr_list(&ethertype_list);
9481 }
9482 
9483 /**
9484  * ice_init_link_configuration -- Setup link in different ways depending
9485  * on whether media is available or not.
9486  * @sc: device private structure
9487  *
9488  * Called at the end of the attach process to either set default link
9489  * parameters if there is media available, or force HW link down and
9490  * set a state bit if there is no media.
9491  */
9492 void
9493 ice_init_link_configuration(struct ice_softc *sc)
9494 {
9495 	struct ice_port_info *pi = sc->hw.port_info;
9496 	struct ice_hw *hw = &sc->hw;
9497 	device_t dev = sc->dev;
9498 	int status, retry_count = 0;
9499 
9500 retry:
9501 	pi->phy.get_link_info = true;
9502 	status = ice_get_link_status(pi, &sc->link_up);
9503 
9504 	if (status) {
9505 		if (hw->adminq.sq_last_status == ICE_AQ_RC_EAGAIN) {
9506 			retry_count++;
9507 			ice_debug(hw, ICE_DBG_LINK,
9508 			    "%s: ice_get_link_status failed with EAGAIN, attempt %d\n",
9509 			    __func__, retry_count);
9510 			if (retry_count < ICE_LINK_AQ_MAX_RETRIES) {
9511 				ice_msec_pause(ICE_LINK_RETRY_DELAY);
9512 				goto retry;
9513 			}
9514 		} else {
9515 			device_printf(dev,
9516 			    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9517 			    __func__, ice_status_str(status),
9518 			    ice_aq_str(hw->adminq.sq_last_status));
9519 		}
9520 		return;
9521 	}
9522 
9523 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9524 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9525 		/* Apply default link settings */
9526 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)) {
9527 			ice_set_link(sc, false);
9528 			ice_set_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
9529 		} else
9530 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9531 	} else {
9532 		 /* Set link down, and poll for media available in timer. This prevents the
9533 		  * driver from receiving spurious link-related events.
9534 		  */
9535 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9536 		status = ice_aq_set_link_restart_an(pi, false, NULL);
9537 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
9538 			device_printf(dev,
9539 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9540 			    __func__, ice_status_str(status),
9541 			    ice_aq_str(hw->adminq.sq_last_status));
9542 	}
9543 }
9544 
9545 /**
9546  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9547  * @sc: device private structure
9548  * @cfg: new PHY config data to be modified
9549  *
9550  * Applies user settings for advertised speeds to the PHY type fields in the
9551  * supplied PHY config struct. It uses the data from pcaps to check if the
9552  * saved settings are invalid and uses the pcaps data instead if they are
9553  * invalid.
9554  */
9555 static int
9556 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9557 			       struct ice_aqc_set_phy_cfg_data *cfg)
9558 {
9559 	struct ice_phy_data phy_data = { 0 };
9560 	struct ice_port_info *pi = sc->hw.port_info;
9561 	u64 phy_low = 0, phy_high = 0;
9562 	u16 link_speeds;
9563 	int ret;
9564 
9565 	link_speeds = pi->phy.curr_user_speed_req;
9566 
9567 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9568 		memset(&phy_data, 0, sizeof(phy_data));
9569 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9570 		phy_data.user_speeds_orig = link_speeds;
9571 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9572 		if (ret != 0) {
9573 			/* Error message already printed within function */
9574 			return (ret);
9575 		}
9576 		phy_low = phy_data.phy_low_intr;
9577 		phy_high = phy_data.phy_high_intr;
9578 
9579 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9580 			goto finalize_link_speed;
9581 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9582 			memset(&phy_data, 0, sizeof(phy_data));
9583 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9584 			phy_data.user_speeds_orig = link_speeds;
9585 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9586 			if (ret != 0) {
9587 				/* Error message already printed within function */
9588 				return (ret);
9589 			}
9590 			phy_low = phy_data.phy_low_intr;
9591 			phy_high = phy_data.phy_high_intr;
9592 
9593 			if (!phy_data.user_speeds_intr) {
9594 				phy_low = phy_data.phy_low_orig;
9595 				phy_high = phy_data.phy_high_orig;
9596 			}
9597 			goto finalize_link_speed;
9598 		}
9599 		/* If we're here, then it means the benefits of Version 2
9600 		 * link management aren't utilized.  We fall through to
9601 		 * handling Strict Link Mode the same as Version 1 link
9602 		 * management.
9603 		 */
9604 	}
9605 
9606 	memset(&phy_data, 0, sizeof(phy_data));
9607 	if ((link_speeds == 0) &&
9608 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9609 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9610 	else
9611 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9612 	phy_data.user_speeds_orig = link_speeds;
9613 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9614 	if (ret != 0) {
9615 		/* Error message already printed within function */
9616 		return (ret);
9617 	}
9618 	phy_low = phy_data.phy_low_intr;
9619 	phy_high = phy_data.phy_high_intr;
9620 
9621 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9622 		if (phy_low == 0 && phy_high == 0) {
9623 			device_printf(sc->dev,
9624 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9625 			return (EINVAL);
9626 		}
9627 	} else {
9628 		if (link_speeds == 0) {
9629 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9630 			    sc->ldo_tlv.phy_type_high & phy_high) {
9631 				phy_low &= sc->ldo_tlv.phy_type_low;
9632 				phy_high &= sc->ldo_tlv.phy_type_high;
9633 			}
9634 		} else if (phy_low == 0 && phy_high == 0) {
9635 			memset(&phy_data, 0, sizeof(phy_data));
9636 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9637 			phy_data.user_speeds_orig = link_speeds;
9638 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9639 			if (ret != 0) {
9640 				/* Error message already printed within function */
9641 				return (ret);
9642 			}
9643 			phy_low = phy_data.phy_low_intr;
9644 			phy_high = phy_data.phy_high_intr;
9645 
9646 			if (!phy_data.user_speeds_intr) {
9647 				phy_low = phy_data.phy_low_orig;
9648 				phy_high = phy_data.phy_high_orig;
9649 			}
9650 		}
9651 	}
9652 
9653 finalize_link_speed:
9654 
9655 	/* Cache new user settings for speeds */
9656 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9657 	cfg->phy_type_low = htole64(phy_low);
9658 	cfg->phy_type_high = htole64(phy_high);
9659 
9660 	return (ret);
9661 }
9662 
9663 /**
9664  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9665  * @sc: device private structure
9666  * @cfg: new PHY config data to be modified
9667  *
9668  * Applies user setting for FEC mode to PHY config struct. It uses the data
9669  * from pcaps to check if the saved settings are invalid and uses the pcaps
9670  * data instead if they are invalid.
9671  */
9672 static int
9673 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9674 			       struct ice_aqc_set_phy_cfg_data *cfg)
9675 {
9676 	struct ice_port_info *pi = sc->hw.port_info;
9677 	int status;
9678 
9679 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9680 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9681 	if (status)
9682 		return (EIO);
9683 
9684 	return (0);
9685 }
9686 
9687 /**
9688  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9689  * @pi: port info struct
9690  * @cfg: new PHY config data to be modified
9691  *
9692  * Applies user setting for flow control mode to PHY config struct. There are
9693  * no invalid flow control mode settings; if there are, then this function
9694  * treats them like "ICE_FC_NONE".
9695  */
9696 static void
9697 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9698 			      struct ice_aqc_set_phy_cfg_data *cfg)
9699 {
9700 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9701 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9702 
9703 	switch (pi->phy.curr_user_fc_req) {
9704 	case ICE_FC_FULL:
9705 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9706 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9707 		break;
9708 	case ICE_FC_RX_PAUSE:
9709 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9710 		break;
9711 	case ICE_FC_TX_PAUSE:
9712 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9713 		break;
9714 	default:
9715 		/* ICE_FC_NONE */
9716 		break;
9717 	}
9718 }
9719 
9720 /**
9721  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9722  * @sc: device private structure
9723  * @settings: which settings to apply
9724  *
9725  * Applies user settings for advertised speeds, FEC mode, and flow
9726  * control mode to a PHY config struct; it uses the data from pcaps
9727  * to check if the saved settings are invalid and uses the pcaps
9728  * data instead if they are invalid.
9729  *
9730  * For things like sysctls where only one setting needs to be
9731  * updated, the bitmap allows the caller to specify which setting
9732  * to update.
9733  */
9734 int
9735 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9736 {
9737 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9738 	struct ice_port_info *pi = sc->hw.port_info;
9739 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9740 	struct ice_hw *hw = &sc->hw;
9741 	device_t dev = sc->dev;
9742 	u64 phy_low, phy_high;
9743 	int status;
9744 	enum ice_fec_mode dflt_fec_mode;
9745 	u16 dflt_user_speed;
9746 
9747 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9748 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9749 		    settings);
9750 	}
9751 
9752 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9753 				     &pcaps, NULL);
9754 	if (status) {
9755 		device_printf(dev,
9756 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9757 		    __func__, ice_status_str(status),
9758 		    ice_aq_str(hw->adminq.sq_last_status));
9759 		return (EIO);
9760 	}
9761 
9762 	phy_low = le64toh(pcaps.phy_type_low);
9763 	phy_high = le64toh(pcaps.phy_type_high);
9764 
9765 	/* Save off initial config parameters */
9766 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9767 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9768 
9769 	/* Setup new PHY config */
9770 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9771 
9772 	/* On error, restore active configuration values */
9773 	if ((settings & ICE_APPLY_LS) &&
9774 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9775 		pi->phy.curr_user_speed_req = dflt_user_speed;
9776 		cfg.phy_type_low = pcaps.phy_type_low;
9777 		cfg.phy_type_high = pcaps.phy_type_high;
9778 	}
9779 	if ((settings & ICE_APPLY_FEC) &&
9780 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9781 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9782 	}
9783 	if (settings & ICE_APPLY_FC) {
9784 		/* No real error indicators for this process,
9785 		 * so we'll just have to assume it works. */
9786 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9787 	}
9788 
9789 	/* Enable link and re-negotiate it */
9790 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9791 
9792 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9793 	if (status) {
9794 		/* Don't indicate failure if there's no media in the port.
9795 		 * The settings have been saved and will apply when media
9796 		 * is inserted.
9797 		 */
9798 		if ((status == ICE_ERR_AQ_ERROR) &&
9799 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9800 			device_printf(dev,
9801 			    "%s: Setting will be applied when media is inserted\n",
9802 			    __func__);
9803 			return (0);
9804 		} else {
9805 			device_printf(dev,
9806 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9807 			    __func__, ice_status_str(status),
9808 			    ice_aq_str(hw->adminq.sq_last_status));
9809 			return (EIO);
9810 		}
9811 	}
9812 
9813 	return (0);
9814 }
9815 
9816 /**
9817  * ice_print_ldo_tlv - Print out LDO TLV information
9818  * @sc: device private structure
9819  * @tlv: LDO TLV information from the adapter NVM
9820  *
9821  * Dump out the information in tlv to the kernel message buffer; intended for
9822  * debugging purposes.
9823  */
9824 static void
9825 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9826 {
9827 	device_t dev = sc->dev;
9828 
9829 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9830 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9831 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9832 	device_printf(dev, "     -phy_high    0x%016llx\n",
9833 	    (unsigned long long)tlv->phy_type_high);
9834 	device_printf(dev, "     -phy_low     0x%016llx\n",
9835 	    (unsigned long long)tlv->phy_type_low);
9836 }
9837 
9838 /**
9839  * ice_set_link_management_mode -- Strict or lenient link management
9840  * @sc: device private structure
9841  *
9842  * Some NVMs give the adapter the option to advertise a superset of link
9843  * configurations.  This checks to see if that option is enabled.
9844  * Further, the NVM could also provide a specific set of configurations
9845  * to try; these are cached in the driver's private structure if they
9846  * are available.
9847  */
9848 void
9849 ice_set_link_management_mode(struct ice_softc *sc)
9850 {
9851 	struct ice_port_info *pi = sc->hw.port_info;
9852 	device_t dev = sc->dev;
9853 	struct ice_link_default_override_tlv tlv = { 0 };
9854 	int status;
9855 
9856 	/* Port must be in strict mode if FW version is below a certain
9857 	 * version. (i.e. Don't set lenient mode features)
9858 	 */
9859 	if (!(ice_fw_supports_link_override(&sc->hw)))
9860 		return;
9861 
9862 	status = ice_get_link_default_override(&tlv, pi);
9863 	if (status) {
9864 		device_printf(dev,
9865 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9866 		    __func__, ice_status_str(status),
9867 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9868 		return;
9869 	}
9870 
9871 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9872 		ice_print_ldo_tlv(sc, &tlv);
9873 
9874 	/* Set lenient link mode */
9875 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9876 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9877 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9878 
9879 	/* FW supports reporting a default configuration */
9880 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9881 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9882 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9883 		/* Knowing we're at a high enough firmware revision to
9884 		 * support this link management configuration, we don't
9885 		 * need to check/support earlier versions.
9886 		 */
9887 		return;
9888 	}
9889 
9890 	/* Default overrides only work if in lenient link mode */
9891 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9892 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9893 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9894 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9895 
9896 	/* Cache the LDO TLV structure in the driver, since it
9897 	 * won't change during the driver's lifetime.
9898 	 */
9899 	sc->ldo_tlv = tlv;
9900 }
9901 
9902 /**
9903  * ice_set_link -- Set up/down link on phy
9904  * @sc: device private structure
9905  * @enabled: link status to set up
9906  *
9907  * This should be called when change of link status is needed.
9908  */
9909 void
9910 ice_set_link(struct ice_softc *sc, bool enabled)
9911 {
9912 	struct ice_hw *hw = &sc->hw;
9913 	device_t dev = sc->dev;
9914 	int status;
9915 
9916 	if (ice_driver_is_detaching(sc))
9917 		return;
9918 
9919 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9920 		return;
9921 
9922 	if (enabled)
9923 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9924 	else {
9925 		status = ice_aq_set_link_restart_an(hw->port_info, false, NULL);
9926 		if (status) {
9927 			if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
9928 				device_printf(dev,
9929 				    "%s: Link control not enabled in current device mode\n",
9930 				    __func__);
9931 			else
9932 				device_printf(dev,
9933 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9934 				    __func__, ice_status_str(status),
9935 				    ice_aq_str(hw->adminq.sq_last_status));
9936 		} else
9937 			sc->link_up = false;
9938 	}
9939 }
9940 
9941 /**
9942  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9943  * @sc: device private structure
9944  *
9945  * This should be called before the tunables for these link settings
9946  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9947  * the cached values that the sysctl handlers will write.
9948  *
9949  * This also needs to be called before ice_init_link_configuration, to ensure
9950  * that there are sane values that can be written if there is media available
9951  * in the port.
9952  */
9953 void
9954 ice_init_saved_phy_cfg(struct ice_softc *sc)
9955 {
9956 	struct ice_port_info *pi = sc->hw.port_info;
9957 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9958 	struct ice_hw *hw = &sc->hw;
9959 	device_t dev = sc->dev;
9960 	int status;
9961 	u64 phy_low, phy_high;
9962 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9963 
9964 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9965 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9966 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9967 	if (status) {
9968 		device_printf(dev,
9969 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9970 		    __func__,
9971 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9972 		    ice_status_str(status),
9973 		    ice_aq_str(hw->adminq.sq_last_status));
9974 		return;
9975 	}
9976 
9977 	phy_low = le64toh(pcaps.phy_type_low);
9978 	phy_high = le64toh(pcaps.phy_type_high);
9979 
9980 	/* Save off initial config parameters */
9981 	pi->phy.curr_user_speed_req =
9982 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9983 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9984 	    pcaps.link_fec_options);
9985 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9986 }
9987 
9988 /**
9989  * ice_module_init - Driver callback to handle module load
9990  *
9991  * Callback for handling module load events. This function should initialize
9992  * any data structures that are used for the life of the device driver.
9993  */
9994 static int
9995 ice_module_init(void)
9996 {
9997 	ice_rdma_init();
9998 	return (0);
9999 }
10000 
10001 /**
10002  * ice_module_exit - Driver callback to handle module exit
10003  *
10004  * Callback for handling module unload events. This function should release
10005  * any resources initialized during ice_module_init.
10006  *
10007  * If this function returns non-zero, the module will not be unloaded. It
10008  * should only return such a value if the module cannot be unloaded at all,
10009  * such as due to outstanding memory references that cannot be revoked.
10010  */
10011 static int
10012 ice_module_exit(void)
10013 {
10014 	ice_rdma_exit();
10015 	return (0);
10016 }
10017 
10018 /**
10019  * ice_module_event_handler - Callback for module events
10020  * @mod: unused module_t parameter
10021  * @what: the event requested
10022  * @arg: unused event argument
10023  *
10024  * Callback used to handle module events from the stack. Used to allow the
10025  * driver to define custom behavior that should happen at module load and
10026  * unload.
10027  */
10028 int
10029 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
10030 {
10031 	switch (what) {
10032 	case MOD_LOAD:
10033 		return ice_module_init();
10034 	case MOD_UNLOAD:
10035 		return ice_module_exit();
10036 	default:
10037 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
10038 		return (EOPNOTSUPP);
10039 	}
10040 }
10041 
10042 /**
10043  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
10044  * @sc: the device private softc
10045  * @ifd: ifdrv ioctl request pointer
10046  */
10047 int
10048 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10049 {
10050 	union ice_nvm_access_data *data;
10051 	struct ice_nvm_access_cmd *cmd;
10052 	size_t ifd_len = ifd->ifd_len, malloc_len;
10053 	struct ice_hw *hw = &sc->hw;
10054 	device_t dev = sc->dev;
10055 	int status;
10056 	u8 *nvm_buffer;
10057 	int err;
10058 
10059 	/*
10060 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
10061 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
10062 	 * without performing a privilege check. Perform one here to ensure
10063 	 * that non-privileged threads cannot access this interface.
10064 	 */
10065 	err = priv_check(curthread, PRIV_DRIVER);
10066 	if (err)
10067 		return (err);
10068 
10069 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
10070 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
10071 			      __func__);
10072 		return (EBUSY);
10073 	}
10074 
10075 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
10076 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
10077 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
10078 		return (EINVAL);
10079 	}
10080 
10081 	if (ifd->ifd_data == NULL) {
10082 		device_printf(dev, "%s: ifd data buffer not present.\n",
10083 			      __func__);
10084 		return (EINVAL);
10085 	}
10086 
10087 	/*
10088 	 * If everything works correctly, ice_handle_nvm_access should not
10089 	 * modify data past the size of the ioctl length. However, it could
10090 	 * lead to memory corruption if it did. Make sure to allocate at least
10091 	 * enough space for the command and data regardless. This
10092 	 * ensures that any access to the data union will not access invalid
10093 	 * memory.
10094 	 */
10095 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
10096 
10097 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
10098 	if (!nvm_buffer)
10099 		return (ENOMEM);
10100 
10101 	/* Copy the NVM access command and data in from user space */
10102 	/* coverity[tainted_data_argument] */
10103 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
10104 	if (err) {
10105 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
10106 			      __func__, ice_err_str(err));
10107 		goto cleanup_free_nvm_buffer;
10108 	}
10109 
10110 	/*
10111 	 * The NVM command structure is immediately followed by data which
10112 	 * varies in size based on the command.
10113 	 */
10114 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
10115 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
10116 
10117 	/* Handle the NVM access request */
10118 	status = ice_handle_nvm_access(hw, cmd, data);
10119 	if (status)
10120 		ice_debug(hw, ICE_DBG_NVM,
10121 			  "NVM access request failed, err %s\n",
10122 			  ice_status_str(status));
10123 
10124 	/* Copy the possibly modified contents of the handled request out */
10125 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
10126 	if (err) {
10127 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
10128 			      __func__, ice_err_str(err));
10129 		goto cleanup_free_nvm_buffer;
10130 	}
10131 
10132 	/* Convert private status to an error code for proper ioctl response */
10133 	switch (status) {
10134 	case 0:
10135 		err = (0);
10136 		break;
10137 	case ICE_ERR_NO_MEMORY:
10138 		err = (ENOMEM);
10139 		break;
10140 	case ICE_ERR_OUT_OF_RANGE:
10141 		err = (ENOTTY);
10142 		break;
10143 	case ICE_ERR_PARAM:
10144 	default:
10145 		err = (EINVAL);
10146 		break;
10147 	}
10148 
10149 cleanup_free_nvm_buffer:
10150 	free(nvm_buffer, M_ICE);
10151 	return err;
10152 }
10153 
10154 /**
10155  * ice_read_sff_eeprom - Read data from SFF eeprom
10156  * @sc: device softc
10157  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
10158  * @offset: offset into the eeprom
10159  * @data: pointer to data buffer to store read data in
10160  * @length: length to read; max length is 16
10161  *
10162  * Read from the SFF eeprom in the module for this PF's port. For more details
10163  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
10164  * and SFF-8024 (both).
10165  */
10166 int
10167 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
10168 {
10169 	struct ice_hw *hw = &sc->hw;
10170 	int ret = 0, retries = 0;
10171 	int status;
10172 
10173 	if (length > 16)
10174 		return (EINVAL);
10175 
10176 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
10177 		return (ENOSYS);
10178 
10179 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
10180 		return (ENXIO);
10181 
10182 	do {
10183 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
10184 					   offset, 0, 0, data, length,
10185 					   false, NULL);
10186 		if (!status) {
10187 			ret = 0;
10188 			break;
10189 		}
10190 		if (status == ICE_ERR_AQ_ERROR &&
10191 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
10192 			ret = EBUSY;
10193 			continue;
10194 		}
10195 		if (status == ICE_ERR_AQ_ERROR &&
10196 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
10197 			/* FW says I2C access isn't supported */
10198 			ret = EACCES;
10199 			break;
10200 		}
10201 		if (status == ICE_ERR_AQ_ERROR &&
10202 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
10203 			device_printf(sc->dev,
10204 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
10205 				  __func__);
10206 			ret = EPERM;
10207 			break;
10208 		} else {
10209 			device_printf(sc->dev,
10210 				  "%s: Error reading I2C data: err %s aq_err %s\n",
10211 				  __func__, ice_status_str(status),
10212 				  ice_aq_str(hw->adminq.sq_last_status));
10213 			ret = EIO;
10214 			break;
10215 		}
10216 	} while (retries++ < ICE_I2C_MAX_RETRIES);
10217 
10218 	if (ret == EBUSY)
10219 		device_printf(sc->dev,
10220 			  "%s: Error reading I2C data after %d retries\n",
10221 			  __func__, ICE_I2C_MAX_RETRIES);
10222 
10223 	return (ret);
10224 }
10225 
10226 /**
10227  * ice_handle_i2c_req - Driver independent I2C request handler
10228  * @sc: device softc
10229  * @req: The I2C parameters to use
10230  *
10231  * Read from the port's I2C eeprom using the parameters from the ioctl.
10232  */
10233 int
10234 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
10235 {
10236 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
10237 }
10238 
10239 /**
10240  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
10241  * @oidp: sysctl oid structure
10242  * @arg1: pointer to private data structure
10243  * @arg2: unused
10244  * @req: sysctl request pointer
10245  *
10246  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
10247  * inserted into the port.
10248  *
10249  *             | SFP A2  | QSFP Lower Page
10250  * ------------|---------|----------------
10251  * Temperature | 96-97	 | 22-23
10252  * Vcc         | 98-99   | 26-27
10253  * TX power    | 102-103 | 34-35..40-41
10254  * RX power    | 104-105 | 50-51..56-57
10255  */
10256 static int
10257 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
10258 {
10259 	struct ice_softc *sc = (struct ice_softc *)arg1;
10260 	device_t dev = sc->dev;
10261 	struct sbuf *sbuf;
10262 	int ret;
10263 	u8 data[16];
10264 
10265 	UNREFERENCED_PARAMETER(arg2);
10266 	UNREFERENCED_PARAMETER(oidp);
10267 
10268 	if (ice_driver_is_detaching(sc))
10269 		return (ESHUTDOWN);
10270 
10271 	if (req->oldptr == NULL) {
10272 		ret = SYSCTL_OUT(req, 0, 128);
10273 		return (ret);
10274 	}
10275 
10276 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
10277 	if (ret)
10278 		return (ret);
10279 
10280 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
10281 	if (data[0] == 0x3) {
10282 		/*
10283 		 * Check for:
10284 		 * - Internally calibrated data
10285 		 * - Diagnostic monitoring is implemented
10286 		 */
10287 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
10288 		if (!(data[0] & 0x60)) {
10289 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
10290 			return (ENODEV);
10291 		}
10292 
10293 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10294 
10295 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
10296 		for (int i = 0; i < 4; i++)
10297 			sbuf_printf(sbuf, "%02X ", data[i]);
10298 
10299 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
10300 		for (int i = 0; i < 4; i++)
10301 			sbuf_printf(sbuf, "%02X ", data[i]);
10302 	} else if (data[0] == 0xD || data[0] == 0x11) {
10303 		/*
10304 		 * QSFP+ modules are always internally calibrated, and must indicate
10305 		 * what types of diagnostic monitoring are implemented
10306 		 */
10307 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10308 
10309 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
10310 		for (int i = 0; i < 2; i++)
10311 			sbuf_printf(sbuf, "%02X ", data[i]);
10312 
10313 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
10314 		for (int i = 0; i < 2; i++)
10315 			sbuf_printf(sbuf, "%02X ", data[i]);
10316 
10317 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
10318 		for (int i = 0; i < 2; i++)
10319 			sbuf_printf(sbuf, "%02X ", data[i]);
10320 
10321 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
10322 		for (int i = 0; i < 2; i++)
10323 			sbuf_printf(sbuf, "%02X ", data[i]);
10324 	} else {
10325 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
10326 		return (ENODEV);
10327 	}
10328 
10329 	sbuf_finish(sbuf);
10330 	sbuf_delete(sbuf);
10331 
10332 	return (0);
10333 }
10334 
10335 /**
10336  * ice_alloc_intr_tracking - Setup interrupt tracking structures
10337  * @sc: device softc structure
10338  *
10339  * Sets up the resource manager for keeping track of interrupt allocations,
10340  * and initializes the tracking maps for the PF's interrupt allocations.
10341  *
10342  * Unlike the scheme for queues, this is done in one step since both the
10343  * manager and the maps both have the same lifetime.
10344  *
10345  * @returns 0 on success, or an error code on failure.
10346  */
10347 int
10348 ice_alloc_intr_tracking(struct ice_softc *sc)
10349 {
10350 	struct ice_hw *hw = &sc->hw;
10351 	device_t dev = sc->dev;
10352 	int err;
10353 
10354 	if (hw->func_caps.common_cap.num_msix_vectors > ICE_MAX_MSIX_VECTORS) {
10355 		device_printf(dev, "%s: Invalid num_msix_vectors value (%u) received from FW.\n",
10356 			__func__,
10357 			hw->func_caps.common_cap.num_msix_vectors);
10358 		return (EINVAL);
10359 	}
10360 
10361 	/* Initialize the interrupt allocation manager */
10362 	err = ice_resmgr_init_contig_only(&sc->dev_imgr,
10363 	    hw->func_caps.common_cap.num_msix_vectors);
10364 	if (err) {
10365 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
10366 			      ice_err_str(err));
10367 		return (err);
10368 	}
10369 
10370 	/* Allocate PF interrupt mapping storage */
10371 	if (!(sc->pf_imap =
10372 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10373 	      M_ICE, M_NOWAIT))) {
10374 		device_printf(dev, "Unable to allocate PF imap memory\n");
10375 		err = ENOMEM;
10376 		goto free_imgr;
10377 	}
10378 	if (!(sc->rdma_imap =
10379 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10380 	      M_ICE, M_NOWAIT))) {
10381 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
10382 		err = ENOMEM;
10383 		free(sc->pf_imap, M_ICE);
10384 		goto free_imgr;
10385 	}
10386 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
10387 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
10388 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
10389 	}
10390 
10391 	return (0);
10392 
10393 free_imgr:
10394 	ice_resmgr_destroy(&sc->dev_imgr);
10395 	return (err);
10396 }
10397 
10398 /**
10399  * ice_free_intr_tracking - Free PF interrupt tracking structures
10400  * @sc: device softc structure
10401  *
10402  * Frees the interrupt resource allocation manager and the PF's owned maps.
10403  *
10404  * VF maps are released when the owning VF's are destroyed, which should always
10405  * happen before this function is called.
10406  */
10407 void
10408 ice_free_intr_tracking(struct ice_softc *sc)
10409 {
10410 	if (sc->pf_imap) {
10411 		ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
10412 				       sc->lan_vectors);
10413 		free(sc->pf_imap, M_ICE);
10414 		sc->pf_imap = NULL;
10415 	}
10416 	if (sc->rdma_imap) {
10417 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
10418 				       sc->lan_vectors);
10419 		free(sc->rdma_imap, M_ICE);
10420 		sc->rdma_imap = NULL;
10421 	}
10422 
10423 	ice_resmgr_destroy(&sc->dev_imgr);
10424 
10425 	ice_resmgr_destroy(&sc->os_imgr);
10426 }
10427 
10428 /**
10429  * ice_apply_supported_speed_filter - Mask off unsupported speeds
10430  * @report_speeds: bit-field for the desired link speeds
10431  * @mod_type: type of module/sgmii connection we have
10432  *
10433  * Given a bitmap of the desired lenient mode link speeds,
10434  * this function will mask off the speeds that are not currently
10435  * supported by the device.
10436  */
10437 static u16
10438 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
10439 {
10440 	u16 speed_mask;
10441 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
10442 
10443 	/*
10444 	 * The SFF specification says 0 is unknown, so we'll
10445 	 * treat it like we're connected through SGMII for now.
10446 	 * This may need revisiting if a new type is supported
10447 	 * in the future.
10448 	 */
10449 	switch (mod_type) {
10450 	case 0:
10451 		module = IS_SGMII;
10452 		break;
10453 	case 3:
10454 		module = IS_SFP;
10455 		break;
10456 	default:
10457 		module = IS_QSFP;
10458 		break;
10459 	}
10460 
10461 	/* We won't offer anything lower than 100M for any part,
10462 	 * but we'll need to mask off other speeds based on the
10463 	 * device and module type.
10464 	 */
10465 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
10466 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
10467 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10468 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
10469 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10470 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
10471 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10472 		if (module == IS_QSFP)
10473 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10474 	}
10475 	if ((report_speeds & ICE_AQ_LINK_SPEED_100GB) ||
10476 	    (report_speeds & ICE_AQ_LINK_SPEED_200GB))
10477 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10478 	return (report_speeds & speed_mask);
10479 }
10480 
10481 /**
10482  * ice_init_health_events - Enable FW health event reporting
10483  * @sc: device softc
10484  *
10485  * Will try to enable firmware health event reporting, but shouldn't
10486  * cause any grief (to the caller) if this fails.
10487  */
10488 void
10489 ice_init_health_events(struct ice_softc *sc)
10490 {
10491 	int status;
10492 	u8 health_mask;
10493 
10494 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10495 	    (!sc->enable_health_events))
10496 		return;
10497 
10498 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10499 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10500 
10501 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10502 	if (status)
10503 		device_printf(sc->dev,
10504 		    "Failed to enable firmware health events, err %s aq_err %s\n",
10505 		    ice_status_str(status),
10506 		    ice_aq_str(sc->hw.adminq.sq_last_status));
10507 	else
10508 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10509 }
10510 
10511 /**
10512  * ice_print_health_status_string - Print message for given FW health event
10513  * @dev: the PCIe device
10514  * @elem: health status element containing status code
10515  *
10516  * A rather large list of possible health status codes and their associated
10517  * messages.
10518  */
10519 static void
10520 ice_print_health_status_string(device_t dev,
10521 			       struct ice_aqc_health_status_elem *elem)
10522 {
10523 	u16 status_code = le16toh(elem->health_status_code);
10524 
10525 	switch (status_code) {
10526 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10527 		device_printf(dev, "The device is in firmware recovery mode.\n");
10528 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10529 		break;
10530 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10531 		device_printf(dev, "The flash chip cannot be accessed.\n");
10532 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10533 		break;
10534 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10535 		device_printf(dev, "NVM authentication failed.\n");
10536 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10537 		break;
10538 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10539 		device_printf(dev, "Option ROM authentication failed.\n");
10540 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10541 		break;
10542 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10543 		device_printf(dev, "DDP package failed.\n");
10544 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10545 		break;
10546 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10547 		device_printf(dev, "NVM image is incompatible.\n");
10548 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10549 		break;
10550 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10551 		device_printf(dev, "Option ROM is incompatible.\n");
10552 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10553 		break;
10554 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10555 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10556 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10557 		break;
10558 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10559 		device_printf(dev, "An unsupported module was detected.\n");
10560 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10561 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10562 		break;
10563 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10564 		device_printf(dev, "Module type is not supported.\n");
10565 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10566 		break;
10567 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10568 		device_printf(dev, "Module is not qualified.\n");
10569 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10570 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10571 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10572 		break;
10573 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10574 		device_printf(dev, "Device cannot communicate with the module.\n");
10575 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10576 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10577 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10578 		break;
10579 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10580 		device_printf(dev, "Unresolved module conflict.\n");
10581 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10582 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10583 		break;
10584 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10585 		device_printf(dev, "Module is not present.\n");
10586 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10587 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10588 		break;
10589 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10590 		device_printf(dev, "Underutilized module.\n");
10591 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10592 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10593 		break;
10594 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10595 		device_printf(dev, "An unsupported module was detected.\n");
10596 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10597 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10598 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10599 		break;
10600 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10601 		device_printf(dev, "Invalid link configuration.\n");
10602 		break;
10603 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10604 		device_printf(dev, "Port hardware access error.\n");
10605 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10606 		break;
10607 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10608 		device_printf(dev, "A port is unreachable.\n");
10609 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10610 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10611 		break;
10612 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10613 		device_printf(dev, "Port speed is limited due to module.\n");
10614 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10615 		break;
10616 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10617 		device_printf(dev, "All configured link modes were attempted but failed to establish link.\n");
10618 		device_printf(dev, "The device will restart the process to establish link.\n");
10619 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10620 		break;
10621 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10622 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10623 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10624 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10625 		break;
10626 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10627 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10628 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10629 		break;
10630 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10631 		device_printf(dev, "Unrecoverable netlist error.\n");
10632 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10633 		break;
10634 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10635 		device_printf(dev, "Port topology conflict.\n");
10636 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10637 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10638 		break;
10639 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10640 		device_printf(dev, "Unrecoverable hardware access error.\n");
10641 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10642 		break;
10643 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10644 		device_printf(dev, "Unrecoverable runtime error.\n");
10645 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10646 		break;
10647 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10648 		device_printf(dev, "Link management engine failed to initialize.\n");
10649 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10650 		break;
10651 	default:
10652 		break;
10653 	}
10654 }
10655 
10656 /**
10657  * ice_handle_health_status_event - helper function to output health status
10658  * @sc: device softc structure
10659  * @event: event received on a control queue
10660  *
10661  * Prints out the appropriate string based on the given Health Status Event
10662  * code.
10663  */
10664 static void
10665 ice_handle_health_status_event(struct ice_softc *sc,
10666 			       struct ice_rq_event_info *event)
10667 {
10668 	struct ice_aqc_health_status_elem *health_info;
10669 	u16 status_count;
10670 	int i;
10671 
10672 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10673 		return;
10674 
10675 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10676 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10677 
10678 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10679 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10680 		return;
10681 	}
10682 
10683 	for (i = 0; i < status_count; i++) {
10684 		ice_print_health_status_string(sc->dev, health_info);
10685 		health_info++;
10686 	}
10687 }
10688 
10689 /**
10690  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10691  * @sc: device softc structure
10692  *
10693  * This function needs to be called after link up; it makes sure the FW has
10694  * certain PFC/DCB settings. In certain configurations this will re-apply a
10695  * default local LLDP MIB configuration; this is intended to workaround a FW
10696  * behavior where these settings seem to be cleared on link up.
10697  */
10698 void
10699 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10700 {
10701 	struct ice_hw *hw = &sc->hw;
10702 	struct ice_port_info *pi;
10703 	device_t dev = sc->dev;
10704 	int status;
10705 
10706 	/* Set Local MIB can disrupt flow control settings for
10707 	 * non-DCB-supported devices.
10708 	 */
10709 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10710 		return;
10711 
10712 	pi = hw->port_info;
10713 
10714 	/* Don't overwrite a custom SW configuration */
10715 	if (!pi->qos_cfg.is_sw_lldp &&
10716 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10717 		ice_set_default_local_mib_settings(sc);
10718 
10719 	status = ice_set_dcb_cfg(pi);
10720 
10721 	if (status)
10722 		device_printf(dev,
10723 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10724 		    ice_status_str(status),
10725 		    ice_aq_str(hw->adminq.sq_last_status));
10726 }
10727 
10728 /**
10729  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10730  * @sbuf: string buffer to print to
10731  * @name: prefix string to use
10732  * @ets: structure to pull values from
10733  *
10734  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10735  * formats the ETS rec and cfg TLVs into text.
10736  */
10737 static void
10738 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10739 {
10740 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10741 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10742 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10743 
10744 	sbuf_printf(sbuf, "%s.prio_table:", name);
10745 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10746 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10747 	sbuf_printf(sbuf, "\n");
10748 
10749 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10750 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10751 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10752 	sbuf_printf(sbuf, "\n");
10753 
10754 	sbuf_printf(sbuf, "%s.tsatable:", name);
10755 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10756 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10757 	sbuf_printf(sbuf, "\n");
10758 }
10759 
10760 /**
10761  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10762  * @oidp: sysctl oid structure
10763  * @arg1: pointer to private data structure
10764  * @arg2: AQ define for either Local or Remote MIB
10765  * @req: sysctl request pointer
10766  *
10767  * Prints out DCB/DCBX configuration, including the contents
10768  * of either the local or remote MIB, depending on the value
10769  * used in arg2.
10770  */
10771 static int
10772 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10773 {
10774 	struct ice_softc *sc = (struct ice_softc *)arg1;
10775 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10776 	struct ice_dcbx_cfg dcb_buf = {};
10777 	struct ice_dcbx_cfg *dcbcfg;
10778 	struct ice_hw *hw = &sc->hw;
10779 	device_t dev = sc->dev;
10780 	struct sbuf *sbuf;
10781 	int status;
10782 	u8 maxtcs, dcbx_status, is_sw_lldp;
10783 
10784 	UNREFERENCED_PARAMETER(oidp);
10785 
10786 	if (ice_driver_is_detaching(sc))
10787 		return (ESHUTDOWN);
10788 
10789 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10790 
10791 	/* The driver doesn't receive a Remote MIB via SW */
10792 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10793 		return (ENOENT);
10794 
10795 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10796 	if (!is_sw_lldp) {
10797 		/* Collect information from the FW in FW LLDP mode */
10798 		dcbcfg = &dcb_buf;
10799 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10800 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10801 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10802 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10803 			device_printf(dev,
10804 			    "Unable to query Remote MIB; port has not received one yet\n");
10805 			return (ENOENT);
10806 		}
10807 		if (status) {
10808 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10809 			    ice_status_str(status),
10810 			    ice_aq_str(hw->adminq.sq_last_status));
10811 			return (EIO);
10812 		}
10813 	}
10814 
10815 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10816 	if (!status)
10817 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10818 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10819 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10820 	else
10821 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10822 		    ice_status_str(status),
10823 		    ice_aq_str(hw->adminq.sq_last_status));
10824 
10825 	maxtcs = hw->func_caps.common_cap.maxtc;
10826 	dcbx_status = ice_get_dcbx_status(hw);
10827 
10828 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10829 
10830 	/* Do the actual printing */
10831 	sbuf_printf(sbuf, "\n");
10832 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10833 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10834 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10835 
10836 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10837 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10838 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10839 	    "DSCP" : "VLAN");
10840 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10841 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10842 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10843 	    "Unknown");
10844 
10845 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10846 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10847 
10848 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10849 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10850 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10851 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10852 
10853 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10854 		sbuf_printf(sbuf, "dscp_map:\n");
10855 		for (int i = 0; i < 8; i++) {
10856 			for (int j = 0; j < 8; j++)
10857 				sbuf_printf(sbuf, " %d",
10858 					    dcbcfg->dscp_map[i * 8 + j]);
10859 			sbuf_printf(sbuf, "\n");
10860 		}
10861 
10862 		sbuf_printf(sbuf, "\nLocal registers:\n");
10863 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10864 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10865 		        >> PRTDCB_GENC_NUMTC_S);
10866 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10867 		    (rd32(hw, PRTDCB_TUP2TC)));
10868 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10869 		    (rd32(hw, PRTDCB_RUP2TC)));
10870 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10871 		    (rd32(hw, GLDCB_TC2PFC)));
10872 	}
10873 
10874 	/* Finish */
10875 	sbuf_finish(sbuf);
10876 	sbuf_delete(sbuf);
10877 
10878 	return (0);
10879 }
10880 
10881 /**
10882  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10883  * @oidp: sysctl oid structure
10884  * @arg1: pointer to private data structure
10885  * @arg2: unused
10886  * @req: sysctl request pointer
10887  *
10888  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10889  * but for simplicity, this only works on the PF's LAN VSI.
10890  */
10891 static int
10892 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10893 {
10894 	struct ice_softc *sc = (struct ice_softc *)arg1;
10895 	struct ice_vsi_ctx ctx = { 0 };
10896 	struct ice_hw *hw = &sc->hw;
10897 	device_t dev = sc->dev;
10898 	struct sbuf *sbuf;
10899 	int status;
10900 
10901 	UNREFERENCED_PARAMETER(oidp);
10902 	UNREFERENCED_PARAMETER(arg2);
10903 
10904 	if (ice_driver_is_detaching(sc))
10905 		return (ESHUTDOWN);
10906 
10907 	/* Get HW absolute index of a VSI */
10908 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10909 
10910 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10911 	if (status) {
10912 		device_printf(dev,
10913 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10914 		    ice_status_str(status),
10915 		    ice_aq_str(hw->adminq.sq_last_status));
10916 		return (EIO);
10917 	}
10918 
10919 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10920 
10921 	/* Do the actual printing */
10922 	sbuf_printf(sbuf, "\n");
10923 
10924 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10925 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10926 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10927 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10928 
10929 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10930 	    LE16_TO_CPU(ctx.info.mapping_flags));
10931 	/* The PF VSI is always contiguous, so there's no if-statement here */
10932 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10933 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10934 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10935 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10936 
10937 	sbuf_printf(sbuf, "TC qbases  :");
10938 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10939 		sbuf_printf(sbuf, " %4d",
10940 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10941 	}
10942 	sbuf_printf(sbuf, "\n");
10943 
10944 	sbuf_printf(sbuf, "TC qcounts :");
10945 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10946 		sbuf_printf(sbuf, " %4d",
10947 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10948 	}
10949 
10950 	/* Finish */
10951 	sbuf_finish(sbuf);
10952 	sbuf_delete(sbuf);
10953 
10954 	return (0);
10955 }
10956 
10957 /**
10958  * ice_get_tx_rx_equalizations -- read serdes tx rx equalization params
10959  * @hw: pointer to the HW struct
10960  * @serdes_num: represents the serdes number
10961  * @ptr: structure to read all serdes parameter for given serdes
10962  *
10963  * returns all serdes equalization parameter supported per serdes number
10964  */
10965 static int
10966 ice_get_tx_rx_equalizations(struct ice_hw *hw, u8 serdes_num,
10967 			    struct ice_serdes_equalization *ptr)
10968 {
10969 	int err = 0;
10970 
10971 	if (!ptr)
10972 		return (EOPNOTSUPP);
10973 
10974 #define ICE_GET_PHY_EQUALIZATION(equ, dir, value) \
10975 	ice_aq_get_phy_equalization(hw, equ, dir, serdes_num, &(ptr->value))
10976 
10977 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_PRE1,
10978 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_pre1);
10979 	if (err)
10980 		return err;
10981 
10982 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_PRE2,
10983 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_pre2);
10984 	if (err)
10985 		return err;
10986 
10987 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_POST1,
10988 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_post1);
10989 	if (err)
10990 		return err;
10991 
10992 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_BFLF,
10993 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_bflf);
10994 	if (err)
10995 		return err;
10996 
10997 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_BFHF,
10998 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_bfhf);
10999 	if (err)
11000 		return err;
11001 
11002 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_DRATE,
11003 			ICE_AQC_OP_CODE_RX_EQU, rx_equalization_drate);
11004 	if (err)
11005 		return err;
11006 
11007 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE1,
11008 			ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre1);
11009 	if (err)
11010 		return err;
11011 
11012 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE2,
11013 			ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre2);
11014 	if (err)
11015 		return err;
11016 
11017 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE3,
11018 			ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre3);
11019 	if (err)
11020 		return err;
11021 
11022 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_ATTEN,
11023 			ICE_AQC_OP_CODE_TX_EQU, tx_equalization_atten);
11024 	if (err)
11025 		return err;
11026 
11027 	err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_POST1,
11028 			ICE_AQC_OP_CODE_TX_EQU, tx_equalization_post1);
11029 	if (err)
11030 		return err;
11031 
11032 	return (0);
11033 }
11034 
11035 /**
11036  * ice_fec_counter_read - reads FEC stats from PHY
11037  * @hw: pointer to the HW struct
11038  * @receiver_id: pcsquad at registerlevel
11039  * @reg_offset: register for the current request
11040  * @output: pointer to the caller-supplied buffer to return requested fec stats
11041  *
11042  * Returns fec stats from phy
11043  */
11044 static int
11045 ice_fec_counter_read(struct ice_hw *hw, u32 receiver_id, u32 reg_offset,
11046 			    u16 *output)
11047 {
11048 	u16 flag = (ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF | ICE_AQ_FLAG_SI);
11049 	struct ice_sbq_msg_input msg = {};
11050 	int err = 0;
11051 
11052 	memset(&msg, 0, sizeof(msg));
11053 	msg.msg_addr_low = ICE_LO_WORD(reg_offset);
11054 	msg.msg_addr_high = ICE_LO_DWORD(receiver_id);
11055 	msg.opcode = ice_sbq_msg_rd;
11056 	msg.dest_dev = rmn_0;
11057 
11058 	err = ice_sbq_rw_reg(hw, &msg, flag);
11059 	if (err) {
11060 		return err;
11061 	}
11062 	*output = ICE_LO_WORD(msg.data);
11063 	return (0);
11064 }
11065 
11066 /**
11067  * ice_get_port_fec_stats - returns fec correctable, uncorrectable stats per pcsquad, pcsport
11068  * @hw: pointer to the HW struct
11069  * @pcs_quad: pcsquad for input port
11070  * @pcs_port: pcsport for input port
11071  * @fec_stats: buffer to hold fec statistics for given port
11072  *
11073  * Returns fec stats
11074  */
11075 static int
11076 ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
11077 		       struct ice_fec_stats_to_sysctl *fec_stats)
11078 {
11079 	u32 uncorr_low_reg = 0, uncorr_high_reg = 0;
11080 	u16 uncorr_low_val = 0, uncorr_high_val = 0;
11081 	u32 corr_low_reg = 0, corr_high_reg = 0;
11082 	u16 corr_low_val = 0, corr_high_val = 0;
11083 	u32 receiver_id = 0;
11084 	int err;
11085 
11086 	switch (pcs_port) {
11087 	case 0:
11088 		corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT0;
11089 		corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT0;
11090 		uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT0;
11091 		uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT0;
11092 		break;
11093 	case 1:
11094 		corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT1;
11095 		corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT1;
11096 		uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT1;
11097 		uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT1;
11098 		break;
11099 	case 2:
11100 		corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT2;
11101 		corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT2;
11102 		uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT2;
11103 		uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT2;
11104 		break;
11105 	case 3:
11106 		corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT3;
11107 		corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT3;
11108 		uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT3;
11109 		uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT3;
11110 		break;
11111 	default:
11112 		return (EINVAL);
11113 	}
11114 	if (pcs_quad == 0)
11115 		receiver_id = ICE_RS_FEC_RECEIVER_ID_PCS0; /* MTIP PCS Quad 0 -FEC */
11116 	else if (pcs_quad == 1)
11117 		receiver_id = ICE_RS_FEC_RECEIVER_ID_PCS1; /* MTIP PCS Quad 1 -FEC */
11118 	else
11119 		return (EINVAL);
11120 
11121 	err = ice_fec_counter_read(hw, receiver_id, corr_low_reg,
11122 			&corr_low_val);
11123 	if (err)
11124 		return err;
11125 
11126 	err = ice_fec_counter_read(hw, receiver_id, corr_high_reg,
11127 			&corr_high_val);
11128 	if (err)
11129 		return err;
11130 
11131 	err = ice_fec_counter_read(hw, receiver_id, uncorr_low_reg,
11132 			&uncorr_low_val);
11133 	if (err)
11134 		return err;
11135 
11136 	err = ice_fec_counter_read(hw, receiver_id, uncorr_high_reg,
11137 			&uncorr_high_val);
11138 	if (err)
11139 		return err;
11140 
11141 	fec_stats->fec_corr_cnt_low =  corr_low_val;
11142 	fec_stats->fec_corr_cnt_high =  corr_high_val;
11143 	fec_stats->fec_uncorr_cnt_low =  uncorr_low_val;
11144 	fec_stats->fec_uncorr_cnt_high =  uncorr_high_val;
11145 
11146 	return (0);
11147 }
11148 
11149 /**
11150  * ice_is_serdes_muxed - returns whether serdes is muxed in hardware
11151  * @hw: pointer to the HW struct
11152  *
11153  * Returns True : when serdes is muxed
11154  *         False: when serdes is not muxed
11155  */
11156 static bool
11157 ice_is_serdes_muxed(struct ice_hw *hw)
11158 {
11159 	return (rd32(hw, 0xB81E0) & 0x4);
11160 }
11161 
11162 /**
11163  * ice_get_maxspeed - Get the max speed for given lport
11164  * @hw: pointer to the HW struct
11165  * @lport: logical port for which max speed is requested
11166  * @max_speed: return max speed for input lport
11167  */
11168 static int
11169 ice_get_maxspeed(struct ice_hw *hw, u8 lport, u8 *max_speed)
11170 {
11171 	struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX] = {};
11172 	u8 option_count = ICE_AQC_PORT_OPT_MAX;
11173 	bool active_valid, pending_valid;
11174 	u8 active_idx, pending_idx;
11175 	int status;
11176 
11177 	status = ice_aq_get_port_options(hw, options, &option_count,
11178 			lport, true, &active_idx, &active_valid,
11179 			&pending_idx, &pending_valid);
11180 
11181 	if (status || active_idx >= ICE_AQC_PORT_OPT_MAX) {
11182 		ice_debug(hw, ICE_DBG_PHY, "Port split read err: %d\n", status);
11183 		return (EIO);
11184 	}
11185 
11186 	if (active_valid) {
11187 		ice_debug(hw, ICE_DBG_PHY, "Active idx: %d\n", active_idx);
11188 	} else {
11189 		ice_debug(hw, ICE_DBG_PHY, "No valid Active option\n");
11190 		return (EINVAL);
11191 	}
11192 	*max_speed = options[active_idx].max_lane_speed;
11193 
11194 	return (0);
11195 }
11196 
11197 /**
11198  * ice_update_port_topology - update port topology
11199  * @lport: logical port for which physical info requested
11200  * @port_topology: buffer to hold port topology
11201  * @is_muxed: serdes is muxed in hardware
11202  */
11203 static int
11204 ice_update_port_topology(u8 lport, struct ice_port_topology *port_topology,
11205 		bool is_muxed)
11206 {
11207 	switch (lport) {
11208 	case 0:
11209 		port_topology->pcs_quad_select = 0;
11210 		port_topology->pcs_port = 0;
11211 		port_topology->primary_serdes_lane = 0;
11212 		break;
11213 	case 1:
11214 		port_topology->pcs_quad_select = 1;
11215 		port_topology->pcs_port = 0;
11216 		if (is_muxed == true)
11217 			port_topology->primary_serdes_lane = 2;
11218 		else
11219 			port_topology->primary_serdes_lane = 4;
11220 		break;
11221 	case 2:
11222 		port_topology->pcs_quad_select = 0;
11223 		port_topology->pcs_port = 1;
11224 		port_topology->primary_serdes_lane = 1;
11225 		break;
11226 	case 3:
11227 		port_topology->pcs_quad_select = 1;
11228 		port_topology->pcs_port = 1;
11229 		if (is_muxed == true)
11230 			port_topology->primary_serdes_lane = 3;
11231 		else
11232 			port_topology->primary_serdes_lane = 5;
11233 		break;
11234 	case 4:
11235 		port_topology->pcs_quad_select = 0;
11236 		port_topology->pcs_port = 2;
11237 		port_topology->primary_serdes_lane = 2;
11238 		break;
11239 	case 5:
11240 		port_topology->pcs_quad_select = 1;
11241 		port_topology->pcs_port = 2;
11242 		port_topology->primary_serdes_lane = 6;
11243 		break;
11244 	case 6:
11245 		port_topology->pcs_quad_select = 0;
11246 		port_topology->pcs_port = 3;
11247 		port_topology->primary_serdes_lane = 3;
11248 		break;
11249 	case 7:
11250 		port_topology->pcs_quad_select = 1;
11251 		port_topology->pcs_port = 3;
11252 		port_topology->primary_serdes_lane = 7;
11253 		break;
11254 	default:
11255 		return (EINVAL);
11256 	}
11257 	return 0;
11258 }
11259 
11260 /**
11261  * ice_get_port_topology - returns physical topology
11262  * @hw: pointer to the HW struct
11263  * @lport: logical port for which physical info requested
11264  * @port_topology: buffer to hold port topology
11265  *
11266  * Returns the physical component associated with the Port like pcsquad, pcsport, serdesnumber
11267  */
11268 static int
11269 ice_get_port_topology(struct ice_hw *hw, u8 lport,
11270 		      struct ice_port_topology *port_topology)
11271 {
11272 	struct ice_aqc_get_link_topo cmd;
11273 	bool is_muxed = false;
11274 	u8 cage_type = 0;
11275 	u16 node_handle;
11276 	u8 ctx = 0;
11277 	int err;
11278 
11279 	if (!hw || !port_topology)
11280 		return (EINVAL);
11281 
11282 	if (hw->device_id >= ICE_DEV_ID_E810_XXV_BACKPLANE) {
11283 		port_topology->serdes_lane_count = 1;
11284 		if (lport == 0) {
11285 			port_topology->pcs_quad_select = 0;
11286 			port_topology->pcs_port = 0;
11287 			port_topology->primary_serdes_lane = 0;
11288 		} else if (lport == 1) {
11289 			port_topology->pcs_quad_select = 1;
11290 			port_topology->pcs_port = 0;
11291 			port_topology->primary_serdes_lane = 1;
11292 		} else {
11293 			return (EINVAL);
11294 		}
11295 		return (0);
11296 	}
11297 
11298 	memset(&cmd, 0, sizeof(cmd));
11299 	ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE << ICE_AQC_LINK_TOPO_NODE_TYPE_S;
11300 	ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S;
11301 	cmd.addr.topo_params.node_type_ctx = ctx;
11302 	cmd.addr.topo_params.index = 0;
11303 	cmd.addr.topo_params.lport_num = 0;
11304 	cmd.addr.topo_params.lport_num_valid = 0;
11305 
11306 	err = ice_aq_get_netlist_node(hw, &cmd, &cage_type, &node_handle);
11307 	if (err)
11308 		return (EINVAL);
11309 
11310 	is_muxed = ice_is_serdes_muxed(hw);
11311 
11312 	err = ice_update_port_topology(lport, port_topology, is_muxed);
11313 	if (err)
11314 		return err;
11315 
11316 	if (cage_type == 0x11 ||  /* SFP */
11317 	   cage_type == 0x12) {   /* SFP28 */
11318 		port_topology->serdes_lane_count = 1;
11319 	} else if (cage_type == 0x13 ||  /* QSFP */
11320 		  cage_type == 0x14) {   /* QSFP28 */
11321 		u8 max_speed = 0;
11322 
11323 		err = ice_get_maxspeed(hw, port_topology->primary_serdes_lane,
11324 		    &max_speed);
11325 		if (err)
11326 			return err;
11327 
11328 		if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_M)
11329 			device_printf(ice_hw_to_dev(hw),
11330 			    "%s: WARNING: reported max_lane_speed is N/A\n",
11331 			    __func__);
11332 
11333 		if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_100G)
11334 			port_topology->serdes_lane_count = 4;
11335 		else if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_50G)
11336 			port_topology->serdes_lane_count = 2;
11337 		else
11338 			port_topology->serdes_lane_count = 1;
11339 	} else
11340 		return (EINVAL);
11341 
11342 	ice_debug(hw, ICE_DBG_PHY, "%s: Port Topology (lport %d):\n",
11343 	    __func__, lport);
11344 	ice_debug(hw, ICE_DBG_PHY, "serdes lane count %d\n",
11345 	    port_topology->serdes_lane_count);
11346 	ice_debug(hw, ICE_DBG_PHY, "pcs quad select %d\n",
11347 	    port_topology->pcs_quad_select);
11348 	ice_debug(hw, ICE_DBG_PHY, "pcs port %d\n",
11349 	    port_topology->pcs_port);
11350 	ice_debug(hw, ICE_DBG_PHY, "primary serdes lane %d\n",
11351 	    port_topology->primary_serdes_lane);
11352 
11353 	return (0);
11354 }
11355 
11356 /**
11357  * ice_sysctl_dump_phy_stats - print PHY stats
11358  * @oidp: sysctl oid structure
11359  * @arg1: pointer to private data structure
11360  * @arg2: unused
11361  * @req: sysctl request pointer
11362  */
11363 static int
11364 ice_sysctl_dump_phy_stats(SYSCTL_HANDLER_ARGS)
11365 {
11366 	struct ice_regdump_to_sysctl ice_prv_regs_buf = {};
11367 	struct ice_softc *sc = (struct ice_softc *)arg1;
11368 	struct ice_port_topology port_topology;
11369 	struct ice_hw *hw = &sc->hw;
11370 	struct ice_port_info *pi;
11371 	device_t dev = sc->dev;
11372 	u8 serdes_num = 0;
11373 	unsigned int i;
11374 	int err = 0;
11375 	struct sbuf *sbuf;
11376 
11377 	pi = hw->port_info;
11378 
11379 	if (!pi) {
11380 		device_printf(dev, "Port info structure is null\n");
11381 		return (EINVAL);
11382 	}
11383 
11384 	UNREFERENCED_PARAMETER(oidp);
11385 	UNREFERENCED_PARAMETER(arg2);
11386 	UNREFERENCED_PARAMETER(req);
11387 
11388 	if (ice_driver_is_detaching(sc))
11389 		return (ESHUTDOWN);
11390 
11391 	if (ice_get_port_topology(hw, pi->lport, &port_topology) != 0) {
11392 		device_printf(dev,
11393 			      "Extended register dump failed for Lport %d\n",
11394 			      pi->lport);
11395 		return (EIO);
11396 	}
11397 
11398 	if (port_topology.serdes_lane_count > ICE_MAX_SERDES_LANE_COUNT) {
11399 		device_printf(dev,
11400 			"Extended register dump failed: Lport %d Serdes count %d\n",
11401 			pi->lport,
11402 			port_topology.serdes_lane_count);
11403 		return (EINVAL);
11404 	}
11405 
11406 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
11407 	/* Get serdes equalization parameter for available serdes */
11408 	for (i = 0; i < port_topology.serdes_lane_count; i++) {
11409 		serdes_num = port_topology.primary_serdes_lane + i;
11410 		err = ice_get_tx_rx_equalizations(hw, serdes_num,
11411 				&(ice_prv_regs_buf.equalization[i]));
11412 		if (err) {
11413 			device_printf(dev,
11414 			    "Serdes equalization get failed Lport %d Serdes %d Err %d\n",
11415 			    pi->lport,serdes_num, err);
11416 			sbuf_finish(sbuf);
11417 			sbuf_delete(sbuf);
11418 			return (EIO);
11419 		}
11420 		sbuf_printf(sbuf, "\nSerdes lane: %d\n", i);
11421 		sbuf_printf(sbuf, "RX PRE1 = %d\n",
11422 			ice_prv_regs_buf.equalization[i].rx_equalization_pre1);
11423 		sbuf_printf(sbuf, "RX PRE2 = %d\n",
11424 			(s16)ice_prv_regs_buf.equalization[i].rx_equalization_pre2);
11425 		sbuf_printf(sbuf, "RX POST1 = %d\n",
11426 			ice_prv_regs_buf.equalization[i].rx_equalization_post1);
11427 		sbuf_printf(sbuf, "RX BFLF = %d\n",
11428 			ice_prv_regs_buf.equalization[i].rx_equalization_bflf);
11429 		sbuf_printf(sbuf, "RX BFHF = %d\n",
11430 			ice_prv_regs_buf.equalization[i].rx_equalization_bfhf);
11431 		sbuf_printf(sbuf, "RX DRATE = %d\n",
11432 			(s16)ice_prv_regs_buf.equalization[i].rx_equalization_drate);
11433 		sbuf_printf(sbuf, "TX PRE1 = %d\n",
11434 			ice_prv_regs_buf.equalization[i].tx_equalization_pre1);
11435 		sbuf_printf(sbuf, "TX PRE2 = %d\n",
11436 			ice_prv_regs_buf.equalization[i].tx_equalization_pre2);
11437 		sbuf_printf(sbuf, "TX PRE3 = %d\n",
11438 			ice_prv_regs_buf.equalization[i].tx_equalization_pre3);
11439 		sbuf_printf(sbuf, "TX POST1 = %d\n",
11440 			ice_prv_regs_buf.equalization[i].tx_equalization_post1);
11441 		sbuf_printf(sbuf, "TX ATTEN = %d\n",
11442 			ice_prv_regs_buf.equalization[i].tx_equalization_atten);
11443 	}
11444 
11445 	/* Get fec  correctable , uncorrectable counter */
11446 	err = ice_get_port_fec_stats(hw, port_topology.pcs_quad_select,
11447 			             port_topology.pcs_port,
11448 				     &(ice_prv_regs_buf.stats));
11449 	if (err) {
11450 		device_printf(dev, "failed to get FEC stats Lport %d Err %d\n",
11451 				pi->lport, err);
11452 		sbuf_finish(sbuf);
11453 		sbuf_delete(sbuf);
11454 		return (EIO);
11455 	}
11456 
11457 	sbuf_printf(sbuf, "\nRS FEC Corrected codeword count = %d\n",
11458 			((u32)ice_prv_regs_buf.stats.fec_corr_cnt_high << 16) |
11459 			    ice_prv_regs_buf.stats.fec_corr_cnt_low);
11460 	sbuf_printf(sbuf, "RS FEC Uncorrected codeword count = %d\n",
11461 			((u32)ice_prv_regs_buf.stats.fec_uncorr_cnt_high << 16) |
11462 			    ice_prv_regs_buf.stats.fec_uncorr_cnt_low);
11463 
11464 	/* Finish */
11465 	sbuf_finish(sbuf);
11466 	sbuf_delete(sbuf);
11467 
11468 	return (0);
11469 }
11470 
11471 /**
11472  * ice_ets_str_to_tbl - Parse string into ETS table
11473  * @str: input string to parse
11474  * @table: output eight values used for ETS values
11475  * @limit: max valid value to accept for ETS values
11476  *
11477  * Parses a string and converts the eight values within
11478  * into a table that can be used in setting ETS settings
11479  * in a MIB.
11480  *
11481  * @return 0 on success, EINVAL if a parsed value is
11482  * not between 0 and limit.
11483  */
11484 static int
11485 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
11486 {
11487 	const char *str_start = str;
11488 	char *str_end;
11489 	long token;
11490 
11491 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
11492 		token = strtol(str_start, &str_end, 0);
11493 		if (token < 0 || token > limit)
11494 			return (EINVAL);
11495 
11496 		table[i] = (u8)token;
11497 		str_start = (str_end + 1);
11498 	}
11499 
11500 	return (0);
11501 }
11502 
11503 /**
11504  * ice_check_ets_bw - Check if ETS bw vals are valid
11505  * @table: eight values used for ETS bandwidth
11506  *
11507  * @return true if the sum of all 8 values in table
11508  * equals 100.
11509  */
11510 static bool
11511 ice_check_ets_bw(u8 *table)
11512 {
11513 	int sum = 0;
11514 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
11515 		sum += (int)table[i];
11516 
11517 	return (sum == 100);
11518 }
11519 
11520 /**
11521  * ice_cfg_pba_num - Determine if PBA Number is retrievable
11522  * @sc: the device private softc structure
11523  *
11524  * Sets the feature flag for the existence of a PBA number
11525  * based on the success of the read command.  This does not
11526  * cache the result.
11527  */
11528 void
11529 ice_cfg_pba_num(struct ice_softc *sc)
11530 {
11531 	u8 pba_string[32] = "";
11532 
11533 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
11534 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
11535 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
11536 }
11537 
11538 /**
11539  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
11540  * @oidp: sysctl oid structure
11541  * @arg1: pointer to private data structure
11542  * @arg2: unused
11543  * @req: sysctl request pointer
11544  */
11545 static int
11546 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
11547 {
11548 	struct ice_softc *sc = (struct ice_softc *)arg1;
11549 	struct ice_aqc_port_ets_elem port_ets = { 0 };
11550 	struct ice_hw *hw = &sc->hw;
11551 	struct ice_port_info *pi;
11552 	device_t dev = sc->dev;
11553 	struct sbuf *sbuf;
11554 	int status;
11555 	int i = 0;
11556 
11557 	UNREFERENCED_PARAMETER(oidp);
11558 	UNREFERENCED_PARAMETER(arg2);
11559 
11560 	if (ice_driver_is_detaching(sc))
11561 		return (ESHUTDOWN);
11562 
11563 	pi = hw->port_info;
11564 
11565 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
11566 	if (status) {
11567 		device_printf(dev,
11568 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
11569 		    ice_status_str(status),
11570 		    ice_aq_str(hw->adminq.sq_last_status));
11571 		return (EIO);
11572 	}
11573 
11574 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
11575 
11576 	/* Do the actual printing */
11577 	sbuf_printf(sbuf, "\n");
11578 
11579 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
11580 
11581 	sbuf_printf(sbuf, "TC BW %%:");
11582 	ice_for_each_traffic_class(i) {
11583 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
11584 	}
11585 	sbuf_printf(sbuf, "\n");
11586 
11587 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
11588 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
11589 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
11590 
11591 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
11592 	ice_for_each_traffic_class(i) {
11593 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
11594 	}
11595 
11596 	/* Finish */
11597 	sbuf_finish(sbuf);
11598 	sbuf_delete(sbuf);
11599 
11600 	return (0);
11601 }
11602 
11603 /**
11604  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
11605  * @oidp: sysctl oid structure
11606  * @arg1: pointer to private data structure
11607  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
11608  * @req: sysctl request pointer
11609  *
11610  * Gets or sets the current DSCP to UP table cached by the driver. Since there
11611  * are 64 possible DSCP values to configure, this sysctl only configures
11612  * chunks of 8 in that space at a time.
11613  *
11614  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
11615  * mode.
11616  */
11617 static int
11618 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
11619 {
11620 	struct ice_softc *sc = (struct ice_softc *)arg1;
11621 	struct ice_dcbx_cfg *local_dcbx_cfg;
11622 	struct ice_port_info *pi;
11623 	struct ice_hw *hw = &sc->hw;
11624 	device_t dev = sc->dev;
11625 	int status;
11626 	struct sbuf *sbuf;
11627 	int ret;
11628 
11629 	/* Store input rates from user */
11630 	char dscp_user_buf[128] = "";
11631 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
11632 
11633 	if (ice_driver_is_detaching(sc))
11634 		return (ESHUTDOWN);
11635 
11636 	if (req->oldptr == NULL && req->newptr == NULL) {
11637 		ret = SYSCTL_OUT(req, 0, 128);
11638 		return (ret);
11639 	}
11640 
11641 	pi = hw->port_info;
11642 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
11643 
11644 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
11645 
11646 	/* Format DSCP-to-UP data for output */
11647 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
11648 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
11649 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
11650 			sbuf_printf(sbuf, ",");
11651 	}
11652 
11653 	sbuf_finish(sbuf);
11654 	sbuf_delete(sbuf);
11655 
11656 	/* Read in the new DSCP mapping values */
11657 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
11658 	if ((ret) || (req->newptr == NULL))
11659 		return (ret);
11660 
11661 	/* Don't allow setting changes in FW DCB mode */
11662 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
11663 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
11664 		    __func__);
11665 		return (EINVAL);
11666 	}
11667 
11668 	/* Convert 8 values in a string to a table; this is similar to what
11669 	 * needs to be done for ETS settings, so this function can be re-used
11670 	 * for that purpose.
11671 	 */
11672 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg,
11673 	    ICE_MAX_TRAFFIC_CLASS - 1);
11674 	if (ret) {
11675 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
11676 		    __func__, dscp_user_buf);
11677 		return (ret);
11678 	}
11679 
11680 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
11681 	    sizeof(new_dscp_table_seg));
11682 
11683 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
11684 
11685 	status = ice_set_dcb_cfg(pi);
11686 	if (status) {
11687 		device_printf(dev,
11688 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
11689 		    __func__, ice_status_str(status),
11690 		    ice_aq_str(hw->adminq.sq_last_status));
11691 		return (EIO);
11692 	}
11693 
11694 	ice_do_dcb_reconfig(sc, false);
11695 
11696 	return (0);
11697 }
11698 
11699 /**
11700  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
11701  * @sc: the device private softc
11702  * @ifd: ifdrv ioctl request pointer
11703  */
11704 int
11705 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
11706 {
11707 	size_t ifd_len = ifd->ifd_len;
11708 	struct ice_hw *hw = &sc->hw;
11709 	device_t dev = sc->dev;
11710 	struct ice_debug_dump_cmd *ddc;
11711 	int status;
11712 	int err = 0;
11713 
11714 	/* Returned arguments from the Admin Queue */
11715 	u16 ret_buf_size = 0;
11716 	u16 ret_next_cluster = 0;
11717 	u16 ret_next_table = 0;
11718 	u32 ret_next_index = 0;
11719 
11720 	/*
11721 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
11722 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
11723 	 * without performing a privilege check. Perform one here to ensure
11724 	 * that non-privileged threads cannot access this interface.
11725 	 */
11726 	err = priv_check(curthread, PRIV_DRIVER);
11727 	if (err)
11728 		return (err);
11729 
11730 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
11731 		device_printf(dev,
11732 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
11733 		    __func__);
11734 		return (EBUSY);
11735 	}
11736 
11737 	if (ifd_len < sizeof(*ddc)) {
11738 		device_printf(dev,
11739 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
11740 		    __func__, ifd_len, sizeof(*ddc));
11741 		return (EINVAL);
11742 	}
11743 
11744 	if (ifd->ifd_data == NULL) {
11745 		device_printf(dev, "%s: ifd data buffer not present.\n",
11746 		     __func__);
11747 		return (EINVAL);
11748 	}
11749 
11750 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
11751 	if (!ddc)
11752 		return (ENOMEM);
11753 
11754 	/* Copy the NVM access command and data in from user space */
11755 	/* coverity[tainted_data_argument] */
11756 	err = copyin(ifd->ifd_data, ddc, ifd_len);
11757 	if (err) {
11758 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
11759 			      __func__, ice_err_str(err));
11760 		goto out;
11761 	}
11762 
11763 	/* The data_size arg must be at least 1 for the AQ cmd to work */
11764 	if (ddc->data_size == 0) {
11765 		device_printf(dev,
11766 		    "%s: data_size must be greater than 0\n", __func__);
11767 		err = EINVAL;
11768 		goto out;
11769 	}
11770 	/* ...and it can't be too long */
11771 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
11772 		device_printf(dev,
11773 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
11774 		    ddc->data_size, ifd_len - sizeof(*ddc));
11775 		err = EINVAL;
11776 		goto out;
11777 	}
11778 
11779 	/* Make sure any possible data buffer space is zeroed */
11780 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
11781 
11782 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
11783 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size,
11784 	    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
11785 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
11786 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
11787 	if (status) {
11788 		device_printf(dev,
11789 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
11790 		    __func__,
11791 		    ice_status_str(status),
11792 		    ice_aq_str(hw->adminq.sq_last_status));
11793 		goto aq_error;
11794 	}
11795 
11796 	ddc->table_id = ret_next_table;
11797 	ddc->offset = ret_next_index;
11798 	ddc->data_size = ret_buf_size;
11799 	ddc->cluster_id = ret_next_cluster;
11800 
11801 	/* Copy the possibly modified contents of the handled request out */
11802 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
11803 	if (err) {
11804 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
11805 			      __func__, ice_err_str(err));
11806 		goto out;
11807 	}
11808 
11809 aq_error:
11810 	/* Convert private status to an error code for proper ioctl response */
11811 	switch (status) {
11812 	case 0:
11813 		err = (0);
11814 		break;
11815 	case ICE_ERR_NO_MEMORY:
11816 		err = (ENOMEM);
11817 		break;
11818 	case ICE_ERR_OUT_OF_RANGE:
11819 		err = (ENOTTY);
11820 		break;
11821 	case ICE_ERR_AQ_ERROR:
11822 		err = (EIO);
11823 		break;
11824 	case ICE_ERR_PARAM:
11825 	default:
11826 		err = (EINVAL);
11827 		break;
11828 	}
11829 
11830 out:
11831 	free(ddc, M_ICE);
11832 	return (err);
11833 }
11834 
11835 /**
11836  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
11837  * @oidp: sysctl oid structure
11838  * @arg1: pointer to private data structure
11839  * @arg2: unused
11840  * @req: sysctl request pointer
11841  *
11842  * Allows user to let "No FEC" mode to be used in "Auto"
11843  * FEC mode during FEC negotiation. This is only supported
11844  * on newer firmware versions.
11845  */
11846 static int
11847 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
11848 {
11849 	struct ice_softc *sc = (struct ice_softc *)arg1;
11850 	struct ice_hw *hw = &sc->hw;
11851 	device_t dev = sc->dev;
11852 	u8 user_flag;
11853 	int ret;
11854 
11855 	UNREFERENCED_PARAMETER(arg2);
11856 
11857 	ret = priv_check(curthread, PRIV_DRIVER);
11858 	if (ret)
11859 		return (ret);
11860 
11861 	if (ice_driver_is_detaching(sc))
11862 		return (ESHUTDOWN);
11863 
11864 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
11865 
11866 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
11867 	if ((ret) || (req->newptr == NULL))
11868 		return (ret);
11869 
11870 	if (!ice_fw_supports_fec_dis_auto(hw)) {
11871 		log(LOG_INFO,
11872 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
11873 		    device_get_nameunit(dev));
11874 		return (ENODEV);
11875 	}
11876 
11877 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
11878 		return (0);
11879 
11880 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
11881 
11882 	if (sc->allow_no_fec_mod_in_auto)
11883 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
11884 		    device_get_nameunit(dev));
11885 	else
11886 		log(LOG_INFO,
11887 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
11888 		    device_get_nameunit(dev));
11889 
11890 	return (0);
11891 }
11892 
11893 /**
11894  * ice_sysctl_temperature - Retrieve NIC temp via AQ command
11895  * @oidp: sysctl oid structure
11896  * @arg1: pointer to private data structure
11897  * @arg2: unused
11898  * @req: sysctl request pointer
11899  *
11900  * If ICE_DBG_DIAG is set in the debug.debug_mask sysctl, then this will print
11901  * temperature threshold information in the kernel message log, too.
11902  */
11903 static int
11904 ice_sysctl_temperature(SYSCTL_HANDLER_ARGS)
11905 {
11906 	struct ice_aqc_get_sensor_reading_resp resp;
11907 	struct ice_softc *sc = (struct ice_softc *)arg1;
11908 	struct ice_hw *hw = &sc->hw;
11909 	device_t dev = sc->dev;
11910 	int status;
11911 
11912 	UNREFERENCED_PARAMETER(oidp);
11913 	UNREFERENCED_PARAMETER(arg2);
11914 
11915 	if (ice_driver_is_detaching(sc))
11916 		return (ESHUTDOWN);
11917 
11918 	status = ice_aq_get_sensor_reading(hw, ICE_AQC_INT_TEMP_SENSOR,
11919 	    ICE_AQC_INT_TEMP_FORMAT, &resp, NULL);
11920 	if (status) {
11921 		device_printf(dev,
11922 		    "Get Sensor Reading AQ call failed, err %s aq_err %s\n",
11923 		    ice_status_str(status),
11924 		    ice_aq_str(hw->adminq.sq_last_status));
11925 		return (EIO);
11926 	}
11927 
11928 	ice_debug(hw, ICE_DBG_DIAG, "%s: Warning Temp Threshold: %d\n", __func__,
11929 	    resp.data.s0f0.temp_warning_threshold);
11930 	ice_debug(hw, ICE_DBG_DIAG, "%s: Critical Temp Threshold: %d\n", __func__,
11931 	    resp.data.s0f0.temp_critical_threshold);
11932 	ice_debug(hw, ICE_DBG_DIAG, "%s: Fatal Temp Threshold: %d\n", __func__,
11933 	    resp.data.s0f0.temp_fatal_threshold);
11934 
11935 	return sysctl_handle_8(oidp, &resp.data.s0f0.temp, 0, req);
11936 }
11937 
11938 /**
11939  * ice_sysctl_create_mirror_interface - Create a new ifnet that monitors
11940  *     traffic from the main PF VSI
11941  */
11942 static int
11943 ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS)
11944 {
11945 	struct ice_softc *sc = (struct ice_softc *)arg1;
11946 	device_t dev = sc->dev;
11947 	int ret;
11948 
11949 	UNREFERENCED_PARAMETER(arg2);
11950 
11951 	ret = priv_check(curthread, PRIV_DRIVER);
11952 	if (ret)
11953 		return (ret);
11954 
11955 	if (ice_driver_is_detaching(sc))
11956 		return (ESHUTDOWN);
11957 
11958 	/* If the user hasn't written "1" to this sysctl yet: */
11959 	if (!ice_test_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC)) {
11960 		/* Avoid output on the first set of reads to this sysctl in
11961 		 * order to prevent a null byte from being written to the
11962 		 * end result when called via sysctl(8).
11963 		 */
11964 		if (req->oldptr == NULL && req->newptr == NULL) {
11965 			ret = SYSCTL_OUT(req, 0, 0);
11966 			return (ret);
11967 		}
11968 
11969 		char input_buf[2] = "";
11970 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
11971 		if ((ret) || (req->newptr == NULL))
11972 			return (ret);
11973 
11974 		/* If we get '1', then indicate we'll create the interface in
11975 		 * the next sysctl read call.
11976 		 */
11977 		if (input_buf[0] == '1') {
11978 			if (sc->mirr_if) {
11979 				device_printf(dev,
11980 				    "Mirror interface %s already exists!\n",
11981 				    if_name(sc->mirr_if->ifp));
11982 				return (EEXIST);
11983 			}
11984 			ice_set_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
11985 			return (0);
11986 		}
11987 
11988 		return (EINVAL);
11989 	}
11990 
11991 	/* --- "Do Create Mirror Interface" is set --- */
11992 
11993 	/* Caller just wants the upper bound for size */
11994 	if (req->oldptr == NULL && req->newptr == NULL) {
11995 		ret = SYSCTL_OUT(req, 0, 128);
11996 		return (ret);
11997 	}
11998 
11999 	device_printf(dev, "Creating new mirroring interface...\n");
12000 
12001 	ret = ice_create_mirror_interface(sc);
12002 	if (ret)
12003 		return (ret);
12004 
12005 	ice_clear_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
12006 
12007 	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface attached"), 0, req);
12008 	return (ret);
12009 }
12010 
12011 /**
12012  * ice_sysctl_destroy_mirror_interface - Destroy network interface that monitors
12013  *     traffic from the main PF VSI
12014  */
12015 static int
12016 ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS)
12017 {
12018 	struct ice_softc *sc = (struct ice_softc *)arg1;
12019 	device_t dev = sc->dev;
12020 	int ret;
12021 
12022 	UNREFERENCED_PARAMETER(arg2);
12023 
12024 	ret = priv_check(curthread, PRIV_DRIVER);
12025 	if (ret)
12026 		return (ret);
12027 
12028 	if (ice_driver_is_detaching(sc))
12029 		return (ESHUTDOWN);
12030 
12031 	/* If the user hasn't written "1" to this sysctl yet: */
12032 	if (!ice_test_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC)) {
12033 		/* Avoid output on the first set of reads to this sysctl in
12034 		 * order to prevent a null byte from being written to the
12035 		 * end result when called via sysctl(8).
12036 		 */
12037 		if (req->oldptr == NULL && req->newptr == NULL) {
12038 			ret = SYSCTL_OUT(req, 0, 0);
12039 			return (ret);
12040 		}
12041 
12042 		char input_buf[2] = "";
12043 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
12044 		if ((ret) || (req->newptr == NULL))
12045 			return (ret);
12046 
12047 		/* If we get '1', then indicate we'll create the interface in
12048 		 * the next sysctl read call.
12049 		 */
12050 		if (input_buf[0] == '1') {
12051 			if (!sc->mirr_if) {
12052 				device_printf(dev,
12053 				    "No mirror interface exists!\n");
12054 				return (EINVAL);
12055 			}
12056 			ice_set_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
12057 			return (0);
12058 		}
12059 
12060 		return (EINVAL);
12061 	}
12062 
12063 	/* --- "Do Destroy Mirror Interface" is set --- */
12064 
12065 	/* Caller just wants the upper bound for size */
12066 	if (req->oldptr == NULL && req->newptr == NULL) {
12067 		ret = SYSCTL_OUT(req, 0, 128);
12068 		return (ret);
12069 	}
12070 
12071 	device_printf(dev, "Destroying mirroring interface...\n");
12072 
12073 	ice_destroy_mirror_interface(sc);
12074 
12075 	ice_clear_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
12076 
12077 	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface destroyed"), 0, req);
12078 	return (ret);
12079 }
12080