xref: /freebsd/sys/dev/ice/ice_lib.c (revision 95eb4b873b6a8b527c5bd78d7191975dfca38998)
1 /* SPDX-License-Identifier: BSD-3-Clause */
2 /*  Copyright (c) 2024, Intel Corporation
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *
8  *   1. Redistributions of source code must retain the above copyright notice,
9  *      this list of conditions and the following disclaimer.
10  *
11  *   2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *
15  *   3. Neither the name of the Intel Corporation nor the names of its
16  *      contributors may be used to endorse or promote products derived from
17  *      this software without specific prior written permission.
18  *
19  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *  POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /**
33  * @file ice_lib.c
34  * @brief Generic device setup and sysctl functions
35  *
36  * Library of generic device functions not specific to the networking stack.
37  *
38  * This includes hardware initialization functions, as well as handlers for
39  * many of the device sysctls used to probe driver status or tune specific
40  * behaviors.
41  */
42 
43 #include "ice_lib.h"
44 #include "ice_iflib.h"
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 #include <machine/resource.h>
48 #include <net/if_dl.h>
49 #include <sys/firmware.h>
50 #include <sys/priv.h>
51 #include <sys/limits.h>
52 
53 /**
54  * @var M_ICE
55  * @brief main ice driver allocation type
56  *
57  * malloc(9) allocation type used by the majority of memory allocations in the
58  * ice driver.
59  */
60 MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
61 
62 /*
63  * Helper function prototypes
64  */
65 static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
66 static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
67 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
68 static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
69 static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
70 			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
71 static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
72 static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
73 static void ice_free_fltr_list(struct ice_list_head *list);
74 static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
75 			       const u8 *addr, enum ice_sw_fwd_act_type action);
76 static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
77 				   struct ice_ctl_q_info *cq);
78 static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
79 static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
80 				    struct ice_rq_event_info *event);
81 static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
82 static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
83 static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84 static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
85 static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
86 static void ice_add_debug_tunables(struct ice_softc *sc);
87 static void ice_add_debug_sysctls(struct ice_softc *sc);
88 static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
89 static void ice_get_default_rss_key(u8 *seed);
90 static int  ice_set_rss_key(struct ice_vsi *vsi);
91 static int  ice_set_rss_lut(struct ice_vsi *vsi);
92 static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
93 static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
94 static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
95 static const char *ice_requested_fec_mode(struct ice_port_info *pi);
96 static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
97 static const char *ice_autoneg_mode(struct ice_port_info *pi);
98 static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
99 static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
100 static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
101 static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
102 static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
103 static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
104 static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
105 static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
106 				     struct sysctl_ctx_list *ctx,
107 				     struct sysctl_oid *parent);
108 static void
109 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
110 				 struct sysctl_oid_list *parent_list,
111 				 u64* pfc_stat_location,
112 				 const char *node_name,
113 				 const char *descr);
114 static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
115 					  struct sysctl_oid *parent,
116 					  struct ice_hw_port_stats *stats);
117 static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
118 				 enum ice_vsi_type type, int idx,
119 				 bool dynamic);
120 static void ice_handle_mib_change_event(struct ice_softc *sc,
121 				 struct ice_rq_event_info *event);
122 static void
123 ice_handle_lan_overflow_event(struct ice_softc *sc,
124 			      struct ice_rq_event_info *event);
125 static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
126 				     struct ice_list_head *list,
127 				     u16 ethertype, u16 direction,
128 				     enum ice_sw_fwd_act_type action);
129 static void ice_del_rx_lldp_filter(struct ice_softc *sc);
130 static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
131 					   u64 phy_type_high);
132 struct ice_phy_data;
133 static int
134 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
135 				   struct ice_phy_data *phy_data);
136 static int
137 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
138 			       struct ice_aqc_set_phy_cfg_data *cfg);
139 static int
140 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
141 			       struct ice_aqc_set_phy_cfg_data *cfg);
142 static void
143 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
144 			      struct ice_aqc_set_phy_cfg_data *cfg);
145 static void
146 ice_print_ldo_tlv(struct ice_softc *sc,
147 		  struct ice_link_default_override_tlv *tlv);
148 static void
149 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
150 				  u64 *phy_type_high);
151 static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
152 static void
153 ice_handle_health_status_event(struct ice_softc *sc,
154 			       struct ice_rq_event_info *event);
155 static void
156 ice_print_health_status_string(device_t dev,
157 			       struct ice_aqc_health_status_elem *elem);
158 static void
159 ice_debug_print_mib_change_event(struct ice_softc *sc,
160 				 struct ice_rq_event_info *event);
161 static bool ice_check_ets_bw(u8 *table);
162 static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
163 static bool
164 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
165 		       struct ice_dcbx_cfg *new_cfg);
166 static void ice_dcb_recfg(struct ice_softc *sc);
167 static u8 ice_dcb_tc_contig(u8 tc_map);
168 static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
169 static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
170 static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
171 				   struct ice_dcb_ets_cfg *ets);
172 static void ice_stop_pf_vsi(struct ice_softc *sc);
173 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
174 static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
175 void
176 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
177 			    struct sysctl_ctx_list *ctx,
178 			    struct sysctl_oid_list *ctx_list);
179 static void ice_set_default_local_mib_settings(struct ice_softc *sc);
180 static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
181 static void ice_start_dcbx_agent(struct ice_softc *sc);
182 static u16 ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
183 					   struct sbuf *sbuf, u16 cluster_id);
184 static void ice_remove_vsi_mirroring(struct ice_vsi *vsi);
185 
186 static int ice_module_init(void);
187 static int ice_module_exit(void);
188 
189 /*
190  * package version comparison functions
191  */
192 static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
193 static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
194 
195 /*
196  * dynamic sysctl handlers
197  */
198 static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
199 static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
200 static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
201 static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
202 static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
203 static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
204 static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
205 static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
206 static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
207 static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
208 static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
209 static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
210 static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
211 static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
212 static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
213 static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
214 static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
215 					 bool is_phy_type_high);
216 static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
217 static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
218 static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
219 static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
220 static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
221 static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
222 static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
223 static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
224 static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
225 static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
226 static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
227 static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
228 static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
229 static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
230 static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
231 static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
232 static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
233 static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
234 static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
235 static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
236 static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
237 static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
238 static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
239 static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
240 static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
241 static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
242 static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
243 static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS);
244 static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS);
245 static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS);
246 static int ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS);
247 static int ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS);
248 
249 /**
250  * ice_map_bar - Map PCIe BAR memory
251  * @dev: the PCIe device
252  * @bar: the BAR info structure
253  * @bar_num: PCIe BAR number
254  *
255  * Maps the specified PCIe BAR. Stores the mapping data in struct
256  * ice_bar_info.
257  */
258 int
259 ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
260 {
261 	if (bar->res != NULL) {
262 		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
263 		return (EDOOFUS);
264 	}
265 
266 	bar->rid = PCIR_BAR(bar_num);
267 	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
268 					  RF_ACTIVE);
269 	if (!bar->res) {
270 		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
271 		return (ENXIO);
272 	}
273 
274 	bar->tag = rman_get_bustag(bar->res);
275 	bar->handle = rman_get_bushandle(bar->res);
276 	bar->size = rman_get_size(bar->res);
277 
278 	return (0);
279 }
280 
281 /**
282  * ice_free_bar - Free PCIe BAR memory
283  * @dev: the PCIe device
284  * @bar: the BAR info structure
285  *
286  * Frees the specified PCIe BAR, releasing its resources.
287  */
288 void
289 ice_free_bar(device_t dev, struct ice_bar_info *bar)
290 {
291 	if (bar->res != NULL)
292 		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
293 	bar->res = NULL;
294 }
295 
296 /**
297  * ice_set_ctrlq_len - Configure ctrlq lengths for a device
298  * @hw: the device hardware structure
299  *
300  * Configures the control queues for the given device, setting up the
301  * specified lengths, prior to initializing hardware.
302  */
303 void
304 ice_set_ctrlq_len(struct ice_hw *hw)
305 {
306 	hw->adminq.num_rq_entries = ICE_AQ_LEN;
307 	hw->adminq.num_sq_entries = ICE_AQ_LEN;
308 	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
309 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
310 
311 	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
312 	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
313 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
314 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
315 
316 }
317 
318 /**
319  * ice_get_next_vsi - Get the next available VSI slot
320  * @all_vsi: the VSI list
321  * @size: the size of the VSI list
322  *
323  * Returns the index to the first available VSI slot. Will return size (one
324  * past the last index) if there are no slots available.
325  */
326 static int
327 ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
328 {
329 	int i;
330 
331 	for (i = 0; i < size; i++) {
332 		if (all_vsi[i] == NULL)
333 			return i;
334 	}
335 
336 	return size;
337 }
338 
339 /**
340  * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
341  * @sc: the device private softc structure
342  * @vsi: the VSI to setup
343  * @type: the VSI type of the new VSI
344  * @idx: the index in the all_vsi array to use
345  * @dynamic: whether this VSI memory was dynamically allocated
346  *
347  * Perform setup for a VSI that is common to both dynamically allocated VSIs
348  * and the static PF VSI which is embedded in the softc structure.
349  */
350 static void
351 ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
352 		     enum ice_vsi_type type, int idx, bool dynamic)
353 {
354 	/* Store important values in VSI struct */
355 	vsi->type = type;
356 	vsi->sc = sc;
357 	vsi->idx = idx;
358 	sc->all_vsi[idx] = vsi;
359 	vsi->dynamic = dynamic;
360 
361 	/* Set default mirroring rule information */
362 	vsi->rule_mir_ingress = ICE_INVAL_MIRROR_RULE_ID;
363 	vsi->rule_mir_egress = ICE_INVAL_MIRROR_RULE_ID;
364 
365 	/* Setup the VSI tunables now */
366 	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
367 }
368 
369 /**
370  * ice_alloc_vsi - Allocate a dynamic VSI
371  * @sc: device softc structure
372  * @type: VSI type
373  *
374  * Allocates a new dynamic VSI structure and inserts it into the VSI list.
375  */
376 struct ice_vsi *
377 ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
378 {
379 	struct ice_vsi *vsi;
380 	int idx;
381 
382 	/* Find an open index for a new VSI to be allocated. If the returned
383 	 * index is >= the num_available_vsi then it means no slot is
384 	 * available.
385 	 */
386 	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
387 	if (idx >= sc->num_available_vsi) {
388 		device_printf(sc->dev, "No available VSI slots\n");
389 		return NULL;
390 	}
391 
392 	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_NOWAIT | M_ZERO);
393 	if (!vsi) {
394 		device_printf(sc->dev, "Unable to allocate VSI memory\n");
395 		return NULL;
396 	}
397 
398 	ice_setup_vsi_common(sc, vsi, type, idx, true);
399 
400 	return vsi;
401 }
402 
403 /**
404  * ice_setup_pf_vsi - Setup the PF VSI
405  * @sc: the device private softc
406  *
407  * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
408  * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
409  * the softc memory, instead of being dynamically allocated at creation.
410  */
411 void
412 ice_setup_pf_vsi(struct ice_softc *sc)
413 {
414 	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
415 }
416 
417 /**
418  * ice_alloc_vsi_qmap
419  * @vsi: VSI structure
420  * @max_tx_queues: Number of transmit queues to identify
421  * @max_rx_queues: Number of receive queues to identify
422  *
423  * Allocates a max_[t|r]x_queues array of words for the VSI where each
424  * word contains the index of the queue it represents.  In here, all
425  * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
426  * all queues for this VSI are not yet assigned an index and thus,
427  * not ready for use.
428  *
429  * Returns an error code on failure.
430  */
431 int
432 ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
433 		   const int max_rx_queues)
434 {
435 	struct ice_softc *sc = vsi->sc;
436 	int i;
437 
438 	MPASS(max_tx_queues > 0);
439 	MPASS(max_rx_queues > 0);
440 
441 	/* Allocate Tx queue mapping memory */
442 	if (!(vsi->tx_qmap =
443 	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
444 		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
445 		return (ENOMEM);
446 	}
447 
448 	/* Allocate Rx queue mapping memory */
449 	if (!(vsi->rx_qmap =
450 	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
451 		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
452 		goto free_tx_qmap;
453 	}
454 
455 	/* Mark every queue map as invalid to start with */
456 	for (i = 0; i < max_tx_queues; i++) {
457 		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
458 	}
459 	for (i = 0; i < max_rx_queues; i++) {
460 		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
461 	}
462 
463 	return 0;
464 
465 free_tx_qmap:
466 	free(vsi->tx_qmap, M_ICE);
467 	vsi->tx_qmap = NULL;
468 
469 	return (ENOMEM);
470 }
471 
472 /**
473  * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
474  * @vsi: the VSI private structure
475  *
476  * Frees the PF qmaps associated with the given VSI. Generally this will be
477  * called by ice_release_vsi, but may need to be called during attach cleanup,
478  * depending on when the qmaps were allocated.
479  */
480 void
481 ice_free_vsi_qmaps(struct ice_vsi *vsi)
482 {
483 	struct ice_softc *sc = vsi->sc;
484 
485 	if (vsi->tx_qmap) {
486 		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
487 					   vsi->num_tx_queues);
488 		free(vsi->tx_qmap, M_ICE);
489 		vsi->tx_qmap = NULL;
490 	}
491 
492 	if (vsi->rx_qmap) {
493 		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
494 					   vsi->num_rx_queues);
495 		free(vsi->rx_qmap, M_ICE);
496 		vsi->rx_qmap = NULL;
497 	}
498 }
499 
500 /**
501  * ice_set_default_vsi_ctx - Setup default VSI context parameters
502  * @ctx: the VSI context to initialize
503  *
504  * Initialize and prepare a default VSI context for configuring a new VSI.
505  */
506 static void
507 ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
508 {
509 	u32 table = 0;
510 
511 	memset(&ctx->info, 0, sizeof(ctx->info));
512 	/* VSI will be allocated from shared pool */
513 	ctx->alloc_from_pool = true;
514 	/* Enable source pruning by default */
515 	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
516 	/* Traffic from VSI can be sent to LAN */
517 	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
518 	/* Allow all packets untagged/tagged */
519 	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
520 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
521 				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
522 	/* Show VLAN/UP from packets in Rx descriptors */
523 	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
524 					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
525 					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
526 	/* Have 1:1 UP mapping for both ingress/egress tables */
527 	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
528 	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
529 	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
530 	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
531 	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
532 	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
533 	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
534 	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
535 	ctx->info.ingress_table = CPU_TO_LE32(table);
536 	ctx->info.egress_table = CPU_TO_LE32(table);
537 	/* Have 1:1 UP mapping for outer to inner UP table */
538 	ctx->info.outer_up_table = CPU_TO_LE32(table);
539 	/* No Outer tag support, so outer_vlan_flags remains zero */
540 }
541 
542 /**
543  * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
544  * @ctx: the VSI context to configure
545  * @type: the VSI type
546  *
547  * Configures the VSI context for RSS, based on the VSI type.
548  */
549 static void
550 ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
551 {
552 	u8 lut_type, hash_type;
553 
554 	switch (type) {
555 	case ICE_VSI_PF:
556 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
557 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
558 		break;
559 	case ICE_VSI_VF:
560 	case ICE_VSI_VMDQ2:
561 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
562 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
563 		break;
564 	default:
565 		/* Other VSI types do not support RSS */
566 		return;
567 	}
568 
569 	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
570 				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
571 				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
572 				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
573 }
574 
575 /**
576  * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
577  * @vsi: the VSI to configure
578  * @ctx: the VSI context to configure
579  *
580  * Configures the context for the given VSI, setting up how the firmware
581  * should map the queues for this VSI.
582  *
583  * @pre vsi->qmap_type is set to a valid type
584  */
585 static int
586 ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
587 {
588 	int pow = 0;
589 	u16 qmap;
590 
591 	MPASS(vsi->rx_qmap != NULL);
592 
593 	switch (vsi->qmap_type) {
594 	case ICE_RESMGR_ALLOC_CONTIGUOUS:
595 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
596 
597 		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
598 		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
599 
600 		break;
601 	case ICE_RESMGR_ALLOC_SCATTERED:
602 		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
603 
604 		for (int i = 0; i < vsi->num_rx_queues; i++)
605 			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
606 		break;
607 	default:
608 		return (EOPNOTSUPP);
609 	}
610 
611 	/* Calculate the next power-of-2 of number of queues */
612 	if (vsi->num_rx_queues)
613 		pow = flsl(vsi->num_rx_queues - 1);
614 
615 	/* Assign all the queues to traffic class zero */
616 	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
617 	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
618 
619 	/* Fill out default driver TC queue info for VSI */
620 	vsi->tc_info[0].qoffset = 0;
621 	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
622 	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
623 	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
624 		vsi->tc_info[i].qoffset = 0;
625 		vsi->tc_info[i].qcount_rx = 1;
626 		vsi->tc_info[i].qcount_tx = 1;
627 	}
628 	vsi->tc_map = 0x1;
629 
630 	return 0;
631 }
632 
633 /**
634  * ice_setup_vsi_mirroring -- Setup a VSI for mirroring PF VSI traffic
635  * @vsi: VSI to setup
636  *
637  * @pre vsi->mirror_src_vsi is set to the SW VSI num that traffic is to be
638  * mirrored from
639  *
640  * Returns 0 on success, EINVAL on failure.
641  */
642 int
643 ice_setup_vsi_mirroring(struct ice_vsi *vsi)
644 {
645 	struct ice_mir_rule_buf rule = { };
646 	struct ice_softc *sc = vsi->sc;
647 	struct ice_hw *hw = &sc->hw;
648 	device_t dev = sc->dev;
649 	enum ice_status status;
650 	u16 rule_id, dest_vsi;
651 	u16 count = 1;
652 
653 	rule.vsi_idx = ice_get_hw_vsi_num(hw, vsi->mirror_src_vsi);
654 	rule.add = true;
655 
656 	dest_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
657 	rule_id = ICE_INVAL_MIRROR_RULE_ID;
658 	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_INGRESS,
659 					    dest_vsi, count, &rule, NULL,
660 					    &rule_id);
661 	if (status) {
662 		device_printf(dev,
663 		    "Could not add INGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
664 		    rule.vsi_idx, dest_vsi, ice_status_str(status),
665 		    ice_aq_str(hw->adminq.sq_last_status));
666 		return (EINVAL);
667 	}
668 
669 	vsi->rule_mir_ingress = rule_id;
670 
671 	rule_id = ICE_INVAL_MIRROR_RULE_ID;
672 	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_EGRESS,
673 					    dest_vsi, count, &rule, NULL, &rule_id);
674 	if (status) {
675 		device_printf(dev,
676 		    "Could not add EGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
677 		    rule.vsi_idx, dest_vsi, ice_status_str(status),
678 		    ice_aq_str(hw->adminq.sq_last_status));
679 		return (EINVAL);
680 	}
681 
682 	vsi->rule_mir_egress = rule_id;
683 
684 	return (0);
685 }
686 
687 /**
688  * ice_remove_vsi_mirroring -- Teardown any VSI mirroring rules
689  * @vsi: VSI to remove mirror rules from
690  */
691 static void
692 ice_remove_vsi_mirroring(struct ice_vsi *vsi)
693 {
694 	struct ice_hw *hw = &vsi->sc->hw;
695 	enum ice_status status = ICE_SUCCESS;
696 	bool keep_alloc = false;
697 
698 	if (vsi->rule_mir_ingress != ICE_INVAL_MIRROR_RULE_ID)
699 		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_ingress, keep_alloc, NULL);
700 
701 	if (status)
702 		device_printf(vsi->sc->dev, "Could not remove mirror VSI ingress rule, err %s aq_err %s\n",
703 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
704 
705 	status = ICE_SUCCESS;
706 
707 	if (vsi->rule_mir_egress != ICE_INVAL_MIRROR_RULE_ID)
708 		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_egress, keep_alloc, NULL);
709 
710 	if (status)
711 		device_printf(vsi->sc->dev, "Could not remove mirror VSI egress rule, err %s aq_err %s\n",
712 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
713 }
714 
715 /**
716  * ice_initialize_vsi - Initialize a VSI for use
717  * @vsi: the vsi to initialize
718  *
719  * Initialize a VSI over the adminq and prepare it for operation.
720  *
721  * @pre vsi->num_tx_queues is set
722  * @pre vsi->num_rx_queues is set
723  */
724 int
725 ice_initialize_vsi(struct ice_vsi *vsi)
726 {
727 	struct ice_vsi_ctx ctx = { 0 };
728 	struct ice_hw *hw = &vsi->sc->hw;
729 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
730 	enum ice_status status;
731 	int err;
732 
733 	/* For now, we only have code supporting PF VSIs */
734 	switch (vsi->type) {
735 	case ICE_VSI_PF:
736 		ctx.flags = ICE_AQ_VSI_TYPE_PF;
737 		break;
738 	case ICE_VSI_VMDQ2:
739 		ctx.flags = ICE_AQ_VSI_TYPE_VMDQ2;
740 		break;
741 	default:
742 		return (ENODEV);
743 	}
744 
745 	ice_set_default_vsi_ctx(&ctx);
746 	ice_set_rss_vsi_ctx(&ctx, vsi->type);
747 
748 	/* XXX: VSIs of other types may need different port info? */
749 	ctx.info.sw_id = hw->port_info->sw_id;
750 
751 	/* Set some RSS parameters based on the VSI type */
752 	ice_vsi_set_rss_params(vsi);
753 
754 	/* Initialize the Rx queue mapping for this VSI */
755 	err = ice_setup_vsi_qmap(vsi, &ctx);
756 	if (err) {
757 		return err;
758 	}
759 
760 	/* (Re-)add VSI to HW VSI handle list */
761 	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
762 	if (status != 0) {
763 		device_printf(vsi->sc->dev,
764 		    "Add VSI AQ call failed, err %s aq_err %s\n",
765 		    ice_status_str(status),
766 		    ice_aq_str(hw->adminq.sq_last_status));
767 		return (EIO);
768 	}
769 	vsi->info = ctx.info;
770 
771 	/* Initialize VSI with just 1 TC to start */
772 	max_txqs[0] = vsi->num_tx_queues;
773 
774 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
775 			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
776 	if (status) {
777 		device_printf(vsi->sc->dev,
778 		    "Failed VSI lan queue config, err %s aq_err %s\n",
779 		    ice_status_str(status),
780 		    ice_aq_str(hw->adminq.sq_last_status));
781 		ice_deinit_vsi(vsi);
782 		return (ENODEV);
783 	}
784 
785 	/* Reset VSI stats */
786 	ice_reset_vsi_stats(vsi);
787 
788 	return 0;
789 }
790 
791 /**
792  * ice_deinit_vsi - Tell firmware to release resources for a VSI
793  * @vsi: the VSI to release
794  *
795  * Helper function which requests the firmware to release the hardware
796  * resources associated with a given VSI.
797  */
798 void
799 ice_deinit_vsi(struct ice_vsi *vsi)
800 {
801 	struct ice_vsi_ctx ctx = { 0 };
802 	struct ice_softc *sc = vsi->sc;
803 	struct ice_hw *hw = &sc->hw;
804 	enum ice_status status;
805 
806 	/* Assert that the VSI pointer matches in the list */
807 	MPASS(vsi == sc->all_vsi[vsi->idx]);
808 
809 	ctx.info = vsi->info;
810 
811 	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
812 	if (status) {
813 		/*
814 		 * This should only fail if the VSI handle is invalid, or if
815 		 * any of the nodes have leaf nodes which are still in use.
816 		 */
817 		device_printf(sc->dev,
818 			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
819 			      vsi->idx, ice_status_str(status));
820 	}
821 
822 	/* Tell firmware to release the VSI resources */
823 	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
824 	if (status != 0) {
825 		device_printf(sc->dev,
826 		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
827 		    vsi->idx, ice_status_str(status),
828 		    ice_aq_str(hw->adminq.sq_last_status));
829 	}
830 }
831 
832 /**
833  * ice_release_vsi - Release resources associated with a VSI
834  * @vsi: the VSI to release
835  *
836  * Release software and firmware resources associated with a VSI. Release the
837  * queue managers associated with this VSI. Also free the VSI structure memory
838  * if the VSI was allocated dynamically using ice_alloc_vsi().
839  */
840 void
841 ice_release_vsi(struct ice_vsi *vsi)
842 {
843 	struct ice_softc *sc = vsi->sc;
844 	int idx = vsi->idx;
845 
846 	/* Assert that the VSI pointer matches in the list */
847 	MPASS(vsi == sc->all_vsi[idx]);
848 
849 	/* Cleanup RSS configuration */
850 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
851 		ice_clean_vsi_rss_cfg(vsi);
852 
853 	ice_del_vsi_sysctl_ctx(vsi);
854 
855 	/* Remove the configured mirror rule, if it exists */
856 	ice_remove_vsi_mirroring(vsi);
857 
858 	/*
859 	 * If we unload the driver after a reset fails, we do not need to do
860 	 * this step.
861 	 */
862 	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
863 		ice_deinit_vsi(vsi);
864 
865 	ice_free_vsi_qmaps(vsi);
866 
867 	if (vsi->dynamic) {
868 		free(sc->all_vsi[idx], M_ICE);
869 	}
870 
871 	sc->all_vsi[idx] = NULL;
872 }
873 
874 /**
875  * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
876  * @pi: port info data
877  *
878  * Returns the baudrate value for the current link speed of a given port.
879  */
880 uint64_t
881 ice_aq_speed_to_rate(struct ice_port_info *pi)
882 {
883 	switch (pi->phy.link_info.link_speed) {
884 	case ICE_AQ_LINK_SPEED_100GB:
885 		return IF_Gbps(100);
886 	case ICE_AQ_LINK_SPEED_50GB:
887 		return IF_Gbps(50);
888 	case ICE_AQ_LINK_SPEED_40GB:
889 		return IF_Gbps(40);
890 	case ICE_AQ_LINK_SPEED_25GB:
891 		return IF_Gbps(25);
892 	case ICE_AQ_LINK_SPEED_10GB:
893 		return IF_Gbps(10);
894 	case ICE_AQ_LINK_SPEED_5GB:
895 		return IF_Gbps(5);
896 	case ICE_AQ_LINK_SPEED_2500MB:
897 		return IF_Mbps(2500);
898 	case ICE_AQ_LINK_SPEED_1000MB:
899 		return IF_Mbps(1000);
900 	case ICE_AQ_LINK_SPEED_100MB:
901 		return IF_Mbps(100);
902 	case ICE_AQ_LINK_SPEED_10MB:
903 		return IF_Mbps(10);
904 	case ICE_AQ_LINK_SPEED_UNKNOWN:
905 	default:
906 		/* return 0 if we don't know the link speed */
907 		return 0;
908 	}
909 }
910 
911 /**
912  * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
913  * @pi: port info data
914  *
915  * Returns the string representation of the current link speed for a given
916  * port.
917  */
918 static const char *
919 ice_aq_speed_to_str(struct ice_port_info *pi)
920 {
921 	switch (pi->phy.link_info.link_speed) {
922 	case ICE_AQ_LINK_SPEED_100GB:
923 		return "100 Gbps";
924 	case ICE_AQ_LINK_SPEED_50GB:
925 		return "50 Gbps";
926 	case ICE_AQ_LINK_SPEED_40GB:
927 		return "40 Gbps";
928 	case ICE_AQ_LINK_SPEED_25GB:
929 		return "25 Gbps";
930 	case ICE_AQ_LINK_SPEED_20GB:
931 		return "20 Gbps";
932 	case ICE_AQ_LINK_SPEED_10GB:
933 		return "10 Gbps";
934 	case ICE_AQ_LINK_SPEED_5GB:
935 		return "5 Gbps";
936 	case ICE_AQ_LINK_SPEED_2500MB:
937 		return "2.5 Gbps";
938 	case ICE_AQ_LINK_SPEED_1000MB:
939 		return "1 Gbps";
940 	case ICE_AQ_LINK_SPEED_100MB:
941 		return "100 Mbps";
942 	case ICE_AQ_LINK_SPEED_10MB:
943 		return "10 Mbps";
944 	case ICE_AQ_LINK_SPEED_UNKNOWN:
945 	default:
946 		return "Unknown speed";
947 	}
948 }
949 
950 /**
951  * ice_get_phy_type_low - Get media associated with phy_type_low
952  * @phy_type_low: the low 64bits of phy_type from the AdminQ
953  *
954  * Given the lower 64bits of the phy_type from the hardware, return the
955  * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
956  * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
957  * be called. If phy_type_low is zero, call ice_phy_type_high.
958  */
959 int
960 ice_get_phy_type_low(uint64_t phy_type_low)
961 {
962 	switch (phy_type_low) {
963 	case ICE_PHY_TYPE_LOW_100BASE_TX:
964 		return IFM_100_TX;
965 	case ICE_PHY_TYPE_LOW_100M_SGMII:
966 		return IFM_100_SGMII;
967 	case ICE_PHY_TYPE_LOW_1000BASE_T:
968 		return IFM_1000_T;
969 	case ICE_PHY_TYPE_LOW_1000BASE_SX:
970 		return IFM_1000_SX;
971 	case ICE_PHY_TYPE_LOW_1000BASE_LX:
972 		return IFM_1000_LX;
973 	case ICE_PHY_TYPE_LOW_1000BASE_KX:
974 		return IFM_1000_KX;
975 	case ICE_PHY_TYPE_LOW_1G_SGMII:
976 		return IFM_1000_SGMII;
977 	case ICE_PHY_TYPE_LOW_2500BASE_T:
978 		return IFM_2500_T;
979 	case ICE_PHY_TYPE_LOW_2500BASE_X:
980 		return IFM_2500_X;
981 	case ICE_PHY_TYPE_LOW_2500BASE_KX:
982 		return IFM_2500_KX;
983 	case ICE_PHY_TYPE_LOW_5GBASE_T:
984 		return IFM_5000_T;
985 	case ICE_PHY_TYPE_LOW_5GBASE_KR:
986 		return IFM_5000_KR;
987 	case ICE_PHY_TYPE_LOW_10GBASE_T:
988 		return IFM_10G_T;
989 	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
990 		return IFM_10G_TWINAX;
991 	case ICE_PHY_TYPE_LOW_10GBASE_SR:
992 		return IFM_10G_SR;
993 	case ICE_PHY_TYPE_LOW_10GBASE_LR:
994 		return IFM_10G_LR;
995 	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
996 		return IFM_10G_KR;
997 	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
998 		return IFM_10G_AOC;
999 	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
1000 		return IFM_10G_SFI;
1001 	case ICE_PHY_TYPE_LOW_25GBASE_T:
1002 		return IFM_25G_T;
1003 	case ICE_PHY_TYPE_LOW_25GBASE_CR:
1004 		return IFM_25G_CR;
1005 	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
1006 		return IFM_25G_CR_S;
1007 	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
1008 		return IFM_25G_CR1;
1009 	case ICE_PHY_TYPE_LOW_25GBASE_SR:
1010 		return IFM_25G_SR;
1011 	case ICE_PHY_TYPE_LOW_25GBASE_LR:
1012 		return IFM_25G_LR;
1013 	case ICE_PHY_TYPE_LOW_25GBASE_KR:
1014 		return IFM_25G_KR;
1015 	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
1016 		return IFM_25G_KR_S;
1017 	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
1018 		return IFM_25G_KR1;
1019 	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
1020 		return IFM_25G_AOC;
1021 	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
1022 		return IFM_25G_AUI;
1023 	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
1024 		return IFM_40G_CR4;
1025 	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
1026 		return IFM_40G_SR4;
1027 	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
1028 		return IFM_40G_LR4;
1029 	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
1030 		return IFM_40G_KR4;
1031 	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
1032 		return IFM_40G_XLAUI_AC;
1033 	case ICE_PHY_TYPE_LOW_40G_XLAUI:
1034 		return IFM_40G_XLAUI;
1035 	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
1036 		return IFM_50G_CR2;
1037 	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
1038 		return IFM_50G_SR2;
1039 	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
1040 		return IFM_50G_LR2;
1041 	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
1042 		return IFM_50G_KR2;
1043 	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
1044 		return IFM_50G_LAUI2_AC;
1045 	case ICE_PHY_TYPE_LOW_50G_LAUI2:
1046 		return IFM_50G_LAUI2;
1047 	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
1048 		return IFM_50G_AUI2_AC;
1049 	case ICE_PHY_TYPE_LOW_50G_AUI2:
1050 		return IFM_50G_AUI2;
1051 	case ICE_PHY_TYPE_LOW_50GBASE_CP:
1052 		return IFM_50G_CP;
1053 	case ICE_PHY_TYPE_LOW_50GBASE_SR:
1054 		return IFM_50G_SR;
1055 	case ICE_PHY_TYPE_LOW_50GBASE_FR:
1056 		return IFM_50G_FR;
1057 	case ICE_PHY_TYPE_LOW_50GBASE_LR:
1058 		return IFM_50G_LR;
1059 	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
1060 		return IFM_50G_KR_PAM4;
1061 	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
1062 		return IFM_50G_AUI1_AC;
1063 	case ICE_PHY_TYPE_LOW_50G_AUI1:
1064 		return IFM_50G_AUI1;
1065 	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
1066 		return IFM_100G_CR4;
1067 	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
1068 		return IFM_100G_SR4;
1069 	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
1070 		return IFM_100G_LR4;
1071 	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
1072 		return IFM_100G_KR4;
1073 	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
1074 		return IFM_100G_CAUI4_AC;
1075 	case ICE_PHY_TYPE_LOW_100G_CAUI4:
1076 		return IFM_100G_CAUI4;
1077 	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
1078 		return IFM_100G_AUI4_AC;
1079 	case ICE_PHY_TYPE_LOW_100G_AUI4:
1080 		return IFM_100G_AUI4;
1081 	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
1082 		return IFM_100G_CR_PAM4;
1083 	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
1084 		return IFM_100G_KR_PAM4;
1085 	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
1086 		return IFM_100G_CP2;
1087 	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
1088 		return IFM_100G_SR2;
1089 	case ICE_PHY_TYPE_LOW_100GBASE_DR:
1090 		return IFM_100G_DR;
1091 	default:
1092 		return IFM_UNKNOWN;
1093 	}
1094 }
1095 
1096 /**
1097  * ice_get_phy_type_high - Get media associated with phy_type_high
1098  * @phy_type_high: the upper 64bits of phy_type from the AdminQ
1099  *
1100  * Given the upper 64bits of the phy_type from the hardware, return the
1101  * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1102  * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1103  * called. If phy_type_high is zero, call ice_get_phy_type_low.
1104  */
1105 int
1106 ice_get_phy_type_high(uint64_t phy_type_high)
1107 {
1108 	switch (phy_type_high) {
1109 	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1110 		return IFM_100G_KR2_PAM4;
1111 	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1112 		return IFM_100G_CAUI2_AC;
1113 	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1114 		return IFM_100G_CAUI2;
1115 	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1116 		return IFM_100G_AUI2_AC;
1117 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1118 		return IFM_100G_AUI2;
1119 	default:
1120 		return IFM_UNKNOWN;
1121 	}
1122 }
1123 
1124 /**
1125  * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1126  * @pi: port info struct
1127  *
1128  * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1129  * to have been called before this function for it to work.
1130  */
1131 static uint64_t
1132 ice_phy_types_to_max_rate(struct ice_port_info *pi)
1133 {
1134 	uint64_t phy_low = pi->phy.phy_type_low;
1135 	uint64_t phy_high = pi->phy.phy_type_high;
1136 	uint64_t max_rate = 0;
1137 	int bit;
1138 
1139 	/*
1140 	 * These are based on the indices used in the BIT() macros for
1141 	 * ICE_PHY_TYPE_LOW_*
1142 	 */
1143 	static const uint64_t phy_rates[] = {
1144 	    IF_Mbps(100),
1145 	    IF_Mbps(100),
1146 	    IF_Gbps(1ULL),
1147 	    IF_Gbps(1ULL),
1148 	    IF_Gbps(1ULL),
1149 	    IF_Gbps(1ULL),
1150 	    IF_Gbps(1ULL),
1151 	    IF_Mbps(2500ULL),
1152 	    IF_Mbps(2500ULL),
1153 	    IF_Mbps(2500ULL),
1154 	    IF_Gbps(5ULL),
1155 	    IF_Gbps(5ULL),
1156 	    IF_Gbps(10ULL),
1157 	    IF_Gbps(10ULL),
1158 	    IF_Gbps(10ULL),
1159 	    IF_Gbps(10ULL),
1160 	    IF_Gbps(10ULL),
1161 	    IF_Gbps(10ULL),
1162 	    IF_Gbps(10ULL),
1163 	    IF_Gbps(25ULL),
1164 	    IF_Gbps(25ULL),
1165 	    IF_Gbps(25ULL),
1166 	    IF_Gbps(25ULL),
1167 	    IF_Gbps(25ULL),
1168 	    IF_Gbps(25ULL),
1169 	    IF_Gbps(25ULL),
1170 	    IF_Gbps(25ULL),
1171 	    IF_Gbps(25ULL),
1172 	    IF_Gbps(25ULL),
1173 	    IF_Gbps(25ULL),
1174 	    IF_Gbps(40ULL),
1175 	    IF_Gbps(40ULL),
1176 	    IF_Gbps(40ULL),
1177 	    IF_Gbps(40ULL),
1178 	    IF_Gbps(40ULL),
1179 	    IF_Gbps(40ULL),
1180 	    IF_Gbps(50ULL),
1181 	    IF_Gbps(50ULL),
1182 	    IF_Gbps(50ULL),
1183 	    IF_Gbps(50ULL),
1184 	    IF_Gbps(50ULL),
1185 	    IF_Gbps(50ULL),
1186 	    IF_Gbps(50ULL),
1187 	    IF_Gbps(50ULL),
1188 	    IF_Gbps(50ULL),
1189 	    IF_Gbps(50ULL),
1190 	    IF_Gbps(50ULL),
1191 	    IF_Gbps(50ULL),
1192 	    IF_Gbps(50ULL),
1193 	    IF_Gbps(50ULL),
1194 	    IF_Gbps(50ULL),
1195 	    IF_Gbps(100ULL),
1196 	    IF_Gbps(100ULL),
1197 	    IF_Gbps(100ULL),
1198 	    IF_Gbps(100ULL),
1199 	    IF_Gbps(100ULL),
1200 	    IF_Gbps(100ULL),
1201 	    IF_Gbps(100ULL),
1202 	    IF_Gbps(100ULL),
1203 	    IF_Gbps(100ULL),
1204 	    IF_Gbps(100ULL),
1205 	    IF_Gbps(100ULL),
1206 	    IF_Gbps(100ULL),
1207 	    IF_Gbps(100ULL),
1208 	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1209 	    IF_Gbps(100ULL),
1210 	    IF_Gbps(100ULL),
1211 	    IF_Gbps(100ULL),
1212 	    IF_Gbps(100ULL),
1213 	    IF_Gbps(100ULL)
1214 	};
1215 
1216 	/* coverity[address_of] */
1217 	for_each_set_bit(bit, &phy_high, 64)
1218 		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1219 			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1220 
1221 	/* coverity[address_of] */
1222 	for_each_set_bit(bit, &phy_low, 64)
1223 		max_rate = uqmax(max_rate, phy_rates[bit]);
1224 
1225 	return (max_rate);
1226 }
1227 
1228 /* The if_media type is split over the original 5 bit media variant field,
1229  * along with extended types using up extra bits in the options section.
1230  * We want to convert this split number into a bitmap index, so we reverse the
1231  * calculation of IFM_X here.
1232  */
1233 #define IFM_IDX(x) (((x) & IFM_TMASK) | \
1234 		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1235 
1236 /**
1237  * ice_add_media_types - Add supported media types to the media structure
1238  * @sc: ice private softc structure
1239  * @media: ifmedia structure to setup
1240  *
1241  * Looks up the supported phy types, and initializes the various media types
1242  * available.
1243  *
1244  * @pre this function must be protected from being called while another thread
1245  * is accessing the ifmedia types.
1246  */
1247 enum ice_status
1248 ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1249 {
1250 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1251 	struct ice_port_info *pi = sc->hw.port_info;
1252 	enum ice_status status;
1253 	uint64_t phy_low, phy_high;
1254 	int bit;
1255 
1256 	ASSERT_CFG_LOCKED(sc);
1257 
1258 	/* the maximum possible media type index is 511. We probably don't
1259 	 * need most of this space, but this ensures future compatibility when
1260 	 * additional media types are used.
1261 	 */
1262 	ice_declare_bitmap(already_added, 511);
1263 
1264 	/* Remove all previous media types */
1265 	ifmedia_removeall(media);
1266 
1267 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1268 				     &pcaps, NULL);
1269 	if (status != ICE_SUCCESS) {
1270 		device_printf(sc->dev,
1271 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1272 		    __func__, ice_status_str(status),
1273 		    ice_aq_str(sc->hw.adminq.sq_last_status));
1274 		return (status);
1275 	}
1276 	phy_low = le64toh(pcaps.phy_type_low);
1277 	phy_high = le64toh(pcaps.phy_type_high);
1278 
1279 	/* make sure the added bitmap is zero'd */
1280 	memset(already_added, 0, sizeof(already_added));
1281 
1282 	/* coverity[address_of] */
1283 	for_each_set_bit(bit, &phy_low, 64) {
1284 		uint64_t type = BIT_ULL(bit);
1285 		int ostype;
1286 
1287 		/* get the OS media type */
1288 		ostype = ice_get_phy_type_low(type);
1289 
1290 		/* don't bother adding the unknown type */
1291 		if (ostype == IFM_UNKNOWN)
1292 			continue;
1293 
1294 		/* only add each media type to the list once */
1295 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1296 			continue;
1297 
1298 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1299 		ice_set_bit(IFM_IDX(ostype), already_added);
1300 	}
1301 
1302 	/* coverity[address_of] */
1303 	for_each_set_bit(bit, &phy_high, 64) {
1304 		uint64_t type = BIT_ULL(bit);
1305 		int ostype;
1306 
1307 		/* get the OS media type */
1308 		ostype = ice_get_phy_type_high(type);
1309 
1310 		/* don't bother adding the unknown type */
1311 		if (ostype == IFM_UNKNOWN)
1312 			continue;
1313 
1314 		/* only add each media type to the list once */
1315 		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1316 			continue;
1317 
1318 		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1319 		ice_set_bit(IFM_IDX(ostype), already_added);
1320 	}
1321 
1322 	/* Use autoselect media by default */
1323 	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1324 	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1325 
1326 	return (ICE_SUCCESS);
1327 }
1328 
1329 /**
1330  * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1331  * @hw: ice hw structure
1332  * @rxqid: Rx queue index in PF space
1333  * @vector: MSI-X vector index in PF/VF space
1334  * @itr_idx: ITR index to use for interrupt
1335  *
1336  * @remark ice_flush() may need to be called after this
1337  */
1338 void
1339 ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1340 {
1341 	u32 val;
1342 
1343 	MPASS(itr_idx <= ICE_ITR_NONE);
1344 
1345 	val = (QINT_RQCTL_CAUSE_ENA_M |
1346 	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1347 	       (vector << QINT_RQCTL_MSIX_INDX_S));
1348 	wr32(hw, QINT_RQCTL(rxqid), val);
1349 }
1350 
1351 /**
1352  * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1353  * @vsi: the VSI to configure
1354  *
1355  * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1356  */
1357 void
1358 ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1359 {
1360 	struct ice_hw *hw = &vsi->sc->hw;
1361 	int i;
1362 
1363 	for (i = 0; i < vsi->num_rx_queues; i++) {
1364 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1365 
1366 		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1367 					    rxq->irqv->me, ICE_RX_ITR);
1368 
1369 		ice_debug(hw, ICE_DBG_INIT,
1370 		    "RXQ(%d) intr enable: me %d rxqid %d vector %d\n",
1371 		    i, rxq->me, vsi->rx_qmap[rxq->me], rxq->irqv->me);
1372 	}
1373 
1374 	ice_flush(hw);
1375 }
1376 
1377 /**
1378  * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1379  * @hw: ice hw structure
1380  * @txqid: Tx queue index in PF space
1381  * @vector: MSI-X vector index in PF/VF space
1382  * @itr_idx: ITR index to use for interrupt
1383  *
1384  * @remark ice_flush() may need to be called after this
1385  */
1386 void
1387 ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1388 {
1389 	u32 val;
1390 
1391 	MPASS(itr_idx <= ICE_ITR_NONE);
1392 
1393 	val = (QINT_TQCTL_CAUSE_ENA_M |
1394 	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1395 	       (vector << QINT_TQCTL_MSIX_INDX_S));
1396 	wr32(hw, QINT_TQCTL(txqid), val);
1397 }
1398 
1399 /**
1400  * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1401  * @vsi: the VSI to configure
1402  *
1403  * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1404  */
1405 void
1406 ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1407 {
1408 	struct ice_hw *hw = &vsi->sc->hw;
1409 	int i;
1410 
1411 	for (i = 0; i < vsi->num_tx_queues; i++) {
1412 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1413 
1414 		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1415 					    txq->irqv->me, ICE_TX_ITR);
1416 	}
1417 
1418 	ice_flush(hw);
1419 }
1420 
1421 /**
1422  * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1423  * @vsi: the VSI to configure
1424  *
1425  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1426  * a software interrupt on that cause. This is required as part of the Rx
1427  * queue disable logic to dissociate the Rx queue from the interrupt.
1428  *
1429  * Note: this function must be called prior to disabling Rx queues with
1430  * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1431  */
1432 void
1433 ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1434 {
1435 	struct ice_hw *hw = &vsi->sc->hw;
1436 	int i;
1437 
1438 	for (i = 0; i < vsi->num_rx_queues; i++) {
1439 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1440 		u32 reg, val;
1441 
1442 		/* Clear the CAUSE_ENA flag */
1443 		reg = vsi->rx_qmap[rxq->me];
1444 		val = rd32(hw, QINT_RQCTL(reg));
1445 		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1446 		wr32(hw, QINT_RQCTL(reg), val);
1447 
1448 		ice_flush(hw);
1449 
1450 		/* Trigger a software interrupt to complete interrupt
1451 		 * dissociation.
1452 		 */
1453 		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1454 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1455 	}
1456 }
1457 
1458 /**
1459  * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1460  * @vsi: the VSI to configure
1461  *
1462  * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1463  * a software interrupt on that cause. This is required as part of the Tx
1464  * queue disable logic to dissociate the Tx queue from the interrupt.
1465  *
1466  * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1467  * the Tx queue disable may not complete properly.
1468  */
1469 void
1470 ice_flush_txq_interrupts(struct ice_vsi *vsi)
1471 {
1472 	struct ice_hw *hw = &vsi->sc->hw;
1473 	int i;
1474 
1475 	for (i = 0; i < vsi->num_tx_queues; i++) {
1476 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1477 		u32 reg, val;
1478 
1479 		/* Clear the CAUSE_ENA flag */
1480 		reg = vsi->tx_qmap[txq->me];
1481 		val = rd32(hw, QINT_TQCTL(reg));
1482 		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1483 		wr32(hw, QINT_TQCTL(reg), val);
1484 
1485 		ice_flush(hw);
1486 
1487 		/* Trigger a software interrupt to complete interrupt
1488 		 * dissociation.
1489 		 */
1490 		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1491 		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1492 	}
1493 }
1494 
1495 /**
1496  * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1497  * @vsi: the VSI to configure
1498  *
1499  * Program the hardware ITR registers with the settings for this VSI.
1500  */
1501 void
1502 ice_configure_rx_itr(struct ice_vsi *vsi)
1503 {
1504 	struct ice_hw *hw = &vsi->sc->hw;
1505 	int i;
1506 
1507 	/* TODO: Handle per-queue/per-vector ITR? */
1508 
1509 	for (i = 0; i < vsi->num_rx_queues; i++) {
1510 		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1511 
1512 		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1513 		     ice_itr_to_reg(hw, vsi->rx_itr));
1514 	}
1515 
1516 	ice_flush(hw);
1517 }
1518 
1519 /**
1520  * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1521  * @vsi: the VSI to configure
1522  *
1523  * Program the hardware ITR registers with the settings for this VSI.
1524  */
1525 void
1526 ice_configure_tx_itr(struct ice_vsi *vsi)
1527 {
1528 	struct ice_hw *hw = &vsi->sc->hw;
1529 	int i;
1530 
1531 	/* TODO: Handle per-queue/per-vector ITR? */
1532 
1533 	for (i = 0; i < vsi->num_tx_queues; i++) {
1534 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1535 
1536 		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1537 		     ice_itr_to_reg(hw, vsi->tx_itr));
1538 	}
1539 
1540 	ice_flush(hw);
1541 }
1542 
1543 /**
1544  * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1545  * @txq: the Tx queue to configure
1546  * @tlan_ctx: the Tx LAN queue context structure to initialize
1547  * @pf_q: real queue number
1548  */
1549 static int
1550 ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1551 {
1552 	struct ice_vsi *vsi = txq->vsi;
1553 	struct ice_softc *sc = vsi->sc;
1554 	struct ice_hw *hw = &sc->hw;
1555 
1556 	tlan_ctx->port_num = hw->port_info->lport;
1557 
1558 	/* number of descriptors in the queue */
1559 	tlan_ctx->qlen = txq->desc_count;
1560 
1561 	/* set the transmit queue base address, defined in 128 byte units */
1562 	tlan_ctx->base = txq->tx_paddr >> 7;
1563 
1564 	tlan_ctx->pf_num = hw->pf_id;
1565 
1566 	switch (vsi->type) {
1567 	case ICE_VSI_PF:
1568 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1569 		break;
1570 	case ICE_VSI_VMDQ2:
1571 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
1572 		break;
1573 	default:
1574 		return (ENODEV);
1575 	}
1576 
1577 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1578 
1579 	/* Enable TSO */
1580 	tlan_ctx->tso_ena = 1;
1581 	tlan_ctx->internal_usage_flag = 1;
1582 
1583 	tlan_ctx->tso_qnum = pf_q;
1584 
1585 	/*
1586 	 * Stick with the older legacy Tx queue interface, instead of the new
1587 	 * advanced queue interface.
1588 	 */
1589 	tlan_ctx->legacy_int = 1;
1590 
1591 	/* Descriptor WB mode */
1592 	tlan_ctx->wb_mode = 0;
1593 
1594 	return (0);
1595 }
1596 
1597 /**
1598  * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1599  * @vsi: the VSI to configure
1600  *
1601  * Configure the device Tx queues through firmware AdminQ commands. After
1602  * this, Tx queues will be ready for transmit.
1603  */
1604 int
1605 ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1606 {
1607 	struct ice_aqc_add_tx_qgrp *qg;
1608 	struct ice_hw *hw = &vsi->sc->hw;
1609 	device_t dev = vsi->sc->dev;
1610 	enum ice_status status;
1611 	int i;
1612 	int err = 0;
1613 	u16 qg_size, pf_q;
1614 
1615 	qg_size = ice_struct_size(qg, txqs, 1);
1616 	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1617 	if (!qg)
1618 		return (ENOMEM);
1619 
1620 	qg->num_txqs = 1;
1621 
1622 	for (i = 0; i < vsi->num_tx_queues; i++) {
1623 		struct ice_tlan_ctx tlan_ctx = { 0 };
1624 		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1625 
1626 		pf_q = vsi->tx_qmap[txq->me];
1627 		qg->txqs[0].txq_id = htole16(pf_q);
1628 
1629 		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1630 		if (err)
1631 			goto free_txqg;
1632 
1633 		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1634 			    ice_tlan_ctx_info);
1635 
1636 		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1637 					 txq->q_handle, 1, qg, qg_size, NULL);
1638 		if (status) {
1639 			device_printf(dev,
1640 				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1641 				      i, txq->tc, txq->q_handle,
1642 				      ice_status_str(status),
1643 				      ice_aq_str(hw->adminq.sq_last_status));
1644 			err = ENODEV;
1645 			goto free_txqg;
1646 		}
1647 
1648 		/* Keep track of the Tx queue TEID */
1649 		if (pf_q == le16toh(qg->txqs[0].txq_id))
1650 			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1651 	}
1652 
1653 free_txqg:
1654 	free(qg, M_ICE);
1655 
1656 	return (err);
1657 }
1658 
1659 /**
1660  * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1661  * @rxq: the receive queue to program
1662  *
1663  * Setup an Rx queue context structure and program it into the hardware
1664  * registers. This is a necessary step for enabling the Rx queue.
1665  *
1666  * @pre the VSI associated with this queue must have initialized mbuf_sz
1667  */
1668 static int
1669 ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1670 {
1671 	struct ice_rlan_ctx rlan_ctx = {0};
1672 	struct ice_vsi *vsi = rxq->vsi;
1673 	struct ice_softc *sc = vsi->sc;
1674 	struct ice_hw *hw = &sc->hw;
1675 	enum ice_status status;
1676 	u32 rxdid = ICE_RXDID_FLEX_NIC;
1677 	u32 regval;
1678 	u16 pf_q;
1679 
1680 	pf_q = vsi->rx_qmap[rxq->me];
1681 
1682 	/* set the receive queue base address, defined in 128 byte units */
1683 	rlan_ctx.base = rxq->rx_paddr >> 7;
1684 
1685 	rlan_ctx.qlen = rxq->desc_count;
1686 
1687 	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1688 
1689 	/* use 32 byte descriptors */
1690 	rlan_ctx.dsize = 1;
1691 
1692 	/* Strip the Ethernet CRC bytes before the packet is posted to the
1693 	 * host memory.
1694 	 */
1695 	rlan_ctx.crcstrip = 1;
1696 
1697 	rlan_ctx.l2tsel = 1;
1698 
1699 	/* don't do header splitting */
1700 	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1701 	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1702 	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1703 
1704 	/* strip VLAN from inner headers */
1705 	rlan_ctx.showiv = 1;
1706 
1707 	rlan_ctx.rxmax = min(vsi->max_frame_size,
1708 			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1709 
1710 	rlan_ctx.lrxqthresh = 1;
1711 
1712 	if (vsi->type != ICE_VSI_VF) {
1713 		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1714 		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1715 		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1716 			QRXFLXP_CNTXT_RXDID_IDX_M;
1717 
1718 		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1719 		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1720 			QRXFLXP_CNTXT_RXDID_PRIO_M;
1721 
1722 		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1723 	}
1724 
1725 	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1726 	if (status) {
1727 		device_printf(sc->dev,
1728 			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1729 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1730 		return (EIO);
1731 	}
1732 
1733 	wr32(hw, rxq->tail, 0);
1734 
1735 	return 0;
1736 }
1737 
1738 /**
1739  * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1740  * @vsi: the VSI to configure
1741  *
1742  * Prepare an Rx context descriptor and configure the device to receive
1743  * traffic.
1744  *
1745  * @pre the VSI must have initialized mbuf_sz
1746  */
1747 int
1748 ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1749 {
1750 	int i, err;
1751 
1752 	for (i = 0; i < vsi->num_rx_queues; i++) {
1753 		MPASS(vsi->mbuf_sz > 0);
1754 		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1755 		if (err)
1756 			return err;
1757 	}
1758 
1759 	return (0);
1760 }
1761 
1762 /**
1763  * ice_is_rxq_ready - Check if an Rx queue is ready
1764  * @hw: ice hw structure
1765  * @pf_q: absolute PF queue index to check
1766  * @reg: on successful return, contains qrx_ctrl contents
1767  *
1768  * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1769  * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1770  * a request to change the queue, as well as to verify the request has
1771  * finished. The queue should change status within a few microseconds, so we
1772  * use a small delay while polling the register.
1773  *
1774  * Returns an error code if the queue does not update after a few retries.
1775  */
1776 static int
1777 ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1778 {
1779 	u32 qrx_ctrl, qena_req, qena_stat;
1780 	int i;
1781 
1782 	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1783 		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1784 		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1785 		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1786 
1787 		/* if the request and status bits equal, then the queue is
1788 		 * fully disabled or enabled.
1789 		 */
1790 		if (qena_req == qena_stat) {
1791 			*reg = qrx_ctrl;
1792 			return (0);
1793 		}
1794 
1795 		/* wait a few microseconds before we check again */
1796 		DELAY(10);
1797 	}
1798 
1799 	return (ETIMEDOUT);
1800 }
1801 
1802 /**
1803  * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1804  * @vsi: VSI containing queue to enable/disable
1805  * @qidx: Queue index in VSI space
1806  * @enable: true to enable queue, false to disable
1807  *
1808  * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1809  * it. Wait for the appropriate time to ensure that the queue has actually
1810  * reached the expected state.
1811  */
1812 int
1813 ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1814 {
1815 	struct ice_hw *hw = &vsi->sc->hw;
1816 	device_t dev = vsi->sc->dev;
1817 	u32 qrx_ctrl = 0;
1818 	int err;
1819 
1820 	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1821 	int pf_q = vsi->rx_qmap[rxq->me];
1822 
1823 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1824 	if (err) {
1825 		device_printf(dev,
1826 			      "Rx queue %d is not ready\n",
1827 			      pf_q);
1828 		return err;
1829 	}
1830 
1831 	/* Skip if the queue is already in correct state */
1832 	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1833 		return (0);
1834 
1835 	if (enable)
1836 		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1837 	else
1838 		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1839 	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1840 
1841 	/* wait for the queue to finalize the request */
1842 	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1843 	if (err) {
1844 		device_printf(dev,
1845 			      "Rx queue %d %sable timeout\n",
1846 			      pf_q, (enable ? "en" : "dis"));
1847 		return err;
1848 	}
1849 
1850 	/* this should never happen */
1851 	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1852 		device_printf(dev,
1853 			      "Rx queue %d invalid state\n",
1854 			      pf_q);
1855 		return (EDOOFUS);
1856 	}
1857 
1858 	return (0);
1859 }
1860 
1861 /**
1862  * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1863  * @vsi: VSI to enable/disable queues
1864  * @enable: true to enable queues, false to disable
1865  *
1866  * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1867  * them. Wait for the appropriate time to ensure that the queues have actually
1868  * reached the expected state.
1869  */
1870 int
1871 ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1872 {
1873 	int i, err;
1874 
1875 	/* TODO: amortize waits by changing all queues up front and then
1876 	 * checking their status afterwards. This will become more necessary
1877 	 * when we have a large number of queues.
1878 	 */
1879 	for (i = 0; i < vsi->num_rx_queues; i++) {
1880 		err = ice_control_rx_queue(vsi, i, enable);
1881 		if (err)
1882 			break;
1883 	}
1884 
1885 	return (0);
1886 }
1887 
1888 /**
1889  * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1890  * @vsi: the VSI to forward to
1891  * @list: list which contains MAC filter entries
1892  * @addr: the MAC address to be added
1893  * @action: filter action to perform on match
1894  *
1895  * Adds a MAC address filter to the list which will be forwarded to firmware
1896  * to add a series of MAC address filters.
1897  *
1898  * Returns 0 on success, and an error code on failure.
1899  *
1900  */
1901 static int
1902 ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1903 		    const u8 *addr, enum ice_sw_fwd_act_type action)
1904 {
1905 	struct ice_fltr_list_entry *entry;
1906 
1907 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1908 	if (!entry)
1909 		return (ENOMEM);
1910 
1911 	entry->fltr_info.flag = ICE_FLTR_TX;
1912 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1913 	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1914 	entry->fltr_info.fltr_act = action;
1915 	entry->fltr_info.vsi_handle = vsi->idx;
1916 	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1917 
1918 	LIST_ADD(&entry->list_entry, list);
1919 
1920 	return 0;
1921 }
1922 
1923 /**
1924  * ice_free_fltr_list - Free memory associated with a MAC address list
1925  * @list: the list to free
1926  *
1927  * Free the memory of each entry associated with the list.
1928  */
1929 static void
1930 ice_free_fltr_list(struct ice_list_head *list)
1931 {
1932 	struct ice_fltr_list_entry *e, *tmp;
1933 
1934 	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1935 		LIST_DEL(&e->list_entry);
1936 		free(e, M_ICE);
1937 	}
1938 }
1939 
1940 /**
1941  * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1942  * @vsi: the VSI to add the filter for
1943  * @addr: MAC address to add a filter for
1944  *
1945  * Add a MAC address filter for a given VSI. This is a wrapper around
1946  * ice_add_mac to simplify the interface. First, it only accepts a single
1947  * address, so we don't have to mess around with the list setup in other
1948  * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1949  * callers don't need to worry about attempting to add the same filter twice.
1950  */
1951 int
1952 ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1953 {
1954 	struct ice_list_head mac_addr_list;
1955 	struct ice_hw *hw = &vsi->sc->hw;
1956 	device_t dev = vsi->sc->dev;
1957 	enum ice_status status;
1958 	int err = 0;
1959 
1960 	INIT_LIST_HEAD(&mac_addr_list);
1961 
1962 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1963 	if (err)
1964 		goto free_mac_list;
1965 
1966 	status = ice_add_mac(hw, &mac_addr_list);
1967 	if (status == ICE_ERR_ALREADY_EXISTS) {
1968 		; /* Don't complain if we try to add a filter that already exists */
1969 	} else if (status) {
1970 		device_printf(dev,
1971 			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1972 			      addr, ":",
1973 			      ice_status_str(status),
1974 			      ice_aq_str(hw->adminq.sq_last_status));
1975 		err = (EIO);
1976 	}
1977 
1978 free_mac_list:
1979 	ice_free_fltr_list(&mac_addr_list);
1980 	return err;
1981 }
1982 
1983 /**
1984  * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1985  * @sc: device softc structure
1986  *
1987  * Program the default unicast and broadcast filters for the PF VSI.
1988  */
1989 int
1990 ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1991 {
1992 	struct ice_vsi *vsi = &sc->pf_vsi;
1993 	struct ice_hw *hw = &sc->hw;
1994 	int err;
1995 
1996 	/* Add the LAN MAC address */
1997 	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1998 	if (err)
1999 		return err;
2000 
2001 	/* Add the broadcast address */
2002 	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
2003 	if (err)
2004 		return err;
2005 
2006 	return (0);
2007 }
2008 
2009 /**
2010  * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
2011  * @vsi: the VSI to add the filter for
2012  * @addr: MAC address to remove a filter for
2013  *
2014  * Remove a MAC address filter from a given VSI. This is a wrapper around
2015  * ice_remove_mac to simplify the interface. First, it only accepts a single
2016  * address, so we don't have to mess around with the list setup in other
2017  * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
2018  * callers don't need to worry about attempting to remove filters which
2019  * haven't yet been added.
2020  */
2021 int
2022 ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
2023 {
2024 	struct ice_list_head mac_addr_list;
2025 	struct ice_hw *hw = &vsi->sc->hw;
2026 	device_t dev = vsi->sc->dev;
2027 	enum ice_status status;
2028 	int err = 0;
2029 
2030 	INIT_LIST_HEAD(&mac_addr_list);
2031 
2032 	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
2033 	if (err)
2034 		goto free_mac_list;
2035 
2036 	status = ice_remove_mac(hw, &mac_addr_list);
2037 	if (status == ICE_ERR_DOES_NOT_EXIST) {
2038 		; /* Don't complain if we try to remove a filter that doesn't exist */
2039 	} else if (status) {
2040 		device_printf(dev,
2041 			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
2042 			      addr, ":",
2043 			      ice_status_str(status),
2044 			      ice_aq_str(hw->adminq.sq_last_status));
2045 		err = (EIO);
2046 	}
2047 
2048 free_mac_list:
2049 	ice_free_fltr_list(&mac_addr_list);
2050 	return err;
2051 }
2052 
2053 /**
2054  * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
2055  * @sc: device softc structure
2056  *
2057  * Remove the default unicast and broadcast filters from the PF VSI.
2058  */
2059 int
2060 ice_rm_pf_default_mac_filters(struct ice_softc *sc)
2061 {
2062 	struct ice_vsi *vsi = &sc->pf_vsi;
2063 	struct ice_hw *hw = &sc->hw;
2064 	int err;
2065 
2066 	/* Remove the LAN MAC address */
2067 	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
2068 	if (err)
2069 		return err;
2070 
2071 	/* Remove the broadcast address */
2072 	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
2073 	if (err)
2074 		return (EIO);
2075 
2076 	return (0);
2077 }
2078 
2079 /**
2080  * ice_check_ctrlq_errors - Check for and report controlq errors
2081  * @sc: device private structure
2082  * @qname: name of the controlq
2083  * @cq: the controlq to check
2084  *
2085  * Check and report controlq errors. Currently all we do is report them to the
2086  * kernel message log, but we might want to improve this in the future, such
2087  * as to keep track of statistics.
2088  */
2089 static void
2090 ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
2091 		       struct ice_ctl_q_info *cq)
2092 {
2093 	struct ice_hw *hw = &sc->hw;
2094 	u32 val;
2095 
2096 	/* Check for error indications. Note that all the controlqs use the
2097 	 * same register layout, so we use the PF_FW_AxQLEN defines only.
2098 	 */
2099 	val = rd32(hw, cq->rq.len);
2100 	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2101 		   PF_FW_ARQLEN_ARQCRIT_M)) {
2102 		if (val & PF_FW_ARQLEN_ARQVFE_M)
2103 			device_printf(sc->dev,
2104 				"%s Receive Queue VF Error detected\n", qname);
2105 		if (val & PF_FW_ARQLEN_ARQOVFL_M)
2106 			device_printf(sc->dev,
2107 				"%s Receive Queue Overflow Error detected\n",
2108 				qname);
2109 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2110 			device_printf(sc->dev,
2111 				"%s Receive Queue Critical Error detected\n",
2112 				qname);
2113 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2114 			 PF_FW_ARQLEN_ARQCRIT_M);
2115 		wr32(hw, cq->rq.len, val);
2116 	}
2117 
2118 	val = rd32(hw, cq->sq.len);
2119 	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2120 		   PF_FW_ATQLEN_ATQCRIT_M)) {
2121 		if (val & PF_FW_ATQLEN_ATQVFE_M)
2122 			device_printf(sc->dev,
2123 				"%s Send Queue VF Error detected\n", qname);
2124 		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2125 			device_printf(sc->dev,
2126 				"%s Send Queue Overflow Error detected\n",
2127 				qname);
2128 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2129 			device_printf(sc->dev,
2130 				"%s Send Queue Critical Error detected\n",
2131 				qname);
2132 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2133 			 PF_FW_ATQLEN_ATQCRIT_M);
2134 		wr32(hw, cq->sq.len, val);
2135 	}
2136 }
2137 
2138 /**
2139  * ice_process_link_event - Process a link event indication from firmware
2140  * @sc: device softc structure
2141  * @e: the received event data
2142  *
2143  * Gets the current link status from hardware, and may print a message if an
2144  * unqualified is detected.
2145  */
2146 static void
2147 ice_process_link_event(struct ice_softc *sc,
2148 		       struct ice_rq_event_info __invariant_only *e)
2149 {
2150 	struct ice_port_info *pi = sc->hw.port_info;
2151 	struct ice_hw *hw = &sc->hw;
2152 	device_t dev = sc->dev;
2153 	enum ice_status status;
2154 
2155 	/* Sanity check that the data length isn't too small */
2156 	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2157 
2158 	/*
2159 	 * Even though the adapter gets link status information inside the
2160 	 * event, it needs to send a Get Link Status AQ command in order
2161 	 * to re-enable link events.
2162 	 */
2163 	pi->phy.get_link_info = true;
2164 	ice_get_link_status(pi, &sc->link_up);
2165 
2166 	if (pi->phy.link_info.topo_media_conflict &
2167 	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2168 	    ICE_AQ_LINK_TOPO_CORRUPT))
2169 		device_printf(dev,
2170 		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2171 
2172 	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2173 	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2174 		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2175 			device_printf(dev,
2176 			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2177 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2178 			device_printf(dev,
2179 			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2180 		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2181 			device_printf(dev,
2182 			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2183 	}
2184 
2185 	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2186 		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2187 			status = ice_aq_set_link_restart_an(pi, false, NULL);
2188 			if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
2189 				device_printf(dev,
2190 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2191 				    __func__, ice_status_str(status),
2192 				    ice_aq_str(hw->adminq.sq_last_status));
2193 		}
2194 	}
2195 	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2196 
2197 	/* Indicate that link status must be reported again */
2198 	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2199 
2200 	/* OS link info is updated elsewhere */
2201 }
2202 
2203 /**
2204  * ice_process_ctrlq_event - Respond to a controlq event
2205  * @sc: device private structure
2206  * @qname: the name for this controlq
2207  * @event: the event to process
2208  *
2209  * Perform actions in response to various controlq event notifications.
2210  */
2211 static void
2212 ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2213 			struct ice_rq_event_info *event)
2214 {
2215 	u16 opcode;
2216 
2217 	opcode = le16toh(event->desc.opcode);
2218 
2219 	switch (opcode) {
2220 	case ice_aqc_opc_get_link_status:
2221 		ice_process_link_event(sc, event);
2222 		break;
2223 	case ice_aqc_opc_fw_logs_event:
2224 		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2225 		break;
2226 	case ice_aqc_opc_lldp_set_mib_change:
2227 		ice_handle_mib_change_event(sc, event);
2228 		break;
2229 	case ice_aqc_opc_event_lan_overflow:
2230 		ice_handle_lan_overflow_event(sc, event);
2231 		break;
2232 	case ice_aqc_opc_get_health_status:
2233 		ice_handle_health_status_event(sc, event);
2234 		break;
2235 	default:
2236 		device_printf(sc->dev,
2237 			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2238 			      qname, opcode);
2239 	}
2240 }
2241 
2242 /**
2243  * ice_process_ctrlq - helper function to process controlq rings
2244  * @sc: device private structure
2245  * @q_type: specific control queue type
2246  * @pending: return parameter to track remaining events
2247  *
2248  * Process controlq events for a given control queue type. Returns zero on
2249  * success, and an error code on failure. If successful, pending is the number
2250  * of remaining events left in the queue.
2251  */
2252 int
2253 ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2254 {
2255 	struct ice_rq_event_info event = { { 0 } };
2256 	struct ice_hw *hw = &sc->hw;
2257 	struct ice_ctl_q_info *cq;
2258 	enum ice_status status;
2259 	const char *qname;
2260 	int loop = 0;
2261 
2262 	switch (q_type) {
2263 	case ICE_CTL_Q_ADMIN:
2264 		cq = &hw->adminq;
2265 		qname = "Admin";
2266 		break;
2267 	case ICE_CTL_Q_MAILBOX:
2268 		cq = &hw->mailboxq;
2269 		qname = "Mailbox";
2270 		break;
2271 	default:
2272 		device_printf(sc->dev,
2273 			      "Unknown control queue type 0x%x\n",
2274 			      q_type);
2275 		return 0;
2276 	}
2277 
2278 	ice_check_ctrlq_errors(sc, qname, cq);
2279 
2280 	/*
2281 	 * Control queue processing happens during the admin task which may be
2282 	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2283 	 */
2284 	event.buf_len = cq->rq_buf_size;
2285 	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2286 	if (!event.msg_buf) {
2287 		device_printf(sc->dev,
2288 			      "Unable to allocate memory for %s Receive Queue event\n",
2289 			      qname);
2290 		return (ENOMEM);
2291 	}
2292 
2293 	do {
2294 		status = ice_clean_rq_elem(hw, cq, &event, pending);
2295 		if (status == ICE_ERR_AQ_NO_WORK)
2296 			break;
2297 		if (status) {
2298 			if (q_type == ICE_CTL_Q_ADMIN)
2299 				device_printf(sc->dev,
2300 					      "%s Receive Queue event error %s\n",
2301 					      qname, ice_status_str(status));
2302 			else
2303 				device_printf(sc->dev,
2304 					      "%s Receive Queue event error %s\n",
2305 					      qname, ice_status_str(status));
2306 			free(event.msg_buf, M_ICE);
2307 			return (EIO);
2308 		}
2309 		/* XXX should we separate this handler by controlq type? */
2310 		ice_process_ctrlq_event(sc, qname, &event);
2311 	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2312 
2313 	free(event.msg_buf, M_ICE);
2314 
2315 	return 0;
2316 }
2317 
2318 /**
2319  * pkg_ver_empty - Check if a package version is empty
2320  * @pkg_ver: the package version to check
2321  * @pkg_name: the package name to check
2322  *
2323  * Checks if the package version structure is empty. We consider a package
2324  * version as empty if none of the versions are non-zero and the name string
2325  * is null as well.
2326  *
2327  * This is used to check if the package version was initialized by the driver,
2328  * as we do not expect an actual DDP package file to have a zero'd version and
2329  * name.
2330  *
2331  * @returns true if the package version is valid, or false otherwise.
2332  */
2333 static bool
2334 pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2335 {
2336 	return (pkg_name[0] == '\0' &&
2337 		pkg_ver->major == 0 &&
2338 		pkg_ver->minor == 0 &&
2339 		pkg_ver->update == 0 &&
2340 		pkg_ver->draft == 0);
2341 }
2342 
2343 /**
2344  * pkg_ver_compatible - Check if the package version is compatible
2345  * @pkg_ver: the package version to check
2346  *
2347  * Compares the package version number to the driver's expected major/minor
2348  * version. Returns an integer indicating whether the version is older, newer,
2349  * or compatible with the driver.
2350  *
2351  * @returns 0 if the package version is compatible, -1 if the package version
2352  * is older, and 1 if the package version is newer than the driver version.
2353  */
2354 static int
2355 pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2356 {
2357 	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2358 		return (1); /* newer */
2359 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2360 		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2361 		return (1); /* newer */
2362 	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2363 		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2364 		return (0); /* compatible */
2365 	else
2366 		return (-1); /* older */
2367 }
2368 
2369 /**
2370  * ice_os_pkg_version_str - Format OS package version info into a sbuf
2371  * @hw: device hw structure
2372  * @buf: string buffer to store name/version string
2373  *
2374  * Formats the name and version of the OS DDP package as found in the ice_ddp
2375  * module into a string.
2376  *
2377  * @remark This will almost always be the same as the active package, but
2378  * could be different in some cases. Use ice_active_pkg_version_str to get the
2379  * version of the active DDP package.
2380  */
2381 static void
2382 ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2383 {
2384 	char name_buf[ICE_PKG_NAME_SIZE];
2385 
2386 	/* If the OS DDP package info is empty, use "None" */
2387 	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2388 		sbuf_printf(buf, "None");
2389 		return;
2390 	}
2391 
2392 	/*
2393 	 * This should already be null-terminated, but since this is a raw
2394 	 * value from an external source, strlcpy() into a new buffer to
2395 	 * make sure.
2396 	 */
2397 	bzero(name_buf, sizeof(name_buf));
2398 	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2399 
2400 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2401 	    name_buf,
2402 	    hw->pkg_ver.major,
2403 	    hw->pkg_ver.minor,
2404 	    hw->pkg_ver.update,
2405 	    hw->pkg_ver.draft);
2406 }
2407 
2408 /**
2409  * ice_active_pkg_version_str - Format active package version info into a sbuf
2410  * @hw: device hw structure
2411  * @buf: string buffer to store name/version string
2412  *
2413  * Formats the name and version of the active DDP package info into a string
2414  * buffer for use.
2415  */
2416 static void
2417 ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2418 {
2419 	char name_buf[ICE_PKG_NAME_SIZE];
2420 
2421 	/* If the active DDP package info is empty, use "None" */
2422 	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2423 		sbuf_printf(buf, "None");
2424 		return;
2425 	}
2426 
2427 	/*
2428 	 * This should already be null-terminated, but since this is a raw
2429 	 * value from an external source, strlcpy() into a new buffer to
2430 	 * make sure.
2431 	 */
2432 	bzero(name_buf, sizeof(name_buf));
2433 	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2434 
2435 	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2436 	    name_buf,
2437 	    hw->active_pkg_ver.major,
2438 	    hw->active_pkg_ver.minor,
2439 	    hw->active_pkg_ver.update,
2440 	    hw->active_pkg_ver.draft);
2441 
2442 	if (hw->active_track_id != 0)
2443 		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2444 }
2445 
2446 /**
2447  * ice_nvm_version_str - Format the NVM version information into a sbuf
2448  * @hw: device hw structure
2449  * @buf: string buffer to store version string
2450  *
2451  * Formats the NVM information including firmware version, API version, NVM
2452  * version, the EETRACK id, and OEM specific version information into a string
2453  * buffer.
2454  */
2455 static void
2456 ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2457 {
2458 	struct ice_nvm_info *nvm = &hw->flash.nvm;
2459 	struct ice_orom_info *orom = &hw->flash.orom;
2460 	struct ice_netlist_info *netlist = &hw->flash.netlist;
2461 
2462 	/* Note that the netlist versions are stored in packed Binary Coded
2463 	 * Decimal format. The use of '%x' will correctly display these as
2464 	 * decimal numbers. This works because every 4 bits will be displayed
2465 	 * as a hexadecimal digit, and the BCD format will only use the values
2466 	 * 0-9.
2467 	 */
2468 	sbuf_printf(buf,
2469 		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2470 		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2471 		    hw->api_maj_ver, hw->api_min_ver,
2472 		    nvm->major, nvm->minor, nvm->eetrack,
2473 		    netlist->major, netlist->minor,
2474 		    netlist->type >> 16, netlist->type & 0xFFFF,
2475 		    netlist->rev, netlist->cust_ver, netlist->hash,
2476 		    orom->major, orom->build, orom->patch);
2477 }
2478 
2479 /**
2480  * ice_print_nvm_version - Print the NVM info to the kernel message log
2481  * @sc: the device softc structure
2482  *
2483  * Format and print an NVM version string using ice_nvm_version_str().
2484  */
2485 void
2486 ice_print_nvm_version(struct ice_softc *sc)
2487 {
2488 	struct ice_hw *hw = &sc->hw;
2489 	device_t dev = sc->dev;
2490 	struct sbuf *sbuf;
2491 
2492 	sbuf = sbuf_new_auto();
2493 	ice_nvm_version_str(hw, sbuf);
2494 	sbuf_finish(sbuf);
2495 	device_printf(dev, "%s\n", sbuf_data(sbuf));
2496 	sbuf_delete(sbuf);
2497 }
2498 
2499 /**
2500  * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2501  * @vsi: the VSI to be updated
2502  *
2503  * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2504  * the updated values.
2505  */
2506 void
2507 ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2508 {
2509 	struct ice_eth_stats *prev_es, *cur_es;
2510 	struct ice_hw *hw = &vsi->sc->hw;
2511 	u16 vsi_num;
2512 
2513 	if (!ice_is_vsi_valid(hw, vsi->idx))
2514 		return;
2515 
2516 	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2517 	prev_es = &vsi->hw_stats.prev;
2518 	cur_es = &vsi->hw_stats.cur;
2519 
2520 #define ICE_VSI_STAT40(name, location) \
2521 	ice_stat_update40(hw, name ## L(vsi_num), \
2522 			  vsi->hw_stats.offsets_loaded, \
2523 			  &prev_es->location, &cur_es->location)
2524 
2525 #define ICE_VSI_STAT32(name, location) \
2526 	ice_stat_update32(hw, name(vsi_num), \
2527 			  vsi->hw_stats.offsets_loaded, \
2528 			  &prev_es->location, &cur_es->location)
2529 
2530 	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2531 	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2532 	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2533 	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2534 	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2535 	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2536 	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2537 	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2538 	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2539 	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2540 
2541 	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2542 			     cur_es);
2543 
2544 #undef ICE_VSI_STAT40
2545 #undef ICE_VSI_STAT32
2546 
2547 	vsi->hw_stats.offsets_loaded = true;
2548 }
2549 
2550 /**
2551  * ice_reset_vsi_stats - Reset VSI statistics counters
2552  * @vsi: VSI structure
2553  *
2554  * Resets the software tracking counters for the VSI statistics, and indicate
2555  * that the offsets haven't been loaded. This is intended to be called
2556  * post-reset so that VSI statistics count from zero again.
2557  */
2558 void
2559 ice_reset_vsi_stats(struct ice_vsi *vsi)
2560 {
2561 	/* Reset HW stats */
2562 	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2563 	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2564 	vsi->hw_stats.offsets_loaded = false;
2565 }
2566 
2567 /**
2568  * ice_update_pf_stats - Update port stats counters
2569  * @sc: device private softc structure
2570  *
2571  * Reads hardware statistics registers and updates the software tracking
2572  * structure with new values.
2573  */
2574 void
2575 ice_update_pf_stats(struct ice_softc *sc)
2576 {
2577 	struct ice_hw_port_stats *prev_ps, *cur_ps;
2578 	struct ice_hw *hw = &sc->hw;
2579 	u8 lport;
2580 
2581 	MPASS(hw->port_info);
2582 
2583 	prev_ps = &sc->stats.prev;
2584 	cur_ps = &sc->stats.cur;
2585 	lport = hw->port_info->lport;
2586 
2587 #define ICE_PF_STAT_PFC(name, location, index) \
2588 	ice_stat_update40(hw, name(lport, index), \
2589 			  sc->stats.offsets_loaded, \
2590 			  &prev_ps->location[index], &cur_ps->location[index])
2591 
2592 #define ICE_PF_STAT40(name, location) \
2593 	ice_stat_update40(hw, name ## L(lport), \
2594 			  sc->stats.offsets_loaded, \
2595 			  &prev_ps->location, &cur_ps->location)
2596 
2597 #define ICE_PF_STAT32(name, location) \
2598 	ice_stat_update32(hw, name(lport), \
2599 			  sc->stats.offsets_loaded, \
2600 			  &prev_ps->location, &cur_ps->location)
2601 
2602 	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2603 	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2604 	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2605 	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2606 	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2607 	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2608 	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2609 	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2610 	/* This stat register doesn't have an lport */
2611 	ice_stat_update32(hw, PRTRPB_RDPC,
2612 			  sc->stats.offsets_loaded,
2613 			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2614 
2615 	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2616 	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2617 	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2618 	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2619 	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2620 	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2621 	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2622 	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2623 	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2624 	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2625 	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2626 	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2627 	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2628 	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2629 	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2630 
2631 	/* Update Priority Flow Control Stats */
2632 	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2633 		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2634 		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2635 		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2636 		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2637 		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2638 	}
2639 
2640 	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2641 	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2642 	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2643 	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2644 	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2645 	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2646 	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2647 	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2648 	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2649 	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2650 	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2651 	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2652 	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2653 
2654 #undef ICE_PF_STAT40
2655 #undef ICE_PF_STAT32
2656 #undef ICE_PF_STAT_PFC
2657 
2658 	sc->stats.offsets_loaded = true;
2659 }
2660 
2661 /**
2662  * ice_reset_pf_stats - Reset port stats counters
2663  * @sc: Device private softc structure
2664  *
2665  * Reset software tracking values for statistics to zero, and indicate that
2666  * offsets haven't been loaded. Intended to be called after a device reset so
2667  * that statistics count from zero again.
2668  */
2669 void
2670 ice_reset_pf_stats(struct ice_softc *sc)
2671 {
2672 	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2673 	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2674 	sc->stats.offsets_loaded = false;
2675 }
2676 
2677 /**
2678  * ice_sysctl_show_fw - sysctl callback to show firmware information
2679  * @oidp: sysctl oid structure
2680  * @arg1: pointer to private data structure
2681  * @arg2: unused
2682  * @req: sysctl request pointer
2683  *
2684  * Callback for the fw_version sysctl, to display the current firmware
2685  * information found at hardware init time.
2686  */
2687 static int
2688 ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2689 {
2690 	struct ice_softc *sc = (struct ice_softc *)arg1;
2691 	struct ice_hw *hw = &sc->hw;
2692 	struct sbuf *sbuf;
2693 
2694 	UNREFERENCED_PARAMETER(oidp);
2695 	UNREFERENCED_PARAMETER(arg2);
2696 
2697 	if (ice_driver_is_detaching(sc))
2698 		return (ESHUTDOWN);
2699 
2700 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2701 	ice_nvm_version_str(hw, sbuf);
2702 	sbuf_finish(sbuf);
2703 	sbuf_delete(sbuf);
2704 
2705 	return (0);
2706 }
2707 
2708 /**
2709  * ice_sysctl_pba_number - sysctl callback to show PBA number
2710  * @oidp: sysctl oid structure
2711  * @arg1: pointer to private data structure
2712  * @arg2: unused
2713  * @req: sysctl request pointer
2714  *
2715  * Callback for the pba_number sysctl, used to read the Product Board Assembly
2716  * number for this device.
2717  */
2718 static int
2719 ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2720 {
2721 	struct ice_softc *sc = (struct ice_softc *)arg1;
2722 	struct ice_hw *hw = &sc->hw;
2723 	device_t dev = sc->dev;
2724 	u8 pba_string[32] = "";
2725 	enum ice_status status;
2726 
2727 	UNREFERENCED_PARAMETER(arg2);
2728 
2729 	if (ice_driver_is_detaching(sc))
2730 		return (ESHUTDOWN);
2731 
2732 	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2733 	if (status) {
2734 		device_printf(dev,
2735 		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2736 		    __func__, ice_status_str(status),
2737 		    ice_aq_str(hw->adminq.sq_last_status));
2738 		return (EIO);
2739 	}
2740 
2741 	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2742 }
2743 
2744 /**
2745  * ice_sysctl_pkg_version - sysctl to show the active package version info
2746  * @oidp: sysctl oid structure
2747  * @arg1: pointer to private data structure
2748  * @arg2: unused
2749  * @req: sysctl request pointer
2750  *
2751  * Callback for the pkg_version sysctl, to display the active DDP package name
2752  * and version information.
2753  */
2754 static int
2755 ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2756 {
2757 	struct ice_softc *sc = (struct ice_softc *)arg1;
2758 	struct ice_hw *hw = &sc->hw;
2759 	struct sbuf *sbuf;
2760 
2761 	UNREFERENCED_PARAMETER(oidp);
2762 	UNREFERENCED_PARAMETER(arg2);
2763 
2764 	if (ice_driver_is_detaching(sc))
2765 		return (ESHUTDOWN);
2766 
2767 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2768 	ice_active_pkg_version_str(hw, sbuf);
2769 	sbuf_finish(sbuf);
2770 	sbuf_delete(sbuf);
2771 
2772 	return (0);
2773 }
2774 
2775 /**
2776  * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2777  * @oidp: sysctl oid structure
2778  * @arg1: pointer to private data structure
2779  * @arg2: unused
2780  * @req: sysctl request pointer
2781  *
2782  * Callback for the pkg_version sysctl, to display the OS DDP package name and
2783  * version info found in the ice_ddp module.
2784  */
2785 static int
2786 ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2787 {
2788 	struct ice_softc *sc = (struct ice_softc *)arg1;
2789 	struct ice_hw *hw = &sc->hw;
2790 	struct sbuf *sbuf;
2791 
2792 	UNREFERENCED_PARAMETER(oidp);
2793 	UNREFERENCED_PARAMETER(arg2);
2794 
2795 	if (ice_driver_is_detaching(sc))
2796 		return (ESHUTDOWN);
2797 
2798 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2799 	ice_os_pkg_version_str(hw, sbuf);
2800 	sbuf_finish(sbuf);
2801 	sbuf_delete(sbuf);
2802 
2803 	return (0);
2804 }
2805 
2806 /**
2807  * ice_sysctl_current_speed - sysctl callback to show current link speed
2808  * @oidp: sysctl oid structure
2809  * @arg1: pointer to private data structure
2810  * @arg2: unused
2811  * @req: sysctl request pointer
2812  *
2813  * Callback for the current_speed sysctl, to display the string representing
2814  * the current link speed.
2815  */
2816 static int
2817 ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2818 {
2819 	struct ice_softc *sc = (struct ice_softc *)arg1;
2820 	struct ice_hw *hw = &sc->hw;
2821 	struct sbuf *sbuf;
2822 
2823 	UNREFERENCED_PARAMETER(oidp);
2824 	UNREFERENCED_PARAMETER(arg2);
2825 
2826 	if (ice_driver_is_detaching(sc))
2827 		return (ESHUTDOWN);
2828 
2829 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2830 	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2831 	sbuf_finish(sbuf);
2832 	sbuf_delete(sbuf);
2833 
2834 	return (0);
2835 }
2836 
2837 /**
2838  * @var phy_link_speeds
2839  * @brief PHY link speed conversion array
2840  *
2841  * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2842  * link speeds used by the link speed sysctls.
2843  *
2844  * @remark these are based on the indices used in the BIT() macros for the
2845  * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2846  */
2847 static const uint16_t phy_link_speeds[] = {
2848     ICE_AQ_LINK_SPEED_100MB,
2849     ICE_AQ_LINK_SPEED_100MB,
2850     ICE_AQ_LINK_SPEED_1000MB,
2851     ICE_AQ_LINK_SPEED_1000MB,
2852     ICE_AQ_LINK_SPEED_1000MB,
2853     ICE_AQ_LINK_SPEED_1000MB,
2854     ICE_AQ_LINK_SPEED_1000MB,
2855     ICE_AQ_LINK_SPEED_2500MB,
2856     ICE_AQ_LINK_SPEED_2500MB,
2857     ICE_AQ_LINK_SPEED_2500MB,
2858     ICE_AQ_LINK_SPEED_5GB,
2859     ICE_AQ_LINK_SPEED_5GB,
2860     ICE_AQ_LINK_SPEED_10GB,
2861     ICE_AQ_LINK_SPEED_10GB,
2862     ICE_AQ_LINK_SPEED_10GB,
2863     ICE_AQ_LINK_SPEED_10GB,
2864     ICE_AQ_LINK_SPEED_10GB,
2865     ICE_AQ_LINK_SPEED_10GB,
2866     ICE_AQ_LINK_SPEED_10GB,
2867     ICE_AQ_LINK_SPEED_25GB,
2868     ICE_AQ_LINK_SPEED_25GB,
2869     ICE_AQ_LINK_SPEED_25GB,
2870     ICE_AQ_LINK_SPEED_25GB,
2871     ICE_AQ_LINK_SPEED_25GB,
2872     ICE_AQ_LINK_SPEED_25GB,
2873     ICE_AQ_LINK_SPEED_25GB,
2874     ICE_AQ_LINK_SPEED_25GB,
2875     ICE_AQ_LINK_SPEED_25GB,
2876     ICE_AQ_LINK_SPEED_25GB,
2877     ICE_AQ_LINK_SPEED_25GB,
2878     ICE_AQ_LINK_SPEED_40GB,
2879     ICE_AQ_LINK_SPEED_40GB,
2880     ICE_AQ_LINK_SPEED_40GB,
2881     ICE_AQ_LINK_SPEED_40GB,
2882     ICE_AQ_LINK_SPEED_40GB,
2883     ICE_AQ_LINK_SPEED_40GB,
2884     ICE_AQ_LINK_SPEED_50GB,
2885     ICE_AQ_LINK_SPEED_50GB,
2886     ICE_AQ_LINK_SPEED_50GB,
2887     ICE_AQ_LINK_SPEED_50GB,
2888     ICE_AQ_LINK_SPEED_50GB,
2889     ICE_AQ_LINK_SPEED_50GB,
2890     ICE_AQ_LINK_SPEED_50GB,
2891     ICE_AQ_LINK_SPEED_50GB,
2892     ICE_AQ_LINK_SPEED_50GB,
2893     ICE_AQ_LINK_SPEED_50GB,
2894     ICE_AQ_LINK_SPEED_50GB,
2895     ICE_AQ_LINK_SPEED_50GB,
2896     ICE_AQ_LINK_SPEED_50GB,
2897     ICE_AQ_LINK_SPEED_50GB,
2898     ICE_AQ_LINK_SPEED_50GB,
2899     ICE_AQ_LINK_SPEED_100GB,
2900     ICE_AQ_LINK_SPEED_100GB,
2901     ICE_AQ_LINK_SPEED_100GB,
2902     ICE_AQ_LINK_SPEED_100GB,
2903     ICE_AQ_LINK_SPEED_100GB,
2904     ICE_AQ_LINK_SPEED_100GB,
2905     ICE_AQ_LINK_SPEED_100GB,
2906     ICE_AQ_LINK_SPEED_100GB,
2907     ICE_AQ_LINK_SPEED_100GB,
2908     ICE_AQ_LINK_SPEED_100GB,
2909     ICE_AQ_LINK_SPEED_100GB,
2910     ICE_AQ_LINK_SPEED_100GB,
2911     ICE_AQ_LINK_SPEED_100GB,
2912     /* These rates are for ICE_PHY_TYPE_HIGH_* */
2913     ICE_AQ_LINK_SPEED_100GB,
2914     ICE_AQ_LINK_SPEED_100GB,
2915     ICE_AQ_LINK_SPEED_100GB,
2916     ICE_AQ_LINK_SPEED_100GB,
2917     ICE_AQ_LINK_SPEED_100GB
2918 };
2919 
2920 #define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2921 "\nControl advertised link speed."		\
2922 "\nFlags:"					\
2923 "\n\t   0x0 - Auto"				\
2924 "\n\t   0x1 - 10 Mb"				\
2925 "\n\t   0x2 - 100 Mb"				\
2926 "\n\t   0x4 - 1G"				\
2927 "\n\t   0x8 - 2.5G"				\
2928 "\n\t  0x10 - 5G"				\
2929 "\n\t  0x20 - 10G"				\
2930 "\n\t  0x40 - 20G"				\
2931 "\n\t  0x80 - 25G"				\
2932 "\n\t 0x100 - 40G"				\
2933 "\n\t 0x200 - 50G"				\
2934 "\n\t 0x400 - 100G"				\
2935 "\n\t0x8000 - Unknown"				\
2936 "\n\t"						\
2937 "\nUse \"sysctl -x\" to view flags properly."
2938 
2939 #define ICE_PHYS_100MB			\
2940     (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2941      ICE_PHY_TYPE_LOW_100M_SGMII)
2942 #define ICE_PHYS_1000MB			\
2943     (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2944      ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2945      ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2946      ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2947      ICE_PHY_TYPE_LOW_1G_SGMII)
2948 #define ICE_PHYS_2500MB			\
2949     (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2950      ICE_PHY_TYPE_LOW_2500BASE_X |	\
2951      ICE_PHY_TYPE_LOW_2500BASE_KX)
2952 #define ICE_PHYS_5GB			\
2953     (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2954      ICE_PHY_TYPE_LOW_5GBASE_KR)
2955 #define ICE_PHYS_10GB			\
2956     (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2957      ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2958      ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2959      ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2960      ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2961      ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2962      ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2963 #define ICE_PHYS_25GB			\
2964     (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2965      ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2966      ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2967      ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2968      ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2969      ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2970      ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2971      ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2972      ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2973      ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2974      ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2975 #define ICE_PHYS_40GB			\
2976     (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2977      ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2978      ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2979      ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2980      ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2981      ICE_PHY_TYPE_LOW_40G_XLAUI)
2982 #define ICE_PHYS_50GB			\
2983     (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2984      ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2985      ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2986      ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2987      ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2988      ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2989      ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2990      ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2991      ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2992      ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2993      ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2994      ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2995      ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2996      ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2997      ICE_PHY_TYPE_LOW_50G_AUI1)
2998 #define ICE_PHYS_100GB_LOW		\
2999     (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
3000      ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
3001      ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
3002      ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
3003      ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
3004      ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
3005      ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
3006      ICE_PHY_TYPE_LOW_100G_AUI4 |	\
3007      ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
3008      ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
3009      ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
3010      ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
3011      ICE_PHY_TYPE_LOW_100GBASE_DR)
3012 #define ICE_PHYS_100GB_HIGH		\
3013     (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
3014      ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
3015      ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
3016      ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
3017      ICE_PHY_TYPE_HIGH_100G_AUI2)
3018 
3019 /**
3020  * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
3021  * @phy_type_low: lower 64-bit PHY Type bitmask
3022  * @phy_type_high: upper 64-bit PHY Type bitmask
3023  *
3024  * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
3025  * link speed flags. If phy_type_high has an unknown PHY type, then the return
3026  * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
3027  */
3028 static u16
3029 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
3030 {
3031 	u16 sysctl_speeds = 0;
3032 	int bit;
3033 
3034 	/* coverity[address_of] */
3035 	for_each_set_bit(bit, &phy_type_low, 64)
3036 		sysctl_speeds |= phy_link_speeds[bit];
3037 
3038 	/* coverity[address_of] */
3039 	for_each_set_bit(bit, &phy_type_high, 64) {
3040 		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
3041 			sysctl_speeds |= phy_link_speeds[bit + 64];
3042 		else
3043 			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
3044 	}
3045 
3046 	return (sysctl_speeds);
3047 }
3048 
3049 /**
3050  * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
3051  * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
3052  * @phy_type_low: output parameter for lower AQ PHY flags
3053  * @phy_type_high: output parameter for higher AQ PHY flags
3054  *
3055  * Converts the given link speed flags into AQ PHY type flag sets appropriate
3056  * for use in a Set PHY Config command.
3057  */
3058 static void
3059 ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
3060 				  u64 *phy_type_high)
3061 {
3062 	*phy_type_low = 0, *phy_type_high = 0;
3063 
3064 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
3065 		*phy_type_low |= ICE_PHYS_100MB;
3066 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
3067 		*phy_type_low |= ICE_PHYS_1000MB;
3068 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
3069 		*phy_type_low |= ICE_PHYS_2500MB;
3070 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
3071 		*phy_type_low |= ICE_PHYS_5GB;
3072 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
3073 		*phy_type_low |= ICE_PHYS_10GB;
3074 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
3075 		*phy_type_low |= ICE_PHYS_25GB;
3076 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
3077 		*phy_type_low |= ICE_PHYS_40GB;
3078 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
3079 		*phy_type_low |= ICE_PHYS_50GB;
3080 	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
3081 		*phy_type_low |= ICE_PHYS_100GB_LOW;
3082 		*phy_type_high |= ICE_PHYS_100GB_HIGH;
3083 	}
3084 }
3085 
3086 /**
3087  * @struct ice_phy_data
3088  * @brief PHY caps and link speeds
3089  *
3090  * Buffer providing report mode and user speeds;
3091  * returning intersection of PHY types and speeds.
3092  */
3093 struct ice_phy_data {
3094 	u64 phy_low_orig;     /* PHY low quad from report */
3095 	u64 phy_high_orig;    /* PHY high quad from report */
3096 	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
3097 	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
3098 	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
3099 	u16 user_speeds_intr; /* Intersect with report speeds */
3100 	u8 report_mode;       /* See ICE_AQC_REPORT_* */
3101 };
3102 
3103 /**
3104  * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
3105  * @sc: device private structure
3106  * @phy_data: device PHY data
3107  *
3108  * On read: Displays the currently supported speeds
3109  * On write: Sets the device's supported speeds
3110  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3111  */
3112 static int
3113 ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3114 				   struct ice_phy_data *phy_data)
3115 {
3116 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3117 	const char *report_types[5] = { "w/o MEDIA",
3118 					"w/MEDIA",
3119 					"ACTIVE",
3120 					"EDOOFUS", /* Not used */
3121 					"DFLT" };
3122 	struct ice_hw *hw = &sc->hw;
3123 	struct ice_port_info *pi = hw->port_info;
3124 	enum ice_status status;
3125 	u16 report_speeds, temp_speeds;
3126 	u8 report_type;
3127 	bool apply_speed_filter = false;
3128 
3129 	switch (phy_data->report_mode) {
3130 	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3131 	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3132 	case ICE_AQC_REPORT_ACTIVE_CFG:
3133 	case ICE_AQC_REPORT_DFLT_CFG:
3134 		report_type = phy_data->report_mode >> 1;
3135 		break;
3136 	default:
3137 		device_printf(sc->dev,
3138 		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3139 		    __func__, phy_data->report_mode);
3140 		return (EINVAL);
3141 	}
3142 
3143 	/* 0 is treated as "Auto"; the driver will handle selecting the
3144 	 * correct speeds. Including, in some cases, applying an override
3145 	 * if provided.
3146 	 */
3147 	if (phy_data->user_speeds_orig == 0)
3148 		phy_data->user_speeds_orig = USHRT_MAX;
3149 	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3150 		apply_speed_filter = true;
3151 
3152 	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3153 	if (status != ICE_SUCCESS) {
3154 		device_printf(sc->dev,
3155 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3156 		    __func__, report_types[report_type],
3157 		    ice_status_str(status),
3158 		    ice_aq_str(sc->hw.adminq.sq_last_status));
3159 		return (EIO);
3160 	}
3161 
3162 	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3163 	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3164 	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3165 	    phy_data->phy_high_orig);
3166 	if (apply_speed_filter) {
3167 		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3168 		    pcaps.module_type[0]);
3169 		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3170 			device_printf(sc->dev,
3171 			    "User-specified speeds (\"0x%04X\") not supported\n",
3172 			    phy_data->user_speeds_orig);
3173 			return (EINVAL);
3174 		}
3175 		report_speeds = temp_speeds;
3176 	}
3177 	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3178 	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3179 	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3180 	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3181 	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3182 
3183 	return (0);
3184  }
3185 
3186 /**
3187  * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3188  * @oidp: sysctl oid structure
3189  * @arg1: pointer to private data structure
3190  * @arg2: unused
3191  * @req: sysctl request pointer
3192  *
3193  * On read: Displays the currently supported speeds
3194  * On write: Sets the device's supported speeds
3195  * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3196  */
3197 static int
3198 ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3199 {
3200 	struct ice_softc *sc = (struct ice_softc *)arg1;
3201 	struct ice_port_info *pi = sc->hw.port_info;
3202 	struct ice_phy_data phy_data = { 0 };
3203 	device_t dev = sc->dev;
3204 	u16 sysctl_speeds;
3205 	int ret;
3206 
3207 	UNREFERENCED_PARAMETER(arg2);
3208 
3209 	if (ice_driver_is_detaching(sc))
3210 		return (ESHUTDOWN);
3211 
3212 	/* Get the current speeds from the adapter's "active" configuration. */
3213 	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3214 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3215 	if (ret) {
3216 		/* Error message already printed within function */
3217 		return (ret);
3218 	}
3219 
3220 	sysctl_speeds = phy_data.user_speeds_intr;
3221 
3222 	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3223 	if ((ret) || (req->newptr == NULL))
3224 		return (ret);
3225 
3226 	if (sysctl_speeds > 0x7FF) {
3227 		device_printf(dev,
3228 			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3229 			      __func__, sysctl_speeds);
3230 		return (EINVAL);
3231 	}
3232 
3233 	pi->phy.curr_user_speed_req = sysctl_speeds;
3234 
3235 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3236 		return 0;
3237 
3238 	/* Apply settings requested by user */
3239 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3240 }
3241 
3242 #define ICE_SYSCTL_HELP_FEC_CONFIG			\
3243 "\nDisplay or set the port's requested FEC mode."	\
3244 "\n\tauto - " ICE_FEC_STRING_AUTO			\
3245 "\n\tfc - " ICE_FEC_STRING_BASER			\
3246 "\n\trs - " ICE_FEC_STRING_RS				\
3247 "\n\tnone - " ICE_FEC_STRING_NONE			\
3248 "\nEither of the left or right strings above can be used to set the requested mode."
3249 
3250 /**
3251  * ice_sysctl_fec_config - Display/change the configured FEC mode
3252  * @oidp: sysctl oid structure
3253  * @arg1: pointer to private data structure
3254  * @arg2: unused
3255  * @req: sysctl request pointer
3256  *
3257  * On read: Displays the configured FEC mode
3258  * On write: Sets the device's FEC mode to the input string, if it's valid.
3259  * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3260  */
3261 static int
3262 ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3263 {
3264 	struct ice_softc *sc = (struct ice_softc *)arg1;
3265 	struct ice_port_info *pi = sc->hw.port_info;
3266 	enum ice_fec_mode new_mode;
3267 	device_t dev = sc->dev;
3268 	char req_fec[32];
3269 	int ret;
3270 
3271 	UNREFERENCED_PARAMETER(arg2);
3272 
3273 	if (ice_driver_is_detaching(sc))
3274 		return (ESHUTDOWN);
3275 
3276 	bzero(req_fec, sizeof(req_fec));
3277 	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3278 
3279 	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3280 	if ((ret) || (req->newptr == NULL))
3281 		return (ret);
3282 
3283 	if (strcmp(req_fec, "auto") == 0 ||
3284 	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3285 		if (sc->allow_no_fec_mod_in_auto)
3286 			new_mode = ICE_FEC_DIS_AUTO;
3287 		else
3288 			new_mode = ICE_FEC_AUTO;
3289 	} else if (strcmp(req_fec, "fc") == 0 ||
3290 	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3291 		new_mode = ICE_FEC_BASER;
3292 	} else if (strcmp(req_fec, "rs") == 0 ||
3293 	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3294 		new_mode = ICE_FEC_RS;
3295 	} else if (strcmp(req_fec, "none") == 0 ||
3296 	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3297 		new_mode = ICE_FEC_NONE;
3298 	} else {
3299 		device_printf(dev,
3300 		    "%s: \"%s\" is not a valid FEC mode\n",
3301 		    __func__, req_fec);
3302 		return (EINVAL);
3303 	}
3304 
3305 	/* Cache user FEC mode for later link ups */
3306 	pi->phy.curr_user_fec_req = new_mode;
3307 
3308 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3309 		return 0;
3310 
3311 	/* Apply settings requested by user */
3312 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3313 }
3314 
3315 /**
3316  * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3317  * @oidp: sysctl oid structure
3318  * @arg1: pointer to private data structure
3319  * @arg2: unused
3320  * @req: sysctl request pointer
3321  *
3322  * On read: Displays the negotiated FEC mode, in a string
3323  */
3324 static int
3325 ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3326 {
3327 	struct ice_softc *sc = (struct ice_softc *)arg1;
3328 	struct ice_hw *hw = &sc->hw;
3329 	char neg_fec[32];
3330 	int ret;
3331 
3332 	UNREFERENCED_PARAMETER(arg2);
3333 
3334 	if (ice_driver_is_detaching(sc))
3335 		return (ESHUTDOWN);
3336 
3337 	/* Copy const string into a buffer to drop const qualifier */
3338 	bzero(neg_fec, sizeof(neg_fec));
3339 	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3340 
3341 	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3342 	if (req->newptr != NULL)
3343 		return (EPERM);
3344 
3345 	return (ret);
3346 }
3347 
3348 #define ICE_SYSCTL_HELP_FC_CONFIG				\
3349 "\nDisplay or set the port's advertised flow control mode.\n"	\
3350 "\t0 - " ICE_FC_STRING_NONE					\
3351 "\n\t1 - " ICE_FC_STRING_RX					\
3352 "\n\t2 - " ICE_FC_STRING_TX					\
3353 "\n\t3 - " ICE_FC_STRING_FULL					\
3354 "\nEither the numbers or the strings above can be used to set the advertised mode."
3355 
3356 /**
3357  * ice_sysctl_fc_config - Display/change the advertised flow control mode
3358  * @oidp: sysctl oid structure
3359  * @arg1: pointer to private data structure
3360  * @arg2: unused
3361  * @req: sysctl request pointer
3362  *
3363  * On read: Displays the configured flow control mode
3364  * On write: Sets the device's flow control mode to the input, if it's valid.
3365  * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3366  */
3367 static int
3368 ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3369 {
3370 	struct ice_softc *sc = (struct ice_softc *)arg1;
3371 	struct ice_port_info *pi = sc->hw.port_info;
3372 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3373 	enum ice_fc_mode old_mode, new_mode;
3374 	struct ice_hw *hw = &sc->hw;
3375 	device_t dev = sc->dev;
3376 	enum ice_status status;
3377 	int ret, fc_num;
3378 	bool mode_set = false;
3379 	struct sbuf buf;
3380 	char *fc_str_end;
3381 	char fc_str[32];
3382 
3383 	UNREFERENCED_PARAMETER(arg2);
3384 
3385 	if (ice_driver_is_detaching(sc))
3386 		return (ESHUTDOWN);
3387 
3388 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3389 				     &pcaps, NULL);
3390 	if (status != ICE_SUCCESS) {
3391 		device_printf(dev,
3392 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3393 		    __func__, ice_status_str(status),
3394 		    ice_aq_str(hw->adminq.sq_last_status));
3395 		return (EIO);
3396 	}
3397 
3398 	/* Convert HW response format to SW enum value */
3399 	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3400 	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3401 		old_mode = ICE_FC_FULL;
3402 	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3403 		old_mode = ICE_FC_TX_PAUSE;
3404 	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3405 		old_mode = ICE_FC_RX_PAUSE;
3406 	else
3407 		old_mode = ICE_FC_NONE;
3408 
3409 	/* Create "old" string for output */
3410 	bzero(fc_str, sizeof(fc_str));
3411 	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3412 	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3413 	sbuf_finish(&buf);
3414 	sbuf_delete(&buf);
3415 
3416 	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3417 	if ((ret) || (req->newptr == NULL))
3418 		return (ret);
3419 
3420 	/* Try to parse input as a string, first */
3421 	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3422 		new_mode = ICE_FC_FULL;
3423 		mode_set = true;
3424 	}
3425 	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3426 		new_mode = ICE_FC_TX_PAUSE;
3427 		mode_set = true;
3428 	}
3429 	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3430 		new_mode = ICE_FC_RX_PAUSE;
3431 		mode_set = true;
3432 	}
3433 	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3434 		new_mode = ICE_FC_NONE;
3435 		mode_set = true;
3436 	}
3437 
3438 	/*
3439 	 * Then check if it's an integer, for compatibility with the method
3440 	 * used in older drivers.
3441 	 */
3442 	if (!mode_set) {
3443 		fc_num = strtol(fc_str, &fc_str_end, 0);
3444 		if (fc_str_end == fc_str)
3445 			fc_num = -1;
3446 		switch (fc_num) {
3447 		case 3:
3448 			new_mode = ICE_FC_FULL;
3449 			break;
3450 		case 2:
3451 			new_mode = ICE_FC_TX_PAUSE;
3452 			break;
3453 		case 1:
3454 			new_mode = ICE_FC_RX_PAUSE;
3455 			break;
3456 		case 0:
3457 			new_mode = ICE_FC_NONE;
3458 			break;
3459 		default:
3460 			device_printf(dev,
3461 			    "%s: \"%s\" is not a valid flow control mode\n",
3462 			    __func__, fc_str);
3463 			return (EINVAL);
3464 		}
3465 	}
3466 
3467 	/* Save flow control mode from user */
3468 	pi->phy.curr_user_fc_req = new_mode;
3469 
3470 	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3471 	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3472 	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3473 	    (new_mode != ICE_FC_NONE)) {
3474 		ret = ice_config_pfc(sc, 0x0);
3475 		if (ret)
3476 			return (ret);
3477 	}
3478 
3479 	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3480 		return 0;
3481 
3482 	/* Apply settings requested by user */
3483 	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3484 }
3485 
3486 /**
3487  * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3488  * @oidp: sysctl oid structure
3489  * @arg1: pointer to private data structure
3490  * @arg2: unused
3491  * @req: sysctl request pointer
3492  *
3493  * On read: Displays the currently negotiated flow control settings.
3494  *
3495  * If link is not established, this will report ICE_FC_NONE, as no flow
3496  * control is negotiated while link is down.
3497  */
3498 static int
3499 ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3500 {
3501 	struct ice_softc *sc = (struct ice_softc *)arg1;
3502 	struct ice_port_info *pi = sc->hw.port_info;
3503 	const char *negotiated_fc;
3504 
3505 	UNREFERENCED_PARAMETER(arg2);
3506 
3507 	if (ice_driver_is_detaching(sc))
3508 		return (ESHUTDOWN);
3509 
3510 	negotiated_fc = ice_flowcontrol_mode(pi);
3511 
3512 	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3513 }
3514 
3515 /**
3516  * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3517  * @oidp: sysctl oid structure
3518  * @arg1: pointer to private data structure
3519  * @arg2: unused
3520  * @req: sysctl request pointer
3521  * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3522  *
3523  * Private handler for phy_type_high and phy_type_low sysctls.
3524  */
3525 static int
3526 __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3527 {
3528 	struct ice_softc *sc = (struct ice_softc *)arg1;
3529 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3530 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3531 	struct ice_hw *hw = &sc->hw;
3532 	device_t dev = sc->dev;
3533 	enum ice_status status;
3534 	uint64_t types;
3535 	int ret;
3536 
3537 	UNREFERENCED_PARAMETER(arg2);
3538 
3539 	if (ice_driver_is_detaching(sc))
3540 		return (ESHUTDOWN);
3541 
3542 	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3543 				     &pcaps, NULL);
3544 	if (status != ICE_SUCCESS) {
3545 		device_printf(dev,
3546 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3547 		    __func__, ice_status_str(status),
3548 		    ice_aq_str(hw->adminq.sq_last_status));
3549 		return (EIO);
3550 	}
3551 
3552 	if (is_phy_type_high)
3553 		types = pcaps.phy_type_high;
3554 	else
3555 		types = pcaps.phy_type_low;
3556 
3557 	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3558 	if ((ret) || (req->newptr == NULL))
3559 		return (ret);
3560 
3561 	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3562 
3563 	if (is_phy_type_high)
3564 		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3565 	else
3566 		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3567 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3568 
3569 	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3570 	if (status != ICE_SUCCESS) {
3571 		device_printf(dev,
3572 		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3573 		    __func__, ice_status_str(status),
3574 		    ice_aq_str(hw->adminq.sq_last_status));
3575 		return (EIO);
3576 	}
3577 
3578 	return (0);
3579 
3580 }
3581 
3582 /**
3583  * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3584  * @oidp: sysctl oid structure
3585  * @arg1: pointer to private data structure
3586  * @arg2: unused
3587  * @req: sysctl request pointer
3588  *
3589  * On read: Displays the currently supported lower PHY types
3590  * On write: Sets the device's supported low PHY types
3591  */
3592 static int
3593 ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3594 {
3595 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3596 }
3597 
3598 /**
3599  * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3600  * @oidp: sysctl oid structure
3601  * @arg1: pointer to private data structure
3602  * @arg2: unused
3603  * @req: sysctl request pointer
3604  *
3605  * On read: Displays the currently supported higher PHY types
3606  * On write: Sets the device's supported high PHY types
3607  */
3608 static int
3609 ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3610 {
3611 	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3612 }
3613 
3614 /**
3615  * ice_sysctl_phy_caps - Display response from Get PHY abililties
3616  * @oidp: sysctl oid structure
3617  * @arg1: pointer to private data structure
3618  * @arg2: unused
3619  * @req: sysctl request pointer
3620  * @report_mode: the mode to report
3621  *
3622  * On read: Display the response from Get PHY abillities with the given report
3623  * mode.
3624  */
3625 static int
3626 ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3627 {
3628 	struct ice_softc *sc = (struct ice_softc *)arg1;
3629 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3630 	struct ice_hw *hw = &sc->hw;
3631 	struct ice_port_info *pi = hw->port_info;
3632 	device_t dev = sc->dev;
3633 	enum ice_status status;
3634 	int ret;
3635 
3636 	UNREFERENCED_PARAMETER(arg2);
3637 
3638 	ret = priv_check(curthread, PRIV_DRIVER);
3639 	if (ret)
3640 		return (ret);
3641 
3642 	if (ice_driver_is_detaching(sc))
3643 		return (ESHUTDOWN);
3644 
3645 	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3646 	if (status != ICE_SUCCESS) {
3647 		device_printf(dev,
3648 		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3649 		    __func__, ice_status_str(status),
3650 		    ice_aq_str(hw->adminq.sq_last_status));
3651 		return (EIO);
3652 	}
3653 
3654 	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3655 	if (req->newptr != NULL)
3656 		return (EPERM);
3657 
3658 	return (ret);
3659 }
3660 
3661 /**
3662  * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3663  * @oidp: sysctl oid structure
3664  * @arg1: pointer to private data structure
3665  * @arg2: unused
3666  * @req: sysctl request pointer
3667  *
3668  * On read: Display the response from Get PHY abillities reporting the last
3669  * software configuration.
3670  */
3671 static int
3672 ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3673 {
3674 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3675 				   ICE_AQC_REPORT_ACTIVE_CFG);
3676 }
3677 
3678 /**
3679  * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3680  * @oidp: sysctl oid structure
3681  * @arg1: pointer to private data structure
3682  * @arg2: unused
3683  * @req: sysctl request pointer
3684  *
3685  * On read: Display the response from Get PHY abillities reporting the NVM
3686  * configuration.
3687  */
3688 static int
3689 ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3690 {
3691 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3692 				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3693 }
3694 
3695 /**
3696  * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3697  * @oidp: sysctl oid structure
3698  * @arg1: pointer to private data structure
3699  * @arg2: unused
3700  * @req: sysctl request pointer
3701  *
3702  * On read: Display the response from Get PHY abillities reporting the
3703  * topology configuration.
3704  */
3705 static int
3706 ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3707 {
3708 	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3709 				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3710 }
3711 
3712 /**
3713  * ice_sysctl_phy_link_status - Display response from Get Link Status
3714  * @oidp: sysctl oid structure
3715  * @arg1: pointer to private data structure
3716  * @arg2: unused
3717  * @req: sysctl request pointer
3718  *
3719  * On read: Display the response from firmware for the Get Link Status
3720  * request.
3721  */
3722 static int
3723 ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3724 {
3725 	struct ice_aqc_get_link_status_data link_data = { 0 };
3726 	struct ice_softc *sc = (struct ice_softc *)arg1;
3727 	struct ice_hw *hw = &sc->hw;
3728 	struct ice_port_info *pi = hw->port_info;
3729 	struct ice_aqc_get_link_status *resp;
3730 	struct ice_aq_desc desc;
3731 	device_t dev = sc->dev;
3732 	enum ice_status status;
3733 	int ret;
3734 
3735 	UNREFERENCED_PARAMETER(arg2);
3736 
3737 	/*
3738 	 * Ensure that only contexts with driver privilege are allowed to
3739 	 * access this information
3740 	 */
3741 	ret = priv_check(curthread, PRIV_DRIVER);
3742 	if (ret)
3743 		return (ret);
3744 
3745 	if (ice_driver_is_detaching(sc))
3746 		return (ESHUTDOWN);
3747 
3748 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3749 	resp = &desc.params.get_link_status;
3750 	resp->lport_num = pi->lport;
3751 
3752 	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3753 	if (status != ICE_SUCCESS) {
3754 		device_printf(dev,
3755 		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3756 		    __func__, ice_status_str(status),
3757 		    ice_aq_str(hw->adminq.sq_last_status));
3758 		return (EIO);
3759 	}
3760 
3761 	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3762 	if (req->newptr != NULL)
3763 		return (EPERM);
3764 
3765 	return (ret);
3766 }
3767 
3768 /**
3769  * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3770  * @oidp: sysctl oid structure
3771  * @arg1: pointer to private softc structure
3772  * @arg2: unused
3773  * @req: sysctl request pointer
3774  *
3775  * On read: Displays current persistent LLDP status.
3776  */
3777 static int
3778 ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3779 {
3780 	struct ice_softc *sc = (struct ice_softc *)arg1;
3781 	struct ice_hw *hw = &sc->hw;
3782 	device_t dev = sc->dev;
3783 	enum ice_status status;
3784 	struct sbuf *sbuf;
3785 	u32 lldp_state;
3786 
3787 	UNREFERENCED_PARAMETER(arg2);
3788 	UNREFERENCED_PARAMETER(oidp);
3789 
3790 	if (ice_driver_is_detaching(sc))
3791 		return (ESHUTDOWN);
3792 
3793 	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3794 	if (status) {
3795 		device_printf(dev,
3796 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3797 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3798 		return (EIO);
3799 	}
3800 
3801 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3802 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3803 	sbuf_finish(sbuf);
3804 	sbuf_delete(sbuf);
3805 
3806 	return (0);
3807 }
3808 
3809 /**
3810  * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3811  * @oidp: sysctl oid structure
3812  * @arg1: pointer to private softc structure
3813  * @arg2: unused
3814  * @req: sysctl request pointer
3815  *
3816  * On read: Displays default persistent LLDP status.
3817  */
3818 static int
3819 ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3820 {
3821 	struct ice_softc *sc = (struct ice_softc *)arg1;
3822 	struct ice_hw *hw = &sc->hw;
3823 	device_t dev = sc->dev;
3824 	enum ice_status status;
3825 	struct sbuf *sbuf;
3826 	u32 lldp_state;
3827 
3828 	UNREFERENCED_PARAMETER(arg2);
3829 	UNREFERENCED_PARAMETER(oidp);
3830 
3831 	if (ice_driver_is_detaching(sc))
3832 		return (ESHUTDOWN);
3833 
3834 	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3835 	if (status) {
3836 		device_printf(dev,
3837 		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3838 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3839 		return (EIO);
3840 	}
3841 
3842 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3843 	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3844 	sbuf_finish(sbuf);
3845 	sbuf_delete(sbuf);
3846 
3847 	return (0);
3848 }
3849 
3850 /**
3851  * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3852  * @dcbcfg: Configuration struct to check for mappings in
3853  *
3854  * @return true if there exists a non-zero DSCP to TC mapping
3855  * inside the input DCB configuration struct.
3856  */
3857 static bool
3858 ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3859 {
3860 	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3861 		if (dcbcfg->dscp_map[i] != 0)
3862 			return (true);
3863 
3864 	return (false);
3865 }
3866 
3867 #define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3868 "\nDisplay or change FW LLDP agent state:" \
3869 "\n\t0 - disabled"			\
3870 "\n\t1 - enabled"
3871 
3872 /**
3873  * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3874  * @oidp: sysctl oid structure
3875  * @arg1: pointer to private softc structure
3876  * @arg2: unused
3877  * @req: sysctl request pointer
3878  *
3879  * On read: Displays whether the FW LLDP agent is running
3880  * On write: Persistently enables or disables the FW LLDP agent
3881  */
3882 static int
3883 ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3884 {
3885 	struct ice_softc *sc = (struct ice_softc *)arg1;
3886 	struct ice_dcbx_cfg *local_dcbx_cfg;
3887 	struct ice_hw *hw = &sc->hw;
3888 	device_t dev = sc->dev;
3889 	enum ice_status status;
3890 	int ret;
3891 	u32 old_state;
3892 	u8 fw_lldp_enabled;
3893 	bool retried_start_lldp = false;
3894 
3895 	UNREFERENCED_PARAMETER(arg2);
3896 
3897 	if (ice_driver_is_detaching(sc))
3898 		return (ESHUTDOWN);
3899 
3900 	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3901 	if (status) {
3902 		device_printf(dev,
3903 		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3904 		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3905 		return (EIO);
3906 	}
3907 
3908 	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3909 		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3910 		if (status) {
3911 			device_printf(dev,
3912 			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3913 			    ice_status_str(status),
3914 			    ice_aq_str(hw->adminq.sq_last_status));
3915 			return (EIO);
3916 		}
3917 	}
3918 	if (old_state == 0)
3919 		fw_lldp_enabled = false;
3920 	else
3921 		fw_lldp_enabled = true;
3922 
3923 	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3924 	if ((ret) || (req->newptr == NULL))
3925 		return (ret);
3926 
3927 	if (old_state == 0 && fw_lldp_enabled == false)
3928 		return (0);
3929 
3930 	if (old_state != 0 && fw_lldp_enabled == true)
3931 		return (0);
3932 
3933 	/* Block transition to FW LLDP if DSCP mode is enabled */
3934 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3935 	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) ||
3936 	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3937 		device_printf(dev,
3938 			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3939 		return (EOPNOTSUPP);
3940 	}
3941 
3942 	if (fw_lldp_enabled == false) {
3943 		status = ice_aq_stop_lldp(hw, true, true, NULL);
3944 		/* EPERM is returned if the LLDP agent is already shutdown */
3945 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3946 			device_printf(dev,
3947 			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3948 			    __func__, ice_status_str(status),
3949 			    ice_aq_str(hw->adminq.sq_last_status));
3950 			return (EIO);
3951 		}
3952 		ice_aq_set_dcb_parameters(hw, true, NULL);
3953 		hw->port_info->qos_cfg.is_sw_lldp = true;
3954 		ice_add_rx_lldp_filter(sc);
3955 	} else {
3956 		ice_del_rx_lldp_filter(sc);
3957 retry_start_lldp:
3958 		status = ice_aq_start_lldp(hw, true, NULL);
3959 		if (status) {
3960 			switch (hw->adminq.sq_last_status) {
3961 			/* EEXIST is returned if the LLDP agent is already started */
3962 			case ICE_AQ_RC_EEXIST:
3963 				break;
3964 			case ICE_AQ_RC_EAGAIN:
3965 				/* Retry command after a 2 second wait */
3966 				if (retried_start_lldp == false) {
3967 					retried_start_lldp = true;
3968 					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3969 					goto retry_start_lldp;
3970 				}
3971 				/* Fallthrough */
3972 			default:
3973 				device_printf(dev,
3974 				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3975 				    __func__, ice_status_str(status),
3976 				    ice_aq_str(hw->adminq.sq_last_status));
3977 				return (EIO);
3978 			}
3979 		}
3980 		ice_start_dcbx_agent(sc);
3981 
3982 		/* Init DCB needs to be done during enabling LLDP to properly
3983 		 * propagate the configuration.
3984 		 */
3985 		status = ice_init_dcb(hw, true);
3986 		if (status) {
3987 			device_printf(dev,
3988 			    "%s: ice_init_dcb failed; status %s, aq_err %s\n",
3989 			    __func__, ice_status_str(status),
3990 			    ice_aq_str(hw->adminq.sq_last_status));
3991 			hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
3992 		}
3993 	}
3994 
3995 	return (ret);
3996 }
3997 
3998 #define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3999 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
4000 "\nIn SW DCB mode, displays and allows setting the table." \
4001 "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
4002 "\nWhere the bandwidth total must add up to 100"
4003 
4004 /**
4005  * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
4006  * @oidp: sysctl oid structure
4007  * @arg1: pointer to private data structure
4008  * @arg2: unused
4009  * @req: sysctl request pointer
4010  *
4011  * Returns the current ETS TC bandwidth table
4012  * cached by the driver.
4013  *
4014  * In SW DCB mode this sysctl also accepts a value that will
4015  * be sent to the firmware for configuration.
4016  */
4017 static int
4018 ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
4019 {
4020 	struct ice_softc *sc = (struct ice_softc *)arg1;
4021 	struct ice_dcbx_cfg *local_dcbx_cfg;
4022 	struct ice_port_info *pi;
4023 	struct ice_hw *hw = &sc->hw;
4024 	device_t dev = sc->dev;
4025 	enum ice_status status;
4026 	struct sbuf *sbuf;
4027 	int ret;
4028 
4029 	/* Store input rates from user */
4030 	char ets_user_buf[128] = "";
4031 	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
4032 
4033 	UNREFERENCED_PARAMETER(arg2);
4034 
4035 	if (ice_driver_is_detaching(sc))
4036 		return (ESHUTDOWN);
4037 
4038 	if (req->oldptr == NULL && req->newptr == NULL) {
4039 		ret = SYSCTL_OUT(req, 0, 128);
4040 		return (ret);
4041 	}
4042 
4043 	pi = hw->port_info;
4044 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4045 
4046 	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4047 
4048 	/* Format ETS BW data for output */
4049 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4050 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
4051 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4052 			sbuf_printf(sbuf, ",");
4053 	}
4054 
4055 	sbuf_finish(sbuf);
4056 	sbuf_delete(sbuf);
4057 
4058 	/* Read in the new ETS values */
4059 	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
4060 	if ((ret) || (req->newptr == NULL))
4061 		return (ret);
4062 
4063 	/* Don't allow setting changes in FW DCB mode */
4064 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4065 		return (EPERM);
4066 
4067 	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
4068 	if (ret) {
4069 		device_printf(dev, "%s: Could not parse input BW table: %s\n",
4070 		    __func__, ets_user_buf);
4071 		return (ret);
4072 	}
4073 
4074 	if (!ice_check_ets_bw(new_ets_table)) {
4075 		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
4076 		    __func__, ets_user_buf);
4077 		return (EINVAL);
4078 	}
4079 
4080 	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
4081 	    sizeof(new_ets_table));
4082 
4083 	/* If BW > 0, then set TSA entry to 2 */
4084 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4085 		if (new_ets_table[i] > 0)
4086 			local_dcbx_cfg->etscfg.tsatable[i] = 2;
4087 		else
4088 			local_dcbx_cfg->etscfg.tsatable[i] = 0;
4089 	}
4090 	local_dcbx_cfg->etscfg.willing = 0;
4091 	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
4092 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
4093 
4094 	status = ice_set_dcb_cfg(pi);
4095 	if (status) {
4096 		device_printf(dev,
4097 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4098 		    __func__, ice_status_str(status),
4099 		    ice_aq_str(hw->adminq.sq_last_status));
4100 		return (EIO);
4101 	}
4102 
4103 	ice_do_dcb_reconfig(sc, false);
4104 
4105 	return (0);
4106 }
4107 
4108 #define ICE_SYSCTL_HELP_UP2TC_MAP \
4109 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
4110 "\nIn SW DCB mode, displays and allows setting the table." \
4111 "\nInput must be in this format: 0,1,2,3,4,5,6,7" \
4112 "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
4113 
4114 /**
4115  * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
4116  * @oidp: sysctl oid structure
4117  * @arg1: pointer to private data structure
4118  * @arg2: unused
4119  * @req: sysctl request pointer
4120  *
4121  * In FW DCB mode, returns the current ETS prio table /
4122  * UP2TC mapping from the local MIB.
4123  *
4124  * In SW DCB mode this sysctl also accepts a value that will
4125  * be sent to the firmware for configuration.
4126  */
4127 static int
4128 ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4129 {
4130 	struct ice_softc *sc = (struct ice_softc *)arg1;
4131 	struct ice_dcbx_cfg *local_dcbx_cfg;
4132 	struct ice_port_info *pi;
4133 	struct ice_hw *hw = &sc->hw;
4134 	device_t dev = sc->dev;
4135 	enum ice_status status;
4136 	struct sbuf *sbuf;
4137 	int ret;
4138 
4139 	/* Store input rates from user */
4140 	char up2tc_user_buf[128] = "";
4141 	/* This array is indexed by UP, not TC */
4142 	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4143 
4144 	UNREFERENCED_PARAMETER(arg2);
4145 
4146 	if (ice_driver_is_detaching(sc))
4147 		return (ESHUTDOWN);
4148 
4149 	if (req->oldptr == NULL && req->newptr == NULL) {
4150 		ret = SYSCTL_OUT(req, 0, 128);
4151 		return (ret);
4152 	}
4153 
4154 	pi = hw->port_info;
4155 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4156 
4157 	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4158 
4159 	/* Format ETS Priority Mapping Table for output */
4160 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4161 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4162 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4163 			sbuf_printf(sbuf, ",");
4164 	}
4165 
4166 	sbuf_finish(sbuf);
4167 	sbuf_delete(sbuf);
4168 
4169 	/* Read in the new ETS priority mapping */
4170 	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4171 	if ((ret) || (req->newptr == NULL))
4172 		return (ret);
4173 
4174 	/* Don't allow setting changes in FW DCB mode */
4175 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4176 		return (EPERM);
4177 
4178 	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
4179 	if (ret) {
4180 		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4181 		    __func__, up2tc_user_buf);
4182 		return (ret);
4183 	}
4184 
4185 	/* Prepare updated ETS CFG/REC TLVs */
4186 	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4187 	    sizeof(new_up2tc));
4188 	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4189 	    sizeof(new_up2tc));
4190 
4191 	status = ice_set_dcb_cfg(pi);
4192 	if (status) {
4193 		device_printf(dev,
4194 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4195 		    __func__, ice_status_str(status),
4196 		    ice_aq_str(hw->adminq.sq_last_status));
4197 		return (EIO);
4198 	}
4199 
4200 	ice_do_dcb_reconfig(sc, false);
4201 
4202 	return (0);
4203 }
4204 
4205 /**
4206  * ice_config_pfc - helper function to set PFC config in FW
4207  * @sc: device private structure
4208  * @new_mode: bit flags indicating PFC status for TCs
4209  *
4210  * @pre must be in SW DCB mode
4211  *
4212  * Configures the driver's local PFC TLV and sends it to the
4213  * FW for configuration, then reconfigures the driver/VSI
4214  * for DCB if needed.
4215  */
4216 static int
4217 ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4218 {
4219 	struct ice_dcbx_cfg *local_dcbx_cfg;
4220 	struct ice_hw *hw = &sc->hw;
4221 	struct ice_port_info *pi;
4222 	device_t dev = sc->dev;
4223 	enum ice_status status;
4224 
4225 	pi = hw->port_info;
4226 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4227 
4228 	/* Prepare updated PFC TLV */
4229 	local_dcbx_cfg->pfc.pfcena = new_mode;
4230 	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4231 	local_dcbx_cfg->pfc.willing = 0;
4232 	local_dcbx_cfg->pfc.mbc = 0;
4233 
4234 	/* Warn if PFC is being disabled with RoCE v2 in use */
4235 	if (new_mode == 0 && sc->rdma_entry.attached)
4236 		device_printf(dev,
4237 		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4238 
4239 	status = ice_set_dcb_cfg(pi);
4240 	if (status) {
4241 		device_printf(dev,
4242 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4243 		    __func__, ice_status_str(status),
4244 		    ice_aq_str(hw->adminq.sq_last_status));
4245 		return (EIO);
4246 	}
4247 
4248 	ice_do_dcb_reconfig(sc, false);
4249 
4250 	return (0);
4251 }
4252 
4253 #define ICE_SYSCTL_HELP_PFC_CONFIG \
4254 "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4255 "\nIn SW DCB mode, displays and allows setting the configuration" \
4256 "\nInput/Output is in this format: 0xff" \
4257 "\nWhere bit position # enables/disables PFC for that Traffic Class #"
4258 
4259 /**
4260  * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4261  * @oidp: sysctl oid structure
4262  * @arg1: pointer to private data structure
4263  * @arg2: unused
4264  * @req: sysctl request pointer
4265  *
4266  * In FW DCB mode, returns a bitmap containing the current TCs
4267  * that have PFC enabled on them.
4268  *
4269  * In SW DCB mode this sysctl also accepts a value that will
4270  * be sent to the firmware for configuration.
4271  */
4272 static int
4273 ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4274 {
4275 	struct ice_softc *sc = (struct ice_softc *)arg1;
4276 	struct ice_dcbx_cfg *local_dcbx_cfg;
4277 	struct ice_port_info *pi;
4278 	struct ice_hw *hw = &sc->hw;
4279 	int ret;
4280 
4281 	/* Store input flags from user */
4282 	u8 user_pfc;
4283 
4284 	UNREFERENCED_PARAMETER(arg2);
4285 
4286 	if (ice_driver_is_detaching(sc))
4287 		return (ESHUTDOWN);
4288 
4289 	if (req->oldptr == NULL && req->newptr == NULL) {
4290 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4291 		return (ret);
4292 	}
4293 
4294 	pi = hw->port_info;
4295 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4296 
4297 	/* Format current PFC enable setting for output */
4298 	user_pfc = local_dcbx_cfg->pfc.pfcena;
4299 
4300 	/* Read in the new PFC config */
4301 	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4302 	if ((ret) || (req->newptr == NULL))
4303 		return (ret);
4304 
4305 	/* Don't allow setting changes in FW DCB mode */
4306 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4307 		return (EPERM);
4308 
4309 	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4310 	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4311 		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4312 		if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
4313 			 sc->link_up) {
4314 			ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4315 			if (ret)
4316 				return (ret);
4317 		}
4318 	}
4319 
4320 	return ice_config_pfc(sc, user_pfc);
4321 }
4322 
4323 #define ICE_SYSCTL_HELP_PFC_MODE \
4324 "\nDisplay and set the current QoS mode for the firmware" \
4325 "\n\t0: VLAN UP mode" \
4326 "\n\t1: DSCP mode"
4327 
4328 /**
4329  * ice_sysctl_pfc_mode
4330  * @oidp: sysctl oid structure
4331  * @arg1: pointer to private data structure
4332  * @arg2: unused
4333  * @req: sysctl request pointer
4334  *
4335  * Gets and sets whether the port is in DSCP or VLAN PCP-based
4336  * PFC mode. This is also used to set whether DSCP or VLAN PCP
4337  * -based settings are configured for DCB.
4338  */
4339 static int
4340 ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4341 {
4342 	struct ice_softc *sc = (struct ice_softc *)arg1;
4343 	struct ice_dcbx_cfg *local_dcbx_cfg;
4344 	struct ice_port_info *pi;
4345 	struct ice_hw *hw = &sc->hw;
4346 	device_t dev = sc->dev;
4347 	enum ice_status status;
4348 	u8 user_pfc_mode, aq_pfc_mode;
4349 	int ret;
4350 
4351 	UNREFERENCED_PARAMETER(arg2);
4352 
4353 	if (ice_driver_is_detaching(sc))
4354 		return (ESHUTDOWN);
4355 
4356 	if (req->oldptr == NULL && req->newptr == NULL) {
4357 		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4358 		return (ret);
4359 	}
4360 
4361 	pi = hw->port_info;
4362 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4363 
4364 	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4365 
4366 	/* Read in the new mode */
4367 	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4368 	if ((ret) || (req->newptr == NULL))
4369 		return (ret);
4370 
4371 	/* Don't allow setting changes in FW DCB mode */
4372 	if (!hw->port_info->qos_cfg.is_sw_lldp)
4373 		return (EPERM);
4374 
4375 	/* Currently, there are only two modes */
4376 	switch (user_pfc_mode) {
4377 	case 0:
4378 		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4379 		break;
4380 	case 1:
4381 		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4382 		break;
4383 	default:
4384 		device_printf(dev,
4385 		    "%s: Valid input range is 0-1 (input %d)\n",
4386 		    __func__, user_pfc_mode);
4387 		return (EINVAL);
4388 	}
4389 
4390 	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4391 	if (status == ICE_ERR_NOT_SUPPORTED) {
4392 		device_printf(dev,
4393 		    "%s: Failed to set PFC mode; DCB not supported\n",
4394 		    __func__);
4395 		return (ENODEV);
4396 	}
4397 	if (status) {
4398 		device_printf(dev,
4399 		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4400 		    __func__, ice_status_str(status),
4401 		    ice_aq_str(hw->adminq.sq_last_status));
4402 		return (EIO);
4403 	}
4404 
4405 	/* Reset settings to default when mode is changed */
4406 	ice_set_default_local_mib_settings(sc);
4407 	/* Cache current settings and reconfigure */
4408 	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4409 	ice_do_dcb_reconfig(sc, false);
4410 
4411 	return (0);
4412 }
4413 
4414 #define ICE_SYSCTL_HELP_SET_LINK_ACTIVE \
4415 "\nKeep link active after setting interface down:" \
4416 "\n\t0 - disable" \
4417 "\n\t1 - enable"
4418 
4419 /**
4420  * ice_sysctl_set_link_active
4421  * @oidp: sysctl oid structure
4422  * @arg1: pointer to private data structure
4423  * @arg2: unused
4424  * @req: sysctl request pointer
4425  *
4426  * Set the link_active_on_if_down sysctl flag.
4427  */
4428 static int
4429 ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS)
4430 {
4431 	struct ice_softc *sc = (struct ice_softc *)arg1;
4432 	bool mode;
4433 	int ret;
4434 
4435 	UNREFERENCED_PARAMETER(arg2);
4436 
4437 	mode = ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4438 
4439 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4440 	if ((ret) || (req->newptr == NULL))
4441 		return (ret);
4442 
4443 	if (mode)
4444 		ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4445 	else
4446 		ice_clear_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4447 
4448 	return (0);
4449 }
4450 
4451 /**
4452  * ice_sysctl_debug_set_link
4453  * @oidp: sysctl oid structure
4454  * @arg1: pointer to private data structure
4455  * @arg2: unused
4456  * @req: sysctl request pointer
4457  *
4458  * Set link up/down in debug session.
4459  */
4460 static int
4461 ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS)
4462 {
4463 	struct ice_softc *sc = (struct ice_softc *)arg1;
4464 	bool mode;
4465 	int ret;
4466 
4467 	UNREFERENCED_PARAMETER(arg2);
4468 
4469 	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4470 	if ((ret) || (req->newptr == NULL))
4471 		return (ret);
4472 
4473 	ice_set_link(sc, mode != 0);
4474 
4475 	return (0);
4476 }
4477 
4478 /**
4479  * ice_add_device_sysctls - add device specific dynamic sysctls
4480  * @sc: device private structure
4481  *
4482  * Add per-device dynamic sysctls which show device configuration or enable
4483  * configuring device functionality. For tunable values which can be set prior
4484  * to load, see ice_add_device_tunables.
4485  *
4486  * This function depends on the sysctl layout setup by ice_add_device_tunables,
4487  * and likely should be called near the end of the attach process.
4488  */
4489 void
4490 ice_add_device_sysctls(struct ice_softc *sc)
4491 {
4492 	struct sysctl_oid *hw_node;
4493 	device_t dev = sc->dev;
4494 
4495 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4496 	struct sysctl_oid_list *ctx_list =
4497 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4498 
4499 	SYSCTL_ADD_PROC(ctx, ctx_list,
4500 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4501 	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4502 
4503 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4504 		SYSCTL_ADD_PROC(ctx, ctx_list,
4505 		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4506 		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4507 	}
4508 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_TEMP_SENSOR)) {
4509 		SYSCTL_ADD_PROC(ctx, ctx_list,
4510 		    OID_AUTO, "temp", CTLTYPE_S8 | CTLFLAG_RD,
4511 		    sc, 0, ice_sysctl_temperature, "CU",
4512 		    "Device temperature in degrees Celcius (C)");
4513 	}
4514 
4515 	SYSCTL_ADD_PROC(ctx, ctx_list,
4516 	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4517 	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4518 
4519 	SYSCTL_ADD_PROC(ctx, ctx_list,
4520 	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4521 	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4522 
4523 	SYSCTL_ADD_PROC(ctx, ctx_list,
4524 	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4525 	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4526 
4527 	SYSCTL_ADD_PROC(ctx, ctx_list,
4528 	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4529 	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4530 
4531 	SYSCTL_ADD_PROC(ctx, ctx_list,
4532 	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4533 	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4534 
4535 	SYSCTL_ADD_PROC(ctx, ctx_list,
4536 	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4537 	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4538 
4539 	SYSCTL_ADD_PROC(ctx, ctx_list,
4540 	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4541 	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4542 
4543 	SYSCTL_ADD_PROC(ctx, ctx_list,
4544 	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4545 	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4546 
4547 	SYSCTL_ADD_PROC(ctx, ctx_list,
4548 	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4549 	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4550 
4551 	SYSCTL_ADD_PROC(ctx, ctx_list,
4552 	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4553 	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4554 
4555 	SYSCTL_ADD_PROC(ctx, ctx_list,
4556 	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4557 	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4558 
4559 	SYSCTL_ADD_PROC(ctx, ctx_list,
4560 	    OID_AUTO, "allow_no_fec_modules_in_auto",
4561 	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4562 	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4563 	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4564 
4565 	SYSCTL_ADD_PROC(ctx, ctx_list,
4566 	    OID_AUTO, "link_active_on_if_down", CTLTYPE_U8 | CTLFLAG_RWTUN,
4567 	    sc, 0, ice_sysctl_set_link_active, "CU", ICE_SYSCTL_HELP_SET_LINK_ACTIVE);
4568 
4569 	SYSCTL_ADD_PROC(ctx, ctx_list,
4570 	    OID_AUTO, "create_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4571 	    sc, 0, ice_sysctl_create_mirror_interface, "A", "");
4572 
4573 	SYSCTL_ADD_PROC(ctx, ctx_list,
4574 	    OID_AUTO, "destroy_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4575 	    sc, 0, ice_sysctl_destroy_mirror_interface, "A", "");
4576 
4577 	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4578 
4579 	/* Differentiate software and hardware statistics, by keeping hw stats
4580 	 * in their own node. This isn't in ice_add_device_tunables, because
4581 	 * we won't have any CTLFLAG_TUN sysctls under this node.
4582 	 */
4583 	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4584 				  NULL, "Port Hardware Statistics");
4585 
4586 	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4587 
4588 	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4589 	 * during creation
4590 	 */
4591 	ice_add_vsi_sysctls(&sc->pf_vsi);
4592 
4593 	/* Add sysctls related to debugging the device driver. This includes
4594 	 * sysctls which display additional internal driver state for use in
4595 	 * understanding what is happening within the driver.
4596 	 */
4597 	ice_add_debug_sysctls(sc);
4598 }
4599 
4600 /**
4601  * @enum hmc_error_type
4602  * @brief enumeration of HMC errors
4603  *
4604  * Enumeration defining the possible HMC errors that might occur.
4605  */
4606 enum hmc_error_type {
4607 	HMC_ERR_PMF_INVALID = 0,
4608 	HMC_ERR_VF_IDX_INVALID = 1,
4609 	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4610 	/* 3 is reserved */
4611 	HMC_ERR_INDEX_TOO_BIG = 4,
4612 	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4613 	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4614 	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4615 	HMC_ERR_PAGE_DESC_INVALID = 8,
4616 	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4617 	/* 10 is reserved */
4618 	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4619 	/* 12 is reserved */
4620 };
4621 
4622 /**
4623  * ice_log_hmc_error - Log an HMC error message
4624  * @hw: device hw structure
4625  * @dev: the device to pass to device_printf()
4626  *
4627  * Log a message when an HMC error interrupt is triggered.
4628  */
4629 void
4630 ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4631 {
4632 	u32 info, data;
4633 	u8 index, errtype, objtype;
4634 	bool isvf;
4635 
4636 	info = rd32(hw, PFHMC_ERRORINFO);
4637 	data = rd32(hw, PFHMC_ERRORDATA);
4638 
4639 	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4640 	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4641 		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4642 	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4643 		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4644 
4645 	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4646 
4647 	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4648 		      isvf ? "VF" : "PF", index);
4649 
4650 	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4651 		      errtype, objtype, data);
4652 
4653 	switch (errtype) {
4654 	case HMC_ERR_PMF_INVALID:
4655 		device_printf(dev, "Private Memory Function is not valid\n");
4656 		break;
4657 	case HMC_ERR_VF_IDX_INVALID:
4658 		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4659 		break;
4660 	case HMC_ERR_VF_PARENT_PF_INVALID:
4661 		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4662 		break;
4663 	case HMC_ERR_INDEX_TOO_BIG:
4664 		device_printf(dev, "Object index too big\n");
4665 		break;
4666 	case HMC_ERR_ADDRESS_TOO_LARGE:
4667 		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4668 		break;
4669 	case HMC_ERR_SEGMENT_DESC_INVALID:
4670 		device_printf(dev, "Segment descriptor is invalid\n");
4671 		break;
4672 	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4673 		device_printf(dev, "Segment descriptor is too small\n");
4674 		break;
4675 	case HMC_ERR_PAGE_DESC_INVALID:
4676 		device_printf(dev, "Page descriptor is invalid\n");
4677 		break;
4678 	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4679 		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4680 		break;
4681 	case HMC_ERR_INVALID_OBJECT_TYPE:
4682 		device_printf(dev, "Invalid object type\n");
4683 		break;
4684 	default:
4685 		device_printf(dev, "Unknown HMC error\n");
4686 	}
4687 
4688 	/* Clear the error indication */
4689 	wr32(hw, PFHMC_ERRORINFO, 0);
4690 }
4691 
4692 /**
4693  * @struct ice_sysctl_info
4694  * @brief sysctl information
4695  *
4696  * Structure used to simplify the process of defining the many similar
4697  * statistics sysctls.
4698  */
4699 struct ice_sysctl_info {
4700 	u64		*stat;
4701 	const char	*name;
4702 	const char	*description;
4703 };
4704 
4705 /**
4706  * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4707  * @ctx: sysctl ctx to use
4708  * @parent: the parent node to add sysctls under
4709  * @stats: the ethernet stats structure to source values from
4710  *
4711  * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4712  * Will add them under the parent node specified.
4713  *
4714  * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4715  * statistics, so it is not included here. Similarly, rx_discards has different
4716  * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4717  */
4718 void
4719 ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4720 			  struct sysctl_oid *parent,
4721 			  struct ice_eth_stats *stats)
4722 {
4723 	const struct ice_sysctl_info ctls[] = {
4724 		/* Rx Stats */
4725 		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4726 		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4727 		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4728 		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4729 		/* Tx Stats */
4730 		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4731 		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4732 		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4733 		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4734 		/* End */
4735 		{ 0, 0, 0 }
4736 	};
4737 
4738 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4739 
4740 	const struct ice_sysctl_info *entry = ctls;
4741 	while (entry->stat != 0) {
4742 		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4743 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4744 			       entry->description);
4745 		entry++;
4746 	}
4747 }
4748 
4749 /**
4750  * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4751  * @oidp: sysctl oid structure
4752  * @arg1: pointer to private data structure
4753  * @arg2: Tx CSO stat to read
4754  * @req: sysctl request pointer
4755  *
4756  * On read: Sums the per-queue Tx CSO stat and displays it.
4757  */
4758 static int
4759 ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4760 {
4761 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4762 	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4763 	u64 stat = 0;
4764 	int i;
4765 
4766 	if (ice_driver_is_detaching(vsi->sc))
4767 		return (ESHUTDOWN);
4768 
4769 	/* Check that the type is valid */
4770 	if (type >= ICE_CSO_STAT_TX_COUNT)
4771 		return (EDOOFUS);
4772 
4773 	/* Sum the stat for each of the Tx queues */
4774 	for (i = 0; i < vsi->num_tx_queues; i++)
4775 		stat += vsi->tx_queues[i].stats.cso[type];
4776 
4777 	return sysctl_handle_64(oidp, NULL, stat, req);
4778 }
4779 
4780 /**
4781  * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4782  * @oidp: sysctl oid structure
4783  * @arg1: pointer to private data structure
4784  * @arg2: Rx CSO stat to read
4785  * @req: sysctl request pointer
4786  *
4787  * On read: Sums the per-queue Rx CSO stat and displays it.
4788  */
4789 static int
4790 ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4791 {
4792 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4793 	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4794 	u64 stat = 0;
4795 	int i;
4796 
4797 	if (ice_driver_is_detaching(vsi->sc))
4798 		return (ESHUTDOWN);
4799 
4800 	/* Check that the type is valid */
4801 	if (type >= ICE_CSO_STAT_RX_COUNT)
4802 		return (EDOOFUS);
4803 
4804 	/* Sum the stat for each of the Rx queues */
4805 	for (i = 0; i < vsi->num_rx_queues; i++)
4806 		stat += vsi->rx_queues[i].stats.cso[type];
4807 
4808 	return sysctl_handle_64(oidp, NULL, stat, req);
4809 }
4810 
4811 /**
4812  * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4813  * @oidp: sysctl oid structure
4814  * @arg1: pointer to private data structure
4815  * @arg2: unused
4816  * @req: sysctl request pointer
4817  *
4818  * On read: Sums current values of Rx error statistics and
4819  * displays it.
4820  */
4821 static int
4822 ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4823 {
4824 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4825 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4826 	u64 stat = 0;
4827 	int i, type;
4828 
4829 	UNREFERENCED_PARAMETER(arg2);
4830 
4831 	if (ice_driver_is_detaching(vsi->sc))
4832 		return (ESHUTDOWN);
4833 
4834 	stat += hs->rx_undersize;
4835 	stat += hs->rx_fragments;
4836 	stat += hs->rx_oversize;
4837 	stat += hs->rx_jabber;
4838 	stat += hs->rx_len_errors;
4839 	stat += hs->crc_errors;
4840 	stat += hs->illegal_bytes;
4841 
4842 	/* Checksum error stats */
4843 	for (i = 0; i < vsi->num_rx_queues; i++)
4844 		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4845 		     type < ICE_CSO_STAT_RX_COUNT;
4846 		     type++)
4847 			stat += vsi->rx_queues[i].stats.cso[type];
4848 
4849 	return sysctl_handle_64(oidp, NULL, stat, req);
4850 }
4851 
4852 /**
4853  * @struct ice_rx_cso_stat_info
4854  * @brief sysctl information for an Rx checksum offload statistic
4855  *
4856  * Structure used to simplify the process of defining the checksum offload
4857  * statistics.
4858  */
4859 struct ice_rx_cso_stat_info {
4860 	enum ice_rx_cso_stat	type;
4861 	const char		*name;
4862 	const char		*description;
4863 };
4864 
4865 /**
4866  * @struct ice_tx_cso_stat_info
4867  * @brief sysctl information for a Tx checksum offload statistic
4868  *
4869  * Structure used to simplify the process of defining the checksum offload
4870  * statistics.
4871  */
4872 struct ice_tx_cso_stat_info {
4873 	enum ice_tx_cso_stat	type;
4874 	const char		*name;
4875 	const char		*description;
4876 };
4877 
4878 /**
4879  * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4880  * @vsi: pointer to the VSI to add sysctls for
4881  * @ctx: sysctl ctx to use
4882  * @parent: the parent node to add sysctls under
4883  *
4884  * Add statistics sysctls for software tracked statistics of a VSI.
4885  *
4886  * Currently this only adds checksum offload statistics, but more counters may
4887  * be added in the future.
4888  */
4889 static void
4890 ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4891 			 struct sysctl_ctx_list *ctx,
4892 			 struct sysctl_oid *parent)
4893 {
4894 	struct sysctl_oid *cso_node;
4895 	struct sysctl_oid_list *cso_list;
4896 
4897 	/* Tx CSO Stats */
4898 	const struct ice_tx_cso_stat_info tx_ctls[] = {
4899 		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4900 		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4901 		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4902 		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4903 		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4904 		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4905 		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4906 		/* End */
4907 		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4908 	};
4909 
4910 	/* Rx CSO Stats */
4911 	const struct ice_rx_cso_stat_info rx_ctls[] = {
4912 		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4913 		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4914 		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4915 		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4916 		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4917 		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4918 		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4919 		/* End */
4920 		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4921 	};
4922 
4923 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4924 
4925 	/* Add a node for statistics tracked by software. */
4926 	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4927 				  NULL, "Checksum offload Statistics");
4928 	cso_list = SYSCTL_CHILDREN(cso_node);
4929 
4930 	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4931 	while (tx_entry->name && tx_entry->description) {
4932 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4933 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4934 				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4935 				tx_entry->description);
4936 		tx_entry++;
4937 	}
4938 
4939 	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4940 	while (rx_entry->name && rx_entry->description) {
4941 		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4942 				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4943 				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4944 				rx_entry->description);
4945 		rx_entry++;
4946 	}
4947 }
4948 
4949 /**
4950  * ice_add_vsi_sysctls - Add sysctls for a VSI
4951  * @vsi: pointer to VSI structure
4952  *
4953  * Add various sysctls for a given VSI.
4954  */
4955 void
4956 ice_add_vsi_sysctls(struct ice_vsi *vsi)
4957 {
4958 	struct sysctl_ctx_list *ctx = &vsi->ctx;
4959 	struct sysctl_oid *hw_node, *sw_node;
4960 	struct sysctl_oid_list *vsi_list, *hw_list;
4961 
4962 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4963 
4964 	/* Keep hw stats in their own node. */
4965 	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4966 				  NULL, "VSI Hardware Statistics");
4967 	hw_list = SYSCTL_CHILDREN(hw_node);
4968 
4969 	/* Add the ethernet statistics for this VSI */
4970 	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4971 
4972 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4973 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4974 			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4975 
4976 	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4977 			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4978 			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4979 			"Aggregate of all Rx errors");
4980 
4981 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4982 		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4983 		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4984 
4985 	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4986 			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4987 			0, "Tx Packets Discarded Due To Error");
4988 
4989 	/* Add a node for statistics tracked by software. */
4990 	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4991 				  NULL, "VSI Software Statistics");
4992 
4993 	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4994 }
4995 
4996 /**
4997  * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4998  * @ctx: sysctl ctx to use
4999  * @parent_list: parent sysctl list to add sysctls under
5000  * @pfc_stat_location: address of statistic for sysctl to display
5001  * @node_name: Name for statistic node
5002  * @descr: Description used for nodes added in this function
5003  *
5004  * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
5005  * for a stat and leaves for each traffic class for that stat.
5006  */
5007 static void
5008 ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
5009 				 struct sysctl_oid_list *parent_list,
5010 				 u64* pfc_stat_location,
5011 				 const char *node_name,
5012 				 const char *descr)
5013 {
5014 	struct sysctl_oid_list *node_list;
5015 	struct sysctl_oid *node;
5016 	struct sbuf *namebuf, *descbuf;
5017 
5018 	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
5019 				   NULL, descr);
5020 	node_list = SYSCTL_CHILDREN(node);
5021 
5022 	namebuf = sbuf_new_auto();
5023 	descbuf = sbuf_new_auto();
5024 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5025 		sbuf_clear(namebuf);
5026 		sbuf_clear(descbuf);
5027 
5028 		sbuf_printf(namebuf, "%d", i);
5029 		sbuf_printf(descbuf, "%s for TC %d", descr, i);
5030 
5031 		sbuf_finish(namebuf);
5032 		sbuf_finish(descbuf);
5033 
5034 		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
5035 			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
5036 			sbuf_data(descbuf));
5037 	}
5038 
5039 	sbuf_delete(namebuf);
5040 	sbuf_delete(descbuf);
5041 }
5042 
5043 /**
5044  * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
5045  * @ctx: the sysctl ctx to use
5046  * @parent: parent node to add the sysctls under
5047  * @stats: the hw ports stat structure to pull values from
5048  *
5049  * Add global Priority Flow Control MAC statistics sysctls. These are
5050  * structured as a node with the PFC statistic, where there are eight
5051  * nodes for each traffic class.
5052  */
5053 static void
5054 ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
5055 			      struct sysctl_oid *parent,
5056 			      struct ice_hw_port_stats *stats)
5057 {
5058 	struct sysctl_oid_list *parent_list;
5059 
5060 	parent_list = SYSCTL_CHILDREN(parent);
5061 
5062 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
5063 	    "p_xon_recvd", "PFC XON received");
5064 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
5065 	    "p_xoff_recvd", "PFC XOFF received");
5066 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
5067 	    "p_xon_txd", "PFC XON transmitted");
5068 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
5069 	    "p_xoff_txd", "PFC XOFF transmitted");
5070 	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
5071 	    "p_xon2xoff", "PFC XON to XOFF transitions");
5072 }
5073 
5074 /**
5075  * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
5076  * @ctx: the sysctl ctx to use
5077  * @parent: parent node to add the sysctls under
5078  * @stats: the hw ports stat structure to pull values from
5079  *
5080  * Add global MAC statistics sysctls.
5081  */
5082 void
5083 ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
5084 			  struct sysctl_oid *parent,
5085 			  struct ice_hw_port_stats *stats)
5086 {
5087 	struct sysctl_oid *mac_node;
5088 	struct sysctl_oid_list *parent_list, *mac_list;
5089 
5090 	parent_list = SYSCTL_CHILDREN(parent);
5091 
5092 	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
5093 				   NULL, "Mac Hardware Statistics");
5094 	mac_list = SYSCTL_CHILDREN(mac_node);
5095 
5096 	/* Add the ethernet statistics common to VSI and MAC */
5097 	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
5098 
5099 	/* Add PFC stats that add per-TC counters */
5100 	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
5101 
5102 	const struct ice_sysctl_info ctls[] = {
5103 		/* Packet Reception Stats */
5104 		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
5105 		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
5106 		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
5107 		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
5108 		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
5109 		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
5110 		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
5111 		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
5112 		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
5113 		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
5114 		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
5115 		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
5116 		{&stats->eth.rx_discards, "rx_discards",
5117 		    "Discarded Rx Packets by Port (shortage of storage space)"},
5118 		/* Packet Transmission Stats */
5119 		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
5120 		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
5121 		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
5122 		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
5123 		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
5124 		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
5125 		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
5126 		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
5127 		/* Flow control */
5128 		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
5129 		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
5130 		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
5131 		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
5132 		/* Other */
5133 		{&stats->crc_errors, "crc_errors", "CRC Errors"},
5134 		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
5135 		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
5136 		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
5137 		/* End */
5138 		{ 0, 0, 0 }
5139 	};
5140 
5141 	const struct ice_sysctl_info *entry = ctls;
5142 	while (entry->stat != 0) {
5143 		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
5144 			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
5145 			entry->description);
5146 		entry++;
5147 	}
5148 }
5149 
5150 /**
5151  * ice_configure_misc_interrupts - enable 'other' interrupt causes
5152  * @sc: pointer to device private softc
5153  *
5154  * Enable various "other" interrupt causes, and associate them to interrupt 0,
5155  * which is our administrative interrupt.
5156  */
5157 void
5158 ice_configure_misc_interrupts(struct ice_softc *sc)
5159 {
5160 	struct ice_hw *hw = &sc->hw;
5161 	u32 val;
5162 
5163 	/* Read the OICR register to clear it */
5164 	rd32(hw, PFINT_OICR);
5165 
5166 	/* Enable useful "other" interrupt causes */
5167 	val = (PFINT_OICR_ECC_ERR_M |
5168 	       PFINT_OICR_MAL_DETECT_M |
5169 	       PFINT_OICR_GRST_M |
5170 	       PFINT_OICR_PCI_EXCEPTION_M |
5171 	       PFINT_OICR_VFLR_M |
5172 	       PFINT_OICR_HMC_ERR_M |
5173 	       PFINT_OICR_PE_CRITERR_M);
5174 
5175 	wr32(hw, PFINT_OICR_ENA, val);
5176 
5177 	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
5178 	 * not explicitly program them when writing to the PFINT_*_CTL
5179 	 * registers. Nevertheless, these writes are associating the
5180 	 * interrupts with the ITR 0 vector
5181 	 */
5182 
5183 	/* Associate the OICR interrupt with ITR 0, and enable it */
5184 	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
5185 
5186 	/* Associate the Mailbox interrupt with ITR 0, and enable it */
5187 	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
5188 
5189 	/* Associate the AdminQ interrupt with ITR 0, and enable it */
5190 	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
5191 }
5192 
5193 /**
5194  * ice_filter_is_mcast - Check if info is a multicast filter
5195  * @vsi: vsi structure addresses are targeted towards
5196  * @info: filter info
5197  *
5198  * @returns true if the provided info is a multicast filter, and false
5199  * otherwise.
5200  */
5201 static bool
5202 ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
5203 {
5204 	const u8 *addr = info->l_data.mac.mac_addr;
5205 
5206 	/*
5207 	 * Check if this info matches a multicast filter added by
5208 	 * ice_add_mac_to_list
5209 	 */
5210 	if ((info->flag == ICE_FLTR_TX) &&
5211 	    (info->src_id == ICE_SRC_ID_VSI) &&
5212 	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5213 	    (info->vsi_handle == vsi->idx) &&
5214 	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5215 		return true;
5216 
5217 	return false;
5218 }
5219 
5220 /**
5221  * @struct ice_mcast_sync_data
5222  * @brief data used by ice_sync_one_mcast_filter function
5223  *
5224  * Structure used to store data needed for processing by the
5225  * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5226  * to be added, an error indication, and a pointer to the device softc.
5227  */
5228 struct ice_mcast_sync_data {
5229 	struct ice_list_head add_list;
5230 	struct ice_softc *sc;
5231 	int err;
5232 };
5233 
5234 /**
5235  * ice_sync_one_mcast_filter - Check if we need to program the filter
5236  * @p: void pointer to algorithm data
5237  * @sdl: link level socket address
5238  * @count: unused count value
5239  *
5240  * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5241  * list. For the given address, search our internal list to see if we have
5242  * found the filter. If not, add it to our list of filters that need to be
5243  * programmed.
5244  *
5245  * @returns (1) if we've actually setup the filter to be added
5246  */
5247 static u_int
5248 ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5249 			  u_int __unused count)
5250 {
5251 	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5252 	struct ice_softc *sc = data->sc;
5253 	struct ice_hw *hw = &sc->hw;
5254 	struct ice_switch_info *sw = hw->switch_info;
5255 	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5256 	struct ice_fltr_mgmt_list_entry *itr;
5257 	struct ice_list_head *rules;
5258 	int err;
5259 
5260 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5261 
5262 	/*
5263 	 * If a previous filter already indicated an error, there is no need
5264 	 * for us to finish processing the rest of the filters.
5265 	 */
5266 	if (data->err)
5267 		return (0);
5268 
5269 	/* See if this filter has already been programmed */
5270 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5271 		struct ice_fltr_info *info = &itr->fltr_info;
5272 		const u8 *addr = info->l_data.mac.mac_addr;
5273 
5274 		/* Only check multicast filters */
5275 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5276 			continue;
5277 
5278 		/*
5279 		 * If this filter matches, mark the internal filter as
5280 		 * "found", and exit.
5281 		 */
5282 		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5283 			itr->marker = ICE_FLTR_FOUND;
5284 			return (1);
5285 		}
5286 	}
5287 
5288 	/*
5289 	 * If we failed to locate the filter in our internal list, we need to
5290 	 * place it into our add list.
5291 	 */
5292 	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5293 				  ICE_FWD_TO_VSI);
5294 	if (err) {
5295 		device_printf(sc->dev,
5296 			      "Failed to place MAC %6D onto add list, err %s\n",
5297 			      sdl_addr, ":", ice_err_str(err));
5298 		data->err = err;
5299 
5300 		return (0);
5301 	}
5302 
5303 	return (1);
5304 }
5305 
5306 /**
5307  * ice_sync_multicast_filters - Synchronize OS and internal filter list
5308  * @sc: device private structure
5309  *
5310  * Called in response to SIOCDELMULTI to synchronize the operating system
5311  * multicast address list with the internal list of filters programmed to
5312  * firmware.
5313  *
5314  * Works in one phase to find added and deleted filters using a marker bit on
5315  * the internal list.
5316  *
5317  * First, a loop over the internal list clears the marker bit. Second, for
5318  * each filter in the ifp list is checked. If we find it in the internal list,
5319  * the marker bit is set. Otherwise, the filter is added to the add list.
5320  * Third, a loop over the internal list determines if any filters have not
5321  * been found. Each of these is added to the delete list. Finally, the add and
5322  * delete lists are programmed to firmware to update the filters.
5323  *
5324  * @returns zero on success or an integer error code on failure.
5325  */
5326 int
5327 ice_sync_multicast_filters(struct ice_softc *sc)
5328 {
5329 	struct ice_hw *hw = &sc->hw;
5330 	struct ice_switch_info *sw = hw->switch_info;
5331 	struct ice_fltr_mgmt_list_entry *itr;
5332 	struct ice_mcast_sync_data data = {};
5333 	struct ice_list_head *rules, remove_list;
5334 	enum ice_status status;
5335 	int err = 0;
5336 
5337 	INIT_LIST_HEAD(&data.add_list);
5338 	INIT_LIST_HEAD(&remove_list);
5339 	data.sc = sc;
5340 	data.err = 0;
5341 
5342 	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5343 
5344 	/* Acquire the lock for the entire duration */
5345 	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5346 
5347 	/* (1) Reset the marker state for all filters */
5348 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5349 		itr->marker = ICE_FLTR_NOT_FOUND;
5350 
5351 	/* (2) determine which filters need to be added and removed */
5352 	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5353 	if (data.err) {
5354 		/* ice_sync_one_mcast_filter already prints an error */
5355 		err = data.err;
5356 		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5357 		goto free_filter_lists;
5358 	}
5359 
5360 	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5361 		struct ice_fltr_info *info = &itr->fltr_info;
5362 		const u8 *addr = info->l_data.mac.mac_addr;
5363 
5364 		/* Only check multicast filters */
5365 		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5366 			continue;
5367 
5368 		/*
5369 		 * If the filter is not marked as found, then it must no
5370 		 * longer be in the ifp address list, so we need to remove it.
5371 		 */
5372 		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5373 			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5374 						  addr, ICE_FWD_TO_VSI);
5375 			if (err) {
5376 				device_printf(sc->dev,
5377 					      "Failed to place MAC %6D onto remove list, err %s\n",
5378 					      addr, ":", ice_err_str(err));
5379 				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5380 				goto free_filter_lists;
5381 			}
5382 		}
5383 	}
5384 
5385 	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5386 
5387 	status = ice_add_mac(hw, &data.add_list);
5388 	if (status) {
5389 		device_printf(sc->dev,
5390 			      "Could not add new MAC filters, err %s aq_err %s\n",
5391 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5392 		err = (EIO);
5393 		goto free_filter_lists;
5394 	}
5395 
5396 	status = ice_remove_mac(hw, &remove_list);
5397 	if (status) {
5398 		device_printf(sc->dev,
5399 			      "Could not remove old MAC filters, err %s aq_err %s\n",
5400 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5401 		err = (EIO);
5402 		goto free_filter_lists;
5403 	}
5404 
5405 free_filter_lists:
5406 	ice_free_fltr_list(&data.add_list);
5407 	ice_free_fltr_list(&remove_list);
5408 
5409 	return (err);
5410 }
5411 
5412 /**
5413  * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5414  * @vsi: The VSI to add the filter for
5415  * @vid: array of VLAN ids to add
5416  * @length: length of vid array
5417  *
5418  * Programs HW filters so that the given VSI will receive the specified VLANs.
5419  */
5420 enum ice_status
5421 ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5422 {
5423 	struct ice_hw *hw = &vsi->sc->hw;
5424 	struct ice_list_head vlan_list;
5425 	struct ice_fltr_list_entry *vlan_entries;
5426 	enum ice_status status;
5427 
5428 	MPASS(length > 0);
5429 
5430 	INIT_LIST_HEAD(&vlan_list);
5431 
5432 	vlan_entries = (struct ice_fltr_list_entry *)
5433 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5434 	if (!vlan_entries)
5435 		return (ICE_ERR_NO_MEMORY);
5436 
5437 	for (u16 i = 0; i < length; i++) {
5438 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5439 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5440 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5441 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5442 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5443 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5444 
5445 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5446 	}
5447 
5448 	status = ice_add_vlan(hw, &vlan_list);
5449 	if (!status)
5450 		goto done;
5451 
5452 	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5453 	for (u16 i = 0; i < length; i++) {
5454 		device_printf(vsi->sc->dev,
5455 		    "- vlan %d, status %d\n",
5456 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5457 		    vlan_entries[i].status);
5458 	}
5459 done:
5460 	free(vlan_entries, M_ICE);
5461 	return (status);
5462 }
5463 
5464 /**
5465  * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5466  * @vsi: The VSI to add the filter for
5467  * @vid: VLAN to add
5468  *
5469  * Programs a HW filter so that the given VSI will receive the specified VLAN.
5470  */
5471 enum ice_status
5472 ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5473 {
5474 	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5475 }
5476 
5477 /**
5478  * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5479  * @vsi: The VSI to remove the filters from
5480  * @vid: array of VLAN ids to remove
5481  * @length: length of vid array
5482  *
5483  * Removes previously programmed HW filters for the specified VSI.
5484  */
5485 enum ice_status
5486 ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5487 {
5488 	struct ice_hw *hw = &vsi->sc->hw;
5489 	struct ice_list_head vlan_list;
5490 	struct ice_fltr_list_entry *vlan_entries;
5491 	enum ice_status status;
5492 
5493 	MPASS(length > 0);
5494 
5495 	INIT_LIST_HEAD(&vlan_list);
5496 
5497 	vlan_entries = (struct ice_fltr_list_entry *)
5498 	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5499 	if (!vlan_entries)
5500 		return (ICE_ERR_NO_MEMORY);
5501 
5502 	for (u16 i = 0; i < length; i++) {
5503 		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5504 		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5505 		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5506 		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5507 		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5508 		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5509 
5510 		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5511 	}
5512 
5513 	status = ice_remove_vlan(hw, &vlan_list);
5514 	if (!status)
5515 		goto done;
5516 
5517 	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5518 	for (u16 i = 0; i < length; i++) {
5519 		device_printf(vsi->sc->dev,
5520 		    "- vlan %d, status %d\n",
5521 		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5522 		    vlan_entries[i].status);
5523 	}
5524 done:
5525 	free(vlan_entries, M_ICE);
5526 	return (status);
5527 }
5528 
5529 /**
5530  * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5531  * @vsi: The VSI to remove the filter from
5532  * @vid: VLAN to remove
5533  *
5534  * Removes a previously programmed HW filter for the specified VSI.
5535  */
5536 enum ice_status
5537 ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5538 {
5539 	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5540 }
5541 
5542 #define ICE_SYSCTL_HELP_RX_ITR			\
5543 "\nControl Rx interrupt throttle rate."		\
5544 "\n\t0-8160 - sets interrupt rate in usecs"	\
5545 "\n\t    -1 - reset the Rx itr to default"
5546 
5547 /**
5548  * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5549  * @oidp: sysctl oid structure
5550  * @arg1: pointer to private data structure
5551  * @arg2: unused
5552  * @req: sysctl request pointer
5553  *
5554  * On read: Displays the current Rx ITR value
5555  * on write: Sets the Rx ITR value, reconfiguring device if it is up
5556  */
5557 static int
5558 ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5559 {
5560 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5561 	struct ice_softc *sc = vsi->sc;
5562 	int increment, ret;
5563 
5564 	UNREFERENCED_PARAMETER(arg2);
5565 
5566 	if (ice_driver_is_detaching(sc))
5567 		return (ESHUTDOWN);
5568 
5569 	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5570 	if ((ret) || (req->newptr == NULL))
5571 		return (ret);
5572 
5573 	if (vsi->rx_itr < 0)
5574 		vsi->rx_itr = ICE_DFLT_RX_ITR;
5575 	if (vsi->rx_itr > ICE_ITR_MAX)
5576 		vsi->rx_itr = ICE_ITR_MAX;
5577 
5578 	/* Assume 2usec increment if it hasn't been loaded yet */
5579 	increment = sc->hw.itr_gran ? : 2;
5580 
5581 	/* We need to round the value to the hardware's ITR granularity */
5582 	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5583 
5584 	/* If the driver has finished initializing, then we need to reprogram
5585 	 * the ITR registers now. Otherwise, they will be programmed during
5586 	 * driver initialization.
5587 	 */
5588 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5589 		ice_configure_rx_itr(vsi);
5590 
5591 	return (0);
5592 }
5593 
5594 #define ICE_SYSCTL_HELP_TX_ITR			\
5595 "\nControl Tx interrupt throttle rate."		\
5596 "\n\t0-8160 - sets interrupt rate in usecs"	\
5597 "\n\t    -1 - reset the Tx itr to default"
5598 
5599 /**
5600  * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5601  * @oidp: sysctl oid structure
5602  * @arg1: pointer to private data structure
5603  * @arg2: unused
5604  * @req: sysctl request pointer
5605  *
5606  * On read: Displays the current Tx ITR value
5607  * on write: Sets the Tx ITR value, reconfiguring device if it is up
5608  */
5609 static int
5610 ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5611 {
5612 	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5613 	struct ice_softc *sc = vsi->sc;
5614 	int increment, ret;
5615 
5616 	UNREFERENCED_PARAMETER(arg2);
5617 
5618 	if (ice_driver_is_detaching(sc))
5619 		return (ESHUTDOWN);
5620 
5621 	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5622 	if ((ret) || (req->newptr == NULL))
5623 		return (ret);
5624 
5625 	/* Allow configuring a negative value to reset to the default */
5626 	if (vsi->tx_itr < 0)
5627 		vsi->tx_itr = ICE_DFLT_TX_ITR;
5628 	if (vsi->tx_itr > ICE_ITR_MAX)
5629 		vsi->tx_itr = ICE_ITR_MAX;
5630 
5631 	/* Assume 2usec increment if it hasn't been loaded yet */
5632 	increment = sc->hw.itr_gran ? : 2;
5633 
5634 	/* We need to round the value to the hardware's ITR granularity */
5635 	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5636 
5637 	/* If the driver has finished initializing, then we need to reprogram
5638 	 * the ITR registers now. Otherwise, they will be programmed during
5639 	 * driver initialization.
5640 	 */
5641 	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5642 		ice_configure_tx_itr(vsi);
5643 
5644 	return (0);
5645 }
5646 
5647 /**
5648  * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5649  * @vsi: pointer to VSI structure
5650  * @parent: parent node to add the tunables under
5651  *
5652  * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5653  * dynamically removed upon VSI removal.
5654  *
5655  * Add various tunables and set up the basic node structure for the VSI. Must
5656  * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5657  * possible after the VSI memory is initialized.
5658  *
5659  * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5660  * their values can be read from loader.conf prior to their first use in the
5661  * driver.
5662  */
5663 void
5664 ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5665 {
5666 	struct sysctl_oid_list *vsi_list;
5667 	char vsi_name[32], vsi_desc[32];
5668 
5669 	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5670 
5671 	/* Initialize the sysctl context for this VSI */
5672 	sysctl_ctx_init(&vsi->ctx);
5673 
5674 	/* Add a node to collect this VSI's statistics together */
5675 	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5676 	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5677 	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5678 					CTLFLAG_RD, NULL, vsi_desc);
5679 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5680 
5681 	vsi->rx_itr = ICE_DFLT_TX_ITR;
5682 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5683 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5684 			vsi, 0, ice_sysctl_rx_itr, "S",
5685 			ICE_SYSCTL_HELP_RX_ITR);
5686 
5687 	vsi->tx_itr = ICE_DFLT_TX_ITR;
5688 	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5689 			CTLTYPE_S16 | CTLFLAG_RWTUN,
5690 			vsi, 0, ice_sysctl_tx_itr, "S",
5691 			ICE_SYSCTL_HELP_TX_ITR);
5692 }
5693 
5694 /**
5695  * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5696  * @vsi: the VSI to remove contexts for
5697  *
5698  * Free the context for the VSI sysctls. This includes the main context, as
5699  * well as the per-queue sysctls.
5700  */
5701 void
5702 ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5703 {
5704 	device_t dev = vsi->sc->dev;
5705 	int err;
5706 
5707 	if (vsi->vsi_node) {
5708 		err = sysctl_ctx_free(&vsi->ctx);
5709 		if (err)
5710 			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5711 				      vsi->idx, ice_err_str(err));
5712 		vsi->vsi_node = NULL;
5713 	}
5714 }
5715 
5716 /**
5717  * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5718  * @sc: pointer to device private softc
5719  * @ctx: the sysctl ctx to use
5720  * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5721  *
5722  * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5723  * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5724  * of 64 DSCP to TC map values that the user can configure.
5725  */
5726 void
5727 ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5728 			    struct sysctl_ctx_list *ctx,
5729 			    struct sysctl_oid_list *ctx_list)
5730 {
5731 	struct sysctl_oid_list *node_list;
5732 	struct sysctl_oid *node;
5733 	struct sbuf *namebuf, *descbuf;
5734 	int first_dscp_val, last_dscp_val;
5735 
5736 	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5737 			       NULL, "Map of DSCP values to DCB TCs");
5738 	node_list = SYSCTL_CHILDREN(node);
5739 
5740 	namebuf = sbuf_new_auto();
5741 	descbuf = sbuf_new_auto();
5742 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5743 		sbuf_clear(namebuf);
5744 		sbuf_clear(descbuf);
5745 
5746 		first_dscp_val = i * 8;
5747 		last_dscp_val = first_dscp_val + 7;
5748 
5749 		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5750 		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5751 			    first_dscp_val, last_dscp_val);
5752 
5753 		sbuf_finish(namebuf);
5754 		sbuf_finish(descbuf);
5755 
5756 		SYSCTL_ADD_PROC(ctx, node_list,
5757 		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5758 		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5759 	}
5760 
5761 	sbuf_delete(namebuf);
5762 	sbuf_delete(descbuf);
5763 }
5764 
5765 /**
5766  * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5767  * @sc: device private structure
5768  *
5769  * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5770  * for re-use by ice_add_device_sysctls.
5771  *
5772  * In order for the sysctl fields to be initialized before use, this function
5773  * should be called as early as possible during attach activities.
5774  *
5775  * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5776  * here in this function, rather than later in ice_add_device_sysctls.
5777  *
5778  * To make things easier, this function is also expected to setup the various
5779  * sysctl nodes in addition to tunables so that other sysctls which can't be
5780  * initialized early can hook into the same nodes.
5781  */
5782 void
5783 ice_add_device_tunables(struct ice_softc *sc)
5784 {
5785 	device_t dev = sc->dev;
5786 
5787 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5788 	struct sysctl_oid_list *ctx_list =
5789 		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5790 
5791 	sc->enable_health_events = ice_enable_health_events;
5792 
5793 	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5794 			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5795 			"Enable FW health event reporting for this PF");
5796 
5797 	/* Add a node to track VSI sysctls. Keep track of the node in the
5798 	 * softc so that we can hook other sysctls into it later. This
5799 	 * includes both the VSI statistics, as well as potentially dynamic
5800 	 * VSIs in the future.
5801 	 */
5802 
5803 	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5804 					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5805 
5806 	/* Add debug tunables */
5807 	ice_add_debug_tunables(sc);
5808 }
5809 
5810 /**
5811  * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5812  * @oidp: sysctl oid structure
5813  * @arg1: pointer to private data structure
5814  * @arg2: unused
5815  * @req: sysctl request pointer
5816  *
5817  * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5818  */
5819 static int
5820 ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5821 {
5822 	struct ice_softc *sc = (struct ice_softc *)arg1;
5823 	struct ice_hw *hw = &sc->hw;
5824 	struct ice_switch_info *sw = hw->switch_info;
5825 	struct ice_fltr_mgmt_list_entry *fm_entry;
5826 	struct ice_list_head *rule_head;
5827 	struct ice_lock *rule_lock;
5828 	struct ice_fltr_info *fi;
5829 	struct sbuf *sbuf;
5830 	int ret;
5831 
5832 	UNREFERENCED_PARAMETER(oidp);
5833 	UNREFERENCED_PARAMETER(arg2);
5834 
5835 	if (ice_driver_is_detaching(sc))
5836 		return (ESHUTDOWN);
5837 
5838 	/* Wire the old buffer so we can take a non-sleepable lock */
5839 	ret = sysctl_wire_old_buffer(req, 0);
5840 	if (ret)
5841 		return (ret);
5842 
5843 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5844 
5845 	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5846 	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5847 
5848 	sbuf_printf(sbuf, "MAC Filter List");
5849 
5850 	ice_acquire_lock(rule_lock);
5851 
5852 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5853 		fi = &fm_entry->fltr_info;
5854 
5855 		sbuf_printf(sbuf,
5856 			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5857 			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5858 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5859 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5860 
5861 		/* if we have a vsi_list_info, print some information about that */
5862 		if (fm_entry->vsi_list_info) {
5863 			sbuf_printf(sbuf,
5864 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5865 				    fm_entry->vsi_count,
5866 				    fm_entry->vsi_list_info->vsi_list_id,
5867 				    fm_entry->vsi_list_info->ref_cnt);
5868 		}
5869 	}
5870 
5871 	ice_release_lock(rule_lock);
5872 
5873 	sbuf_finish(sbuf);
5874 	sbuf_delete(sbuf);
5875 
5876 	return (0);
5877 }
5878 
5879 /**
5880  * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5881  * @oidp: sysctl oid structure
5882  * @arg1: pointer to private data structure
5883  * @arg2: unused
5884  * @req: sysctl request pointer
5885  *
5886  * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5887  */
5888 static int
5889 ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5890 {
5891 	struct ice_softc *sc = (struct ice_softc *)arg1;
5892 	struct ice_hw *hw = &sc->hw;
5893 	struct ice_switch_info *sw = hw->switch_info;
5894 	struct ice_fltr_mgmt_list_entry *fm_entry;
5895 	struct ice_list_head *rule_head;
5896 	struct ice_lock *rule_lock;
5897 	struct ice_fltr_info *fi;
5898 	struct sbuf *sbuf;
5899 	int ret;
5900 
5901 	UNREFERENCED_PARAMETER(oidp);
5902 	UNREFERENCED_PARAMETER(arg2);
5903 
5904 	if (ice_driver_is_detaching(sc))
5905 		return (ESHUTDOWN);
5906 
5907 	/* Wire the old buffer so we can take a non-sleepable lock */
5908 	ret = sysctl_wire_old_buffer(req, 0);
5909 	if (ret)
5910 		return (ret);
5911 
5912 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5913 
5914 	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5915 	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5916 
5917 	sbuf_printf(sbuf, "VLAN Filter List");
5918 
5919 	ice_acquire_lock(rule_lock);
5920 
5921 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5922 		fi = &fm_entry->fltr_info;
5923 
5924 		sbuf_printf(sbuf,
5925 			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5926 			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5927 			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5928 			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5929 
5930 		/* if we have a vsi_list_info, print some information about that */
5931 		if (fm_entry->vsi_list_info) {
5932 			sbuf_printf(sbuf,
5933 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5934 				    fm_entry->vsi_count,
5935 				    fm_entry->vsi_list_info->vsi_list_id,
5936 				    fm_entry->vsi_list_info->ref_cnt);
5937 		}
5938 	}
5939 
5940 	ice_release_lock(rule_lock);
5941 
5942 	sbuf_finish(sbuf);
5943 	sbuf_delete(sbuf);
5944 
5945 	return (0);
5946 }
5947 
5948 /**
5949  * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5950  * @oidp: sysctl oid structure
5951  * @arg1: pointer to private data structure
5952  * @arg2: unused
5953  * @req: sysctl request pointer
5954  *
5955  * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5956  * filters.
5957  */
5958 static int
5959 ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5960 {
5961 	struct ice_softc *sc = (struct ice_softc *)arg1;
5962 	struct ice_hw *hw = &sc->hw;
5963 	struct ice_switch_info *sw = hw->switch_info;
5964 	struct ice_fltr_mgmt_list_entry *fm_entry;
5965 	struct ice_list_head *rule_head;
5966 	struct ice_lock *rule_lock;
5967 	struct ice_fltr_info *fi;
5968 	struct sbuf *sbuf;
5969 	int ret;
5970 
5971 	UNREFERENCED_PARAMETER(oidp);
5972 	UNREFERENCED_PARAMETER(arg2);
5973 
5974 	if (ice_driver_is_detaching(sc))
5975 		return (ESHUTDOWN);
5976 
5977 	/* Wire the old buffer so we can take a non-sleepable lock */
5978 	ret = sysctl_wire_old_buffer(req, 0);
5979 	if (ret)
5980 		return (ret);
5981 
5982 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5983 
5984 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5985 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5986 
5987 	sbuf_printf(sbuf, "Ethertype Filter List");
5988 
5989 	ice_acquire_lock(rule_lock);
5990 
5991 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5992 		fi = &fm_entry->fltr_info;
5993 
5994 		sbuf_printf(sbuf,
5995 			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5996 			fi->l_data.ethertype_mac.ethertype,
5997 			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5998 			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5999 			fi->fltr_rule_id);
6000 
6001 		/* if we have a vsi_list_info, print some information about that */
6002 		if (fm_entry->vsi_list_info) {
6003 			sbuf_printf(sbuf,
6004 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6005 				    fm_entry->vsi_count,
6006 				    fm_entry->vsi_list_info->vsi_list_id,
6007 				    fm_entry->vsi_list_info->ref_cnt);
6008 		}
6009 	}
6010 
6011 	ice_release_lock(rule_lock);
6012 
6013 	sbuf_finish(sbuf);
6014 	sbuf_delete(sbuf);
6015 
6016 	return (0);
6017 }
6018 
6019 /**
6020  * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
6021  * @oidp: sysctl oid structure
6022  * @arg1: pointer to private data structure
6023  * @arg2: unused
6024  * @req: sysctl request pointer
6025  *
6026  * Callback for "ethertype_mac_filters" sysctl to dump the programmed
6027  * Ethertype/MAC filters.
6028  */
6029 static int
6030 ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
6031 {
6032 	struct ice_softc *sc = (struct ice_softc *)arg1;
6033 	struct ice_hw *hw = &sc->hw;
6034 	struct ice_switch_info *sw = hw->switch_info;
6035 	struct ice_fltr_mgmt_list_entry *fm_entry;
6036 	struct ice_list_head *rule_head;
6037 	struct ice_lock *rule_lock;
6038 	struct ice_fltr_info *fi;
6039 	struct sbuf *sbuf;
6040 	int ret;
6041 
6042 	UNREFERENCED_PARAMETER(oidp);
6043 	UNREFERENCED_PARAMETER(arg2);
6044 
6045 	if (ice_driver_is_detaching(sc))
6046 		return (ESHUTDOWN);
6047 
6048 	/* Wire the old buffer so we can take a non-sleepable lock */
6049 	ret = sysctl_wire_old_buffer(req, 0);
6050 	if (ret)
6051 		return (ret);
6052 
6053 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6054 
6055 	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
6056 	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
6057 
6058 	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
6059 
6060 	ice_acquire_lock(rule_lock);
6061 
6062 	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
6063 		fi = &fm_entry->fltr_info;
6064 
6065 		sbuf_printf(sbuf,
6066 			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
6067 			    fi->l_data.ethertype_mac.ethertype,
6068 			    fi->l_data.ethertype_mac.mac_addr, ":",
6069 			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
6070 			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
6071 			    fi->fltr_rule_id);
6072 
6073 		/* if we have a vsi_list_info, print some information about that */
6074 		if (fm_entry->vsi_list_info) {
6075 			sbuf_printf(sbuf,
6076 				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6077 				    fm_entry->vsi_count,
6078 				    fm_entry->vsi_list_info->vsi_list_id,
6079 				    fm_entry->vsi_list_info->ref_cnt);
6080 		}
6081 	}
6082 
6083 	ice_release_lock(rule_lock);
6084 
6085 	sbuf_finish(sbuf);
6086 	sbuf_delete(sbuf);
6087 
6088 	return (0);
6089 }
6090 
6091 /**
6092  * ice_sysctl_dump_state_flags - Dump device driver state flags
6093  * @oidp: sysctl oid structure
6094  * @arg1: pointer to private data structure
6095  * @arg2: unused
6096  * @req: sysctl request pointer
6097  *
6098  * Callback for "state" sysctl to display currently set driver state flags.
6099  */
6100 static int
6101 ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
6102 {
6103 	struct ice_softc *sc = (struct ice_softc *)arg1;
6104 	struct sbuf *sbuf;
6105 	u32 copied_state;
6106 	unsigned int i;
6107 	bool at_least_one = false;
6108 
6109 	UNREFERENCED_PARAMETER(oidp);
6110 	UNREFERENCED_PARAMETER(arg2);
6111 
6112 	if (ice_driver_is_detaching(sc))
6113 		return (ESHUTDOWN);
6114 
6115 	/* Make a copy of the state to ensure we display coherent values */
6116 	copied_state = atomic_load_acq_32(&sc->state);
6117 
6118 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6119 
6120 	/* Add the string for each set state to the sbuf */
6121 	for (i = 0; i < 32; i++) {
6122 		if (copied_state & BIT(i)) {
6123 			const char *str = ice_state_to_str((enum ice_state)i);
6124 
6125 			at_least_one = true;
6126 
6127 			if (str)
6128 				sbuf_printf(sbuf, "\n%s", str);
6129 			else
6130 				sbuf_printf(sbuf, "\nBIT(%u)", i);
6131 		}
6132 	}
6133 
6134 	if (!at_least_one)
6135 		sbuf_printf(sbuf, "Nothing set");
6136 
6137 	sbuf_finish(sbuf);
6138 	sbuf_delete(sbuf);
6139 
6140 	return (0);
6141 }
6142 
6143 #define ICE_SYSCTL_DEBUG_MASK_HELP \
6144 "\nSelect debug statements to print to kernel messages"		\
6145 "\nFlags:"							\
6146 "\n\t        0x1 - Function Tracing"				\
6147 "\n\t        0x2 - Driver Initialization"			\
6148 "\n\t        0x4 - Release"					\
6149 "\n\t        0x8 - FW Logging"					\
6150 "\n\t       0x10 - Link"					\
6151 "\n\t       0x20 - PHY"						\
6152 "\n\t       0x40 - Queue Context"				\
6153 "\n\t       0x80 - NVM"						\
6154 "\n\t      0x100 - LAN"						\
6155 "\n\t      0x200 - Flow"					\
6156 "\n\t      0x400 - DCB"						\
6157 "\n\t      0x800 - Diagnostics"					\
6158 "\n\t     0x1000 - Flow Director"				\
6159 "\n\t     0x2000 - Switch"					\
6160 "\n\t     0x4000 - Scheduler"					\
6161 "\n\t     0x8000 - RDMA"					\
6162 "\n\t    0x10000 - DDP Package"					\
6163 "\n\t    0x20000 - Resources"					\
6164 "\n\t    0x40000 - ACL"						\
6165 "\n\t    0x80000 - PTP"						\
6166 "\n\t   0x100000 - Admin Queue messages"			\
6167 "\n\t   0x200000 - Admin Queue descriptors"			\
6168 "\n\t   0x400000 - Admin Queue descriptor buffers"		\
6169 "\n\t   0x800000 - Admin Queue commands"			\
6170 "\n\t  0x1000000 - Parser"					\
6171 "\n\t  ..."							\
6172 "\n\t  0x8000000 - (Reserved for user)"				\
6173 "\n\t"								\
6174 "\nUse \"sysctl -x\" to view flags properly."
6175 
6176 /**
6177  * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
6178  * @sc: device private structure
6179  *
6180  * Add sysctl tunable values related to debugging the device driver. For now,
6181  * this means a tunable to set the debug mask early during driver load.
6182  *
6183  * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
6184  * that in normal kernel builds, these will all be hidden, but on a debug
6185  * kernel they will be more easily visible.
6186  */
6187 static void
6188 ice_add_debug_tunables(struct ice_softc *sc)
6189 {
6190 	struct sysctl_oid_list *debug_list;
6191 	device_t dev = sc->dev;
6192 
6193 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6194 	struct sysctl_oid_list *ctx_list =
6195 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
6196 
6197 	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
6198 					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6199 					    NULL, "Debug Sysctls");
6200 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6201 
6202 	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
6203 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6204 		       &sc->hw.debug_mask, 0,
6205 		       ICE_SYSCTL_DEBUG_MASK_HELP);
6206 
6207 	/* Load the default value from the global sysctl first */
6208 	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
6209 
6210 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6211 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6212 			&sc->enable_tx_fc_filter, 0,
6213 			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6214 
6215 	sc->tx_balance_en = ice_tx_balance_en;
6216 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6217 			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6218 			&sc->tx_balance_en, 0,
6219 			"Enable 5-layer scheduler topology");
6220 
6221 	/* Load the default value from the global sysctl first */
6222 	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6223 
6224 	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6225 			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6226 			&sc->enable_tx_lldp_filter, 0,
6227 			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6228 
6229 	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6230 }
6231 
6232 #define ICE_SYSCTL_HELP_REQUEST_RESET		\
6233 "\nRequest the driver to initiate a reset."	\
6234 "\n\tpfr - Initiate a PF reset"			\
6235 "\n\tcorer - Initiate a CORE reset"		\
6236 "\n\tglobr - Initiate a GLOBAL reset"
6237 
6238 /**
6239  * @var rl_sysctl_ticks
6240  * @brief timestamp for latest reset request sysctl call
6241  *
6242  * Helps rate-limit the call to the sysctl which resets the device
6243  */
6244 int rl_sysctl_ticks = 0;
6245 
6246 /**
6247  * ice_sysctl_request_reset - Request that the driver initiate a reset
6248  * @oidp: sysctl oid structure
6249  * @arg1: pointer to private data structure
6250  * @arg2: unused
6251  * @req: sysctl request pointer
6252  *
6253  * Callback for "request_reset" sysctl to request that the driver initiate
6254  * a reset. Expects to be passed one of the following strings
6255  *
6256  * "pfr" - Initiate a PF reset
6257  * "corer" - Initiate a CORE reset
6258  * "globr" - Initiate a Global reset
6259  */
6260 static int
6261 ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6262 {
6263 	struct ice_softc *sc = (struct ice_softc *)arg1;
6264 	struct ice_hw *hw = &sc->hw;
6265 	enum ice_status status;
6266 	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6267 	const char *reset_message;
6268 	int ret;
6269 
6270 	/* Buffer to store the requested reset string. Must contain enough
6271 	 * space to store the largest expected reset string, which currently
6272 	 * means 6 bytes of space.
6273 	 */
6274 	char reset[6] = "";
6275 
6276 	UNREFERENCED_PARAMETER(arg2);
6277 
6278 	ret = priv_check(curthread, PRIV_DRIVER);
6279 	if (ret)
6280 		return (ret);
6281 
6282 	if (ice_driver_is_detaching(sc))
6283 		return (ESHUTDOWN);
6284 
6285 	/* Read in the requested reset type. */
6286 	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6287 	if ((ret) || (req->newptr == NULL))
6288 		return (ret);
6289 
6290 	if (strcmp(reset, "pfr") == 0) {
6291 		reset_message = "Requesting a PF reset";
6292 		reset_type = ICE_RESET_PFR;
6293 	} else if (strcmp(reset, "corer") == 0) {
6294 		reset_message = "Initiating a CORE reset";
6295 		reset_type = ICE_RESET_CORER;
6296 	} else if (strcmp(reset, "globr") == 0) {
6297 		reset_message = "Initiating a GLOBAL reset";
6298 		reset_type = ICE_RESET_GLOBR;
6299 	} else if (strcmp(reset, "empr") == 0) {
6300 		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6301 		return (EOPNOTSUPP);
6302 	}
6303 
6304 	if (reset_type == ICE_RESET_INVAL) {
6305 		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6306 		return (EINVAL);
6307 	}
6308 
6309 	/*
6310 	 * Rate-limit the frequency at which this function is called.
6311 	 * Assuming this is called successfully once, typically,
6312 	 * everything should be handled within the allotted time frame.
6313 	 * However, in the odd setup situations, we've also put in
6314 	 * guards for when the reset has finished, but we're in the
6315 	 * process of rebuilding. And instead of queueing an intent,
6316 	 * simply error out and let the caller retry, if so desired.
6317 	 */
6318 	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6319 		device_printf(sc->dev,
6320 		    "Call frequency too high. Operation aborted.\n");
6321 		return (EBUSY);
6322 	}
6323 	rl_sysctl_ticks = ticks;
6324 
6325 	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6326 		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6327 		return (EBUSY);
6328 	}
6329 
6330 	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6331 		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6332 		return (EBUSY);
6333 	}
6334 
6335 	device_printf(sc->dev, "%s\n", reset_message);
6336 
6337 	/* Initiate the PF reset during the admin status task */
6338 	if (reset_type == ICE_RESET_PFR) {
6339 		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6340 		return (0);
6341 	}
6342 
6343 	/*
6344 	 * Other types of resets including CORE and GLOBAL resets trigger an
6345 	 * interrupt on all PFs. Initiate the reset now. Preparation and
6346 	 * rebuild logic will be handled by the admin status task.
6347 	 */
6348 	status = ice_reset(hw, reset_type);
6349 
6350 	/*
6351 	 * Resets can take a long time and we still don't want another call
6352 	 * to this function before we settle down.
6353 	 */
6354 	rl_sysctl_ticks = ticks;
6355 
6356 	if (status) {
6357 		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6358 			      ice_status_str(status));
6359 		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6360 		return (EFAULT);
6361 	}
6362 
6363 	return (0);
6364 }
6365 
6366 #define ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID	(0xFFFFFF)
6367 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6368 "\nSelect clusters to dump with \"dump\" sysctl"		\
6369 "\nFlags:"							\
6370 "\n\t      0x1 - Switch"					\
6371 "\n\t      0x2 - ACL"						\
6372 "\n\t      0x4 - Tx Scheduler"					\
6373 "\n\t      0x8 - Profile Configuration"			\
6374 "\n\t     0x20 - Link"						\
6375 "\n\t     0x80 - DCB"						\
6376 "\n\t    0x100 - L2P"						\
6377 "\n\t 0x400000 - Manageability Transactions"			\
6378 "\n\t"								\
6379 "\nUse \"sysctl -x\" to view flags properly."
6380 
6381 /**
6382  * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6383  *     from FW when FW debug dump occurs
6384  * @oidp: sysctl oid structure
6385  * @arg1: pointer to private data structure
6386  * @arg2: unused
6387  * @req: sysctl request pointer
6388  */
6389 static int
6390 ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6391 {
6392 	struct ice_softc *sc = (struct ice_softc *)arg1;
6393 	device_t dev = sc->dev;
6394 	u32 clusters;
6395 	int ret;
6396 
6397 	UNREFERENCED_PARAMETER(arg2);
6398 
6399 	ret = priv_check(curthread, PRIV_DRIVER);
6400 	if (ret)
6401 		return (ret);
6402 
6403 	if (ice_driver_is_detaching(sc))
6404 		return (ESHUTDOWN);
6405 
6406 	clusters = sc->fw_debug_dump_cluster_mask;
6407 
6408 	ret = sysctl_handle_32(oidp, &clusters, 0, req);
6409 	if ((ret) || (req->newptr == NULL))
6410 		return (ret);
6411 
6412 	if (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK)) {
6413 		device_printf(dev,
6414 		    "%s: ERROR: Incorrect settings requested\n",
6415 		    __func__);
6416 		sc->fw_debug_dump_cluster_mask = ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID;
6417 		return (EINVAL);
6418 	}
6419 
6420 	sc->fw_debug_dump_cluster_mask = clusters;
6421 
6422 	return (0);
6423 }
6424 
6425 #define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6426 
6427 /**
6428  * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6429  * @sc: the device softc
6430  * @sbuf: initialized sbuf to print data to
6431  * @cluster_id: FW cluster ID to print data from
6432  *
6433  * Reads debug data from the specified cluster id in the FW and prints it to
6434  * the input sbuf. This function issues multiple AQ commands to the FW in
6435  * order to get all of the data in the cluster.
6436  *
6437  * @remark Only intended to be used by the sysctl handler
6438  * ice_sysctl_fw_debug_dump_do_dump
6439  */
6440 static u16
6441 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6442 {
6443 	struct ice_hw *hw = &sc->hw;
6444 	device_t dev = sc->dev;
6445 	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6446 	const u8 reserved_buf[8] = {};
6447 	enum ice_status status;
6448 	int counter = 0;
6449 	u8 *data_buf;
6450 
6451 	/* Input parameters / loop variables */
6452 	u16 table_id = 0;
6453 	u32 offset = 0;
6454 
6455 	/* Output from the Get Internal Data AQ command */
6456 	u16 ret_buf_size = 0;
6457 	u16 ret_next_cluster = 0;
6458 	u16 ret_next_table = 0;
6459 	u32 ret_next_index = 0;
6460 
6461 	/* Other setup */
6462 	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6463 	if (!data_buf)
6464 		return ret_next_cluster;
6465 
6466 	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6467 	    cluster_id);
6468 
6469 	for (;;) {
6470 		/* Do not trust the FW behavior to be completely correct */
6471 		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6472 			device_printf(dev,
6473 			    "%s: Exceeded counter limit for cluster %d\n",
6474 			    __func__, cluster_id);
6475 			break;
6476 		}
6477 
6478 		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6479 		ice_debug(hw, ICE_DBG_DIAG,
6480 		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6481 		    table_id, offset, data_buf_size);
6482 
6483 		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6484 		    offset, data_buf, data_buf_size, &ret_buf_size,
6485 		    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
6486 		if (status) {
6487 			device_printf(dev,
6488 			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6489 			    __func__, cluster_id, ice_status_str(status),
6490 			    ice_aq_str(hw->adminq.sq_last_status));
6491 			break;
6492 		}
6493 
6494 		ice_debug(hw, ICE_DBG_DIAG,
6495 		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6496 		    ret_next_table, ret_next_index, ret_buf_size);
6497 
6498 		/* Print cluster id */
6499 		u32 print_cluster_id = (u32)cluster_id;
6500 		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6501 		/* Print table id */
6502 		u32 print_table_id = (u32)table_id;
6503 		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6504 		/* Print table length */
6505 		u32 print_table_length = (u32)ret_buf_size;
6506 		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6507 		/* Print current offset */
6508 		u32 print_curr_offset = offset;
6509 		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6510 		/* Print reserved bytes */
6511 		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6512 		/* Print data */
6513 		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6514 
6515 		/* Adjust loop variables */
6516 		memset(data_buf, 0, data_buf_size);
6517 		bool same_table_next = (table_id == ret_next_table);
6518 		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6519 		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6520 
6521 		if ((!same_table_next && !last_offset_next) ||
6522 		    (same_table_next && last_table_next)) {
6523 			device_printf(dev,
6524 			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6525 			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6526 			break;
6527 		}
6528 
6529 		if (!same_table_next && !last_table_next && last_offset_next) {
6530 			/* We've hit the end of the table */
6531 			table_id = ret_next_table;
6532 			offset = 0;
6533 		}
6534 		else if (!same_table_next && last_table_next && last_offset_next) {
6535 			/* We've hit the end of the cluster */
6536 			break;
6537 		}
6538 		else if (same_table_next && !last_table_next && last_offset_next) {
6539 			if (cluster_id == 0x1 && table_id < 39)
6540 				table_id += 1;
6541 			else
6542 				break;
6543 		}
6544 		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6545 			/* More data left in the table */
6546 			offset = ret_next_index;
6547 		}
6548 	}
6549 
6550 	free(data_buf, M_ICE);
6551 	return ret_next_cluster;
6552 }
6553 
6554 #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6555 "\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6556 "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6557 "\nthis data is opaque and not a string."
6558 
6559 #define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6560 #define ICE_FW_DUMP_ALL_TEXT_SIZE	(10 * 1024 * 1024)
6561 #define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6562 #define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6563 #define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6564 
6565 /**
6566  * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6567  * @oidp: sysctl oid structure
6568  * @arg1: pointer to private data structure
6569  * @arg2: unused
6570  * @req: sysctl request pointer
6571  *
6572  * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6573  * formatted dump of some debug FW data intended to be processed by a special
6574  * Intel tool. Prints out the cluster data specified by the "clusters"
6575  * sysctl.
6576  *
6577  * @remark The actual AQ calls and printing are handled by a helper
6578  * function above.
6579  */
6580 static int
6581 ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6582 {
6583 	struct ice_softc *sc = (struct ice_softc *)arg1;
6584 	device_t dev = sc->dev;
6585 	struct sbuf *sbuf;
6586 	int bit, ret;
6587 
6588 	UNREFERENCED_PARAMETER(arg2);
6589 
6590 	ret = priv_check(curthread, PRIV_DRIVER);
6591 	if (ret)
6592 		return (ret);
6593 
6594 	if (ice_driver_is_detaching(sc))
6595 		return (ESHUTDOWN);
6596 
6597 	/* If the user hasn't written "1" to this sysctl yet: */
6598 	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6599 		/* Avoid output on the first set of reads to this sysctl in
6600 		 * order to prevent a null byte from being written to the
6601 		 * end result when called via sysctl(8).
6602 		 */
6603 		if (req->oldptr == NULL && req->newptr == NULL) {
6604 			ret = SYSCTL_OUT(req, 0, 0);
6605 			return (ret);
6606 		}
6607 
6608 		char input_buf[2] = "";
6609 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6610 		if ((ret) || (req->newptr == NULL))
6611 			return (ret);
6612 
6613 		/* If we get '1', then indicate we'll do a dump in the next
6614 		 * sysctl read call.
6615 		 */
6616 		if (input_buf[0] == '1') {
6617 			if (sc->fw_debug_dump_cluster_mask == ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID) {
6618 				device_printf(dev,
6619 				    "%s: Debug Dump failed because an invalid cluster was specified.\n",
6620 				    __func__);
6621 				return (EINVAL);
6622 			}
6623 
6624 			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6625 			return (0);
6626 		}
6627 
6628 		return (EINVAL);
6629 	}
6630 
6631 	/* --- FW debug dump state is set --- */
6632 
6633 
6634 	/* Caller just wants the upper bound for size */
6635 	if (req->oldptr == NULL && req->newptr == NULL) {
6636 		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6637 		if (sc->fw_debug_dump_cluster_mask == 0)
6638 			est_output_len += ICE_FW_DUMP_ALL_TEXT_SIZE;
6639 		else {
6640 			if (sc->fw_debug_dump_cluster_mask & 0x1)
6641 				est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6642 			if (sc->fw_debug_dump_cluster_mask & 0x2)
6643 				est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6644 			if (sc->fw_debug_dump_cluster_mask & 0x4)
6645 				est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6646 		}
6647 
6648 		ret = SYSCTL_OUT(req, 0, est_output_len);
6649 		return (ret);
6650 	}
6651 
6652 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6653 	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6654 
6655 	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6656 
6657 	if (sc->fw_debug_dump_cluster_mask) {
6658 		for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6659 		    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6660 			ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6661 	} else {
6662 		u16 next_cluster_id = 0;
6663 		/* We don't support QUEUE_MNG and FULL_CSR_SPACE */
6664 		do {
6665 			next_cluster_id = ice_fw_debug_dump_print_cluster(sc, sbuf, next_cluster_id);
6666 		} while (next_cluster_id != 0 && next_cluster_id < ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG);
6667 	}
6668 
6669 	sbuf_finish(sbuf);
6670 	sbuf_delete(sbuf);
6671 
6672 	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6673 	return (ret);
6674 }
6675 
6676 /**
6677  * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6678  * @sc: device private structure
6679  *
6680  * Add sysctls related to debugging the device driver. Generally these should
6681  * simply be sysctls which dump internal driver state, to aid in understanding
6682  * what the driver is doing.
6683  */
6684 static void
6685 ice_add_debug_sysctls(struct ice_softc *sc)
6686 {
6687 	struct sysctl_oid *sw_node, *dump_node;
6688 	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6689 	device_t dev = sc->dev;
6690 
6691 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6692 
6693 	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6694 
6695 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6696 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6697 			ice_sysctl_request_reset, "A",
6698 			ICE_SYSCTL_HELP_REQUEST_RESET);
6699 
6700 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6701 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6702 		       &sc->soft_stats.pfr_count, 0,
6703 		       "# of PF resets handled");
6704 
6705 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6706 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6707 		       &sc->soft_stats.corer_count, 0,
6708 		       "# of CORE resets handled");
6709 
6710 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6711 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6712 		       &sc->soft_stats.globr_count, 0,
6713 		       "# of Global resets handled");
6714 
6715 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6716 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6717 		       &sc->soft_stats.empr_count, 0,
6718 		       "# of EMP resets handled");
6719 
6720 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6721 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6722 		       &sc->soft_stats.tx_mdd_count, 0,
6723 		       "# of Tx MDD events detected");
6724 
6725 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6726 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6727 		       &sc->soft_stats.rx_mdd_count, 0,
6728 		       "# of Rx MDD events detected");
6729 
6730 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6731 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6732 			ice_sysctl_dump_state_flags, "A",
6733 			"Driver State Flags");
6734 
6735 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "set_link",
6736 			ICE_CTLFLAG_DEBUG | CTLTYPE_U8 | CTLFLAG_RW, sc, 0,
6737 			ice_sysctl_debug_set_link, "CU", "Set link");
6738 
6739 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6740 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6741 			ice_sysctl_phy_type_low, "QU",
6742 			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6743 
6744 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6745 			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6746 			ice_sysctl_phy_type_high, "QU",
6747 			"PHY type High from Get PHY Caps/Set PHY Cfg");
6748 
6749 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6750 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6751 			ice_sysctl_phy_sw_caps, "",
6752 			"Get PHY Capabilities (Software configuration)");
6753 
6754 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6755 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6756 			ice_sysctl_phy_nvm_caps, "",
6757 			"Get PHY Capabilities (NVM configuration)");
6758 
6759 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6760 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6761 			ice_sysctl_phy_topo_caps, "",
6762 			"Get PHY Capabilities (Topology configuration)");
6763 
6764 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6765 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6766 			ice_sysctl_phy_link_status, "",
6767 			"Get PHY Link Status");
6768 
6769 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6770 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6771 			ice_sysctl_read_i2c_diag_data, "A",
6772 			"Dump selected diagnostic data from FW");
6773 
6774 	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6775 		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6776 		       "FW Build ID");
6777 
6778 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6779 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6780 			ice_sysctl_os_pkg_version, "A",
6781 			"DDP package name and version found in ice_ddp");
6782 
6783 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6784 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6785 			ice_sysctl_fw_cur_lldp_persist_status, "A",
6786 			"Current LLDP persistent status");
6787 
6788 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6789 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6790 			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6791 			"Default LLDP persistent status");
6792 
6793 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6794 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6795 			ice_sysctl_negotiated_fc, "A",
6796 			"Current Negotiated Flow Control mode");
6797 
6798 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6799 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6800 			ice_sysctl_dump_dcbx_cfg, "A",
6801 			"Dumps Local MIB information from firmware");
6802 
6803 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6804 			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6805 			ice_sysctl_dump_dcbx_cfg, "A",
6806 			"Dumps Remote MIB information from firmware");
6807 
6808 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6809 			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6810 			"Dumps Selected PF VSI parameters from firmware");
6811 
6812 	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6813 			sc, 0, ice_sysctl_query_port_ets, "A",
6814 			"Prints selected output from Query Port ETS AQ command");
6815 
6816 	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6817 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6818 				  "Switch Configuration");
6819 	sw_list = SYSCTL_CHILDREN(sw_node);
6820 
6821 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6822 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6823 			ice_sysctl_dump_mac_filters, "A",
6824 			"MAC Filters");
6825 
6826 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6827 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6828 			ice_sysctl_dump_vlan_filters, "A",
6829 			"VLAN Filters");
6830 
6831 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6832 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6833 			ice_sysctl_dump_ethertype_filters, "A",
6834 			"Ethertype Filters");
6835 
6836 	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6837 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6838 			ice_sysctl_dump_ethertype_mac_filters, "A",
6839 			"Ethertype/MAC Filters");
6840 
6841 	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6842 				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6843 				  "Internal FW Dump");
6844 	dump_list = SYSCTL_CHILDREN(dump_node);
6845 
6846 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6847 			ICE_CTLFLAG_DEBUG | CTLTYPE_U32 | CTLFLAG_RW, sc, 0,
6848 			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6849 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6850 
6851 	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6852 			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6853 			ice_sysctl_fw_debug_dump_do_dump, "",
6854 			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6855 }
6856 
6857 /**
6858  * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6859  * @vsi: the VSI to disable
6860  *
6861  * Disables the Tx queues associated with this VSI. Essentially the opposite
6862  * of ice_cfg_vsi_for_tx.
6863  */
6864 int
6865 ice_vsi_disable_tx(struct ice_vsi *vsi)
6866 {
6867 	struct ice_softc *sc = vsi->sc;
6868 	struct ice_hw *hw = &sc->hw;
6869 	enum ice_status status;
6870 	u32 *q_teids;
6871 	u16 *q_ids, *q_handles;
6872 	size_t q_teids_size, q_ids_size, q_handles_size;
6873 	int tc, j, buf_idx, err = 0;
6874 
6875 	if (vsi->num_tx_queues > 255)
6876 		return (ENOSYS);
6877 
6878 	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6879 	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6880 	if (!q_teids)
6881 		return (ENOMEM);
6882 
6883 	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6884 	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6885 	if (!q_ids) {
6886 		err = (ENOMEM);
6887 		goto free_q_teids;
6888 	}
6889 
6890 	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6891 	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6892 	if (!q_handles) {
6893 		err = (ENOMEM);
6894 		goto free_q_ids;
6895 	}
6896 
6897 	ice_for_each_traffic_class(tc) {
6898 		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6899 		u16 start_idx, end_idx;
6900 
6901 		/* Skip rest of disabled TCs once the first
6902 		 * disabled TC is found */
6903 		if (!(vsi->tc_map & BIT(tc)))
6904 			break;
6905 
6906 		/* Fill out TX queue information for this TC */
6907 		start_idx = tc_info->qoffset;
6908 		end_idx = start_idx + tc_info->qcount_tx;
6909 		buf_idx = 0;
6910 		for (j = start_idx; j < end_idx; j++) {
6911 			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6912 
6913 			q_ids[buf_idx] = vsi->tx_qmap[j];
6914 			q_handles[buf_idx] = txq->q_handle;
6915 			q_teids[buf_idx] = txq->q_teid;
6916 			buf_idx++;
6917 		}
6918 
6919 		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6920 					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6921 		if (status == ICE_ERR_DOES_NOT_EXIST) {
6922 			; /* Queues have already been disabled, no need to report this as an error */
6923 		} else if (status == ICE_ERR_RESET_ONGOING) {
6924 			device_printf(sc->dev,
6925 				      "Reset in progress. LAN Tx queues already disabled\n");
6926 			break;
6927 		} else if (status) {
6928 			device_printf(sc->dev,
6929 				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6930 				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6931 			err = (ENODEV);
6932 			break;
6933 		}
6934 
6935 		/* Clear buffers */
6936 		memset(q_teids, 0, q_teids_size);
6937 		memset(q_ids, 0, q_ids_size);
6938 		memset(q_handles, 0, q_handles_size);
6939 	}
6940 
6941 /* free_q_handles: */
6942 	free(q_handles, M_ICE);
6943 free_q_ids:
6944 	free(q_ids, M_ICE);
6945 free_q_teids:
6946 	free(q_teids, M_ICE);
6947 
6948 	return err;
6949 }
6950 
6951 /**
6952  * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6953  * @vsi: the VSI to configure
6954  *
6955  * Sets the RSS table size and lookup table type for the VSI based on its
6956  * VSI type.
6957  */
6958 static void
6959 ice_vsi_set_rss_params(struct ice_vsi *vsi)
6960 {
6961 	struct ice_softc *sc = vsi->sc;
6962 	struct ice_hw_common_caps *cap;
6963 
6964 	cap = &sc->hw.func_caps.common_cap;
6965 
6966 	switch (vsi->type) {
6967 	case ICE_VSI_PF:
6968 		/* The PF VSI inherits RSS instance of the PF */
6969 		vsi->rss_table_size = cap->rss_table_size;
6970 		vsi->rss_lut_type = ICE_LUT_PF;
6971 		break;
6972 	case ICE_VSI_VF:
6973 	case ICE_VSI_VMDQ2:
6974 		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6975 		vsi->rss_lut_type = ICE_LUT_VSI;
6976 		break;
6977 	default:
6978 		device_printf(sc->dev,
6979 			      "VSI %d: RSS not supported for VSI type %d\n",
6980 			      vsi->idx, vsi->type);
6981 		break;
6982 	}
6983 }
6984 
6985 /**
6986  * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6987  * @vsi: The VSI to add the context for
6988  *
6989  * Creates a sysctl context for storing txq sysctls. Additionally creates
6990  * a node rooted at the given VSI's main sysctl node. This context will be
6991  * used to store per-txq sysctls which may need to be released during the
6992  * driver's lifetime.
6993  */
6994 void
6995 ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6996 {
6997 	struct sysctl_oid_list *vsi_list;
6998 
6999 	sysctl_ctx_init(&vsi->txqs_ctx);
7000 
7001 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7002 
7003 	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
7004 					 CTLFLAG_RD, NULL, "Tx Queues");
7005 }
7006 
7007 /**
7008  * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
7009  * @vsi: The VSI to add the context for
7010  *
7011  * Creates a sysctl context for storing rxq sysctls. Additionally creates
7012  * a node rooted at the given VSI's main sysctl node. This context will be
7013  * used to store per-rxq sysctls which may need to be released during the
7014  * driver's lifetime.
7015  */
7016 void
7017 ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
7018 {
7019 	struct sysctl_oid_list *vsi_list;
7020 
7021 	sysctl_ctx_init(&vsi->rxqs_ctx);
7022 
7023 	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7024 
7025 	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
7026 					 CTLFLAG_RD, NULL, "Rx Queues");
7027 }
7028 
7029 /**
7030  * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
7031  * @vsi: The VSI to delete from
7032  *
7033  * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
7034  * Must be called prior to freeing the Tx queue memory, in order to avoid
7035  * having sysctls point at stale memory.
7036  */
7037 void
7038 ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
7039 {
7040 	device_t dev = vsi->sc->dev;
7041 	int err;
7042 
7043 	if (vsi->txqs_node) {
7044 		err = sysctl_ctx_free(&vsi->txqs_ctx);
7045 		if (err)
7046 			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
7047 				      vsi->idx, ice_err_str(err));
7048 		vsi->txqs_node = NULL;
7049 	}
7050 }
7051 
7052 /**
7053  * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
7054  * @vsi: The VSI to delete from
7055  *
7056  * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
7057  * Must be called prior to freeing the Rx queue memory, in order to avoid
7058  * having sysctls point at stale memory.
7059  */
7060 void
7061 ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
7062 {
7063 	device_t dev = vsi->sc->dev;
7064 	int err;
7065 
7066 	if (vsi->rxqs_node) {
7067 		err = sysctl_ctx_free(&vsi->rxqs_ctx);
7068 		if (err)
7069 			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
7070 				      vsi->idx, ice_err_str(err));
7071 		vsi->rxqs_node = NULL;
7072 	}
7073 }
7074 
7075 /**
7076  * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
7077  * @txq: pointer to the Tx queue
7078  *
7079 * Add per-queue sysctls for a given Tx queue. Can't be called during
7080 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7081  */
7082 void
7083 ice_add_txq_sysctls(struct ice_tx_queue *txq)
7084 {
7085 	struct ice_vsi *vsi = txq->vsi;
7086 	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
7087 	struct sysctl_oid_list *txqs_list, *this_txq_list;
7088 	struct sysctl_oid *txq_node;
7089 	char txq_name[32], txq_desc[32];
7090 
7091 	const struct ice_sysctl_info ctls[] = {
7092 		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
7093 		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
7094 		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
7095 		{ 0, 0, 0 }
7096 	};
7097 
7098 	const struct ice_sysctl_info *entry = ctls;
7099 
7100 	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
7101 
7102 	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
7103 	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
7104 	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
7105 				   CTLFLAG_RD, NULL, txq_desc);
7106 	this_txq_list = SYSCTL_CHILDREN(txq_node);
7107 
7108 	/* Add the Tx queue statistics */
7109 	while (entry->stat != 0) {
7110 		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
7111 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7112 			       entry->description);
7113 		entry++;
7114 	}
7115 
7116 	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
7117 		       CTLFLAG_RD, &txq->tc, 0,
7118 		       "Traffic Class that Queue belongs to");
7119 }
7120 
7121 /**
7122  * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
7123  * @rxq: pointer to the Rx queue
7124  *
7125  * Add per-queue sysctls for a given Rx queue. Can't be called during
7126  * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7127  */
7128 void
7129 ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
7130 {
7131 	struct ice_vsi *vsi = rxq->vsi;
7132 	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
7133 	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
7134 	struct sysctl_oid *rxq_node;
7135 	char rxq_name[32], rxq_desc[32];
7136 
7137 	const struct ice_sysctl_info ctls[] = {
7138 		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
7139 		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
7140 		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
7141 		{ 0, 0, 0 }
7142 	};
7143 
7144 	const struct ice_sysctl_info *entry = ctls;
7145 
7146 	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
7147 
7148 	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
7149 	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
7150 	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
7151 				   CTLFLAG_RD, NULL, rxq_desc);
7152 	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
7153 
7154 	/* Add the Rx queue statistics */
7155 	while (entry->stat != 0) {
7156 		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
7157 			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7158 			       entry->description);
7159 		entry++;
7160 	}
7161 
7162 	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
7163 		       CTLFLAG_RD, &rxq->tc, 0,
7164 		       "Traffic Class that Queue belongs to");
7165 }
7166 
7167 /**
7168  * ice_get_default_rss_key - Obtain a default RSS key
7169  * @seed: storage for the RSS key data
7170  *
7171  * Copies a pre-generated RSS key into the seed memory. The seed pointer must
7172  * point to a block of memory that is at least 40 bytes in size.
7173  *
7174  * The key isn't randomly generated each time this function is called because
7175  * that makes the RSS key change every time we reconfigure RSS. This does mean
7176  * that we're hard coding a possibly 'well known' key. We might want to
7177  * investigate randomly generating this key once during the first call.
7178  */
7179 static void
7180 ice_get_default_rss_key(u8 *seed)
7181 {
7182 	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
7183 		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
7184 		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
7185 		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
7186 		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
7187 	};
7188 
7189 	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
7190 }
7191 
7192 /**
7193  * ice_set_rss_key - Configure a given VSI with the default RSS key
7194  * @vsi: the VSI to configure
7195  *
7196  * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
7197  * If the kernel RSS interface is not available, this will fall back to our
7198  * pre-generated hash seed from ice_get_default_rss_key().
7199  */
7200 static int
7201 ice_set_rss_key(struct ice_vsi *vsi)
7202 {
7203 	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
7204 	struct ice_softc *sc = vsi->sc;
7205 	struct ice_hw *hw = &sc->hw;
7206 	enum ice_status status;
7207 
7208 	/*
7209 	 * If the RSS kernel interface is disabled, this will return the
7210 	 * default RSS key above.
7211 	 */
7212 	rss_getkey(keydata.standard_rss_key);
7213 
7214 	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
7215 	if (status) {
7216 		device_printf(sc->dev,
7217 			      "ice_aq_set_rss_key status %s, error %s\n",
7218 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7219 		return (EIO);
7220 	}
7221 
7222 	return (0);
7223 }
7224 
7225 /**
7226  * ice_set_rss_flow_flds - Program the RSS hash flows after package init
7227  * @vsi: the VSI to configure
7228  *
7229  * If the package file is initialized, the default RSS flows are reset. We
7230  * need to reprogram the expected hash configuration. We'll use
7231  * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7232  * support is not enabled, this macro will fall back to suitable defaults.
7233  */
7234 static void
7235 ice_set_rss_flow_flds(struct ice_vsi *vsi)
7236 {
7237 	struct ice_softc *sc = vsi->sc;
7238 	struct ice_hw *hw = &sc->hw;
7239 	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7240 	device_t dev = sc->dev;
7241 	enum ice_status status;
7242 	u_int rss_hash_config;
7243 
7244 	rss_hash_config = rss_gethashconfig();
7245 
7246 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7247 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7248 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7249 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7250 		if (status)
7251 			device_printf(dev,
7252 				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7253 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7254 	}
7255 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7256 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7257 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7258 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7259 		if (status)
7260 			device_printf(dev,
7261 				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7262 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7263 	}
7264 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7265 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7266 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7267 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7268 		if (status)
7269 			device_printf(dev,
7270 				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7271 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7272 	}
7273 	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7274 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7275 		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7276 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7277 		if (status)
7278 			device_printf(dev,
7279 				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7280 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7281 	}
7282 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7283 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7284 		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7285 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7286 		if (status)
7287 			device_printf(dev,
7288 				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7289 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7290 	}
7291 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7292 		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7293 		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7294 		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7295 		if (status)
7296 			device_printf(dev,
7297 				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7298 				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7299 	}
7300 
7301 	/* Warn about RSS hash types which are not supported */
7302 	/* coverity[dead_error_condition] */
7303 	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7304 		device_printf(dev,
7305 			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7306 			      vsi->idx);
7307 	}
7308 }
7309 
7310 /**
7311  * ice_set_rss_lut - Program the RSS lookup table for a VSI
7312  * @vsi: the VSI to configure
7313  *
7314  * Programs the RSS lookup table for a given VSI. We use
7315  * rss_get_indirection_to_bucket which will use the indirection table provided
7316  * by the kernel RSS interface when available. If the kernel RSS interface is
7317  * not available, we will fall back to a simple round-robin fashion queue
7318  * assignment.
7319  */
7320 static int
7321 ice_set_rss_lut(struct ice_vsi *vsi)
7322 {
7323 	struct ice_softc *sc = vsi->sc;
7324 	struct ice_hw *hw = &sc->hw;
7325 	device_t dev = sc->dev;
7326 	struct ice_aq_get_set_rss_lut_params lut_params;
7327 	enum ice_status status;
7328 	int i, err = 0;
7329 	u8 *lut;
7330 
7331 	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7332 	if (!lut) {
7333 		device_printf(dev, "Failed to allocate RSS lut memory\n");
7334 		return (ENOMEM);
7335 	}
7336 
7337 	/* Populate the LUT with max no. of queues. If the RSS kernel
7338 	 * interface is disabled, this will assign the lookup table in
7339 	 * a simple round robin fashion
7340 	 */
7341 	for (i = 0; i < vsi->rss_table_size; i++) {
7342 		/* XXX: this needs to be changed if num_rx_queues ever counts
7343 		 * more than just the RSS queues */
7344 		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7345 	}
7346 
7347 	lut_params.vsi_handle = vsi->idx;
7348 	lut_params.lut_size = vsi->rss_table_size;
7349 	lut_params.lut_type = vsi->rss_lut_type;
7350 	lut_params.lut = lut;
7351 	lut_params.global_lut_id = 0;
7352 	status = ice_aq_set_rss_lut(hw, &lut_params);
7353 	if (status) {
7354 		device_printf(dev,
7355 			      "Cannot set RSS lut, err %s aq_err %s\n",
7356 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7357 		err = (EIO);
7358 	}
7359 
7360 	free(lut, M_ICE);
7361 	return err;
7362 }
7363 
7364 /**
7365  * ice_config_rss - Configure RSS for a VSI
7366  * @vsi: the VSI to configure
7367  *
7368  * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7369  * a given VSI.
7370  */
7371 int
7372 ice_config_rss(struct ice_vsi *vsi)
7373 {
7374 	int err;
7375 
7376 	/* Nothing to do, if RSS is not enabled */
7377 	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7378 		return 0;
7379 
7380 	err = ice_set_rss_key(vsi);
7381 	if (err)
7382 		return err;
7383 
7384 	ice_set_rss_flow_flds(vsi);
7385 
7386 	return ice_set_rss_lut(vsi);
7387 }
7388 
7389 /**
7390  * ice_log_pkg_init - Log a message about status of DDP initialization
7391  * @sc: the device softc pointer
7392  * @pkg_status: the status result of ice_copy_and_init_pkg
7393  *
7394  * Called by ice_load_pkg after an attempt to download the DDP package
7395  * contents to the device to log an appropriate message for the system
7396  * administrator about download status.
7397  *
7398  * @post ice_is_init_pkg_successful function is used to determine
7399  * whether the download was successful and DDP package is compatible
7400  * with this driver. Otherwise driver will transition to Safe Mode.
7401  */
7402 void
7403 ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7404 {
7405 	struct ice_hw *hw = &sc->hw;
7406 	device_t dev = sc->dev;
7407 	struct sbuf *active_pkg, *os_pkg;
7408 
7409 	active_pkg = sbuf_new_auto();
7410 	ice_active_pkg_version_str(hw, active_pkg);
7411 	sbuf_finish(active_pkg);
7412 
7413 	os_pkg = sbuf_new_auto();
7414 	ice_os_pkg_version_str(hw, os_pkg);
7415 	sbuf_finish(os_pkg);
7416 
7417 	switch (pkg_status) {
7418 	case ICE_DDP_PKG_SUCCESS:
7419 		device_printf(dev,
7420 			      "The DDP package was successfully loaded: %s.\n",
7421 			      sbuf_data(active_pkg));
7422 		break;
7423 	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7424 	case ICE_DDP_PKG_ALREADY_LOADED:
7425 		device_printf(dev,
7426 			      "DDP package already present on device: %s.\n",
7427 			      sbuf_data(active_pkg));
7428 		break;
7429 	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7430 		device_printf(dev,
7431 			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7432 			      sbuf_data(active_pkg),
7433 			      sbuf_data(os_pkg));
7434 		break;
7435 	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7436 		device_printf(dev,
7437 			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7438 			      sbuf_data(active_pkg),
7439 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7440 		break;
7441 	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7442 		device_printf(dev,
7443 			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7444 			      sbuf_data(active_pkg),
7445 			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7446 		break;
7447 	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7448 		/*
7449 		 * This assumes that the active_pkg_ver will not be
7450 		 * initialized if the ice_ddp package version is not
7451 		 * supported.
7452 		 */
7453 		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7454 			/* The ice_ddp version is not supported */
7455 			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7456 				device_printf(dev,
7457 					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7458 					      sbuf_data(os_pkg),
7459 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7460 			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7461 				device_printf(dev,
7462 					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7463 					      sbuf_data(os_pkg),
7464 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7465 			} else {
7466 				device_printf(dev,
7467 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7468 					      sbuf_data(os_pkg),
7469 					      sbuf_data(active_pkg),
7470 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7471 			}
7472 		} else {
7473 			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7474 				device_printf(dev,
7475 					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7476 					      sbuf_data(active_pkg),
7477 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7478 			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7479 				device_printf(dev,
7480 					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7481 					      sbuf_data(active_pkg),
7482 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7483 			} else {
7484 				device_printf(dev,
7485 					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7486 					      sbuf_data(os_pkg),
7487 					      sbuf_data(active_pkg),
7488 					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7489 			}
7490 		}
7491 		break;
7492 	case ICE_DDP_PKG_INVALID_FILE:
7493 		device_printf(dev,
7494 			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7495 		break;
7496 	case ICE_DDP_PKG_FW_MISMATCH:
7497 		device_printf(dev,
7498 			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7499 		break;
7500 	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7501 	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7502 		device_printf(dev,
7503 			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7504 		break;
7505 	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7506 		device_printf(dev,
7507 			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7508 		break;
7509 	case ICE_DDP_PKG_MANIFEST_INVALID:
7510 	case ICE_DDP_PKG_BUFFER_INVALID:
7511 		device_printf(dev,
7512 			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7513 		break;
7514 	default:
7515 		device_printf(dev,
7516 			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7517 		break;
7518 	}
7519 
7520 	sbuf_delete(active_pkg);
7521 	sbuf_delete(os_pkg);
7522 }
7523 
7524 /**
7525  * ice_load_pkg_file - Load the DDP package file using firmware_get
7526  * @sc: device private softc
7527  *
7528  * Use firmware_get to load the DDP package memory and then request that
7529  * firmware download the package contents and program the relevant hardware
7530  * bits.
7531  *
7532  * This function makes a copy of the DDP package memory which is tracked in
7533  * the ice_hw structure. The copy will be managed and released by
7534  * ice_deinit_hw(). This allows the firmware reference to be immediately
7535  * released using firmware_put.
7536  */
7537 enum ice_status
7538 ice_load_pkg_file(struct ice_softc *sc)
7539 {
7540 	struct ice_hw *hw = &sc->hw;
7541 	device_t dev = sc->dev;
7542 	enum ice_ddp_state state;
7543 	const struct firmware *pkg;
7544 	enum ice_status status = ICE_SUCCESS;
7545 	u8 cached_layer_count;
7546 	u8 *buf_copy;
7547 
7548 	pkg = firmware_get("ice_ddp");
7549 	if (!pkg) {
7550 		device_printf(dev,
7551 		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7552 		if (cold)
7553 			device_printf(dev,
7554 			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7555 		status = ICE_ERR_CFG;
7556 		goto err_load_pkg;
7557 	}
7558 
7559 	/* Check for topology change */
7560 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7561 		cached_layer_count = hw->num_tx_sched_layers;
7562 		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7563 		if (buf_copy == NULL)
7564 			return ICE_ERR_NO_MEMORY;
7565 		memcpy(buf_copy, pkg->data, pkg->datasize);
7566 		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7567 		free(buf_copy, M_ICE);
7568 		/* Success indicates a change was made */
7569 		if (status == ICE_SUCCESS) {
7570 			/* 9 -> 5 */
7571 			if (cached_layer_count == 9)
7572 				device_printf(dev,
7573 				    "Transmit balancing feature enabled\n");
7574 			else
7575 				device_printf(dev,
7576 				    "Transmit balancing feature disabled\n");
7577 			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7578 			return (status);
7579 		} else if (status == ICE_ERR_CFG) {
7580 			/* Status is ICE_ERR_CFG when DDP does not support transmit balancing */
7581 			device_printf(dev,
7582 			    "DDP package does not support transmit balancing feature - please update to the latest DDP package and try again\n");
7583 		}
7584 	}
7585 
7586 	/* Copy and download the pkg contents */
7587 	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7588 
7589 	/* Release the firmware reference */
7590 	firmware_put(pkg, FIRMWARE_UNLOAD);
7591 
7592 	/* Check the active DDP package version and log a message */
7593 	ice_log_pkg_init(sc, state);
7594 
7595 	/* Place the driver into safe mode */
7596 	if (ice_is_init_pkg_successful(state))
7597 		return (ICE_ERR_ALREADY_EXISTS);
7598 
7599 err_load_pkg:
7600 	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7601 	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7602 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7603 	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7604 
7605 	return (status);
7606 }
7607 
7608 /**
7609  * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7610  * @vsi: the vsi to retrieve the value for
7611  * @counter: the counter type to retrieve
7612  *
7613  * Returns the value for a given ifnet counter. To do so, we calculate the
7614  * value based on the matching hardware statistics.
7615  */
7616 uint64_t
7617 ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7618 {
7619 	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7620 	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7621 
7622 	/* For some statistics, especially those related to error flows, we do
7623 	 * not have per-VSI counters. In this case, we just report the global
7624 	 * counters.
7625 	 */
7626 
7627 	switch (counter) {
7628 	case IFCOUNTER_IPACKETS:
7629 		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7630 	case IFCOUNTER_IERRORS:
7631 		return (hs->crc_errors + hs->illegal_bytes +
7632 			hs->mac_local_faults + hs->mac_remote_faults +
7633 			hs->rx_len_errors + hs->rx_undersize +
7634 			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7635 	case IFCOUNTER_OPACKETS:
7636 		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7637 	case IFCOUNTER_OERRORS:
7638 		return (es->tx_errors);
7639 	case IFCOUNTER_COLLISIONS:
7640 		return (0);
7641 	case IFCOUNTER_IBYTES:
7642 		return (es->rx_bytes);
7643 	case IFCOUNTER_OBYTES:
7644 		return (es->tx_bytes);
7645 	case IFCOUNTER_IMCASTS:
7646 		return (es->rx_multicast);
7647 	case IFCOUNTER_OMCASTS:
7648 		return (es->tx_multicast);
7649 	case IFCOUNTER_IQDROPS:
7650 		return (es->rx_discards);
7651 	case IFCOUNTER_OQDROPS:
7652 		return (hs->tx_dropped_link_down);
7653 	case IFCOUNTER_NOPROTO:
7654 		return (es->rx_unknown_protocol);
7655 	default:
7656 		return if_get_counter_default(vsi->sc->ifp, counter);
7657 	}
7658 }
7659 
7660 /**
7661  * ice_save_pci_info - Save PCI configuration fields in HW struct
7662  * @hw: the ice_hw struct to save the PCI information in
7663  * @dev: the device to get the PCI information from
7664  *
7665  * This should only be called once, early in the device attach
7666  * process.
7667  */
7668 void
7669 ice_save_pci_info(struct ice_hw *hw, device_t dev)
7670 {
7671 	hw->vendor_id = pci_get_vendor(dev);
7672 	hw->device_id = pci_get_device(dev);
7673 	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7674 	hw->subsystem_device_id = pci_get_subdevice(dev);
7675 	hw->revision_id = pci_get_revid(dev);
7676 	hw->bus.device = pci_get_slot(dev);
7677 	hw->bus.func = pci_get_function(dev);
7678 }
7679 
7680 /**
7681  * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7682  * @sc: the device softc
7683  *
7684  * Replace the configuration for each VSI, and then cleanup replay
7685  * information. Called after a hardware reset in order to reconfigure the
7686  * active VSIs.
7687  */
7688 int
7689 ice_replay_all_vsi_cfg(struct ice_softc *sc)
7690 {
7691 	struct ice_hw *hw = &sc->hw;
7692 	enum ice_status status;
7693 	int i;
7694 
7695 	for (i = 0 ; i < sc->num_available_vsi; i++) {
7696 		struct ice_vsi *vsi = sc->all_vsi[i];
7697 
7698 		if (!vsi)
7699 			continue;
7700 
7701 		status = ice_replay_vsi(hw, vsi->idx);
7702 		if (status) {
7703 			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7704 				      vsi->idx, ice_status_str(status),
7705 				      ice_aq_str(hw->adminq.sq_last_status));
7706 			return (EIO);
7707 		}
7708 	}
7709 
7710 	/* Cleanup replay filters after successful reconfiguration */
7711 	ice_replay_post(hw);
7712 	return (0);
7713 }
7714 
7715 /**
7716  * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7717  * @vsi: pointer to the VSI structure
7718  *
7719  * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7720  * during driver removal to ensure that all RSS resources are properly
7721  * released.
7722  *
7723  * @remark this function doesn't report an error as it is expected to be
7724  * called during driver reset and unload, and there isn't much the driver can
7725  * do if freeing RSS resources fails.
7726  */
7727 static void
7728 ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7729 {
7730 	struct ice_softc *sc = vsi->sc;
7731 	struct ice_hw *hw = &sc->hw;
7732 	device_t dev = sc->dev;
7733 	enum ice_status status;
7734 
7735 	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7736 	if (status)
7737 		device_printf(dev,
7738 			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7739 			      vsi->idx, ice_status_str(status));
7740 
7741 	/* Remove this VSI from the RSS list */
7742 	ice_rem_vsi_rss_list(hw, vsi->idx);
7743 }
7744 
7745 /**
7746  * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7747  * @sc: the device softc pointer
7748  *
7749  * Cleanup the advanced RSS configuration for all VSIs on a given PF
7750  * interface.
7751  *
7752  * @remark This should be called while preparing for a reset, to cleanup stale
7753  * RSS configuration for all VSIs.
7754  */
7755 void
7756 ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7757 {
7758 	int i;
7759 
7760 	/* No need to cleanup if RSS is not enabled */
7761 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7762 		return;
7763 
7764 	for (i = 0; i < sc->num_available_vsi; i++) {
7765 		struct ice_vsi *vsi = sc->all_vsi[i];
7766 
7767 		if (vsi)
7768 			ice_clean_vsi_rss_cfg(vsi);
7769 	}
7770 }
7771 
7772 /**
7773  * ice_requested_fec_mode - Return the requested FEC mode as a string
7774  * @pi: The port info structure
7775  *
7776  * Return a string representing the requested FEC mode.
7777  */
7778 static const char *
7779 ice_requested_fec_mode(struct ice_port_info *pi)
7780 {
7781 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7782 	enum ice_status status;
7783 
7784 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7785 				     &pcaps, NULL);
7786 	if (status)
7787 		/* Just report unknown if we can't get capabilities */
7788 		return "Unknown";
7789 
7790 	/* Check if RS-FEC has been requested first */
7791 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7792 				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7793 		return ice_fec_str(ICE_FEC_RS);
7794 
7795 	/* If RS FEC has not been requested, then check BASE-R */
7796 	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7797 				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7798 		return ice_fec_str(ICE_FEC_BASER);
7799 
7800 	return ice_fec_str(ICE_FEC_NONE);
7801 }
7802 
7803 /**
7804  * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7805  * @pi: The port info structure
7806  *
7807  * Return a string representing the current FEC mode.
7808  */
7809 static const char *
7810 ice_negotiated_fec_mode(struct ice_port_info *pi)
7811 {
7812 	/* First, check if RS has been requested first */
7813 	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7814 					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7815 		return ice_fec_str(ICE_FEC_RS);
7816 
7817 	/* If RS FEC has not been requested, then check BASE-R */
7818 	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7819 		return ice_fec_str(ICE_FEC_BASER);
7820 
7821 	return ice_fec_str(ICE_FEC_NONE);
7822 }
7823 
7824 /**
7825  * ice_autoneg_mode - Return string indicating of autoneg completed
7826  * @pi: The port info structure
7827  *
7828  * Return "True" if autonegotiation is completed, "False" otherwise.
7829  */
7830 static const char *
7831 ice_autoneg_mode(struct ice_port_info *pi)
7832 {
7833 	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7834 		return "True";
7835 	else
7836 		return "False";
7837 }
7838 
7839 /**
7840  * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7841  * @pi: The port info structure
7842  *
7843  * Returns the current Flow Control mode as a string.
7844  */
7845 static const char *
7846 ice_flowcontrol_mode(struct ice_port_info *pi)
7847 {
7848 	return ice_fc_str(pi->fc.current_mode);
7849 }
7850 
7851 /**
7852  * ice_link_up_msg - Log a link up message with associated info
7853  * @sc: the device private softc
7854  *
7855  * Log a link up message with LOG_NOTICE message level. Include information
7856  * about the duplex, FEC mode, autonegotiation and flow control.
7857  */
7858 void
7859 ice_link_up_msg(struct ice_softc *sc)
7860 {
7861 	struct ice_hw *hw = &sc->hw;
7862 	struct ifnet *ifp = sc->ifp;
7863 	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7864 
7865 	speed = ice_aq_speed_to_str(hw->port_info);
7866 	req_fec = ice_requested_fec_mode(hw->port_info);
7867 	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7868 	autoneg = ice_autoneg_mode(hw->port_info);
7869 	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7870 
7871 	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7872 	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7873 }
7874 
7875 /**
7876  * ice_update_laa_mac - Update MAC address if Locally Administered
7877  * @sc: the device softc
7878  *
7879  * Update the device MAC address when a Locally Administered Address is
7880  * assigned.
7881  *
7882  * This function does *not* update the MAC filter list itself. Instead, it
7883  * should be called after ice_rm_pf_default_mac_filters, so that the previous
7884  * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7885  * so that the new address filter will be assigned.
7886  */
7887 int
7888 ice_update_laa_mac(struct ice_softc *sc)
7889 {
7890 	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7891 	struct ice_hw *hw = &sc->hw;
7892 	enum ice_status status;
7893 
7894 	/* If the address is the same, then there is nothing to update */
7895 	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7896 		return (0);
7897 
7898 	/* Reject Multicast addresses */
7899 	if (ETHER_IS_MULTICAST(lladdr))
7900 		return (EINVAL);
7901 
7902 	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7903 	if (status) {
7904 		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7905 			      lladdr, ":", ice_status_str(status),
7906 			      ice_aq_str(hw->adminq.sq_last_status));
7907 		return (EFAULT);
7908 	}
7909 
7910 	/* Copy the address into place of the LAN address. */
7911 	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7912 
7913 	return (0);
7914 }
7915 
7916 /**
7917  * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7918  * @sc: device softc
7919  *
7920  * This will potentially print out a warning message if bus bandwidth
7921  * is insufficient for full-speed operation.
7922  *
7923  * This should only be called once, during the attach process, after
7924  * hw->port_info has been filled out with port link topology information
7925  * (from the Get PHY Capabilities Admin Queue command).
7926  */
7927 void
7928 ice_get_and_print_bus_info(struct ice_softc *sc)
7929 {
7930 	struct ice_hw *hw = &sc->hw;
7931 	device_t dev = sc->dev;
7932 	u16 pci_link_status;
7933 	int offset;
7934 
7935 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7936 	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7937 
7938 	/* Fill out hw struct with PCIE link status info */
7939 	ice_set_pci_link_status_data(hw, pci_link_status);
7940 
7941 	/* Use info to print out bandwidth messages */
7942 	ice_print_bus_link_data(dev, hw);
7943 
7944 	if (ice_pcie_bandwidth_check(sc)) {
7945 		device_printf(dev,
7946 		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7947 		device_printf(dev,
7948 		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7949 	}
7950 }
7951 
7952 /**
7953  * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7954  * a 64-bit baudrate.
7955  * @speed: enum value to convert
7956  *
7957  * This only goes up to PCIE Gen 4.
7958  */
7959 static uint64_t
7960 ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7961 {
7962 	/* If the PCI-E speed is Gen1 or Gen2, then report
7963 	 * only 80% of bus speed to account for encoding overhead.
7964 	 */
7965 	switch (speed) {
7966 	case ice_pcie_speed_2_5GT:
7967 		return IF_Gbps(2);
7968 	case ice_pcie_speed_5_0GT:
7969 		return IF_Gbps(4);
7970 	case ice_pcie_speed_8_0GT:
7971 		return IF_Gbps(8);
7972 	case ice_pcie_speed_16_0GT:
7973 		return IF_Gbps(16);
7974 	case ice_pcie_speed_unknown:
7975 	default:
7976 		return 0;
7977 	}
7978 }
7979 
7980 /**
7981  * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7982  * a 32-bit number.
7983  * @width: enum value to convert
7984  */
7985 static int
7986 ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7987 {
7988 	switch (width) {
7989 	case ice_pcie_lnk_x1:
7990 		return (1);
7991 	case ice_pcie_lnk_x2:
7992 		return (2);
7993 	case ice_pcie_lnk_x4:
7994 		return (4);
7995 	case ice_pcie_lnk_x8:
7996 		return (8);
7997 	case ice_pcie_lnk_x12:
7998 		return (12);
7999 	case ice_pcie_lnk_x16:
8000 		return (16);
8001 	case ice_pcie_lnk_x32:
8002 		return (32);
8003 	case ice_pcie_lnk_width_resrv:
8004 	case ice_pcie_lnk_width_unknown:
8005 	default:
8006 		return (0);
8007 	}
8008 }
8009 
8010 /**
8011  * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
8012  * full-speed device operation.
8013  * @sc: adapter softc
8014  *
8015  * Returns 0 if sufficient; 1 if not.
8016  */
8017 static uint8_t
8018 ice_pcie_bandwidth_check(struct ice_softc *sc)
8019 {
8020 	struct ice_hw *hw = &sc->hw;
8021 	int num_ports, pcie_width;
8022 	u64 pcie_speed, port_speed;
8023 
8024 	MPASS(hw->port_info);
8025 
8026 	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
8027 	port_speed = ice_phy_types_to_max_rate(hw->port_info);
8028 	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
8029 	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
8030 
8031 	/*
8032 	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
8033 	 * failover.
8034 	 */
8035 	if (port_speed == IF_Gbps(100))
8036 		num_ports = 1;
8037 
8038 	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
8039 }
8040 
8041 /**
8042  * ice_print_bus_link_data - Print PCI-E bandwidth information
8043  * @dev: device to print string for
8044  * @hw: hw struct with PCI-e link information
8045  */
8046 static void
8047 ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
8048 {
8049         device_printf(dev, "PCI Express Bus: Speed %s %s\n",
8050             ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
8051             (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
8052             (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
8053             (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
8054             (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
8055             (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
8056             (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
8057             (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
8058             (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
8059             (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
8060             (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
8061 }
8062 
8063 /**
8064  * ice_set_pci_link_status_data - store PCI bus info
8065  * @hw: pointer to hardware structure
8066  * @link_status: the link status word from PCI config space
8067  *
8068  * Stores the PCI bus info (speed, width, type) within the ice_hw structure
8069  **/
8070 static void
8071 ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
8072 {
8073 	u16 reg;
8074 
8075 	hw->bus.type = ice_bus_pci_express;
8076 
8077 	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
8078 
8079 	switch (reg) {
8080 	case ice_pcie_lnk_x1:
8081 	case ice_pcie_lnk_x2:
8082 	case ice_pcie_lnk_x4:
8083 	case ice_pcie_lnk_x8:
8084 	case ice_pcie_lnk_x12:
8085 	case ice_pcie_lnk_x16:
8086 	case ice_pcie_lnk_x32:
8087 		hw->bus.width = (enum ice_pcie_link_width)reg;
8088 		break;
8089 	default:
8090 		hw->bus.width = ice_pcie_lnk_width_unknown;
8091 		break;
8092 	}
8093 
8094 	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
8095 
8096 	switch (reg) {
8097 	case ice_pcie_speed_2_5GT:
8098 	case ice_pcie_speed_5_0GT:
8099 	case ice_pcie_speed_8_0GT:
8100 	case ice_pcie_speed_16_0GT:
8101 		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
8102 		break;
8103 	default:
8104 		hw->bus.speed = ice_pcie_speed_unknown;
8105 		break;
8106 	}
8107 }
8108 
8109 /**
8110  * ice_init_link_events - Initialize Link Status Events mask
8111  * @sc: the device softc
8112  *
8113  * Initialize the Link Status Events mask to disable notification of link
8114  * events we don't care about in software. Also request that link status
8115  * events be enabled.
8116  */
8117 int
8118 ice_init_link_events(struct ice_softc *sc)
8119 {
8120 	struct ice_hw *hw = &sc->hw;
8121 	enum ice_status status;
8122 	u16 wanted_events;
8123 
8124 	/* Set the bits for the events that we want to be notified by */
8125 	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
8126 			 ICE_AQ_LINK_EVENT_MEDIA_NA |
8127 			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
8128 
8129 	/* request that every event except the wanted events be masked */
8130 	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
8131 	if (status) {
8132 		device_printf(sc->dev,
8133 			      "Failed to set link status event mask, err %s aq_err %s\n",
8134 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8135 		return (EIO);
8136 	}
8137 
8138 	/* Request link info with the LSE bit set to enable link status events */
8139 	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
8140 	if (status) {
8141 		device_printf(sc->dev,
8142 			      "Failed to enable link status events, err %s aq_err %s\n",
8143 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8144 		return (EIO);
8145 	}
8146 
8147 	return (0);
8148 }
8149 
8150 /**
8151  * ice_handle_mdd_event - Handle possibly malicious events
8152  * @sc: the device softc
8153  *
8154  * Called by the admin task if an MDD detection interrupt is triggered.
8155  * Identifies possibly malicious events coming from VFs. Also triggers for
8156  * similar incorrect behavior from the PF as well.
8157  */
8158 void
8159 ice_handle_mdd_event(struct ice_softc *sc)
8160 {
8161 	struct ice_hw *hw = &sc->hw;
8162 	bool mdd_detected = false, request_reinit = false;
8163 	device_t dev = sc->dev;
8164 	u32 reg;
8165 
8166 	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
8167 		return;
8168 
8169 	reg = rd32(hw, GL_MDET_TX_TCLAN);
8170 	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
8171 		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
8172 		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
8173 		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
8174 		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
8175 
8176 		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
8177 			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
8178 
8179 		/* Only clear this event if it matches this PF, that way other
8180 		 * PFs can read the event and determine VF and queue number.
8181 		 */
8182 		if (pf_num == hw->pf_id)
8183 			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
8184 
8185 		mdd_detected = true;
8186 	}
8187 
8188 	/* Determine what triggered the MDD event */
8189 	reg = rd32(hw, GL_MDET_TX_PQM);
8190 	if (reg & GL_MDET_TX_PQM_VALID_M) {
8191 		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
8192 		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
8193 		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
8194 		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
8195 
8196 		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
8197 			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
8198 
8199 		/* Only clear this event if it matches this PF, that way other
8200 		 * PFs can read the event and determine VF and queue number.
8201 		 */
8202 		if (pf_num == hw->pf_id)
8203 			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
8204 
8205 		mdd_detected = true;
8206 	}
8207 
8208 	reg = rd32(hw, GL_MDET_RX);
8209 	if (reg & GL_MDET_RX_VALID_M) {
8210 		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
8211 		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
8212 		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
8213 		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
8214 
8215 		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
8216 			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
8217 
8218 		/* Only clear this event if it matches this PF, that way other
8219 		 * PFs can read the event and determine VF and queue number.
8220 		 */
8221 		if (pf_num == hw->pf_id)
8222 			wr32(hw, GL_MDET_RX, 0xffffffff);
8223 
8224 		mdd_detected = true;
8225 	}
8226 
8227 	/* Now, confirm that this event actually affects this PF, by checking
8228 	 * the PF registers.
8229 	 */
8230 	if (mdd_detected) {
8231 		reg = rd32(hw, PF_MDET_TX_TCLAN);
8232 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
8233 			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
8234 			sc->soft_stats.tx_mdd_count++;
8235 			request_reinit = true;
8236 		}
8237 
8238 		reg = rd32(hw, PF_MDET_TX_PQM);
8239 		if (reg & PF_MDET_TX_PQM_VALID_M) {
8240 			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8241 			sc->soft_stats.tx_mdd_count++;
8242 			request_reinit = true;
8243 		}
8244 
8245 		reg = rd32(hw, PF_MDET_RX);
8246 		if (reg & PF_MDET_RX_VALID_M) {
8247 			wr32(hw, PF_MDET_RX, 0xffff);
8248 			sc->soft_stats.rx_mdd_count++;
8249 			request_reinit = true;
8250 		}
8251 	}
8252 
8253 	/* TODO: Implement logic to detect and handle events caused by VFs. */
8254 
8255 	/* request that the upper stack re-initialize the Tx/Rx queues */
8256 	if (request_reinit)
8257 		ice_request_stack_reinit(sc);
8258 
8259 	ice_flush(hw);
8260 }
8261 
8262 /**
8263  * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8264  * @sc: the device softc
8265  *
8266  * @pre device is DCB capable and the FW LLDP agent has started
8267  *
8268  * Checks DCBX status and starts the DCBX agent if it is not in
8269  * a valid state via an AQ command.
8270  */
8271 static void
8272 ice_start_dcbx_agent(struct ice_softc *sc)
8273 {
8274 	struct ice_hw *hw = &sc->hw;
8275 	device_t dev = sc->dev;
8276 	bool dcbx_agent_status;
8277 	enum ice_status status;
8278 
8279 	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8280 
8281 	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8282 	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8283 		/*
8284 		 * Start DCBX agent, but not LLDP. The return value isn't
8285 		 * checked here because a more detailed dcbx agent status is
8286 		 * retrieved and checked in ice_init_dcb() and elsewhere.
8287 		 */
8288 		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8289 		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8290 			device_printf(dev,
8291 			    "start_stop_dcbx failed, err %s aq_err %s\n",
8292 			    ice_status_str(status),
8293 			    ice_aq_str(hw->adminq.sq_last_status));
8294 	}
8295 }
8296 
8297 /**
8298  * ice_init_dcb_setup - Initialize DCB settings for HW
8299  * @sc: the device softc
8300  *
8301  * This needs to be called after the fw_lldp_agent sysctl is added, since that
8302  * can update the device's LLDP agent status if a tunable value is set.
8303  *
8304  * Get and store the initial state of DCB settings on driver load. Print out
8305  * informational messages as well.
8306  */
8307 void
8308 ice_init_dcb_setup(struct ice_softc *sc)
8309 {
8310 	struct ice_dcbx_cfg *local_dcbx_cfg;
8311 	struct ice_hw *hw = &sc->hw;
8312 	device_t dev = sc->dev;
8313 	enum ice_status status;
8314 	u8 pfcmode_ret;
8315 
8316 	/* Don't do anything if DCB isn't supported */
8317 	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8318 		device_printf(dev, "%s: No DCB support\n", __func__);
8319 		return;
8320 	}
8321 
8322 	/* Starts DCBX agent if it needs starting */
8323 	ice_start_dcbx_agent(sc);
8324 
8325 	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8326 	status = ice_init_dcb(hw, true);
8327 
8328 	/* If there is an error, then FW LLDP is not in a usable state */
8329 	if (status != 0 && status != ICE_ERR_NOT_READY) {
8330 		/* Don't print an error message if the return code from the AQ
8331 		 * cmd performed in ice_init_dcb() is EPERM; that means the
8332 		 * FW LLDP engine is disabled, and that is a valid state.
8333 		 */
8334 		if (!(status == ICE_ERR_AQ_ERROR &&
8335 		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8336 			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8337 				      ice_status_str(status),
8338 				      ice_aq_str(hw->adminq.sq_last_status));
8339 		}
8340 		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8341 	}
8342 
8343 	switch (hw->port_info->qos_cfg.dcbx_status) {
8344 	case ICE_DCBX_STATUS_DIS:
8345 		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8346 		break;
8347 	case ICE_DCBX_STATUS_NOT_STARTED:
8348 		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8349 		break;
8350 	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8351 		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8352 		break;
8353 	default:
8354 		break;
8355 	}
8356 
8357 	/* LLDP disabled in FW */
8358 	if (hw->port_info->qos_cfg.is_sw_lldp) {
8359 		ice_add_rx_lldp_filter(sc);
8360 		device_printf(dev, "Firmware LLDP agent disabled\n");
8361 	}
8362 
8363 	/* Query and cache PFC mode */
8364 	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8365 	if (status) {
8366 		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8367 			      ice_status_str(status),
8368 			      ice_aq_str(hw->adminq.sq_last_status));
8369 	}
8370 	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8371 	switch (pfcmode_ret) {
8372 	case ICE_AQC_PFC_VLAN_BASED_PFC:
8373 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8374 		break;
8375 	case ICE_AQC_PFC_DSCP_BASED_PFC:
8376 		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8377 		break;
8378 	default:
8379 		/* DCB is disabled, but we shouldn't get here */
8380 		break;
8381 	}
8382 
8383 	/* Set default SW MIB for init */
8384 	ice_set_default_local_mib_settings(sc);
8385 
8386 	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8387 }
8388 
8389 /**
8390  * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8391  * @dcbcfg: DCB configuration to examine
8392  *
8393  * Scans a TC mapping table inside dcbcfg to find traffic classes
8394  * enabled and @returns a bitmask of enabled TCs
8395  */
8396 u8
8397 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8398 {
8399 	u8 tc_map = 0;
8400 	int i = 0;
8401 
8402 	switch (dcbcfg->pfc_mode) {
8403 	case ICE_QOS_MODE_VLAN:
8404 		/* XXX: "i" is actually "User Priority" here, not
8405 		 * Traffic Class, but the max for both is 8, so it works
8406 		 * out here.
8407 		 */
8408 		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8409 			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8410 		break;
8411 	case ICE_QOS_MODE_DSCP:
8412 		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8413 			tc_map |= BIT(dcbcfg->dscp_map[i]);
8414 		break;
8415 	default:
8416 		/* Invalid Mode */
8417 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8418 		break;
8419 	}
8420 
8421 	return (tc_map);
8422 }
8423 
8424 /**
8425  * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8426  * @dcbcfg: config to retrieve number of TCs from
8427  *
8428  * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8429  * Priority Assignment Table, a value from 1 to 8. If there are
8430  * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8431  * then returns 0.
8432  */
8433 static u8
8434 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8435 {
8436 	u8 tc_map;
8437 
8438 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8439 
8440 	return (ice_dcb_tc_contig(tc_map));
8441 }
8442 
8443 /**
8444  * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8445  * @sc: the device private softc
8446  * @event: event received on a control queue
8447  *
8448  * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8449  */
8450 static void
8451 ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8452 {
8453 	struct ice_aqc_lldp_get_mib *params =
8454 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8455 	u8 mib_type, bridge_type, tx_status;
8456 
8457 	static const char* mib_type_strings[] = {
8458 	    "Local MIB",
8459 	    "Remote MIB",
8460 	    "Reserved",
8461 	    "Reserved"
8462 	};
8463 	static const char* bridge_type_strings[] = {
8464 	    "Nearest Bridge",
8465 	    "Non-TPMR Bridge",
8466 	    "Reserved",
8467 	    "Reserved"
8468 	};
8469 	static const char* tx_status_strings[] = {
8470 	    "Port's TX active",
8471 	    "Port's TX suspended and drained",
8472 	    "Reserved",
8473 	    "Port's TX suspended and drained; blocked TC pipe flushed"
8474 	};
8475 
8476 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8477 	    ICE_AQ_LLDP_MIB_TYPE_S;
8478 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8479 	    ICE_AQ_LLDP_BRID_TYPE_S;
8480 	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8481 	    ICE_AQ_LLDP_TX_S;
8482 
8483 	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8484 	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8485 	    tx_status_strings[tx_status]);
8486 
8487 	/* Nothing else to report */
8488 	if (!event->msg_buf)
8489 		return;
8490 
8491 	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8492 	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8493 			event->msg_len);
8494 }
8495 
8496 /**
8497  * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8498  * @sc: the device private softc
8499  * @old_cfg: Old DCBX configuration to compare against
8500  * @new_cfg: New DCBX configuration to check
8501  *
8502  * @return true if something changed in new_cfg that requires the driver
8503  * to do some reconfiguration.
8504  */
8505 static bool
8506 ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8507     struct ice_dcbx_cfg *new_cfg)
8508 {
8509 	struct ice_hw *hw = &sc->hw;
8510 	bool needs_reconfig = false;
8511 
8512 	/* No change detected in DCBX config */
8513 	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8514 		ice_debug(hw, ICE_DBG_DCB,
8515 		    "No change detected in local DCBX configuration\n");
8516 		return (false);
8517 	}
8518 
8519 	/* Check if ETS config has changed */
8520 	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8521 		   sizeof(new_cfg->etscfg))) {
8522 		/* If Priority Table has changed, then driver reconfig is needed */
8523 		if (memcmp(&new_cfg->etscfg.prio_table,
8524 			   &old_cfg->etscfg.prio_table,
8525 			   sizeof(new_cfg->etscfg.prio_table))) {
8526 			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8527 			needs_reconfig = true;
8528 		}
8529 
8530 		/* These are just informational */
8531 		if (memcmp(&new_cfg->etscfg.tcbwtable,
8532 			   &old_cfg->etscfg.tcbwtable,
8533 			   sizeof(new_cfg->etscfg.tcbwtable))) {
8534 			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8535 			needs_reconfig = true;
8536 		}
8537 
8538 		if (memcmp(&new_cfg->etscfg.tsatable,
8539 			   &old_cfg->etscfg.tsatable,
8540 			   sizeof(new_cfg->etscfg.tsatable))) {
8541 			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8542 			needs_reconfig = true;
8543 		}
8544 	}
8545 
8546 	/* Check if PFC config has changed */
8547 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8548 		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8549 		needs_reconfig = true;
8550 	}
8551 
8552 	/* Check if APP table has changed */
8553 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8554 		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8555 
8556 	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8557 
8558 	return (needs_reconfig);
8559 }
8560 
8561 /**
8562  * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8563  * @sc: the device private softc
8564  *
8565  * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8566  */
8567 static void
8568 ice_stop_pf_vsi(struct ice_softc *sc)
8569 {
8570 	/* Dissociate the Tx and Rx queues from the interrupts */
8571 	ice_flush_txq_interrupts(&sc->pf_vsi);
8572 	ice_flush_rxq_interrupts(&sc->pf_vsi);
8573 
8574 	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8575 		return;
8576 
8577 	/* Disable the Tx and Rx queues */
8578 	ice_vsi_disable_tx(&sc->pf_vsi);
8579 	ice_control_all_rx_queues(&sc->pf_vsi, false);
8580 }
8581 
8582 /**
8583  * ice_vsi_setup_q_map - Setup a VSI queue map
8584  * @vsi: the VSI being configured
8585  * @ctxt: VSI context structure
8586  */
8587 static void
8588 ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8589 {
8590 	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8591 	u16 offset = 0, qmap = 0, pow = 0;
8592 	u16 num_q_per_tc, qcount_rx, rem_queues;
8593 	int i, j, k;
8594 
8595 	if (vsi->num_tcs == 0) {
8596 		/* at least TC0 should be enabled by default */
8597 		vsi->num_tcs = 1;
8598 		vsi->tc_map = 0x1;
8599 	}
8600 
8601 	qcount_rx = vsi->num_rx_queues;
8602 	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8603 
8604 	if (!num_q_per_tc)
8605 		num_q_per_tc = 1;
8606 
8607 	/* Set initial values for # of queues to use for each active TC */
8608 	ice_for_each_traffic_class(i)
8609 		if (i < vsi->num_tcs)
8610 			qcounts[i] = num_q_per_tc;
8611 
8612 	/* If any queues are unassigned, add them to TC 0 */
8613 	rem_queues = qcount_rx % vsi->num_tcs;
8614 	if (rem_queues > 0)
8615 		qcounts[0] += rem_queues;
8616 
8617 	/* TC mapping is a function of the number of Rx queues assigned to the
8618 	 * VSI for each traffic class and the offset of these queues.
8619 	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8620 	 * queues allocated to TC0. No:of queues is a power-of-2.
8621 	 *
8622 	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8623 	 * queue, this way, traffic for the given TC will be sent to the default
8624 	 * queue.
8625 	 *
8626 	 * Setup number and offset of Rx queues for all TCs for the VSI
8627 	 */
8628 	ice_for_each_traffic_class(i) {
8629 		if (!(vsi->tc_map & BIT(i))) {
8630 			/* TC is not enabled */
8631 			vsi->tc_info[i].qoffset = 0;
8632 			vsi->tc_info[i].qcount_rx = 1;
8633 			vsi->tc_info[i].qcount_tx = 1;
8634 
8635 			ctxt->info.tc_mapping[i] = 0;
8636 			continue;
8637 		}
8638 
8639 		/* TC is enabled */
8640 		vsi->tc_info[i].qoffset = offset;
8641 		vsi->tc_info[i].qcount_rx = qcounts[i];
8642 		vsi->tc_info[i].qcount_tx = qcounts[i];
8643 
8644 		/* find the (rounded up) log-2 of queue count for current TC */
8645 		pow = fls(qcounts[i] - 1);
8646 
8647 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8648 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8649 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8650 			 ICE_AQ_VSI_TC_Q_NUM_M);
8651 		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8652 
8653 		/* Store traffic class and handle data in queue structures */
8654 		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8655 			vsi->tx_queues[j].q_handle = k;
8656 			vsi->tx_queues[j].tc = i;
8657 
8658 			vsi->rx_queues[j].tc = i;
8659 		}
8660 
8661 		offset += qcounts[i];
8662 	}
8663 
8664 	/* Rx queue mapping */
8665 	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8666 	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8667 	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8668 }
8669 
8670 /**
8671  * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8672  * @sc: the device private softc
8673  * @tc_map: traffic class bitmap
8674  *
8675  * @pre VSI queues are stopped
8676  *
8677  * @return 0 if configuration is successful
8678  * @return EIO if Update VSI AQ cmd fails
8679  * @return ENODEV if updating Tx Scheduler fails
8680  */
8681 static int
8682 ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8683 {
8684 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8685 	struct ice_vsi *vsi = &sc->pf_vsi;
8686 	struct ice_hw *hw = &sc->hw;
8687 	struct ice_vsi_ctx ctx = { 0 };
8688 	device_t dev = sc->dev;
8689 	enum ice_status status;
8690 	u8 num_tcs = 0;
8691 	int i = 0;
8692 
8693 	/* Count the number of enabled Traffic Classes */
8694 	ice_for_each_traffic_class(i)
8695 		if (tc_map & BIT(i))
8696 			num_tcs++;
8697 
8698 	vsi->tc_map = tc_map;
8699 	vsi->num_tcs = num_tcs;
8700 
8701 	/* Set default parameters for context */
8702 	ctx.vf_num = 0;
8703 	ctx.info = vsi->info;
8704 
8705 	/* Setup queue map */
8706 	ice_vsi_setup_q_map(vsi, &ctx);
8707 
8708 	/* Update VSI configuration in firmware (RX queues) */
8709 	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8710 	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8711 	if (status) {
8712 		device_printf(dev,
8713 		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8714 		    __func__, ice_status_str(status),
8715 		    ice_aq_str(hw->adminq.sq_last_status));
8716 		return (EIO);
8717 	}
8718 	vsi->info = ctx.info;
8719 
8720 	/* Use values derived in ice_vsi_setup_q_map() */
8721 	for (i = 0; i < num_tcs; i++)
8722 		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8723 
8724 	if (hw->debug_mask & ICE_DBG_DCB) {
8725 		device_printf(dev, "%s: max_txqs:", __func__);
8726 		ice_for_each_traffic_class(i)
8727 			printf(" %d", max_txqs[i]);
8728 		printf("\n");
8729 	}
8730 
8731 	/* Update LAN Tx queue info in firmware */
8732 	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8733 				 max_txqs);
8734 	if (status) {
8735 		device_printf(dev,
8736 		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8737 		    __func__, ice_status_str(status),
8738 		    ice_aq_str(hw->adminq.sq_last_status));
8739 		return (ENODEV);
8740 	}
8741 
8742 	vsi->info.valid_sections = 0;
8743 
8744 	return (0);
8745 }
8746 
8747 /**
8748  * ice_dcb_tc_contig - Count TCs if they're contiguous
8749  * @tc_map: pointer to priority table
8750  *
8751  * @return The number of traffic classes in
8752  * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8753  */
8754 static u8
8755 ice_dcb_tc_contig(u8 tc_map)
8756 {
8757 	bool tc_unused = false;
8758 	u8 ret = 0;
8759 
8760 	/* Scan bitmask for contiguous TCs starting with TC0 */
8761 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8762 		if (tc_map & BIT(i)) {
8763 			if (!tc_unused) {
8764 				ret++;
8765 			} else {
8766 				/* Non-contiguous TCs detected */
8767 				return (0);
8768 			}
8769 		} else
8770 			tc_unused = true;
8771 	}
8772 
8773 	return (ret);
8774 }
8775 
8776 /**
8777  * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8778  * @sc: the device private softc
8779  *
8780  * @pre All VSIs have been disabled/stopped
8781  *
8782  * Reconfigures VSI settings based on local_dcbx_cfg.
8783  */
8784 static void
8785 ice_dcb_recfg(struct ice_softc *sc)
8786 {
8787 	struct ice_dcbx_cfg *dcbcfg =
8788 	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8789 	device_t dev = sc->dev;
8790 	u8 tc_map = 0;
8791 	int ret;
8792 
8793 	tc_map = ice_dcb_get_tc_map(dcbcfg);
8794 
8795 	/* If non-contiguous TCs are used, then configure
8796 	 * the default TC instead. There's no support for
8797 	 * non-contiguous TCs being used.
8798 	 */
8799 	if (ice_dcb_tc_contig(tc_map) == 0) {
8800 		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8801 		ice_set_default_local_lldp_mib(sc);
8802 	}
8803 
8804 	/* Reconfigure VSI queues to add/remove traffic classes */
8805 	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8806 	if (ret)
8807 		device_printf(dev,
8808 		    "Failed to configure TCs for PF VSI, err %s\n",
8809 		    ice_err_str(ret));
8810 
8811 }
8812 
8813 /**
8814  * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8815  * @sc: device softc structure
8816  *
8817  * Overwrites the driver's SW local LLDP MIB with default settings. This
8818  * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8819  * admin queue command.
8820  */
8821 static void
8822 ice_set_default_local_mib_settings(struct ice_softc *sc)
8823 {
8824 	struct ice_dcbx_cfg *dcbcfg;
8825 	struct ice_hw *hw = &sc->hw;
8826 	struct ice_port_info *pi;
8827 	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8828 
8829 	pi = hw->port_info;
8830 
8831 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8832 
8833 	maxtcs = hw->func_caps.common_cap.maxtc;
8834 	/* This value is only 3 bits; 8 TCs maps to 0 */
8835 	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8836 
8837 	/* VLAN vs DSCP mode needs to be preserved */
8838 	old_pfc_mode = dcbcfg->pfc_mode;
8839 
8840 	/**
8841 	 * Setup the default settings used by the driver for the Set Local
8842 	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8843 	 * PFC, TSA=2).
8844 	 */
8845 	memset(dcbcfg, 0, sizeof(*dcbcfg));
8846 
8847 	dcbcfg->etscfg.willing = 1;
8848 	dcbcfg->etscfg.tcbwtable[0] = 100;
8849 	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8850 	dcbcfg->etscfg.tsatable[0] = 2;
8851 
8852 	dcbcfg->etsrec = dcbcfg->etscfg;
8853 	dcbcfg->etsrec.willing = 0;
8854 
8855 	dcbcfg->pfc.willing = 1;
8856 	dcbcfg->pfc.pfccap = maxtcs;
8857 
8858 	dcbcfg->pfc_mode = old_pfc_mode;
8859 }
8860 
8861 /**
8862  * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8863  * @sc: the device private softc
8864  * @pending_mib: FW has a pending MIB change to execute
8865  *
8866  * @pre Determined that the DCB configuration requires a change
8867  *
8868  * Reconfigures the PF LAN VSI based on updated DCB configuration
8869  * found in the hw struct's/port_info's/ local dcbx configuration.
8870  */
8871 void
8872 ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8873 {
8874 	struct ice_aqc_port_ets_elem port_ets = { 0 };
8875 	struct ice_dcbx_cfg *local_dcbx_cfg;
8876 	struct ice_hw *hw = &sc->hw;
8877 	struct ice_port_info *pi;
8878 	device_t dev = sc->dev;
8879 	enum ice_status status;
8880 
8881 	pi = sc->hw.port_info;
8882 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8883 
8884 	ice_rdma_notify_dcb_qos_change(sc);
8885 	/* If there's a pending MIB, tell the FW to execute the MIB change
8886 	 * now.
8887 	 */
8888 	if (pending_mib) {
8889 		status = ice_lldp_execute_pending_mib(hw);
8890 		if ((status == ICE_ERR_AQ_ERROR) &&
8891 		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8892 			device_printf(dev,
8893 			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8894 		} else if (status) {
8895 			device_printf(dev,
8896 			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8897 			    ice_status_str(status),
8898 			    ice_aq_str(hw->adminq.sq_last_status));
8899 			/* This won't break traffic, but QoS will not work as expected */
8900 		}
8901 	}
8902 
8903 	/* Set state when there's more than one TC */
8904 	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8905 		device_printf(dev, "Multiple traffic classes enabled\n");
8906 		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8907 	} else {
8908 		device_printf(dev, "Multiple traffic classes disabled\n");
8909 		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8910 	}
8911 
8912 	/* Disable PF VSI since it's going to be reconfigured */
8913 	ice_stop_pf_vsi(sc);
8914 
8915 	/* Query ETS configuration and update SW Tx scheduler info */
8916 	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8917 	if (status != ICE_SUCCESS) {
8918 		device_printf(dev,
8919 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8920 		    ice_status_str(status),
8921 		    ice_aq_str(hw->adminq.sq_last_status));
8922 		/* This won't break traffic, but QoS will not work as expected */
8923 	}
8924 
8925 	/* Change PF VSI configuration */
8926 	ice_dcb_recfg(sc);
8927 
8928 	/* Send new configuration to RDMA client driver */
8929 	ice_rdma_dcb_qos_update(sc, pi);
8930 
8931 	ice_request_stack_reinit(sc);
8932 }
8933 
8934 /**
8935  * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8936  * @sc: the device private softc
8937  * @event: event received on a control queue
8938  *
8939  * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8940  * VSI depending on what has changed. This will also print out some debug
8941  * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8942  */
8943 static void
8944 ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8945 {
8946 	struct ice_aqc_lldp_get_mib *params =
8947 	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8948 	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8949 	struct ice_port_info *pi;
8950 	device_t dev = sc->dev;
8951 	struct ice_hw *hw = &sc->hw;
8952 	bool needs_reconfig, mib_is_pending;
8953 	enum ice_status status;
8954 	u8 mib_type, bridge_type;
8955 
8956 	ASSERT_CFG_LOCKED(sc);
8957 
8958 	ice_debug_print_mib_change_event(sc, event);
8959 
8960 	pi = sc->hw.port_info;
8961 
8962 	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8963 	    ICE_AQ_LLDP_MIB_TYPE_S;
8964 	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8965 	    ICE_AQ_LLDP_BRID_TYPE_S;
8966 	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8967 	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8968 
8969 	/* Ignore if event is not for Nearest Bridge */
8970 	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8971 		return;
8972 
8973 	/* Check MIB Type and return if event for Remote MIB update */
8974 	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8975 		/* Update the cached remote MIB and return */
8976 		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8977 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8978 					 &pi->qos_cfg.remote_dcbx_cfg);
8979 		if (status)
8980 			device_printf(dev,
8981 			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8982 			    __func__, ice_status_str(status),
8983 			    ice_aq_str(hw->adminq.sq_last_status));
8984 		/* Not fatal if this fails */
8985 		return;
8986 	}
8987 
8988 	/* Save line length by aliasing the local dcbx cfg */
8989 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8990 	/* Save off the old configuration and clear current config */
8991 	tmp_dcbx_cfg = *local_dcbx_cfg;
8992 	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8993 
8994 	/* Update the current local_dcbx_cfg with new data */
8995 	if (mib_is_pending) {
8996 		ice_get_dcb_cfg_from_mib_change(pi, event);
8997 	} else {
8998 		/* Get updated DCBX data from firmware */
8999 		status = ice_get_dcb_cfg(pi);
9000 		if (status) {
9001 			device_printf(dev,
9002 			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
9003 			    __func__, ice_status_str(status),
9004 			    ice_aq_str(hw->adminq.sq_last_status));
9005 			return;
9006 		}
9007 	}
9008 
9009 	/* Check to see if DCB needs reconfiguring */
9010 	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
9011 	    local_dcbx_cfg);
9012 
9013 	if (!needs_reconfig && !mib_is_pending)
9014 		return;
9015 
9016 	/* Reconfigure -- this will also notify FW that configuration is done,
9017 	 * if the FW MIB change is only pending instead of executed.
9018 	 */
9019 	ice_do_dcb_reconfig(sc, mib_is_pending);
9020 }
9021 
9022 /**
9023  * ice_send_version - Send driver version to firmware
9024  * @sc: the device private softc
9025  *
9026  * Send the driver version to the firmware. This must be called as early as
9027  * possible after ice_init_hw().
9028  */
9029 int
9030 ice_send_version(struct ice_softc *sc)
9031 {
9032 	struct ice_driver_ver driver_version = {0};
9033 	struct ice_hw *hw = &sc->hw;
9034 	device_t dev = sc->dev;
9035 	enum ice_status status;
9036 
9037 	driver_version.major_ver = ice_major_version;
9038 	driver_version.minor_ver = ice_minor_version;
9039 	driver_version.build_ver = ice_patch_version;
9040 	driver_version.subbuild_ver = ice_rc_version;
9041 
9042 	strlcpy((char *)driver_version.driver_string, ice_driver_version,
9043 		sizeof(driver_version.driver_string));
9044 
9045 	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
9046 	if (status) {
9047 		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
9048 			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
9049 		return (EIO);
9050 	}
9051 
9052 	return (0);
9053 }
9054 
9055 /**
9056  * ice_handle_lan_overflow_event - helper function to log LAN overflow events
9057  * @sc: device softc
9058  * @event: event received on a control queue
9059  *
9060  * Prints out a message when a LAN overflow event is detected on a receive
9061  * queue.
9062  */
9063 static void
9064 ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
9065 {
9066 	struct ice_aqc_event_lan_overflow *params =
9067 	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
9068 	struct ice_hw *hw = &sc->hw;
9069 
9070 	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
9071 		  LE32_TO_CPU(params->prtdcb_ruptq),
9072 		  LE32_TO_CPU(params->qtx_ctl));
9073 }
9074 
9075 /**
9076  * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
9077  * @vsi: the VSI to target packets to
9078  * @list: the list to add the filter to
9079  * @ethertype: the Ethertype to filter on
9080  * @direction: The direction of the filter (Tx or Rx)
9081  * @action: the action to take
9082  *
9083  * Add an Ethertype filter to a filter list. Used to forward a series of
9084  * filters to the firmware for configuring the switch.
9085  *
9086  * Returns 0 on success, and an error code on failure.
9087  */
9088 static int
9089 ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
9090 			  u16 ethertype, u16 direction,
9091 			  enum ice_sw_fwd_act_type action)
9092 {
9093 	struct ice_fltr_list_entry *entry;
9094 
9095 	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
9096 
9097 	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
9098 	if (!entry)
9099 		return (ENOMEM);
9100 
9101 	entry->fltr_info.flag = direction;
9102 	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
9103 	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
9104 	entry->fltr_info.fltr_act = action;
9105 	entry->fltr_info.vsi_handle = vsi->idx;
9106 	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
9107 
9108 	LIST_ADD(&entry->list_entry, list);
9109 
9110 	return 0;
9111 }
9112 
9113 #define ETHERTYPE_PAUSE_FRAMES 0x8808
9114 #define ETHERTYPE_LLDP_FRAMES 0x88cc
9115 
9116 /**
9117  * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
9118  * @sc: the device private softc
9119  *
9120  * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
9121  * the host. This prevents malicious VFs from sending these frames and being
9122  * able to control or configure the network.
9123  */
9124 int
9125 ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
9126 {
9127 	struct ice_list_head ethertype_list;
9128 	struct ice_vsi *vsi = &sc->pf_vsi;
9129 	struct ice_hw *hw = &sc->hw;
9130 	device_t dev = sc->dev;
9131 	enum ice_status status;
9132 	int err = 0;
9133 
9134 	INIT_LIST_HEAD(&ethertype_list);
9135 
9136 	/*
9137 	 * Note that the switch filters will ignore the VSI index for the drop
9138 	 * action, so we only need to program drop filters once for the main
9139 	 * VSI.
9140 	 */
9141 
9142 	/* Configure switch to drop all Tx pause frames coming from any VSI. */
9143 	if (sc->enable_tx_fc_filter) {
9144 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9145 						ETHERTYPE_PAUSE_FRAMES,
9146 						ICE_FLTR_TX, ICE_DROP_PACKET);
9147 		if (err)
9148 			goto free_ethertype_list;
9149 	}
9150 
9151 	/* Configure switch to drop LLDP frames coming from any VSI */
9152 	if (sc->enable_tx_lldp_filter) {
9153 		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9154 						ETHERTYPE_LLDP_FRAMES,
9155 						ICE_FLTR_TX, ICE_DROP_PACKET);
9156 		if (err)
9157 			goto free_ethertype_list;
9158 	}
9159 
9160 	status = ice_add_eth_mac(hw, &ethertype_list);
9161 	if (status) {
9162 		device_printf(dev,
9163 			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
9164 			      ice_status_str(status),
9165 			      ice_aq_str(hw->adminq.sq_last_status));
9166 		err = (EIO);
9167 	}
9168 
9169 free_ethertype_list:
9170 	ice_free_fltr_list(&ethertype_list);
9171 	return err;
9172 }
9173 
9174 /**
9175  * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
9176  * @sc: the device private structure
9177  *
9178  * Add a switch ethertype filter which forwards the LLDP frames to the main PF
9179  * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
9180  * be forwarded to the stack.
9181  */
9182 void
9183 ice_add_rx_lldp_filter(struct ice_softc *sc)
9184 {
9185 	struct ice_list_head ethertype_list;
9186 	struct ice_vsi *vsi = &sc->pf_vsi;
9187 	struct ice_hw *hw = &sc->hw;
9188 	device_t dev = sc->dev;
9189 	enum ice_status status;
9190 	int err;
9191 	u16 vsi_num;
9192 
9193 	/*
9194 	 * If FW is new enough, use a direct AQ command to perform the filter
9195 	 * addition.
9196 	 */
9197 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9198 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9199 		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
9200 		if (status) {
9201 			device_printf(dev,
9202 			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9203 			    ice_status_str(status),
9204 			    ice_aq_str(hw->adminq.sq_last_status));
9205 		} else
9206 			ice_set_state(&sc->state,
9207 			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9208 		return;
9209 	}
9210 
9211 	INIT_LIST_HEAD(&ethertype_list);
9212 
9213 	/* Forward Rx LLDP frames to the stack */
9214 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9215 					ETHERTYPE_LLDP_FRAMES,
9216 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9217 	if (err) {
9218 		device_printf(dev,
9219 			      "Failed to add Rx LLDP filter, err %s\n",
9220 			      ice_err_str(err));
9221 		goto free_ethertype_list;
9222 	}
9223 
9224 	status = ice_add_eth_mac(hw, &ethertype_list);
9225 	if (status && status != ICE_ERR_ALREADY_EXISTS) {
9226 		device_printf(dev,
9227 			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9228 			      ice_status_str(status),
9229 			      ice_aq_str(hw->adminq.sq_last_status));
9230 	} else {
9231 		/*
9232 		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
9233 		 * already existing filter as an error case.
9234 		 */
9235 		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9236 	}
9237 
9238 free_ethertype_list:
9239 	ice_free_fltr_list(&ethertype_list);
9240 }
9241 
9242 /**
9243  * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9244  * @sc: the device private structure
9245  *
9246  * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9247  * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9248  * stack.
9249  */
9250 static void
9251 ice_del_rx_lldp_filter(struct ice_softc *sc)
9252 {
9253 	struct ice_list_head ethertype_list;
9254 	struct ice_vsi *vsi = &sc->pf_vsi;
9255 	struct ice_hw *hw = &sc->hw;
9256 	device_t dev = sc->dev;
9257 	enum ice_status status;
9258 	int err;
9259 	u16 vsi_num;
9260 
9261 	/*
9262 	 * Only in the scenario where the driver added the filter during
9263 	 * this session (while the driver was loaded) would we be able to
9264 	 * delete this filter.
9265 	 */
9266 	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9267 		return;
9268 
9269 	/*
9270 	 * If FW is new enough, use a direct AQ command to perform the filter
9271 	 * removal.
9272 	 */
9273 	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9274 		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9275 		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9276 		if (status) {
9277 			device_printf(dev,
9278 			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9279 			    ice_status_str(status),
9280 			    ice_aq_str(hw->adminq.sq_last_status));
9281 		}
9282 		return;
9283 	}
9284 
9285 	INIT_LIST_HEAD(&ethertype_list);
9286 
9287 	/* Remove filter forwarding Rx LLDP frames to the stack */
9288 	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9289 					ETHERTYPE_LLDP_FRAMES,
9290 					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9291 	if (err) {
9292 		device_printf(dev,
9293 			      "Failed to remove Rx LLDP filter, err %s\n",
9294 			      ice_err_str(err));
9295 		goto free_ethertype_list;
9296 	}
9297 
9298 	status = ice_remove_eth_mac(hw, &ethertype_list);
9299 	if (status == ICE_ERR_DOES_NOT_EXIST) {
9300 		; /* Don't complain if we try to remove a filter that doesn't exist */
9301 	} else if (status) {
9302 		device_printf(dev,
9303 			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9304 			      ice_status_str(status),
9305 			      ice_aq_str(hw->adminq.sq_last_status));
9306 	}
9307 
9308 free_ethertype_list:
9309 	ice_free_fltr_list(&ethertype_list);
9310 }
9311 
9312 /**
9313  * ice_init_link_configuration -- Setup link in different ways depending
9314  * on whether media is available or not.
9315  * @sc: device private structure
9316  *
9317  * Called at the end of the attach process to either set default link
9318  * parameters if there is media available, or force HW link down and
9319  * set a state bit if there is no media.
9320  */
9321 void
9322 ice_init_link_configuration(struct ice_softc *sc)
9323 {
9324 	struct ice_port_info *pi = sc->hw.port_info;
9325 	struct ice_hw *hw = &sc->hw;
9326 	device_t dev = sc->dev;
9327 	enum ice_status status;
9328 
9329 	pi->phy.get_link_info = true;
9330 	status = ice_get_link_status(pi, &sc->link_up);
9331 	if (status != ICE_SUCCESS) {
9332 		device_printf(dev,
9333 		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9334 		    __func__, ice_status_str(status),
9335 		    ice_aq_str(hw->adminq.sq_last_status));
9336 		return;
9337 	}
9338 
9339 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9340 		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9341 		/* Apply default link settings */
9342 		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)) {
9343 			ice_set_link(sc, false);
9344 			ice_set_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
9345 		} else
9346 			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9347 	} else {
9348 		 /* Set link down, and poll for media available in timer. This prevents the
9349 		  * driver from receiving spurious link-related events.
9350 		  */
9351 		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9352 		status = ice_aq_set_link_restart_an(pi, false, NULL);
9353 		if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
9354 			device_printf(dev,
9355 			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9356 			    __func__, ice_status_str(status),
9357 			    ice_aq_str(hw->adminq.sq_last_status));
9358 	}
9359 }
9360 
9361 /**
9362  * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9363  * @sc: device private structure
9364  * @cfg: new PHY config data to be modified
9365  *
9366  * Applies user settings for advertised speeds to the PHY type fields in the
9367  * supplied PHY config struct. It uses the data from pcaps to check if the
9368  * saved settings are invalid and uses the pcaps data instead if they are
9369  * invalid.
9370  */
9371 static int
9372 ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9373 			       struct ice_aqc_set_phy_cfg_data *cfg)
9374 {
9375 	struct ice_phy_data phy_data = { 0 };
9376 	struct ice_port_info *pi = sc->hw.port_info;
9377 	u64 phy_low = 0, phy_high = 0;
9378 	u16 link_speeds;
9379 	int ret;
9380 
9381 	link_speeds = pi->phy.curr_user_speed_req;
9382 
9383 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9384 		memset(&phy_data, 0, sizeof(phy_data));
9385 		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9386 		phy_data.user_speeds_orig = link_speeds;
9387 		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9388 		if (ret != 0) {
9389 			/* Error message already printed within function */
9390 			return (ret);
9391 		}
9392 		phy_low = phy_data.phy_low_intr;
9393 		phy_high = phy_data.phy_high_intr;
9394 
9395 		if (link_speeds == 0 || phy_data.user_speeds_intr)
9396 			goto finalize_link_speed;
9397 		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9398 			memset(&phy_data, 0, sizeof(phy_data));
9399 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9400 			phy_data.user_speeds_orig = link_speeds;
9401 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9402 			if (ret != 0) {
9403 				/* Error message already printed within function */
9404 				return (ret);
9405 			}
9406 			phy_low = phy_data.phy_low_intr;
9407 			phy_high = phy_data.phy_high_intr;
9408 
9409 			if (!phy_data.user_speeds_intr) {
9410 				phy_low = phy_data.phy_low_orig;
9411 				phy_high = phy_data.phy_high_orig;
9412 			}
9413 			goto finalize_link_speed;
9414 		}
9415 		/* If we're here, then it means the benefits of Version 2
9416 		 * link management aren't utilized.  We fall through to
9417 		 * handling Strict Link Mode the same as Version 1 link
9418 		 * management.
9419 		 */
9420 	}
9421 
9422 	memset(&phy_data, 0, sizeof(phy_data));
9423 	if ((link_speeds == 0) &&
9424 	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9425 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9426 	else
9427 		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9428 	phy_data.user_speeds_orig = link_speeds;
9429 	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9430 	if (ret != 0) {
9431 		/* Error message already printed within function */
9432 		return (ret);
9433 	}
9434 	phy_low = phy_data.phy_low_intr;
9435 	phy_high = phy_data.phy_high_intr;
9436 
9437 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9438 		if (phy_low == 0 && phy_high == 0) {
9439 			device_printf(sc->dev,
9440 			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9441 			return (EINVAL);
9442 		}
9443 	} else {
9444 		if (link_speeds == 0) {
9445 			if (sc->ldo_tlv.phy_type_low & phy_low ||
9446 			    sc->ldo_tlv.phy_type_high & phy_high) {
9447 				phy_low &= sc->ldo_tlv.phy_type_low;
9448 				phy_high &= sc->ldo_tlv.phy_type_high;
9449 			}
9450 		} else if (phy_low == 0 && phy_high == 0) {
9451 			memset(&phy_data, 0, sizeof(phy_data));
9452 			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9453 			phy_data.user_speeds_orig = link_speeds;
9454 			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9455 			if (ret != 0) {
9456 				/* Error message already printed within function */
9457 				return (ret);
9458 			}
9459 			phy_low = phy_data.phy_low_intr;
9460 			phy_high = phy_data.phy_high_intr;
9461 
9462 			if (!phy_data.user_speeds_intr) {
9463 				phy_low = phy_data.phy_low_orig;
9464 				phy_high = phy_data.phy_high_orig;
9465 			}
9466 		}
9467 	}
9468 
9469 finalize_link_speed:
9470 
9471 	/* Cache new user settings for speeds */
9472 	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9473 	cfg->phy_type_low = htole64(phy_low);
9474 	cfg->phy_type_high = htole64(phy_high);
9475 
9476 	return (ret);
9477 }
9478 
9479 /**
9480  * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9481  * @sc: device private structure
9482  * @cfg: new PHY config data to be modified
9483  *
9484  * Applies user setting for FEC mode to PHY config struct. It uses the data
9485  * from pcaps to check if the saved settings are invalid and uses the pcaps
9486  * data instead if they are invalid.
9487  */
9488 static int
9489 ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9490 			       struct ice_aqc_set_phy_cfg_data *cfg)
9491 {
9492 	struct ice_port_info *pi = sc->hw.port_info;
9493 	enum ice_status status;
9494 
9495 	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9496 	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9497 	if (status)
9498 		return (EIO);
9499 
9500 	return (0);
9501 }
9502 
9503 /**
9504  * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9505  * @pi: port info struct
9506  * @cfg: new PHY config data to be modified
9507  *
9508  * Applies user setting for flow control mode to PHY config struct. There are
9509  * no invalid flow control mode settings; if there are, then this function
9510  * treats them like "ICE_FC_NONE".
9511  */
9512 static void
9513 ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9514 			      struct ice_aqc_set_phy_cfg_data *cfg)
9515 {
9516 	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9517 		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9518 
9519 	switch (pi->phy.curr_user_fc_req) {
9520 	case ICE_FC_FULL:
9521 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9522 			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9523 		break;
9524 	case ICE_FC_RX_PAUSE:
9525 		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9526 		break;
9527 	case ICE_FC_TX_PAUSE:
9528 		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9529 		break;
9530 	default:
9531 		/* ICE_FC_NONE */
9532 		break;
9533 	}
9534 }
9535 
9536 /**
9537  * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9538  * @sc: device private structure
9539  * @settings: which settings to apply
9540  *
9541  * Applies user settings for advertised speeds, FEC mode, and flow
9542  * control mode to a PHY config struct; it uses the data from pcaps
9543  * to check if the saved settings are invalid and uses the pcaps
9544  * data instead if they are invalid.
9545  *
9546  * For things like sysctls where only one setting needs to be
9547  * updated, the bitmap allows the caller to specify which setting
9548  * to update.
9549  */
9550 int
9551 ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9552 {
9553 	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9554 	struct ice_port_info *pi = sc->hw.port_info;
9555 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9556 	struct ice_hw *hw = &sc->hw;
9557 	device_t dev = sc->dev;
9558 	u64 phy_low, phy_high;
9559 	enum ice_status status;
9560 	enum ice_fec_mode dflt_fec_mode;
9561 	u16 dflt_user_speed;
9562 
9563 	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9564 		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9565 		    settings);
9566 	}
9567 
9568 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9569 				     &pcaps, NULL);
9570 	if (status != ICE_SUCCESS) {
9571 		device_printf(dev,
9572 		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9573 		    __func__, ice_status_str(status),
9574 		    ice_aq_str(hw->adminq.sq_last_status));
9575 		return (EIO);
9576 	}
9577 
9578 	phy_low = le64toh(pcaps.phy_type_low);
9579 	phy_high = le64toh(pcaps.phy_type_high);
9580 
9581 	/* Save off initial config parameters */
9582 	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9583 	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9584 
9585 	/* Setup new PHY config */
9586 	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9587 
9588 	/* On error, restore active configuration values */
9589 	if ((settings & ICE_APPLY_LS) &&
9590 	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9591 		pi->phy.curr_user_speed_req = dflt_user_speed;
9592 		cfg.phy_type_low = pcaps.phy_type_low;
9593 		cfg.phy_type_high = pcaps.phy_type_high;
9594 	}
9595 	if ((settings & ICE_APPLY_FEC) &&
9596 	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9597 		pi->phy.curr_user_fec_req = dflt_fec_mode;
9598 	}
9599 	if (settings & ICE_APPLY_FC) {
9600 		/* No real error indicators for this process,
9601 		 * so we'll just have to assume it works. */
9602 		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9603 	}
9604 
9605 	/* Enable link and re-negotiate it */
9606 	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9607 
9608 	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9609 	if (status != ICE_SUCCESS) {
9610 		/* Don't indicate failure if there's no media in the port.
9611 		 * The settings have been saved and will apply when media
9612 		 * is inserted.
9613 		 */
9614 		if ((status == ICE_ERR_AQ_ERROR) &&
9615 		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9616 			device_printf(dev,
9617 			    "%s: Setting will be applied when media is inserted\n",
9618 			    __func__);
9619 			return (0);
9620 		} else {
9621 			device_printf(dev,
9622 			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9623 			    __func__, ice_status_str(status),
9624 			    ice_aq_str(hw->adminq.sq_last_status));
9625 			return (EIO);
9626 		}
9627 	}
9628 
9629 	return (0);
9630 }
9631 
9632 /**
9633  * ice_print_ldo_tlv - Print out LDO TLV information
9634  * @sc: device private structure
9635  * @tlv: LDO TLV information from the adapter NVM
9636  *
9637  * Dump out the information in tlv to the kernel message buffer; intended for
9638  * debugging purposes.
9639  */
9640 static void
9641 ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9642 {
9643 	device_t dev = sc->dev;
9644 
9645 	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9646 	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9647 	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9648 	device_printf(dev, "     -phy_high    0x%016llx\n",
9649 	    (unsigned long long)tlv->phy_type_high);
9650 	device_printf(dev, "     -phy_low     0x%016llx\n",
9651 	    (unsigned long long)tlv->phy_type_low);
9652 }
9653 
9654 /**
9655  * ice_set_link_management_mode -- Strict or lenient link management
9656  * @sc: device private structure
9657  *
9658  * Some NVMs give the adapter the option to advertise a superset of link
9659  * configurations.  This checks to see if that option is enabled.
9660  * Further, the NVM could also provide a specific set of configurations
9661  * to try; these are cached in the driver's private structure if they
9662  * are available.
9663  */
9664 void
9665 ice_set_link_management_mode(struct ice_softc *sc)
9666 {
9667 	struct ice_port_info *pi = sc->hw.port_info;
9668 	device_t dev = sc->dev;
9669 	struct ice_link_default_override_tlv tlv = { 0 };
9670 	enum ice_status status;
9671 
9672 	/* Port must be in strict mode if FW version is below a certain
9673 	 * version. (i.e. Don't set lenient mode features)
9674 	 */
9675 	if (!(ice_fw_supports_link_override(&sc->hw)))
9676 		return;
9677 
9678 	status = ice_get_link_default_override(&tlv, pi);
9679 	if (status != ICE_SUCCESS) {
9680 		device_printf(dev,
9681 		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9682 		    __func__, ice_status_str(status),
9683 		    ice_aq_str(sc->hw.adminq.sq_last_status));
9684 		return;
9685 	}
9686 
9687 	if (sc->hw.debug_mask & ICE_DBG_LINK)
9688 		ice_print_ldo_tlv(sc, &tlv);
9689 
9690 	/* Set lenient link mode */
9691 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9692 	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9693 		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9694 
9695 	/* FW supports reporting a default configuration */
9696 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9697 	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9698 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9699 		/* Knowing we're at a high enough firmware revision to
9700 		 * support this link management configuration, we don't
9701 		 * need to check/support earlier versions.
9702 		 */
9703 		return;
9704 	}
9705 
9706 	/* Default overrides only work if in lenient link mode */
9707 	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9708 	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9709 	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9710 		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9711 
9712 	/* Cache the LDO TLV structure in the driver, since it
9713 	 * won't change during the driver's lifetime.
9714 	 */
9715 	sc->ldo_tlv = tlv;
9716 }
9717 
9718 /**
9719  * ice_set_link -- Set up/down link on phy
9720  * @sc: device private structure
9721  * @enabled: link status to set up
9722  *
9723  * This should be called when change of link status is needed.
9724  */
9725 void
9726 ice_set_link(struct ice_softc *sc, bool enabled)
9727 {
9728 	struct ice_hw *hw = &sc->hw;
9729 	device_t dev = sc->dev;
9730 	enum ice_status status;
9731 
9732 	if (ice_driver_is_detaching(sc))
9733 		return;
9734 
9735 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9736 		return;
9737 
9738 	if (enabled)
9739 		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9740 	else {
9741 		status = ice_aq_set_link_restart_an(hw->port_info, false, NULL);
9742 		if (status != ICE_SUCCESS) {
9743 			if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
9744 				device_printf(dev,
9745 				    "%s: Link control not enabled in current device mode\n",
9746 				    __func__);
9747 			else
9748 				device_printf(dev,
9749 				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9750 				    __func__, ice_status_str(status),
9751 				    ice_aq_str(hw->adminq.sq_last_status));
9752 		} else
9753 			sc->link_up = false;
9754 	}
9755 }
9756 
9757 /**
9758  * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9759  * @sc: device private structure
9760  *
9761  * This should be called before the tunables for these link settings
9762  * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9763  * the cached values that the sysctl handlers will write.
9764  *
9765  * This also needs to be called before ice_init_link_configuration, to ensure
9766  * that there are sane values that can be written if there is media available
9767  * in the port.
9768  */
9769 void
9770 ice_init_saved_phy_cfg(struct ice_softc *sc)
9771 {
9772 	struct ice_port_info *pi = sc->hw.port_info;
9773 	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9774 	struct ice_hw *hw = &sc->hw;
9775 	device_t dev = sc->dev;
9776 	enum ice_status status;
9777 	u64 phy_low, phy_high;
9778 	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9779 
9780 	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9781 		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9782 	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9783 	if (status != ICE_SUCCESS) {
9784 		device_printf(dev,
9785 		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9786 		    __func__,
9787 		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9788 		    ice_status_str(status),
9789 		    ice_aq_str(hw->adminq.sq_last_status));
9790 		return;
9791 	}
9792 
9793 	phy_low = le64toh(pcaps.phy_type_low);
9794 	phy_high = le64toh(pcaps.phy_type_high);
9795 
9796 	/* Save off initial config parameters */
9797 	pi->phy.curr_user_speed_req =
9798 	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9799 	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9800 	    pcaps.link_fec_options);
9801 	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9802 }
9803 
9804 /**
9805  * ice_module_init - Driver callback to handle module load
9806  *
9807  * Callback for handling module load events. This function should initialize
9808  * any data structures that are used for the life of the device driver.
9809  */
9810 static int
9811 ice_module_init(void)
9812 {
9813 	ice_rdma_init();
9814 	return (0);
9815 }
9816 
9817 /**
9818  * ice_module_exit - Driver callback to handle module exit
9819  *
9820  * Callback for handling module unload events. This function should release
9821  * any resources initialized during ice_module_init.
9822  *
9823  * If this function returns non-zero, the module will not be unloaded. It
9824  * should only return such a value if the module cannot be unloaded at all,
9825  * such as due to outstanding memory references that cannot be revoked.
9826  */
9827 static int
9828 ice_module_exit(void)
9829 {
9830 	ice_rdma_exit();
9831 	return (0);
9832 }
9833 
9834 /**
9835  * ice_module_event_handler - Callback for module events
9836  * @mod: unused module_t parameter
9837  * @what: the event requested
9838  * @arg: unused event argument
9839  *
9840  * Callback used to handle module events from the stack. Used to allow the
9841  * driver to define custom behavior that should happen at module load and
9842  * unload.
9843  */
9844 int
9845 ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9846 {
9847 	switch (what) {
9848 	case MOD_LOAD:
9849 		return ice_module_init();
9850 	case MOD_UNLOAD:
9851 		return ice_module_exit();
9852 	default:
9853 		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9854 		return (EOPNOTSUPP);
9855 	}
9856 }
9857 
9858 /**
9859  * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9860  * @sc: the device private softc
9861  * @ifd: ifdrv ioctl request pointer
9862  */
9863 int
9864 ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9865 {
9866 	union ice_nvm_access_data *data;
9867 	struct ice_nvm_access_cmd *cmd;
9868 	size_t ifd_len = ifd->ifd_len, malloc_len;
9869 	struct ice_hw *hw = &sc->hw;
9870 	device_t dev = sc->dev;
9871 	enum ice_status status;
9872 	u8 *nvm_buffer;
9873 	int err;
9874 
9875 	/*
9876 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9877 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9878 	 * without performing a privilege check. Perform one here to ensure
9879 	 * that non-privileged threads cannot access this interface.
9880 	 */
9881 	err = priv_check(curthread, PRIV_DRIVER);
9882 	if (err)
9883 		return (err);
9884 
9885 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9886 		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9887 			      __func__);
9888 		return (EBUSY);
9889 	}
9890 
9891 	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9892 		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9893 			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9894 		return (EINVAL);
9895 	}
9896 
9897 	if (ifd->ifd_data == NULL) {
9898 		device_printf(dev, "%s: ifd data buffer not present.\n",
9899 			      __func__);
9900 		return (EINVAL);
9901 	}
9902 
9903 	/*
9904 	 * If everything works correctly, ice_handle_nvm_access should not
9905 	 * modify data past the size of the ioctl length. However, it could
9906 	 * lead to memory corruption if it did. Make sure to allocate at least
9907 	 * enough space for the command and data regardless. This
9908 	 * ensures that any access to the data union will not access invalid
9909 	 * memory.
9910 	 */
9911 	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9912 
9913 	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9914 	if (!nvm_buffer)
9915 		return (ENOMEM);
9916 
9917 	/* Copy the NVM access command and data in from user space */
9918 	/* coverity[tainted_data_argument] */
9919 	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9920 	if (err) {
9921 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9922 			      __func__, ice_err_str(err));
9923 		goto cleanup_free_nvm_buffer;
9924 	}
9925 
9926 	/*
9927 	 * The NVM command structure is immediately followed by data which
9928 	 * varies in size based on the command.
9929 	 */
9930 	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9931 	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9932 
9933 	/* Handle the NVM access request */
9934 	status = ice_handle_nvm_access(hw, cmd, data);
9935 	if (status)
9936 		ice_debug(hw, ICE_DBG_NVM,
9937 			  "NVM access request failed, err %s\n",
9938 			  ice_status_str(status));
9939 
9940 	/* Copy the possibly modified contents of the handled request out */
9941 	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9942 	if (err) {
9943 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9944 			      __func__, ice_err_str(err));
9945 		goto cleanup_free_nvm_buffer;
9946 	}
9947 
9948 	/* Convert private status to an error code for proper ioctl response */
9949 	switch (status) {
9950 	case ICE_SUCCESS:
9951 		err = (0);
9952 		break;
9953 	case ICE_ERR_NO_MEMORY:
9954 		err = (ENOMEM);
9955 		break;
9956 	case ICE_ERR_OUT_OF_RANGE:
9957 		err = (ENOTTY);
9958 		break;
9959 	case ICE_ERR_PARAM:
9960 	default:
9961 		err = (EINVAL);
9962 		break;
9963 	}
9964 
9965 cleanup_free_nvm_buffer:
9966 	free(nvm_buffer, M_ICE);
9967 	return err;
9968 }
9969 
9970 /**
9971  * ice_read_sff_eeprom - Read data from SFF eeprom
9972  * @sc: device softc
9973  * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9974  * @offset: offset into the eeprom
9975  * @data: pointer to data buffer to store read data in
9976  * @length: length to read; max length is 16
9977  *
9978  * Read from the SFF eeprom in the module for this PF's port. For more details
9979  * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9980  * and SFF-8024 (both).
9981  */
9982 int
9983 ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9984 {
9985 	struct ice_hw *hw = &sc->hw;
9986 	int ret = 0, retries = 0;
9987 	enum ice_status status;
9988 
9989 	if (length > 16)
9990 		return (EINVAL);
9991 
9992 	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9993 		return (ENOSYS);
9994 
9995 	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9996 		return (ENXIO);
9997 
9998 	do {
9999 		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
10000 					   offset, 0, 0, data, length,
10001 					   false, NULL);
10002 		if (!status) {
10003 			ret = 0;
10004 			break;
10005 		}
10006 		if (status == ICE_ERR_AQ_ERROR &&
10007 		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
10008 			ret = EBUSY;
10009 			continue;
10010 		}
10011 		if (status == ICE_ERR_AQ_ERROR &&
10012 		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
10013 			/* FW says I2C access isn't supported */
10014 			ret = EACCES;
10015 			break;
10016 		}
10017 		if (status == ICE_ERR_AQ_ERROR &&
10018 		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
10019 			device_printf(sc->dev,
10020 				  "%s: Module pointer location specified in command does not permit the required operation.\n",
10021 				  __func__);
10022 			ret = EPERM;
10023 			break;
10024 		} else {
10025 			device_printf(sc->dev,
10026 				  "%s: Error reading I2C data: err %s aq_err %s\n",
10027 				  __func__, ice_status_str(status),
10028 				  ice_aq_str(hw->adminq.sq_last_status));
10029 			ret = EIO;
10030 			break;
10031 		}
10032 	} while (retries++ < ICE_I2C_MAX_RETRIES);
10033 
10034 	if (ret == EBUSY)
10035 		device_printf(sc->dev,
10036 			  "%s: Error reading I2C data after %d retries\n",
10037 			  __func__, ICE_I2C_MAX_RETRIES);
10038 
10039 	return (ret);
10040 }
10041 
10042 /**
10043  * ice_handle_i2c_req - Driver independent I2C request handler
10044  * @sc: device softc
10045  * @req: The I2C parameters to use
10046  *
10047  * Read from the port's I2C eeprom using the parameters from the ioctl.
10048  */
10049 int
10050 ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
10051 {
10052 	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
10053 }
10054 
10055 /**
10056  * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
10057  * @oidp: sysctl oid structure
10058  * @arg1: pointer to private data structure
10059  * @arg2: unused
10060  * @req: sysctl request pointer
10061  *
10062  * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
10063  * inserted into the port.
10064  *
10065  *             | SFP A2  | QSFP Lower Page
10066  * ------------|---------|----------------
10067  * Temperature | 96-97	 | 22-23
10068  * Vcc         | 98-99   | 26-27
10069  * TX power    | 102-103 | 34-35..40-41
10070  * RX power    | 104-105 | 50-51..56-57
10071  */
10072 static int
10073 ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
10074 {
10075 	struct ice_softc *sc = (struct ice_softc *)arg1;
10076 	device_t dev = sc->dev;
10077 	struct sbuf *sbuf;
10078 	int ret;
10079 	u8 data[16];
10080 
10081 	UNREFERENCED_PARAMETER(arg2);
10082 	UNREFERENCED_PARAMETER(oidp);
10083 
10084 	if (ice_driver_is_detaching(sc))
10085 		return (ESHUTDOWN);
10086 
10087 	if (req->oldptr == NULL) {
10088 		ret = SYSCTL_OUT(req, 0, 128);
10089 		return (ret);
10090 	}
10091 
10092 	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
10093 	if (ret)
10094 		return (ret);
10095 
10096 	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
10097 	if (data[0] == 0x3) {
10098 		/*
10099 		 * Check for:
10100 		 * - Internally calibrated data
10101 		 * - Diagnostic monitoring is implemented
10102 		 */
10103 		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
10104 		if (!(data[0] & 0x60)) {
10105 			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
10106 			return (ENODEV);
10107 		}
10108 
10109 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10110 
10111 		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
10112 		for (int i = 0; i < 4; i++)
10113 			sbuf_printf(sbuf, "%02X ", data[i]);
10114 
10115 		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
10116 		for (int i = 0; i < 4; i++)
10117 			sbuf_printf(sbuf, "%02X ", data[i]);
10118 	} else if (data[0] == 0xD || data[0] == 0x11) {
10119 		/*
10120 		 * QSFP+ modules are always internally calibrated, and must indicate
10121 		 * what types of diagnostic monitoring are implemented
10122 		 */
10123 		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10124 
10125 		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
10126 		for (int i = 0; i < 2; i++)
10127 			sbuf_printf(sbuf, "%02X ", data[i]);
10128 
10129 		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
10130 		for (int i = 0; i < 2; i++)
10131 			sbuf_printf(sbuf, "%02X ", data[i]);
10132 
10133 		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
10134 		for (int i = 0; i < 2; i++)
10135 			sbuf_printf(sbuf, "%02X ", data[i]);
10136 
10137 		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
10138 		for (int i = 0; i < 2; i++)
10139 			sbuf_printf(sbuf, "%02X ", data[i]);
10140 	} else {
10141 		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
10142 		return (ENODEV);
10143 	}
10144 
10145 	sbuf_finish(sbuf);
10146 	sbuf_delete(sbuf);
10147 
10148 	return (0);
10149 }
10150 
10151 /**
10152  * ice_alloc_intr_tracking - Setup interrupt tracking structures
10153  * @sc: device softc structure
10154  *
10155  * Sets up the resource manager for keeping track of interrupt allocations,
10156  * and initializes the tracking maps for the PF's interrupt allocations.
10157  *
10158  * Unlike the scheme for queues, this is done in one step since both the
10159  * manager and the maps both have the same lifetime.
10160  *
10161  * @returns 0 on success, or an error code on failure.
10162  */
10163 int
10164 ice_alloc_intr_tracking(struct ice_softc *sc)
10165 {
10166 	struct ice_hw *hw = &sc->hw;
10167 	device_t dev = sc->dev;
10168 	int err;
10169 
10170 	/* Initialize the interrupt allocation manager */
10171 	err = ice_resmgr_init_contig_only(&sc->dev_imgr,
10172 	    hw->func_caps.common_cap.num_msix_vectors);
10173 	if (err) {
10174 		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
10175 			      ice_err_str(err));
10176 		return (err);
10177 	}
10178 
10179 	/* Allocate PF interrupt mapping storage */
10180 	if (!(sc->pf_imap =
10181 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10182 	      M_ICE, M_NOWAIT))) {
10183 		device_printf(dev, "Unable to allocate PF imap memory\n");
10184 		err = ENOMEM;
10185 		goto free_imgr;
10186 	}
10187 	if (!(sc->rdma_imap =
10188 	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10189 	      M_ICE, M_NOWAIT))) {
10190 		device_printf(dev, "Unable to allocate RDMA imap memory\n");
10191 		err = ENOMEM;
10192 		free(sc->pf_imap, M_ICE);
10193 		goto free_imgr;
10194 	}
10195 	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
10196 		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
10197 		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
10198 	}
10199 
10200 	return (0);
10201 
10202 free_imgr:
10203 	ice_resmgr_destroy(&sc->dev_imgr);
10204 	return (err);
10205 }
10206 
10207 /**
10208  * ice_free_intr_tracking - Free PF interrupt tracking structures
10209  * @sc: device softc structure
10210  *
10211  * Frees the interrupt resource allocation manager and the PF's owned maps.
10212  *
10213  * VF maps are released when the owning VF's are destroyed, which should always
10214  * happen before this function is called.
10215  */
10216 void
10217 ice_free_intr_tracking(struct ice_softc *sc)
10218 {
10219 	if (sc->pf_imap) {
10220 		ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
10221 				       sc->lan_vectors);
10222 		free(sc->pf_imap, M_ICE);
10223 		sc->pf_imap = NULL;
10224 	}
10225 	if (sc->rdma_imap) {
10226 		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
10227 				       sc->lan_vectors);
10228 		free(sc->rdma_imap, M_ICE);
10229 		sc->rdma_imap = NULL;
10230 	}
10231 
10232 	ice_resmgr_destroy(&sc->dev_imgr);
10233 
10234 	ice_resmgr_destroy(&sc->os_imgr);
10235 }
10236 
10237 /**
10238  * ice_apply_supported_speed_filter - Mask off unsupported speeds
10239  * @report_speeds: bit-field for the desired link speeds
10240  * @mod_type: type of module/sgmii connection we have
10241  *
10242  * Given a bitmap of the desired lenient mode link speeds,
10243  * this function will mask off the speeds that are not currently
10244  * supported by the device.
10245  */
10246 static u16
10247 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
10248 {
10249 	u16 speed_mask;
10250 	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
10251 
10252 	/*
10253 	 * The SFF specification says 0 is unknown, so we'll
10254 	 * treat it like we're connected through SGMII for now.
10255 	 * This may need revisiting if a new type is supported
10256 	 * in the future.
10257 	 */
10258 	switch (mod_type) {
10259 	case 0:
10260 		module = IS_SGMII;
10261 		break;
10262 	case 3:
10263 		module = IS_SFP;
10264 		break;
10265 	default:
10266 		module = IS_QSFP;
10267 		break;
10268 	}
10269 
10270 	/* We won't offer anything lower than 100M for any part,
10271 	 * but we'll need to mask off other speeds based on the
10272 	 * device and module type.
10273 	 */
10274 	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
10275 	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
10276 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10277 	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
10278 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10279 	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
10280 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10281 		if (module == IS_QSFP)
10282 			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10283 	}
10284 	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
10285 		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10286 	return (report_speeds & speed_mask);
10287 }
10288 
10289 /**
10290  * ice_init_health_events - Enable FW health event reporting
10291  * @sc: device softc
10292  *
10293  * Will try to enable firmware health event reporting, but shouldn't
10294  * cause any grief (to the caller) if this fails.
10295  */
10296 void
10297 ice_init_health_events(struct ice_softc *sc)
10298 {
10299 	enum ice_status status;
10300 	u8 health_mask;
10301 
10302 	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10303 		(!sc->enable_health_events))
10304 		return;
10305 
10306 	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10307 		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10308 
10309 	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10310 	if (status)
10311 		device_printf(sc->dev,
10312 		    "Failed to enable firmware health events, err %s aq_err %s\n",
10313 		    ice_status_str(status),
10314 		    ice_aq_str(sc->hw.adminq.sq_last_status));
10315 	else
10316 		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10317 }
10318 
10319 /**
10320  * ice_print_health_status_string - Print message for given FW health event
10321  * @dev: the PCIe device
10322  * @elem: health status element containing status code
10323  *
10324  * A rather large list of possible health status codes and their associated
10325  * messages.
10326  */
10327 static void
10328 ice_print_health_status_string(device_t dev,
10329 			       struct ice_aqc_health_status_elem *elem)
10330 {
10331 	u16 status_code = le16toh(elem->health_status_code);
10332 
10333 	switch (status_code) {
10334 	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10335 		device_printf(dev, "The device is in firmware recovery mode.\n");
10336 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10337 		break;
10338 	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10339 		device_printf(dev, "The flash chip cannot be accessed.\n");
10340 		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10341 		break;
10342 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10343 		device_printf(dev, "NVM authentication failed.\n");
10344 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10345 		break;
10346 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10347 		device_printf(dev, "Option ROM authentication failed.\n");
10348 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10349 		break;
10350 	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10351 		device_printf(dev, "DDP package failed.\n");
10352 		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10353 		break;
10354 	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10355 		device_printf(dev, "NVM image is incompatible.\n");
10356 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10357 		break;
10358 	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10359 		device_printf(dev, "Option ROM is incompatible.\n");
10360 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10361 		break;
10362 	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10363 		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10364 		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10365 		break;
10366 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10367 		device_printf(dev, "An unsupported module was detected.\n");
10368 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10369 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10370 		break;
10371 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10372 		device_printf(dev, "Module type is not supported.\n");
10373 		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10374 		break;
10375 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10376 		device_printf(dev, "Module is not qualified.\n");
10377 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10378 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10379 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10380 		break;
10381 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10382 		device_printf(dev, "Device cannot communicate with the module.\n");
10383 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10384 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10385 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10386 		break;
10387 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10388 		device_printf(dev, "Unresolved module conflict.\n");
10389 		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10390 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10391 		break;
10392 	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10393 		device_printf(dev, "Module is not present.\n");
10394 		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10395 		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10396 		break;
10397 	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10398 		device_printf(dev, "Underutilized module.\n");
10399 		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10400 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10401 		break;
10402 	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10403 		device_printf(dev, "An unsupported module was detected.\n");
10404 		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10405 		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10406 		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10407 		break;
10408 	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10409 		device_printf(dev, "Invalid link configuration.\n");
10410 		break;
10411 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10412 		device_printf(dev, "Port hardware access error.\n");
10413 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10414 		break;
10415 	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10416 		device_printf(dev, "A port is unreachable.\n");
10417 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10418 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10419 		break;
10420 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10421 		device_printf(dev, "Port speed is limited due to module.\n");
10422 		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10423 		break;
10424 	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10425 		device_printf(dev, "A parallel fault was detected.\n");
10426 		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10427 		break;
10428 	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10429 		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10430 		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10431 		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10432 		break;
10433 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10434 		device_printf(dev, "LOM topology netlist is corrupted.\n");
10435 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10436 		break;
10437 	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10438 		device_printf(dev, "Unrecoverable netlist error.\n");
10439 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10440 		break;
10441 	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10442 		device_printf(dev, "Port topology conflict.\n");
10443 		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10444 		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10445 		break;
10446 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10447 		device_printf(dev, "Unrecoverable hardware access error.\n");
10448 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10449 		break;
10450 	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10451 		device_printf(dev, "Unrecoverable runtime error.\n");
10452 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10453 		break;
10454 	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10455 		device_printf(dev, "Link management engine failed to initialize.\n");
10456 		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10457 		break;
10458 	default:
10459 		break;
10460 	}
10461 }
10462 
10463 /**
10464  * ice_handle_health_status_event - helper function to output health status
10465  * @sc: device softc structure
10466  * @event: event received on a control queue
10467  *
10468  * Prints out the appropriate string based on the given Health Status Event
10469  * code.
10470  */
10471 static void
10472 ice_handle_health_status_event(struct ice_softc *sc,
10473 			       struct ice_rq_event_info *event)
10474 {
10475 	struct ice_aqc_health_status_elem *health_info;
10476 	u16 status_count;
10477 	int i;
10478 
10479 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10480 		return;
10481 
10482 	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10483 	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10484 
10485 	if (status_count > (event->buf_len / sizeof(*health_info))) {
10486 		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10487 		return;
10488 	}
10489 
10490 	for (i = 0; i < status_count; i++) {
10491 		ice_print_health_status_string(sc->dev, health_info);
10492 		health_info++;
10493 	}
10494 }
10495 
10496 /**
10497  * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10498  * @sc: device softc structure
10499  *
10500  * This function needs to be called after link up; it makes sure the FW has
10501  * certain PFC/DCB settings. In certain configurations this will re-apply a
10502  * default local LLDP MIB configuration; this is intended to workaround a FW
10503  * behavior where these settings seem to be cleared on link up.
10504  */
10505 void
10506 ice_set_default_local_lldp_mib(struct ice_softc *sc)
10507 {
10508 	struct ice_hw *hw = &sc->hw;
10509 	struct ice_port_info *pi;
10510 	device_t dev = sc->dev;
10511 	enum ice_status status;
10512 
10513 	/* Set Local MIB can disrupt flow control settings for
10514 	 * non-DCB-supported devices.
10515 	 */
10516 	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10517 		return;
10518 
10519 	pi = hw->port_info;
10520 
10521 	/* Don't overwrite a custom SW configuration */
10522 	if (!pi->qos_cfg.is_sw_lldp &&
10523 	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10524 		ice_set_default_local_mib_settings(sc);
10525 
10526 	status = ice_set_dcb_cfg(pi);
10527 
10528 	if (status)
10529 		device_printf(dev,
10530 		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10531 		    ice_status_str(status),
10532 		    ice_aq_str(hw->adminq.sq_last_status));
10533 }
10534 
10535 /**
10536  * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10537  * @sbuf: string buffer to print to
10538  * @name: prefix string to use
10539  * @ets: structure to pull values from
10540  *
10541  * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10542  * formats the ETS rec and cfg TLVs into text.
10543  */
10544 static void
10545 ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10546 {
10547 	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10548 	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10549 	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10550 
10551 	sbuf_printf(sbuf, "%s.prio_table:", name);
10552 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10553 		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10554 	sbuf_printf(sbuf, "\n");
10555 
10556 	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10557 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10558 		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10559 	sbuf_printf(sbuf, "\n");
10560 
10561 	sbuf_printf(sbuf, "%s.tsatable:", name);
10562 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10563 		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10564 	sbuf_printf(sbuf, "\n");
10565 }
10566 
10567 /**
10568  * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10569  * @oidp: sysctl oid structure
10570  * @arg1: pointer to private data structure
10571  * @arg2: AQ define for either Local or Remote MIB
10572  * @req: sysctl request pointer
10573  *
10574  * Prints out DCB/DCBX configuration, including the contents
10575  * of either the local or remote MIB, depending on the value
10576  * used in arg2.
10577  */
10578 static int
10579 ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10580 {
10581 	struct ice_softc *sc = (struct ice_softc *)arg1;
10582 	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10583 	struct ice_dcbx_cfg dcb_buf = {};
10584 	struct ice_dcbx_cfg *dcbcfg;
10585 	struct ice_hw *hw = &sc->hw;
10586 	device_t dev = sc->dev;
10587 	struct sbuf *sbuf;
10588 	enum ice_status status;
10589 	u8 maxtcs, dcbx_status, is_sw_lldp;
10590 
10591 	UNREFERENCED_PARAMETER(oidp);
10592 
10593 	if (ice_driver_is_detaching(sc))
10594 		return (ESHUTDOWN);
10595 
10596 	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10597 
10598 	/* The driver doesn't receive a Remote MIB via SW */
10599 	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10600 		return (ENOENT);
10601 
10602 	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10603 	if (!is_sw_lldp) {
10604 		/* Collect information from the FW in FW LLDP mode */
10605 		dcbcfg = &dcb_buf;
10606 		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10607 		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10608 		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10609 		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10610 			device_printf(dev,
10611 			    "Unable to query Remote MIB; port has not received one yet\n");
10612 			return (ENOENT);
10613 		}
10614 		if (status) {
10615 			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10616 			    ice_status_str(status),
10617 			    ice_aq_str(hw->adminq.sq_last_status));
10618 			return (EIO);
10619 		}
10620 	}
10621 
10622 	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10623 	if (status == ICE_SUCCESS)
10624 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10625 	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10626 		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10627 	else
10628 		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10629 		    ice_status_str(status),
10630 		    ice_aq_str(hw->adminq.sq_last_status));
10631 
10632 	maxtcs = hw->func_caps.common_cap.maxtc;
10633 	dcbx_status = ice_get_dcbx_status(hw);
10634 
10635 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10636 
10637 	/* Do the actual printing */
10638 	sbuf_printf(sbuf, "\n");
10639 	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10640 	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10641 	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10642 
10643 	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10644 	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10645 	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10646 	    "DSCP" : "VLAN");
10647 	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10648 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10649 	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10650 	    "Unknown");
10651 
10652 	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10653 	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10654 
10655 	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10656 	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10657 	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10658 	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10659 
10660 	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10661 		sbuf_printf(sbuf, "dscp_map:\n");
10662 		for (int i = 0; i < 8; i++) {
10663 			for (int j = 0; j < 8; j++)
10664 				sbuf_printf(sbuf, " %d",
10665 					    dcbcfg->dscp_map[i * 8 + j]);
10666 			sbuf_printf(sbuf, "\n");
10667 		}
10668 
10669 		sbuf_printf(sbuf, "\nLocal registers:\n");
10670 		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10671 		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10672 		        >> PRTDCB_GENC_NUMTC_S);
10673 		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10674 		    (rd32(hw, PRTDCB_TUP2TC)));
10675 		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10676 		    (rd32(hw, PRTDCB_RUP2TC)));
10677 		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10678 		    (rd32(hw, GLDCB_TC2PFC)));
10679 	}
10680 
10681 	/* Finish */
10682 	sbuf_finish(sbuf);
10683 	sbuf_delete(sbuf);
10684 
10685 	return (0);
10686 }
10687 
10688 /**
10689  * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10690  * @oidp: sysctl oid structure
10691  * @arg1: pointer to private data structure
10692  * @arg2: unused
10693  * @req: sysctl request pointer
10694  *
10695  * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10696  * but for simplicity, this only works on the PF's LAN VSI.
10697  */
10698 static int
10699 ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10700 {
10701 	struct ice_softc *sc = (struct ice_softc *)arg1;
10702 	struct ice_vsi_ctx ctx = { 0 };
10703 	struct ice_hw *hw = &sc->hw;
10704 	device_t dev = sc->dev;
10705 	struct sbuf *sbuf;
10706 	enum ice_status status;
10707 
10708 	UNREFERENCED_PARAMETER(oidp);
10709 	UNREFERENCED_PARAMETER(arg2);
10710 
10711 	if (ice_driver_is_detaching(sc))
10712 		return (ESHUTDOWN);
10713 
10714 	/* Get HW absolute index of a VSI */
10715 	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10716 
10717 	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10718 	if (status != ICE_SUCCESS) {
10719 		device_printf(dev,
10720 		    "Get VSI AQ call failed, err %s aq_err %s\n",
10721 		    ice_status_str(status),
10722 		    ice_aq_str(hw->adminq.sq_last_status));
10723 		return (EIO);
10724 	}
10725 
10726 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10727 
10728 	/* Do the actual printing */
10729 	sbuf_printf(sbuf, "\n");
10730 
10731 	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10732 	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10733 	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10734 	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10735 
10736 	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10737 	    LE16_TO_CPU(ctx.info.mapping_flags));
10738 	/* The PF VSI is always contiguous, so there's no if-statement here */
10739 	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10740 	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10741 	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10742 	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10743 
10744 	sbuf_printf(sbuf, "TC qbases  :");
10745 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10746 		sbuf_printf(sbuf, " %4d",
10747 		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10748 	}
10749 	sbuf_printf(sbuf, "\n");
10750 
10751 	sbuf_printf(sbuf, "TC qcounts :");
10752 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10753 		sbuf_printf(sbuf, " %4d",
10754 		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10755 	}
10756 
10757 	/* Finish */
10758 	sbuf_finish(sbuf);
10759 	sbuf_delete(sbuf);
10760 
10761 	return (0);
10762 }
10763 
10764 /**
10765  * ice_ets_str_to_tbl - Parse string into ETS table
10766  * @str: input string to parse
10767  * @table: output eight values used for ETS values
10768  * @limit: max valid value to accept for ETS values
10769  *
10770  * Parses a string and converts the eight values within
10771  * into a table that can be used in setting ETS settings
10772  * in a MIB.
10773  *
10774  * @return 0 on success, EINVAL if a parsed value is
10775  * not between 0 and limit.
10776  */
10777 static int
10778 ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10779 {
10780 	const char *str_start = str;
10781 	char *str_end;
10782 	long token;
10783 
10784 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10785 		token = strtol(str_start, &str_end, 0);
10786 		if (token < 0 || token > limit)
10787 			return (EINVAL);
10788 
10789 		table[i] = (u8)token;
10790 		str_start = (str_end + 1);
10791 	}
10792 
10793 	return (0);
10794 }
10795 
10796 /**
10797  * ice_check_ets_bw - Check if ETS bw vals are valid
10798  * @table: eight values used for ETS bandwidth
10799  *
10800  * @return true if the sum of all 8 values in table
10801  * equals 100.
10802  */
10803 static bool
10804 ice_check_ets_bw(u8 *table)
10805 {
10806 	int sum = 0;
10807 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10808 		sum += (int)table[i];
10809 
10810 	return (sum == 100);
10811 }
10812 
10813 /**
10814  * ice_cfg_pba_num - Determine if PBA Number is retrievable
10815  * @sc: the device private softc structure
10816  *
10817  * Sets the feature flag for the existence of a PBA number
10818  * based on the success of the read command.  This does not
10819  * cache the result.
10820  */
10821 void
10822 ice_cfg_pba_num(struct ice_softc *sc)
10823 {
10824 	u8 pba_string[32] = "";
10825 
10826 	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10827 	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10828 		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10829 }
10830 
10831 /**
10832  * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10833  * @oidp: sysctl oid structure
10834  * @arg1: pointer to private data structure
10835  * @arg2: unused
10836  * @req: sysctl request pointer
10837  */
10838 static int
10839 ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10840 {
10841 	struct ice_softc *sc = (struct ice_softc *)arg1;
10842 	struct ice_aqc_port_ets_elem port_ets = { 0 };
10843 	struct ice_hw *hw = &sc->hw;
10844 	struct ice_port_info *pi;
10845 	device_t dev = sc->dev;
10846 	struct sbuf *sbuf;
10847 	enum ice_status status;
10848 	int i = 0;
10849 
10850 	UNREFERENCED_PARAMETER(oidp);
10851 	UNREFERENCED_PARAMETER(arg2);
10852 
10853 	if (ice_driver_is_detaching(sc))
10854 		return (ESHUTDOWN);
10855 
10856 	pi = hw->port_info;
10857 
10858 	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10859 	if (status != ICE_SUCCESS) {
10860 		device_printf(dev,
10861 		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10862 		    ice_status_str(status),
10863 		    ice_aq_str(hw->adminq.sq_last_status));
10864 		return (EIO);
10865 	}
10866 
10867 	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10868 
10869 	/* Do the actual printing */
10870 	sbuf_printf(sbuf, "\n");
10871 
10872 	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10873 
10874 	sbuf_printf(sbuf, "TC BW %%:");
10875 	ice_for_each_traffic_class(i) {
10876 		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10877 	}
10878 	sbuf_printf(sbuf, "\n");
10879 
10880 	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10881 	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10882 	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10883 
10884 	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10885 	ice_for_each_traffic_class(i) {
10886 		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10887 	}
10888 
10889 	/* Finish */
10890 	sbuf_finish(sbuf);
10891 	sbuf_delete(sbuf);
10892 
10893 	return (0);
10894 }
10895 
10896 /**
10897  * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10898  * @oidp: sysctl oid structure
10899  * @arg1: pointer to private data structure
10900  * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10901  * @req: sysctl request pointer
10902  *
10903  * Gets or sets the current DSCP to UP table cached by the driver. Since there
10904  * are 64 possible DSCP values to configure, this sysctl only configures
10905  * chunks of 8 in that space at a time.
10906  *
10907  * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10908  * mode.
10909  */
10910 static int
10911 ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10912 {
10913 	struct ice_softc *sc = (struct ice_softc *)arg1;
10914 	struct ice_dcbx_cfg *local_dcbx_cfg;
10915 	struct ice_port_info *pi;
10916 	struct ice_hw *hw = &sc->hw;
10917 	device_t dev = sc->dev;
10918 	enum ice_status status;
10919 	struct sbuf *sbuf;
10920 	int ret;
10921 
10922 	/* Store input rates from user */
10923 	char dscp_user_buf[128] = "";
10924 	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10925 
10926 	if (ice_driver_is_detaching(sc))
10927 		return (ESHUTDOWN);
10928 
10929 	if (req->oldptr == NULL && req->newptr == NULL) {
10930 		ret = SYSCTL_OUT(req, 0, 128);
10931 		return (ret);
10932 	}
10933 
10934 	pi = hw->port_info;
10935 	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10936 
10937 	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10938 
10939 	/* Format DSCP-to-UP data for output */
10940 	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10941 		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10942 		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10943 			sbuf_printf(sbuf, ",");
10944 	}
10945 
10946 	sbuf_finish(sbuf);
10947 	sbuf_delete(sbuf);
10948 
10949 	/* Read in the new DSCP mapping values */
10950 	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10951 	if ((ret) || (req->newptr == NULL))
10952 		return (ret);
10953 
10954 	/* Don't allow setting changes in FW DCB mode */
10955 	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10956 		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10957 		    __func__);
10958 		return (EINVAL);
10959 	}
10960 
10961 	/* Convert 8 values in a string to a table; this is similar to what
10962 	 * needs to be done for ETS settings, so this function can be re-used
10963 	 * for that purpose.
10964 	 */
10965 	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10966 	if (ret) {
10967 		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10968 		    __func__, dscp_user_buf);
10969 		return (ret);
10970 	}
10971 
10972 	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10973 	    sizeof(new_dscp_table_seg));
10974 
10975 	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10976 
10977 	status = ice_set_dcb_cfg(pi);
10978 	if (status) {
10979 		device_printf(dev,
10980 		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10981 		    __func__, ice_status_str(status),
10982 		    ice_aq_str(hw->adminq.sq_last_status));
10983 		return (EIO);
10984 	}
10985 
10986 	ice_do_dcb_reconfig(sc, false);
10987 
10988 	return (0);
10989 }
10990 
10991 /**
10992  * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10993  * @sc: the device private softc
10994  * @ifd: ifdrv ioctl request pointer
10995  */
10996 int
10997 ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10998 {
10999 	size_t ifd_len = ifd->ifd_len;
11000 	struct ice_hw *hw = &sc->hw;
11001 	device_t dev = sc->dev;
11002 	struct ice_debug_dump_cmd *ddc;
11003 	enum ice_status status;
11004 	int err = 0;
11005 
11006 	/* Returned arguments from the Admin Queue */
11007 	u16 ret_buf_size = 0;
11008 	u16 ret_next_cluster = 0;
11009 	u16 ret_next_table = 0;
11010 	u32 ret_next_index = 0;
11011 
11012 	/*
11013 	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
11014 	 * a privilege check. In turn, iflib forwards the ioctl to the driver
11015 	 * without performing a privilege check. Perform one here to ensure
11016 	 * that non-privileged threads cannot access this interface.
11017 	 */
11018 	err = priv_check(curthread, PRIV_DRIVER);
11019 	if (err)
11020 		return (err);
11021 
11022 	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
11023 		device_printf(dev,
11024 		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
11025 		    __func__);
11026 		return (EBUSY);
11027 	}
11028 
11029 	if (ifd_len < sizeof(*ddc)) {
11030 		device_printf(dev,
11031 		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
11032 		    __func__, ifd_len, sizeof(*ddc));
11033 		return (EINVAL);
11034 	}
11035 
11036 	if (ifd->ifd_data == NULL) {
11037 		device_printf(dev, "%s: ifd data buffer not present.\n",
11038 		     __func__);
11039 		return (EINVAL);
11040 	}
11041 
11042 	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
11043 	if (!ddc)
11044 		return (ENOMEM);
11045 
11046 	/* Copy the NVM access command and data in from user space */
11047 	/* coverity[tainted_data_argument] */
11048 	err = copyin(ifd->ifd_data, ddc, ifd_len);
11049 	if (err) {
11050 		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
11051 			      __func__, ice_err_str(err));
11052 		goto out;
11053 	}
11054 
11055 	/* The data_size arg must be at least 1 for the AQ cmd to work */
11056 	if (ddc->data_size == 0) {
11057 		device_printf(dev,
11058 		    "%s: data_size must be greater than 0\n", __func__);
11059 		err = EINVAL;
11060 		goto out;
11061 	}
11062 	/* ...and it can't be too long */
11063 	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
11064 		device_printf(dev,
11065 		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
11066 		    ddc->data_size, ifd_len - sizeof(*ddc));
11067 		err = EINVAL;
11068 		goto out;
11069 	}
11070 
11071 	/* Make sure any possible data buffer space is zeroed */
11072 	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
11073 
11074 	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
11075 	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size,
11076 	    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
11077 	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
11078 	    __func__, ret_buf_size, ret_next_table, ret_next_index);
11079 	if (status) {
11080 		device_printf(dev,
11081 		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
11082 		    __func__,
11083 		    ice_status_str(status),
11084 		    ice_aq_str(hw->adminq.sq_last_status));
11085 		goto aq_error;
11086 	}
11087 
11088 	ddc->table_id = ret_next_table;
11089 	ddc->offset = ret_next_index;
11090 	ddc->data_size = ret_buf_size;
11091 	ddc->cluster_id = ret_next_cluster;
11092 
11093 	/* Copy the possibly modified contents of the handled request out */
11094 	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
11095 	if (err) {
11096 		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
11097 			      __func__, ice_err_str(err));
11098 		goto out;
11099 	}
11100 
11101 aq_error:
11102 	/* Convert private status to an error code for proper ioctl response */
11103 	switch (status) {
11104 	case ICE_SUCCESS:
11105 		err = (0);
11106 		break;
11107 	case ICE_ERR_NO_MEMORY:
11108 		err = (ENOMEM);
11109 		break;
11110 	case ICE_ERR_OUT_OF_RANGE:
11111 		err = (ENOTTY);
11112 		break;
11113 	case ICE_ERR_AQ_ERROR:
11114 		err = (EIO);
11115 		break;
11116 	case ICE_ERR_PARAM:
11117 	default:
11118 		err = (EINVAL);
11119 		break;
11120 	}
11121 
11122 out:
11123 	free(ddc, M_ICE);
11124 	return (err);
11125 }
11126 
11127 /**
11128  * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
11129  * @oidp: sysctl oid structure
11130  * @arg1: pointer to private data structure
11131  * @arg2: unused
11132  * @req: sysctl request pointer
11133  *
11134  * Allows user to let "No FEC" mode to be used in "Auto"
11135  * FEC mode during FEC negotiation. This is only supported
11136  * on newer firmware versions.
11137  */
11138 static int
11139 ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
11140 {
11141 	struct ice_softc *sc = (struct ice_softc *)arg1;
11142 	struct ice_hw *hw = &sc->hw;
11143 	device_t dev = sc->dev;
11144 	u8 user_flag;
11145 	int ret;
11146 
11147 	UNREFERENCED_PARAMETER(arg2);
11148 
11149 	ret = priv_check(curthread, PRIV_DRIVER);
11150 	if (ret)
11151 		return (ret);
11152 
11153 	if (ice_driver_is_detaching(sc))
11154 		return (ESHUTDOWN);
11155 
11156 	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
11157 
11158 	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
11159 	if ((ret) || (req->newptr == NULL))
11160 		return (ret);
11161 
11162 	if (!ice_fw_supports_fec_dis_auto(hw)) {
11163 		log(LOG_INFO,
11164 		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
11165 		    device_get_nameunit(dev));
11166 		return (ENODEV);
11167 	}
11168 
11169 	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
11170 		return (0);
11171 
11172 	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
11173 
11174 	if (sc->allow_no_fec_mod_in_auto)
11175 		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
11176 		    device_get_nameunit(dev));
11177 	else
11178 		log(LOG_INFO,
11179 		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
11180 		    device_get_nameunit(dev));
11181 
11182 	return (0);
11183 }
11184 
11185 /**
11186  * ice_sysctl_temperature - Retrieve NIC temp via AQ command
11187  * @oidp: sysctl oid structure
11188  * @arg1: pointer to private data structure
11189  * @arg2: unused
11190  * @req: sysctl request pointer
11191  *
11192  * If ICE_DBG_DIAG is set in the debug.debug_mask sysctl, then this will print
11193  * temperature threshold information in the kernel message log, too.
11194  */
11195 static int
11196 ice_sysctl_temperature(SYSCTL_HANDLER_ARGS)
11197 {
11198 	struct ice_aqc_get_sensor_reading_resp resp;
11199 	struct ice_softc *sc = (struct ice_softc *)arg1;
11200 	struct ice_hw *hw = &sc->hw;
11201 	device_t dev = sc->dev;
11202 	enum ice_status status;
11203 
11204 	UNREFERENCED_PARAMETER(oidp);
11205 	UNREFERENCED_PARAMETER(arg2);
11206 
11207 	if (ice_driver_is_detaching(sc))
11208 		return (ESHUTDOWN);
11209 
11210 	status = ice_aq_get_sensor_reading(hw, ICE_AQC_INT_TEMP_SENSOR,
11211 	    ICE_AQC_INT_TEMP_FORMAT, &resp, NULL);
11212 	if (status != ICE_SUCCESS) {
11213 		device_printf(dev,
11214 		    "Get Sensor Reading AQ call failed, err %s aq_err %s\n",
11215 		    ice_status_str(status),
11216 		    ice_aq_str(hw->adminq.sq_last_status));
11217 		return (EIO);
11218 	}
11219 
11220 	ice_debug(hw, ICE_DBG_DIAG, "%s: Warning Temp Threshold: %d\n", __func__,
11221 	    resp.data.s0f0.temp_warning_threshold);
11222 	ice_debug(hw, ICE_DBG_DIAG, "%s: Critical Temp Threshold: %d\n", __func__,
11223 	    resp.data.s0f0.temp_critical_threshold);
11224 	ice_debug(hw, ICE_DBG_DIAG, "%s: Fatal Temp Threshold: %d\n", __func__,
11225 	    resp.data.s0f0.temp_fatal_threshold);
11226 
11227 	return sysctl_handle_8(oidp, &resp.data.s0f0.temp, 0, req);
11228 }
11229 
11230 /**
11231  * ice_sysctl_create_mirror_interface - Create a new ifnet that monitors
11232  *     traffic from the main PF VSI
11233  */
11234 static int
11235 ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS)
11236 {
11237 	struct ice_softc *sc = (struct ice_softc *)arg1;
11238 	device_t dev = sc->dev;
11239 	int ret;
11240 
11241 	UNREFERENCED_PARAMETER(arg2);
11242 
11243 	ret = priv_check(curthread, PRIV_DRIVER);
11244 	if (ret)
11245 		return (ret);
11246 
11247 	if (ice_driver_is_detaching(sc))
11248 		return (ESHUTDOWN);
11249 
11250 	/* If the user hasn't written "1" to this sysctl yet: */
11251 	if (!ice_test_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC)) {
11252 		/* Avoid output on the first set of reads to this sysctl in
11253 		 * order to prevent a null byte from being written to the
11254 		 * end result when called via sysctl(8).
11255 		 */
11256 		if (req->oldptr == NULL && req->newptr == NULL) {
11257 			ret = SYSCTL_OUT(req, 0, 0);
11258 			return (ret);
11259 		}
11260 
11261 		char input_buf[2] = "";
11262 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
11263 		if ((ret) || (req->newptr == NULL))
11264 			return (ret);
11265 
11266 		/* If we get '1', then indicate we'll create the interface in
11267 		 * the next sysctl read call.
11268 		 */
11269 		if (input_buf[0] == '1') {
11270 			if (sc->mirr_if) {
11271 				device_printf(dev,
11272 				    "Mirror interface %s already exists!\n",
11273 				    if_name(sc->mirr_if->ifp));
11274 				return (EEXIST);
11275 			}
11276 			ice_set_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
11277 			return (0);
11278 		}
11279 
11280 		return (EINVAL);
11281 	}
11282 
11283 	/* --- "Do Create Mirror Interface" is set --- */
11284 
11285 	/* Caller just wants the upper bound for size */
11286 	if (req->oldptr == NULL && req->newptr == NULL) {
11287 		ret = SYSCTL_OUT(req, 0, 128);
11288 		return (ret);
11289 	}
11290 
11291 	device_printf(dev, "Creating new mirroring interface...\n");
11292 
11293 	ret = ice_create_mirror_interface(sc);
11294 	if (ret)
11295 		return (ret);
11296 
11297 	ice_clear_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
11298 
11299 	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface attached"), 0, req);
11300 	return (ret);
11301 }
11302 
11303 /**
11304  * ice_sysctl_destroy_mirror_interface - Destroy network interface that monitors
11305  *     traffic from the main PF VSI
11306  */
11307 static int
11308 ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS)
11309 {
11310 	struct ice_softc *sc = (struct ice_softc *)arg1;
11311 	device_t dev = sc->dev;
11312 	int ret;
11313 
11314 	UNREFERENCED_PARAMETER(arg2);
11315 
11316 	ret = priv_check(curthread, PRIV_DRIVER);
11317 	if (ret)
11318 		return (ret);
11319 
11320 	if (ice_driver_is_detaching(sc))
11321 		return (ESHUTDOWN);
11322 
11323 	/* If the user hasn't written "1" to this sysctl yet: */
11324 	if (!ice_test_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC)) {
11325 		/* Avoid output on the first set of reads to this sysctl in
11326 		 * order to prevent a null byte from being written to the
11327 		 * end result when called via sysctl(8).
11328 		 */
11329 		if (req->oldptr == NULL && req->newptr == NULL) {
11330 			ret = SYSCTL_OUT(req, 0, 0);
11331 			return (ret);
11332 		}
11333 
11334 		char input_buf[2] = "";
11335 		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
11336 		if ((ret) || (req->newptr == NULL))
11337 			return (ret);
11338 
11339 		/* If we get '1', then indicate we'll create the interface in
11340 		 * the next sysctl read call.
11341 		 */
11342 		if (input_buf[0] == '1') {
11343 			if (!sc->mirr_if) {
11344 				device_printf(dev,
11345 				    "No mirror interface exists!\n");
11346 				return (EINVAL);
11347 			}
11348 			ice_set_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
11349 			return (0);
11350 		}
11351 
11352 		return (EINVAL);
11353 	}
11354 
11355 	/* --- "Do Destroy Mirror Interface" is set --- */
11356 
11357 	/* Caller just wants the upper bound for size */
11358 	if (req->oldptr == NULL && req->newptr == NULL) {
11359 		ret = SYSCTL_OUT(req, 0, 128);
11360 		return (ret);
11361 	}
11362 
11363 	device_printf(dev, "Destroying mirroring interface...\n");
11364 
11365 	ice_destroy_mirror_interface(sc);
11366 
11367 	ice_clear_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
11368 
11369 	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface destroyed"), 0, req);
11370 	return (ret);
11371 }
11372