xref: /linux/drivers/net/ethernet/intel/ice/ice_main.c (revision effa76856f2d7111f8c44de49f15ebdfccea8ccc)
1  // SPDX-License-Identifier: GPL-2.0
2  /* Copyright (c) 2018, Intel Corporation. */
3  
4  /* Intel(R) Ethernet Connection E800 Series Linux Driver */
5  
6  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7  
8  #include <generated/utsrelease.h>
9  #include "ice.h"
10  #include "ice_base.h"
11  #include "ice_lib.h"
12  #include "ice_fltr.h"
13  #include "ice_dcb_lib.h"
14  #include "ice_dcb_nl.h"
15  #include "ice_devlink.h"
16  /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
17   * ice tracepoint functions. This must be done exactly once across the
18   * ice driver.
19   */
20  #define CREATE_TRACE_POINTS
21  #include "ice_trace.h"
22  #include "ice_eswitch.h"
23  #include "ice_tc_lib.h"
24  #include "ice_vsi_vlan_ops.h"
25  
26  #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
27  static const char ice_driver_string[] = DRV_SUMMARY;
28  static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
29  
30  /* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
31  #define ICE_DDP_PKG_PATH	"intel/ice/ddp/"
32  #define ICE_DDP_PKG_FILE	ICE_DDP_PKG_PATH "ice.pkg"
33  
34  MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
35  MODULE_DESCRIPTION(DRV_SUMMARY);
36  MODULE_LICENSE("GPL v2");
37  MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
38  
39  static int debug = -1;
40  module_param(debug, int, 0644);
41  #ifndef CONFIG_DYNAMIC_DEBUG
42  MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
43  #else
44  MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
45  #endif /* !CONFIG_DYNAMIC_DEBUG */
46  
47  static DEFINE_IDA(ice_aux_ida);
48  DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
49  EXPORT_SYMBOL(ice_xdp_locking_key);
50  
51  /**
52   * ice_hw_to_dev - Get device pointer from the hardware structure
53   * @hw: pointer to the device HW structure
54   *
55   * Used to access the device pointer from compilation units which can't easily
56   * include the definition of struct ice_pf without leading to circular header
57   * dependencies.
58   */
59  struct device *ice_hw_to_dev(struct ice_hw *hw)
60  {
61  	struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
62  
63  	return &pf->pdev->dev;
64  }
65  
66  static struct workqueue_struct *ice_wq;
67  static const struct net_device_ops ice_netdev_safe_mode_ops;
68  static const struct net_device_ops ice_netdev_ops;
69  
70  static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
71  
72  static void ice_vsi_release_all(struct ice_pf *pf);
73  
74  static int ice_rebuild_channels(struct ice_pf *pf);
75  static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
76  
77  static int
78  ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
79  		     void *cb_priv, enum tc_setup_type type, void *type_data,
80  		     void *data,
81  		     void (*cleanup)(struct flow_block_cb *block_cb));
82  
83  bool netif_is_ice(struct net_device *dev)
84  {
85  	return dev && (dev->netdev_ops == &ice_netdev_ops);
86  }
87  
88  /**
89   * ice_get_tx_pending - returns number of Tx descriptors not processed
90   * @ring: the ring of descriptors
91   */
92  static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
93  {
94  	u16 head, tail;
95  
96  	head = ring->next_to_clean;
97  	tail = ring->next_to_use;
98  
99  	if (head != tail)
100  		return (head < tail) ?
101  			tail - head : (tail + ring->count - head);
102  	return 0;
103  }
104  
105  /**
106   * ice_check_for_hang_subtask - check for and recover hung queues
107   * @pf: pointer to PF struct
108   */
109  static void ice_check_for_hang_subtask(struct ice_pf *pf)
110  {
111  	struct ice_vsi *vsi = NULL;
112  	struct ice_hw *hw;
113  	unsigned int i;
114  	int packets;
115  	u32 v;
116  
117  	ice_for_each_vsi(pf, v)
118  		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
119  			vsi = pf->vsi[v];
120  			break;
121  		}
122  
123  	if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
124  		return;
125  
126  	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
127  		return;
128  
129  	hw = &vsi->back->hw;
130  
131  	ice_for_each_txq(vsi, i) {
132  		struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
133  		struct ice_ring_stats *ring_stats;
134  
135  		if (!tx_ring)
136  			continue;
137  		if (ice_ring_ch_enabled(tx_ring))
138  			continue;
139  
140  		ring_stats = tx_ring->ring_stats;
141  		if (!ring_stats)
142  			continue;
143  
144  		if (tx_ring->desc) {
145  			/* If packet counter has not changed the queue is
146  			 * likely stalled, so force an interrupt for this
147  			 * queue.
148  			 *
149  			 * prev_pkt would be negative if there was no
150  			 * pending work.
151  			 */
152  			packets = ring_stats->stats.pkts & INT_MAX;
153  			if (ring_stats->tx_stats.prev_pkt == packets) {
154  				/* Trigger sw interrupt to revive the queue */
155  				ice_trigger_sw_intr(hw, tx_ring->q_vector);
156  				continue;
157  			}
158  
159  			/* Memory barrier between read of packet count and call
160  			 * to ice_get_tx_pending()
161  			 */
162  			smp_rmb();
163  			ring_stats->tx_stats.prev_pkt =
164  			    ice_get_tx_pending(tx_ring) ? packets : -1;
165  		}
166  	}
167  }
168  
169  /**
170   * ice_init_mac_fltr - Set initial MAC filters
171   * @pf: board private structure
172   *
173   * Set initial set of MAC filters for PF VSI; configure filters for permanent
174   * address and broadcast address. If an error is encountered, netdevice will be
175   * unregistered.
176   */
177  static int ice_init_mac_fltr(struct ice_pf *pf)
178  {
179  	struct ice_vsi *vsi;
180  	u8 *perm_addr;
181  
182  	vsi = ice_get_main_vsi(pf);
183  	if (!vsi)
184  		return -EINVAL;
185  
186  	perm_addr = vsi->port_info->mac.perm_addr;
187  	return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
188  }
189  
190  /**
191   * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
192   * @netdev: the net device on which the sync is happening
193   * @addr: MAC address to sync
194   *
195   * This is a callback function which is called by the in kernel device sync
196   * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
197   * populates the tmp_sync_list, which is later used by ice_add_mac to add the
198   * MAC filters from the hardware.
199   */
200  static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
201  {
202  	struct ice_netdev_priv *np = netdev_priv(netdev);
203  	struct ice_vsi *vsi = np->vsi;
204  
205  	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
206  				     ICE_FWD_TO_VSI))
207  		return -EINVAL;
208  
209  	return 0;
210  }
211  
212  /**
213   * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
214   * @netdev: the net device on which the unsync is happening
215   * @addr: MAC address to unsync
216   *
217   * This is a callback function which is called by the in kernel device unsync
218   * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
219   * populates the tmp_unsync_list, which is later used by ice_remove_mac to
220   * delete the MAC filters from the hardware.
221   */
222  static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
223  {
224  	struct ice_netdev_priv *np = netdev_priv(netdev);
225  	struct ice_vsi *vsi = np->vsi;
226  
227  	/* Under some circumstances, we might receive a request to delete our
228  	 * own device address from our uc list. Because we store the device
229  	 * address in the VSI's MAC filter list, we need to ignore such
230  	 * requests and not delete our device address from this list.
231  	 */
232  	if (ether_addr_equal(addr, netdev->dev_addr))
233  		return 0;
234  
235  	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
236  				     ICE_FWD_TO_VSI))
237  		return -EINVAL;
238  
239  	return 0;
240  }
241  
242  /**
243   * ice_vsi_fltr_changed - check if filter state changed
244   * @vsi: VSI to be checked
245   *
246   * returns true if filter state has changed, false otherwise.
247   */
248  static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
249  {
250  	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
251  	       test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
252  }
253  
254  /**
255   * ice_set_promisc - Enable promiscuous mode for a given PF
256   * @vsi: the VSI being configured
257   * @promisc_m: mask of promiscuous config bits
258   *
259   */
260  static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
261  {
262  	int status;
263  
264  	if (vsi->type != ICE_VSI_PF)
265  		return 0;
266  
267  	if (ice_vsi_has_non_zero_vlans(vsi)) {
268  		promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
269  		status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
270  						       promisc_m);
271  	} else {
272  		status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
273  						  promisc_m, 0);
274  	}
275  	if (status && status != -EEXIST)
276  		return status;
277  
278  	return 0;
279  }
280  
281  /**
282   * ice_clear_promisc - Disable promiscuous mode for a given PF
283   * @vsi: the VSI being configured
284   * @promisc_m: mask of promiscuous config bits
285   *
286   */
287  static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
288  {
289  	int status;
290  
291  	if (vsi->type != ICE_VSI_PF)
292  		return 0;
293  
294  	if (ice_vsi_has_non_zero_vlans(vsi)) {
295  		promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
296  		status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
297  							 promisc_m);
298  	} else {
299  		status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
300  						    promisc_m, 0);
301  	}
302  
303  	return status;
304  }
305  
306  /**
307   * ice_vsi_sync_fltr - Update the VSI filter list to the HW
308   * @vsi: ptr to the VSI
309   *
310   * Push any outstanding VSI filter changes through the AdminQ.
311   */
312  static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
313  {
314  	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
315  	struct device *dev = ice_pf_to_dev(vsi->back);
316  	struct net_device *netdev = vsi->netdev;
317  	bool promisc_forced_on = false;
318  	struct ice_pf *pf = vsi->back;
319  	struct ice_hw *hw = &pf->hw;
320  	u32 changed_flags = 0;
321  	int err;
322  
323  	if (!vsi->netdev)
324  		return -EINVAL;
325  
326  	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
327  		usleep_range(1000, 2000);
328  
329  	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
330  	vsi->current_netdev_flags = vsi->netdev->flags;
331  
332  	INIT_LIST_HEAD(&vsi->tmp_sync_list);
333  	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
334  
335  	if (ice_vsi_fltr_changed(vsi)) {
336  		clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
337  		clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
338  
339  		/* grab the netdev's addr_list_lock */
340  		netif_addr_lock_bh(netdev);
341  		__dev_uc_sync(netdev, ice_add_mac_to_sync_list,
342  			      ice_add_mac_to_unsync_list);
343  		__dev_mc_sync(netdev, ice_add_mac_to_sync_list,
344  			      ice_add_mac_to_unsync_list);
345  		/* our temp lists are populated. release lock */
346  		netif_addr_unlock_bh(netdev);
347  	}
348  
349  	/* Remove MAC addresses in the unsync list */
350  	err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
351  	ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
352  	if (err) {
353  		netdev_err(netdev, "Failed to delete MAC filters\n");
354  		/* if we failed because of alloc failures, just bail */
355  		if (err == -ENOMEM)
356  			goto out;
357  	}
358  
359  	/* Add MAC addresses in the sync list */
360  	err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
361  	ice_fltr_free_list(dev, &vsi->tmp_sync_list);
362  	/* If filter is added successfully or already exists, do not go into
363  	 * 'if' condition and report it as error. Instead continue processing
364  	 * rest of the function.
365  	 */
366  	if (err && err != -EEXIST) {
367  		netdev_err(netdev, "Failed to add MAC filters\n");
368  		/* If there is no more space for new umac filters, VSI
369  		 * should go into promiscuous mode. There should be some
370  		 * space reserved for promiscuous filters.
371  		 */
372  		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
373  		    !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
374  				      vsi->state)) {
375  			promisc_forced_on = true;
376  			netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
377  				    vsi->vsi_num);
378  		} else {
379  			goto out;
380  		}
381  	}
382  	err = 0;
383  	/* check for changes in promiscuous modes */
384  	if (changed_flags & IFF_ALLMULTI) {
385  		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
386  			err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
387  			if (err) {
388  				vsi->current_netdev_flags &= ~IFF_ALLMULTI;
389  				goto out_promisc;
390  			}
391  		} else {
392  			/* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
393  			err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
394  			if (err) {
395  				vsi->current_netdev_flags |= IFF_ALLMULTI;
396  				goto out_promisc;
397  			}
398  		}
399  	}
400  
401  	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
402  	    test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
403  		clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
404  		if (vsi->current_netdev_flags & IFF_PROMISC) {
405  			/* Apply Rx filter rule to get traffic from wire */
406  			if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
407  				err = ice_set_dflt_vsi(vsi);
408  				if (err && err != -EEXIST) {
409  					netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
410  						   err, vsi->vsi_num);
411  					vsi->current_netdev_flags &=
412  						~IFF_PROMISC;
413  					goto out_promisc;
414  				}
415  				err = 0;
416  				vlan_ops->dis_rx_filtering(vsi);
417  			}
418  		} else {
419  			/* Clear Rx filter to remove traffic from wire */
420  			if (ice_is_vsi_dflt_vsi(vsi)) {
421  				err = ice_clear_dflt_vsi(vsi);
422  				if (err) {
423  					netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
424  						   err, vsi->vsi_num);
425  					vsi->current_netdev_flags |=
426  						IFF_PROMISC;
427  					goto out_promisc;
428  				}
429  				if (vsi->netdev->features &
430  				    NETIF_F_HW_VLAN_CTAG_FILTER)
431  					vlan_ops->ena_rx_filtering(vsi);
432  			}
433  		}
434  	}
435  	goto exit;
436  
437  out_promisc:
438  	set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
439  	goto exit;
440  out:
441  	/* if something went wrong then set the changed flag so we try again */
442  	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
443  	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
444  exit:
445  	clear_bit(ICE_CFG_BUSY, vsi->state);
446  	return err;
447  }
448  
449  /**
450   * ice_sync_fltr_subtask - Sync the VSI filter list with HW
451   * @pf: board private structure
452   */
453  static void ice_sync_fltr_subtask(struct ice_pf *pf)
454  {
455  	int v;
456  
457  	if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
458  		return;
459  
460  	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
461  
462  	ice_for_each_vsi(pf, v)
463  		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
464  		    ice_vsi_sync_fltr(pf->vsi[v])) {
465  			/* come back and try again later */
466  			set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
467  			break;
468  		}
469  }
470  
471  /**
472   * ice_pf_dis_all_vsi - Pause all VSIs on a PF
473   * @pf: the PF
474   * @locked: is the rtnl_lock already held
475   */
476  static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
477  {
478  	int node;
479  	int v;
480  
481  	ice_for_each_vsi(pf, v)
482  		if (pf->vsi[v])
483  			ice_dis_vsi(pf->vsi[v], locked);
484  
485  	for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
486  		pf->pf_agg_node[node].num_vsis = 0;
487  
488  	for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
489  		pf->vf_agg_node[node].num_vsis = 0;
490  }
491  
492  /**
493   * ice_clear_sw_switch_recipes - clear switch recipes
494   * @pf: board private structure
495   *
496   * Mark switch recipes as not created in sw structures. There are cases where
497   * rules (especially advanced rules) need to be restored, either re-read from
498   * hardware or added again. For example after the reset. 'recp_created' flag
499   * prevents from doing that and need to be cleared upfront.
500   */
501  static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
502  {
503  	struct ice_sw_recipe *recp;
504  	u8 i;
505  
506  	recp = pf->hw.switch_info->recp_list;
507  	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
508  		recp[i].recp_created = false;
509  }
510  
511  /**
512   * ice_prepare_for_reset - prep for reset
513   * @pf: board private structure
514   * @reset_type: reset type requested
515   *
516   * Inform or close all dependent features in prep for reset.
517   */
518  static void
519  ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
520  {
521  	struct ice_hw *hw = &pf->hw;
522  	struct ice_vsi *vsi;
523  	struct ice_vf *vf;
524  	unsigned int bkt;
525  
526  	dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
527  
528  	/* already prepared for reset */
529  	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
530  		return;
531  
532  	ice_unplug_aux_dev(pf);
533  
534  	/* Notify VFs of impending reset */
535  	if (ice_check_sq_alive(hw, &hw->mailboxq))
536  		ice_vc_notify_reset(pf);
537  
538  	/* Disable VFs until reset is completed */
539  	mutex_lock(&pf->vfs.table_lock);
540  	ice_for_each_vf(pf, bkt, vf)
541  		ice_set_vf_state_qs_dis(vf);
542  	mutex_unlock(&pf->vfs.table_lock);
543  
544  	if (ice_is_eswitch_mode_switchdev(pf)) {
545  		if (reset_type != ICE_RESET_PFR)
546  			ice_clear_sw_switch_recipes(pf);
547  	}
548  
549  	/* release ADQ specific HW and SW resources */
550  	vsi = ice_get_main_vsi(pf);
551  	if (!vsi)
552  		goto skip;
553  
554  	/* to be on safe side, reset orig_rss_size so that normal flow
555  	 * of deciding rss_size can take precedence
556  	 */
557  	vsi->orig_rss_size = 0;
558  
559  	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
560  		if (reset_type == ICE_RESET_PFR) {
561  			vsi->old_ena_tc = vsi->all_enatc;
562  			vsi->old_numtc = vsi->all_numtc;
563  		} else {
564  			ice_remove_q_channels(vsi, true);
565  
566  			/* for other reset type, do not support channel rebuild
567  			 * hence reset needed info
568  			 */
569  			vsi->old_ena_tc = 0;
570  			vsi->all_enatc = 0;
571  			vsi->old_numtc = 0;
572  			vsi->all_numtc = 0;
573  			vsi->req_txq = 0;
574  			vsi->req_rxq = 0;
575  			clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
576  			memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
577  		}
578  	}
579  skip:
580  
581  	/* clear SW filtering DB */
582  	ice_clear_hw_tbls(hw);
583  	/* disable the VSIs and their queues that are not already DOWN */
584  	ice_pf_dis_all_vsi(pf, false);
585  
586  	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
587  		ice_ptp_prepare_for_reset(pf);
588  
589  	if (ice_is_feature_supported(pf, ICE_F_GNSS))
590  		ice_gnss_exit(pf);
591  
592  	if (hw->port_info)
593  		ice_sched_clear_port(hw->port_info);
594  
595  	ice_shutdown_all_ctrlq(hw);
596  
597  	set_bit(ICE_PREPARED_FOR_RESET, pf->state);
598  }
599  
600  /**
601   * ice_do_reset - Initiate one of many types of resets
602   * @pf: board private structure
603   * @reset_type: reset type requested before this function was called.
604   */
605  static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
606  {
607  	struct device *dev = ice_pf_to_dev(pf);
608  	struct ice_hw *hw = &pf->hw;
609  
610  	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
611  
612  	ice_prepare_for_reset(pf, reset_type);
613  
614  	/* trigger the reset */
615  	if (ice_reset(hw, reset_type)) {
616  		dev_err(dev, "reset %d failed\n", reset_type);
617  		set_bit(ICE_RESET_FAILED, pf->state);
618  		clear_bit(ICE_RESET_OICR_RECV, pf->state);
619  		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
620  		clear_bit(ICE_PFR_REQ, pf->state);
621  		clear_bit(ICE_CORER_REQ, pf->state);
622  		clear_bit(ICE_GLOBR_REQ, pf->state);
623  		wake_up(&pf->reset_wait_queue);
624  		return;
625  	}
626  
627  	/* PFR is a bit of a special case because it doesn't result in an OICR
628  	 * interrupt. So for PFR, rebuild after the reset and clear the reset-
629  	 * associated state bits.
630  	 */
631  	if (reset_type == ICE_RESET_PFR) {
632  		pf->pfr_count++;
633  		ice_rebuild(pf, reset_type);
634  		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
635  		clear_bit(ICE_PFR_REQ, pf->state);
636  		wake_up(&pf->reset_wait_queue);
637  		ice_reset_all_vfs(pf);
638  	}
639  }
640  
641  /**
642   * ice_reset_subtask - Set up for resetting the device and driver
643   * @pf: board private structure
644   */
645  static void ice_reset_subtask(struct ice_pf *pf)
646  {
647  	enum ice_reset_req reset_type = ICE_RESET_INVAL;
648  
649  	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
650  	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
651  	 * of reset is pending and sets bits in pf->state indicating the reset
652  	 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
653  	 * prepare for pending reset if not already (for PF software-initiated
654  	 * global resets the software should already be prepared for it as
655  	 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
656  	 * by firmware or software on other PFs, that bit is not set so prepare
657  	 * for the reset now), poll for reset done, rebuild and return.
658  	 */
659  	if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
660  		/* Perform the largest reset requested */
661  		if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
662  			reset_type = ICE_RESET_CORER;
663  		if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
664  			reset_type = ICE_RESET_GLOBR;
665  		if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
666  			reset_type = ICE_RESET_EMPR;
667  		/* return if no valid reset type requested */
668  		if (reset_type == ICE_RESET_INVAL)
669  			return;
670  		ice_prepare_for_reset(pf, reset_type);
671  
672  		/* make sure we are ready to rebuild */
673  		if (ice_check_reset(&pf->hw)) {
674  			set_bit(ICE_RESET_FAILED, pf->state);
675  		} else {
676  			/* done with reset. start rebuild */
677  			pf->hw.reset_ongoing = false;
678  			ice_rebuild(pf, reset_type);
679  			/* clear bit to resume normal operations, but
680  			 * ICE_NEEDS_RESTART bit is set in case rebuild failed
681  			 */
682  			clear_bit(ICE_RESET_OICR_RECV, pf->state);
683  			clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
684  			clear_bit(ICE_PFR_REQ, pf->state);
685  			clear_bit(ICE_CORER_REQ, pf->state);
686  			clear_bit(ICE_GLOBR_REQ, pf->state);
687  			wake_up(&pf->reset_wait_queue);
688  			ice_reset_all_vfs(pf);
689  		}
690  
691  		return;
692  	}
693  
694  	/* No pending resets to finish processing. Check for new resets */
695  	if (test_bit(ICE_PFR_REQ, pf->state))
696  		reset_type = ICE_RESET_PFR;
697  	if (test_bit(ICE_CORER_REQ, pf->state))
698  		reset_type = ICE_RESET_CORER;
699  	if (test_bit(ICE_GLOBR_REQ, pf->state))
700  		reset_type = ICE_RESET_GLOBR;
701  	/* If no valid reset type requested just return */
702  	if (reset_type == ICE_RESET_INVAL)
703  		return;
704  
705  	/* reset if not already down or busy */
706  	if (!test_bit(ICE_DOWN, pf->state) &&
707  	    !test_bit(ICE_CFG_BUSY, pf->state)) {
708  		ice_do_reset(pf, reset_type);
709  	}
710  }
711  
712  /**
713   * ice_print_topo_conflict - print topology conflict message
714   * @vsi: the VSI whose topology status is being checked
715   */
716  static void ice_print_topo_conflict(struct ice_vsi *vsi)
717  {
718  	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
719  	case ICE_AQ_LINK_TOPO_CONFLICT:
720  	case ICE_AQ_LINK_MEDIA_CONFLICT:
721  	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
722  	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
723  	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
724  		netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
725  		break;
726  	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
727  		if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
728  			netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
729  		else
730  			netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
731  		break;
732  	default:
733  		break;
734  	}
735  }
736  
737  /**
738   * ice_print_link_msg - print link up or down message
739   * @vsi: the VSI whose link status is being queried
740   * @isup: boolean for if the link is now up or down
741   */
742  void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
743  {
744  	struct ice_aqc_get_phy_caps_data *caps;
745  	const char *an_advertised;
746  	const char *fec_req;
747  	const char *speed;
748  	const char *fec;
749  	const char *fc;
750  	const char *an;
751  	int status;
752  
753  	if (!vsi)
754  		return;
755  
756  	if (vsi->current_isup == isup)
757  		return;
758  
759  	vsi->current_isup = isup;
760  
761  	if (!isup) {
762  		netdev_info(vsi->netdev, "NIC Link is Down\n");
763  		return;
764  	}
765  
766  	switch (vsi->port_info->phy.link_info.link_speed) {
767  	case ICE_AQ_LINK_SPEED_100GB:
768  		speed = "100 G";
769  		break;
770  	case ICE_AQ_LINK_SPEED_50GB:
771  		speed = "50 G";
772  		break;
773  	case ICE_AQ_LINK_SPEED_40GB:
774  		speed = "40 G";
775  		break;
776  	case ICE_AQ_LINK_SPEED_25GB:
777  		speed = "25 G";
778  		break;
779  	case ICE_AQ_LINK_SPEED_20GB:
780  		speed = "20 G";
781  		break;
782  	case ICE_AQ_LINK_SPEED_10GB:
783  		speed = "10 G";
784  		break;
785  	case ICE_AQ_LINK_SPEED_5GB:
786  		speed = "5 G";
787  		break;
788  	case ICE_AQ_LINK_SPEED_2500MB:
789  		speed = "2.5 G";
790  		break;
791  	case ICE_AQ_LINK_SPEED_1000MB:
792  		speed = "1 G";
793  		break;
794  	case ICE_AQ_LINK_SPEED_100MB:
795  		speed = "100 M";
796  		break;
797  	default:
798  		speed = "Unknown ";
799  		break;
800  	}
801  
802  	switch (vsi->port_info->fc.current_mode) {
803  	case ICE_FC_FULL:
804  		fc = "Rx/Tx";
805  		break;
806  	case ICE_FC_TX_PAUSE:
807  		fc = "Tx";
808  		break;
809  	case ICE_FC_RX_PAUSE:
810  		fc = "Rx";
811  		break;
812  	case ICE_FC_NONE:
813  		fc = "None";
814  		break;
815  	default:
816  		fc = "Unknown";
817  		break;
818  	}
819  
820  	/* Get FEC mode based on negotiated link info */
821  	switch (vsi->port_info->phy.link_info.fec_info) {
822  	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
823  	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
824  		fec = "RS-FEC";
825  		break;
826  	case ICE_AQ_LINK_25G_KR_FEC_EN:
827  		fec = "FC-FEC/BASE-R";
828  		break;
829  	default:
830  		fec = "NONE";
831  		break;
832  	}
833  
834  	/* check if autoneg completed, might be false due to not supported */
835  	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
836  		an = "True";
837  	else
838  		an = "False";
839  
840  	/* Get FEC mode requested based on PHY caps last SW configuration */
841  	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
842  	if (!caps) {
843  		fec_req = "Unknown";
844  		an_advertised = "Unknown";
845  		goto done;
846  	}
847  
848  	status = ice_aq_get_phy_caps(vsi->port_info, false,
849  				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
850  	if (status)
851  		netdev_info(vsi->netdev, "Get phy capability failed.\n");
852  
853  	an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
854  
855  	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
856  	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
857  		fec_req = "RS-FEC";
858  	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
859  		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
860  		fec_req = "FC-FEC/BASE-R";
861  	else
862  		fec_req = "NONE";
863  
864  	kfree(caps);
865  
866  done:
867  	netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
868  		    speed, fec_req, fec, an_advertised, an, fc);
869  	ice_print_topo_conflict(vsi);
870  }
871  
872  /**
873   * ice_vsi_link_event - update the VSI's netdev
874   * @vsi: the VSI on which the link event occurred
875   * @link_up: whether or not the VSI needs to be set up or down
876   */
877  static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
878  {
879  	if (!vsi)
880  		return;
881  
882  	if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
883  		return;
884  
885  	if (vsi->type == ICE_VSI_PF) {
886  		if (link_up == netif_carrier_ok(vsi->netdev))
887  			return;
888  
889  		if (link_up) {
890  			netif_carrier_on(vsi->netdev);
891  			netif_tx_wake_all_queues(vsi->netdev);
892  		} else {
893  			netif_carrier_off(vsi->netdev);
894  			netif_tx_stop_all_queues(vsi->netdev);
895  		}
896  	}
897  }
898  
899  /**
900   * ice_set_dflt_mib - send a default config MIB to the FW
901   * @pf: private PF struct
902   *
903   * This function sends a default configuration MIB to the FW.
904   *
905   * If this function errors out at any point, the driver is still able to
906   * function.  The main impact is that LFC may not operate as expected.
907   * Therefore an error state in this function should be treated with a DBG
908   * message and continue on with driver rebuild/reenable.
909   */
910  static void ice_set_dflt_mib(struct ice_pf *pf)
911  {
912  	struct device *dev = ice_pf_to_dev(pf);
913  	u8 mib_type, *buf, *lldpmib = NULL;
914  	u16 len, typelen, offset = 0;
915  	struct ice_lldp_org_tlv *tlv;
916  	struct ice_hw *hw = &pf->hw;
917  	u32 ouisubtype;
918  
919  	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
920  	lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
921  	if (!lldpmib) {
922  		dev_dbg(dev, "%s Failed to allocate MIB memory\n",
923  			__func__);
924  		return;
925  	}
926  
927  	/* Add ETS CFG TLV */
928  	tlv = (struct ice_lldp_org_tlv *)lldpmib;
929  	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
930  		   ICE_IEEE_ETS_TLV_LEN);
931  	tlv->typelen = htons(typelen);
932  	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
933  		      ICE_IEEE_SUBTYPE_ETS_CFG);
934  	tlv->ouisubtype = htonl(ouisubtype);
935  
936  	buf = tlv->tlvinfo;
937  	buf[0] = 0;
938  
939  	/* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
940  	 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
941  	 * Octets 13 - 20 are TSA values - leave as zeros
942  	 */
943  	buf[5] = 0x64;
944  	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
945  	offset += len + 2;
946  	tlv = (struct ice_lldp_org_tlv *)
947  		((char *)tlv + sizeof(tlv->typelen) + len);
948  
949  	/* Add ETS REC TLV */
950  	buf = tlv->tlvinfo;
951  	tlv->typelen = htons(typelen);
952  
953  	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
954  		      ICE_IEEE_SUBTYPE_ETS_REC);
955  	tlv->ouisubtype = htonl(ouisubtype);
956  
957  	/* First octet of buf is reserved
958  	 * Octets 1 - 4 map UP to TC - all UPs map to zero
959  	 * Octets 5 - 12 are BW values - set TC 0 to 100%.
960  	 * Octets 13 - 20 are TSA value - leave as zeros
961  	 */
962  	buf[5] = 0x64;
963  	offset += len + 2;
964  	tlv = (struct ice_lldp_org_tlv *)
965  		((char *)tlv + sizeof(tlv->typelen) + len);
966  
967  	/* Add PFC CFG TLV */
968  	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
969  		   ICE_IEEE_PFC_TLV_LEN);
970  	tlv->typelen = htons(typelen);
971  
972  	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
973  		      ICE_IEEE_SUBTYPE_PFC_CFG);
974  	tlv->ouisubtype = htonl(ouisubtype);
975  
976  	/* Octet 1 left as all zeros - PFC disabled */
977  	buf[0] = 0x08;
978  	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
979  	offset += len + 2;
980  
981  	if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
982  		dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
983  
984  	kfree(lldpmib);
985  }
986  
987  /**
988   * ice_check_phy_fw_load - check if PHY FW load failed
989   * @pf: pointer to PF struct
990   * @link_cfg_err: bitmap from the link info structure
991   *
992   * check if external PHY FW load failed and print an error message if it did
993   */
994  static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
995  {
996  	if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
997  		clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
998  		return;
999  	}
1000  
1001  	if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
1002  		return;
1003  
1004  	if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
1005  		dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
1006  		set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
1007  	}
1008  }
1009  
1010  /**
1011   * ice_check_module_power
1012   * @pf: pointer to PF struct
1013   * @link_cfg_err: bitmap from the link info structure
1014   *
1015   * check module power level returned by a previous call to aq_get_link_info
1016   * and print error messages if module power level is not supported
1017   */
1018  static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
1019  {
1020  	/* if module power level is supported, clear the flag */
1021  	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
1022  			      ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
1023  		clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1024  		return;
1025  	}
1026  
1027  	/* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
1028  	 * above block didn't clear this bit, there's nothing to do
1029  	 */
1030  	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
1031  		return;
1032  
1033  	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
1034  		dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
1035  		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1036  	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
1037  		dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
1038  		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1039  	}
1040  }
1041  
1042  /**
1043   * ice_check_link_cfg_err - check if link configuration failed
1044   * @pf: pointer to the PF struct
1045   * @link_cfg_err: bitmap from the link info structure
1046   *
1047   * print if any link configuration failure happens due to the value in the
1048   * link_cfg_err parameter in the link info structure
1049   */
1050  static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
1051  {
1052  	ice_check_module_power(pf, link_cfg_err);
1053  	ice_check_phy_fw_load(pf, link_cfg_err);
1054  }
1055  
1056  /**
1057   * ice_link_event - process the link event
1058   * @pf: PF that the link event is associated with
1059   * @pi: port_info for the port that the link event is associated with
1060   * @link_up: true if the physical link is up and false if it is down
1061   * @link_speed: current link speed received from the link event
1062   *
1063   * Returns 0 on success and negative on failure
1064   */
1065  static int
1066  ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
1067  	       u16 link_speed)
1068  {
1069  	struct device *dev = ice_pf_to_dev(pf);
1070  	struct ice_phy_info *phy_info;
1071  	struct ice_vsi *vsi;
1072  	u16 old_link_speed;
1073  	bool old_link;
1074  	int status;
1075  
1076  	phy_info = &pi->phy;
1077  	phy_info->link_info_old = phy_info->link_info;
1078  
1079  	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
1080  	old_link_speed = phy_info->link_info_old.link_speed;
1081  
1082  	/* update the link info structures and re-enable link events,
1083  	 * don't bail on failure due to other book keeping needed
1084  	 */
1085  	status = ice_update_link_info(pi);
1086  	if (status)
1087  		dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
1088  			pi->lport, status,
1089  			ice_aq_str(pi->hw->adminq.sq_last_status));
1090  
1091  	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
1092  
1093  	/* Check if the link state is up after updating link info, and treat
1094  	 * this event as an UP event since the link is actually UP now.
1095  	 */
1096  	if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
1097  		link_up = true;
1098  
1099  	vsi = ice_get_main_vsi(pf);
1100  	if (!vsi || !vsi->port_info)
1101  		return -EINVAL;
1102  
1103  	/* turn off PHY if media was removed */
1104  	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
1105  	    !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
1106  		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
1107  		ice_set_link(vsi, false);
1108  	}
1109  
1110  	/* if the old link up/down and speed is the same as the new */
1111  	if (link_up == old_link && link_speed == old_link_speed)
1112  		return 0;
1113  
1114  	ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
1115  
1116  	if (ice_is_dcb_active(pf)) {
1117  		if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
1118  			ice_dcb_rebuild(pf);
1119  	} else {
1120  		if (link_up)
1121  			ice_set_dflt_mib(pf);
1122  	}
1123  	ice_vsi_link_event(vsi, link_up);
1124  	ice_print_link_msg(vsi, link_up);
1125  
1126  	ice_vc_notify_link_state(pf);
1127  
1128  	return 0;
1129  }
1130  
1131  /**
1132   * ice_watchdog_subtask - periodic tasks not using event driven scheduling
1133   * @pf: board private structure
1134   */
1135  static void ice_watchdog_subtask(struct ice_pf *pf)
1136  {
1137  	int i;
1138  
1139  	/* if interface is down do nothing */
1140  	if (test_bit(ICE_DOWN, pf->state) ||
1141  	    test_bit(ICE_CFG_BUSY, pf->state))
1142  		return;
1143  
1144  	/* make sure we don't do these things too often */
1145  	if (time_before(jiffies,
1146  			pf->serv_tmr_prev + pf->serv_tmr_period))
1147  		return;
1148  
1149  	pf->serv_tmr_prev = jiffies;
1150  
1151  	/* Update the stats for active netdevs so the network stack
1152  	 * can look at updated numbers whenever it cares to
1153  	 */
1154  	ice_update_pf_stats(pf);
1155  	ice_for_each_vsi(pf, i)
1156  		if (pf->vsi[i] && pf->vsi[i]->netdev)
1157  			ice_update_vsi_stats(pf->vsi[i]);
1158  }
1159  
1160  /**
1161   * ice_init_link_events - enable/initialize link events
1162   * @pi: pointer to the port_info instance
1163   *
1164   * Returns -EIO on failure, 0 on success
1165   */
1166  static int ice_init_link_events(struct ice_port_info *pi)
1167  {
1168  	u16 mask;
1169  
1170  	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
1171  		       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
1172  		       ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
1173  
1174  	if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
1175  		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
1176  			pi->lport);
1177  		return -EIO;
1178  	}
1179  
1180  	if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
1181  		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
1182  			pi->lport);
1183  		return -EIO;
1184  	}
1185  
1186  	return 0;
1187  }
1188  
1189  /**
1190   * ice_handle_link_event - handle link event via ARQ
1191   * @pf: PF that the link event is associated with
1192   * @event: event structure containing link status info
1193   */
1194  static int
1195  ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
1196  {
1197  	struct ice_aqc_get_link_status_data *link_data;
1198  	struct ice_port_info *port_info;
1199  	int status;
1200  
1201  	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
1202  	port_info = pf->hw.port_info;
1203  	if (!port_info)
1204  		return -EINVAL;
1205  
1206  	status = ice_link_event(pf, port_info,
1207  				!!(link_data->link_info & ICE_AQ_LINK_UP),
1208  				le16_to_cpu(link_data->link_speed));
1209  	if (status)
1210  		dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
1211  			status);
1212  
1213  	return status;
1214  }
1215  
1216  enum ice_aq_task_state {
1217  	ICE_AQ_TASK_WAITING = 0,
1218  	ICE_AQ_TASK_COMPLETE,
1219  	ICE_AQ_TASK_CANCELED,
1220  };
1221  
1222  struct ice_aq_task {
1223  	struct hlist_node entry;
1224  
1225  	u16 opcode;
1226  	struct ice_rq_event_info *event;
1227  	enum ice_aq_task_state state;
1228  };
1229  
1230  /**
1231   * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1232   * @pf: pointer to the PF private structure
1233   * @opcode: the opcode to wait for
1234   * @timeout: how long to wait, in jiffies
1235   * @event: storage for the event info
1236   *
1237   * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1238   * current thread will be put to sleep until the specified event occurs or
1239   * until the given timeout is reached.
1240   *
1241   * To obtain only the descriptor contents, pass an event without an allocated
1242   * msg_buf. If the complete data buffer is desired, allocate the
1243   * event->msg_buf with enough space ahead of time.
1244   *
1245   * Returns: zero on success, or a negative error code on failure.
1246   */
1247  int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
1248  			  struct ice_rq_event_info *event)
1249  {
1250  	struct device *dev = ice_pf_to_dev(pf);
1251  	struct ice_aq_task *task;
1252  	unsigned long start;
1253  	long ret;
1254  	int err;
1255  
1256  	task = kzalloc(sizeof(*task), GFP_KERNEL);
1257  	if (!task)
1258  		return -ENOMEM;
1259  
1260  	INIT_HLIST_NODE(&task->entry);
1261  	task->opcode = opcode;
1262  	task->event = event;
1263  	task->state = ICE_AQ_TASK_WAITING;
1264  
1265  	spin_lock_bh(&pf->aq_wait_lock);
1266  	hlist_add_head(&task->entry, &pf->aq_wait_list);
1267  	spin_unlock_bh(&pf->aq_wait_lock);
1268  
1269  	start = jiffies;
1270  
1271  	ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
1272  					       timeout);
1273  	switch (task->state) {
1274  	case ICE_AQ_TASK_WAITING:
1275  		err = ret < 0 ? ret : -ETIMEDOUT;
1276  		break;
1277  	case ICE_AQ_TASK_CANCELED:
1278  		err = ret < 0 ? ret : -ECANCELED;
1279  		break;
1280  	case ICE_AQ_TASK_COMPLETE:
1281  		err = ret < 0 ? ret : 0;
1282  		break;
1283  	default:
1284  		WARN(1, "Unexpected AdminQ wait task state %u", task->state);
1285  		err = -EINVAL;
1286  		break;
1287  	}
1288  
1289  	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1290  		jiffies_to_msecs(jiffies - start),
1291  		jiffies_to_msecs(timeout),
1292  		opcode);
1293  
1294  	spin_lock_bh(&pf->aq_wait_lock);
1295  	hlist_del(&task->entry);
1296  	spin_unlock_bh(&pf->aq_wait_lock);
1297  	kfree(task);
1298  
1299  	return err;
1300  }
1301  
1302  /**
1303   * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1304   * @pf: pointer to the PF private structure
1305   * @opcode: the opcode of the event
1306   * @event: the event to check
1307   *
1308   * Loops over the current list of pending threads waiting for an AdminQ event.
1309   * For each matching task, copy the contents of the event into the task
1310   * structure and wake up the thread.
1311   *
1312   * If multiple threads wait for the same opcode, they will all be woken up.
1313   *
1314   * Note that event->msg_buf will only be duplicated if the event has a buffer
1315   * with enough space already allocated. Otherwise, only the descriptor and
1316   * message length will be copied.
1317   *
1318   * Returns: true if an event was found, false otherwise
1319   */
1320  static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
1321  				struct ice_rq_event_info *event)
1322  {
1323  	struct ice_aq_task *task;
1324  	bool found = false;
1325  
1326  	spin_lock_bh(&pf->aq_wait_lock);
1327  	hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
1328  		if (task->state || task->opcode != opcode)
1329  			continue;
1330  
1331  		memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
1332  		task->event->msg_len = event->msg_len;
1333  
1334  		/* Only copy the data buffer if a destination was set */
1335  		if (task->event->msg_buf &&
1336  		    task->event->buf_len > event->buf_len) {
1337  			memcpy(task->event->msg_buf, event->msg_buf,
1338  			       event->buf_len);
1339  			task->event->buf_len = event->buf_len;
1340  		}
1341  
1342  		task->state = ICE_AQ_TASK_COMPLETE;
1343  		found = true;
1344  	}
1345  	spin_unlock_bh(&pf->aq_wait_lock);
1346  
1347  	if (found)
1348  		wake_up(&pf->aq_wait_queue);
1349  }
1350  
1351  /**
1352   * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1353   * @pf: the PF private structure
1354   *
1355   * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1356   * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1357   */
1358  static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
1359  {
1360  	struct ice_aq_task *task;
1361  
1362  	spin_lock_bh(&pf->aq_wait_lock);
1363  	hlist_for_each_entry(task, &pf->aq_wait_list, entry)
1364  		task->state = ICE_AQ_TASK_CANCELED;
1365  	spin_unlock_bh(&pf->aq_wait_lock);
1366  
1367  	wake_up(&pf->aq_wait_queue);
1368  }
1369  
1370  /**
1371   * __ice_clean_ctrlq - helper function to clean controlq rings
1372   * @pf: ptr to struct ice_pf
1373   * @q_type: specific Control queue type
1374   */
1375  static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
1376  {
1377  	struct device *dev = ice_pf_to_dev(pf);
1378  	struct ice_rq_event_info event;
1379  	struct ice_hw *hw = &pf->hw;
1380  	struct ice_ctl_q_info *cq;
1381  	u16 pending, i = 0;
1382  	const char *qtype;
1383  	u32 oldval, val;
1384  
1385  	/* Do not clean control queue if/when PF reset fails */
1386  	if (test_bit(ICE_RESET_FAILED, pf->state))
1387  		return 0;
1388  
1389  	switch (q_type) {
1390  	case ICE_CTL_Q_ADMIN:
1391  		cq = &hw->adminq;
1392  		qtype = "Admin";
1393  		break;
1394  	case ICE_CTL_Q_SB:
1395  		cq = &hw->sbq;
1396  		qtype = "Sideband";
1397  		break;
1398  	case ICE_CTL_Q_MAILBOX:
1399  		cq = &hw->mailboxq;
1400  		qtype = "Mailbox";
1401  		/* we are going to try to detect a malicious VF, so set the
1402  		 * state to begin detection
1403  		 */
1404  		hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
1405  		break;
1406  	default:
1407  		dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
1408  		return 0;
1409  	}
1410  
1411  	/* check for error indications - PF_xx_AxQLEN register layout for
1412  	 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1413  	 */
1414  	val = rd32(hw, cq->rq.len);
1415  	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1416  		   PF_FW_ARQLEN_ARQCRIT_M)) {
1417  		oldval = val;
1418  		if (val & PF_FW_ARQLEN_ARQVFE_M)
1419  			dev_dbg(dev, "%s Receive Queue VF Error detected\n",
1420  				qtype);
1421  		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
1422  			dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
1423  				qtype);
1424  		}
1425  		if (val & PF_FW_ARQLEN_ARQCRIT_M)
1426  			dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
1427  				qtype);
1428  		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1429  			 PF_FW_ARQLEN_ARQCRIT_M);
1430  		if (oldval != val)
1431  			wr32(hw, cq->rq.len, val);
1432  	}
1433  
1434  	val = rd32(hw, cq->sq.len);
1435  	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1436  		   PF_FW_ATQLEN_ATQCRIT_M)) {
1437  		oldval = val;
1438  		if (val & PF_FW_ATQLEN_ATQVFE_M)
1439  			dev_dbg(dev, "%s Send Queue VF Error detected\n",
1440  				qtype);
1441  		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
1442  			dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
1443  				qtype);
1444  		}
1445  		if (val & PF_FW_ATQLEN_ATQCRIT_M)
1446  			dev_dbg(dev, "%s Send Queue Critical Error detected\n",
1447  				qtype);
1448  		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1449  			 PF_FW_ATQLEN_ATQCRIT_M);
1450  		if (oldval != val)
1451  			wr32(hw, cq->sq.len, val);
1452  	}
1453  
1454  	event.buf_len = cq->rq_buf_size;
1455  	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
1456  	if (!event.msg_buf)
1457  		return 0;
1458  
1459  	do {
1460  		u16 opcode;
1461  		int ret;
1462  
1463  		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
1464  		if (ret == -EALREADY)
1465  			break;
1466  		if (ret) {
1467  			dev_err(dev, "%s Receive Queue event error %d\n", qtype,
1468  				ret);
1469  			break;
1470  		}
1471  
1472  		opcode = le16_to_cpu(event.desc.opcode);
1473  
1474  		/* Notify any thread that might be waiting for this event */
1475  		ice_aq_check_events(pf, opcode, &event);
1476  
1477  		switch (opcode) {
1478  		case ice_aqc_opc_get_link_status:
1479  			if (ice_handle_link_event(pf, &event))
1480  				dev_err(dev, "Could not handle link event\n");
1481  			break;
1482  		case ice_aqc_opc_event_lan_overflow:
1483  			ice_vf_lan_overflow_event(pf, &event);
1484  			break;
1485  		case ice_mbx_opc_send_msg_to_pf:
1486  			if (!ice_is_malicious_vf(pf, &event, i, pending))
1487  				ice_vc_process_vf_msg(pf, &event);
1488  			break;
1489  		case ice_aqc_opc_fw_logging:
1490  			ice_output_fw_log(hw, &event.desc, event.msg_buf);
1491  			break;
1492  		case ice_aqc_opc_lldp_set_mib_change:
1493  			ice_dcb_process_lldp_set_mib_change(pf, &event);
1494  			break;
1495  		default:
1496  			dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
1497  				qtype, opcode);
1498  			break;
1499  		}
1500  	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
1501  
1502  	kfree(event.msg_buf);
1503  
1504  	return pending && (i == ICE_DFLT_IRQ_WORK);
1505  }
1506  
1507  /**
1508   * ice_ctrlq_pending - check if there is a difference between ntc and ntu
1509   * @hw: pointer to hardware info
1510   * @cq: control queue information
1511   *
1512   * returns true if there are pending messages in a queue, false if there aren't
1513   */
1514  static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
1515  {
1516  	u16 ntu;
1517  
1518  	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
1519  	return cq->rq.next_to_clean != ntu;
1520  }
1521  
1522  /**
1523   * ice_clean_adminq_subtask - clean the AdminQ rings
1524   * @pf: board private structure
1525   */
1526  static void ice_clean_adminq_subtask(struct ice_pf *pf)
1527  {
1528  	struct ice_hw *hw = &pf->hw;
1529  
1530  	if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
1531  		return;
1532  
1533  	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
1534  		return;
1535  
1536  	clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
1537  
1538  	/* There might be a situation where new messages arrive to a control
1539  	 * queue between processing the last message and clearing the
1540  	 * EVENT_PENDING bit. So before exiting, check queue head again (using
1541  	 * ice_ctrlq_pending) and process new messages if any.
1542  	 */
1543  	if (ice_ctrlq_pending(hw, &hw->adminq))
1544  		__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
1545  
1546  	ice_flush(hw);
1547  }
1548  
1549  /**
1550   * ice_clean_mailboxq_subtask - clean the MailboxQ rings
1551   * @pf: board private structure
1552   */
1553  static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
1554  {
1555  	struct ice_hw *hw = &pf->hw;
1556  
1557  	if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
1558  		return;
1559  
1560  	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
1561  		return;
1562  
1563  	clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
1564  
1565  	if (ice_ctrlq_pending(hw, &hw->mailboxq))
1566  		__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
1567  
1568  	ice_flush(hw);
1569  }
1570  
1571  /**
1572   * ice_clean_sbq_subtask - clean the Sideband Queue rings
1573   * @pf: board private structure
1574   */
1575  static void ice_clean_sbq_subtask(struct ice_pf *pf)
1576  {
1577  	struct ice_hw *hw = &pf->hw;
1578  
1579  	/* Nothing to do here if sideband queue is not supported */
1580  	if (!ice_is_sbq_supported(hw)) {
1581  		clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
1582  		return;
1583  	}
1584  
1585  	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
1586  		return;
1587  
1588  	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
1589  		return;
1590  
1591  	clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
1592  
1593  	if (ice_ctrlq_pending(hw, &hw->sbq))
1594  		__ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
1595  
1596  	ice_flush(hw);
1597  }
1598  
1599  /**
1600   * ice_service_task_schedule - schedule the service task to wake up
1601   * @pf: board private structure
1602   *
1603   * If not already scheduled, this puts the task into the work queue.
1604   */
1605  void ice_service_task_schedule(struct ice_pf *pf)
1606  {
1607  	if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
1608  	    !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
1609  	    !test_bit(ICE_NEEDS_RESTART, pf->state))
1610  		queue_work(ice_wq, &pf->serv_task);
1611  }
1612  
1613  /**
1614   * ice_service_task_complete - finish up the service task
1615   * @pf: board private structure
1616   */
1617  static void ice_service_task_complete(struct ice_pf *pf)
1618  {
1619  	WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
1620  
1621  	/* force memory (pf->state) to sync before next service task */
1622  	smp_mb__before_atomic();
1623  	clear_bit(ICE_SERVICE_SCHED, pf->state);
1624  }
1625  
1626  /**
1627   * ice_service_task_stop - stop service task and cancel works
1628   * @pf: board private structure
1629   *
1630   * Return 0 if the ICE_SERVICE_DIS bit was not already set,
1631   * 1 otherwise.
1632   */
1633  static int ice_service_task_stop(struct ice_pf *pf)
1634  {
1635  	int ret;
1636  
1637  	ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
1638  
1639  	if (pf->serv_tmr.function)
1640  		del_timer_sync(&pf->serv_tmr);
1641  	if (pf->serv_task.func)
1642  		cancel_work_sync(&pf->serv_task);
1643  
1644  	clear_bit(ICE_SERVICE_SCHED, pf->state);
1645  	return ret;
1646  }
1647  
1648  /**
1649   * ice_service_task_restart - restart service task and schedule works
1650   * @pf: board private structure
1651   *
1652   * This function is needed for suspend and resume works (e.g WoL scenario)
1653   */
1654  static void ice_service_task_restart(struct ice_pf *pf)
1655  {
1656  	clear_bit(ICE_SERVICE_DIS, pf->state);
1657  	ice_service_task_schedule(pf);
1658  }
1659  
1660  /**
1661   * ice_service_timer - timer callback to schedule service task
1662   * @t: pointer to timer_list
1663   */
1664  static void ice_service_timer(struct timer_list *t)
1665  {
1666  	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
1667  
1668  	mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
1669  	ice_service_task_schedule(pf);
1670  }
1671  
1672  /**
1673   * ice_handle_mdd_event - handle malicious driver detect event
1674   * @pf: pointer to the PF structure
1675   *
1676   * Called from service task. OICR interrupt handler indicates MDD event.
1677   * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1678   * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
1679   * disable the queue, the PF can be configured to reset the VF using ethtool
1680   * private flag mdd-auto-reset-vf.
1681   */
1682  static void ice_handle_mdd_event(struct ice_pf *pf)
1683  {
1684  	struct device *dev = ice_pf_to_dev(pf);
1685  	struct ice_hw *hw = &pf->hw;
1686  	struct ice_vf *vf;
1687  	unsigned int bkt;
1688  	u32 reg;
1689  
1690  	if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
1691  		/* Since the VF MDD event logging is rate limited, check if
1692  		 * there are pending MDD events.
1693  		 */
1694  		ice_print_vfs_mdd_events(pf);
1695  		return;
1696  	}
1697  
1698  	/* find what triggered an MDD event */
1699  	reg = rd32(hw, GL_MDET_TX_PQM);
1700  	if (reg & GL_MDET_TX_PQM_VALID_M) {
1701  		u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
1702  				GL_MDET_TX_PQM_PF_NUM_S;
1703  		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
1704  				GL_MDET_TX_PQM_VF_NUM_S;
1705  		u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
1706  				GL_MDET_TX_PQM_MAL_TYPE_S;
1707  		u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
1708  				GL_MDET_TX_PQM_QNUM_S);
1709  
1710  		if (netif_msg_tx_err(pf))
1711  			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1712  				 event, queue, pf_num, vf_num);
1713  		wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
1714  	}
1715  
1716  	reg = rd32(hw, GL_MDET_TX_TCLAN);
1717  	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
1718  		u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
1719  				GL_MDET_TX_TCLAN_PF_NUM_S;
1720  		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
1721  				GL_MDET_TX_TCLAN_VF_NUM_S;
1722  		u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
1723  				GL_MDET_TX_TCLAN_MAL_TYPE_S;
1724  		u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
1725  				GL_MDET_TX_TCLAN_QNUM_S);
1726  
1727  		if (netif_msg_tx_err(pf))
1728  			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1729  				 event, queue, pf_num, vf_num);
1730  		wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
1731  	}
1732  
1733  	reg = rd32(hw, GL_MDET_RX);
1734  	if (reg & GL_MDET_RX_VALID_M) {
1735  		u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
1736  				GL_MDET_RX_PF_NUM_S;
1737  		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
1738  				GL_MDET_RX_VF_NUM_S;
1739  		u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
1740  				GL_MDET_RX_MAL_TYPE_S;
1741  		u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
1742  				GL_MDET_RX_QNUM_S);
1743  
1744  		if (netif_msg_rx_err(pf))
1745  			dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1746  				 event, queue, pf_num, vf_num);
1747  		wr32(hw, GL_MDET_RX, 0xffffffff);
1748  	}
1749  
1750  	/* check to see if this PF caused an MDD event */
1751  	reg = rd32(hw, PF_MDET_TX_PQM);
1752  	if (reg & PF_MDET_TX_PQM_VALID_M) {
1753  		wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
1754  		if (netif_msg_tx_err(pf))
1755  			dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
1756  	}
1757  
1758  	reg = rd32(hw, PF_MDET_TX_TCLAN);
1759  	if (reg & PF_MDET_TX_TCLAN_VALID_M) {
1760  		wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
1761  		if (netif_msg_tx_err(pf))
1762  			dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1763  	}
1764  
1765  	reg = rd32(hw, PF_MDET_RX);
1766  	if (reg & PF_MDET_RX_VALID_M) {
1767  		wr32(hw, PF_MDET_RX, 0xFFFF);
1768  		if (netif_msg_rx_err(pf))
1769  			dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
1770  	}
1771  
1772  	/* Check to see if one of the VFs caused an MDD event, and then
1773  	 * increment counters and set print pending
1774  	 */
1775  	mutex_lock(&pf->vfs.table_lock);
1776  	ice_for_each_vf(pf, bkt, vf) {
1777  		reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
1778  		if (reg & VP_MDET_TX_PQM_VALID_M) {
1779  			wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF);
1780  			vf->mdd_tx_events.count++;
1781  			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1782  			if (netif_msg_tx_err(pf))
1783  				dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1784  					 vf->vf_id);
1785  		}
1786  
1787  		reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
1788  		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1789  			wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF);
1790  			vf->mdd_tx_events.count++;
1791  			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1792  			if (netif_msg_tx_err(pf))
1793  				dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1794  					 vf->vf_id);
1795  		}
1796  
1797  		reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
1798  		if (reg & VP_MDET_TX_TDPU_VALID_M) {
1799  			wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF);
1800  			vf->mdd_tx_events.count++;
1801  			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1802  			if (netif_msg_tx_err(pf))
1803  				dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1804  					 vf->vf_id);
1805  		}
1806  
1807  		reg = rd32(hw, VP_MDET_RX(vf->vf_id));
1808  		if (reg & VP_MDET_RX_VALID_M) {
1809  			wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF);
1810  			vf->mdd_rx_events.count++;
1811  			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1812  			if (netif_msg_rx_err(pf))
1813  				dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
1814  					 vf->vf_id);
1815  
1816  			/* Since the queue is disabled on VF Rx MDD events, the
1817  			 * PF can be configured to reset the VF through ethtool
1818  			 * private flag mdd-auto-reset-vf.
1819  			 */
1820  			if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
1821  				/* VF MDD event counters will be cleared by
1822  				 * reset, so print the event prior to reset.
1823  				 */
1824  				ice_print_vf_rx_mdd_event(vf);
1825  				ice_reset_vf(vf, ICE_VF_RESET_LOCK);
1826  			}
1827  		}
1828  	}
1829  	mutex_unlock(&pf->vfs.table_lock);
1830  
1831  	ice_print_vfs_mdd_events(pf);
1832  }
1833  
1834  /**
1835   * ice_force_phys_link_state - Force the physical link state
1836   * @vsi: VSI to force the physical link state to up/down
1837   * @link_up: true/false indicates to set the physical link to up/down
1838   *
1839   * Force the physical link state by getting the current PHY capabilities from
1840   * hardware and setting the PHY config based on the determined capabilities. If
1841   * link changes a link event will be triggered because both the Enable Automatic
1842   * Link Update and LESM Enable bits are set when setting the PHY capabilities.
1843   *
1844   * Returns 0 on success, negative on failure
1845   */
1846  static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
1847  {
1848  	struct ice_aqc_get_phy_caps_data *pcaps;
1849  	struct ice_aqc_set_phy_cfg_data *cfg;
1850  	struct ice_port_info *pi;
1851  	struct device *dev;
1852  	int retcode;
1853  
1854  	if (!vsi || !vsi->port_info || !vsi->back)
1855  		return -EINVAL;
1856  	if (vsi->type != ICE_VSI_PF)
1857  		return 0;
1858  
1859  	dev = ice_pf_to_dev(vsi->back);
1860  
1861  	pi = vsi->port_info;
1862  
1863  	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1864  	if (!pcaps)
1865  		return -ENOMEM;
1866  
1867  	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
1868  				      NULL);
1869  	if (retcode) {
1870  		dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
1871  			vsi->vsi_num, retcode);
1872  		retcode = -EIO;
1873  		goto out;
1874  	}
1875  
1876  	/* No change in link */
1877  	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
1878  	    link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
1879  		goto out;
1880  
1881  	/* Use the current user PHY configuration. The current user PHY
1882  	 * configuration is initialized during probe from PHY capabilities
1883  	 * software mode, and updated on set PHY configuration.
1884  	 */
1885  	cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
1886  	if (!cfg) {
1887  		retcode = -ENOMEM;
1888  		goto out;
1889  	}
1890  
1891  	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
1892  	if (link_up)
1893  		cfg->caps |= ICE_AQ_PHY_ENA_LINK;
1894  	else
1895  		cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
1896  
1897  	retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
1898  	if (retcode) {
1899  		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
1900  			vsi->vsi_num, retcode);
1901  		retcode = -EIO;
1902  	}
1903  
1904  	kfree(cfg);
1905  out:
1906  	kfree(pcaps);
1907  	return retcode;
1908  }
1909  
1910  /**
1911   * ice_init_nvm_phy_type - Initialize the NVM PHY type
1912   * @pi: port info structure
1913   *
1914   * Initialize nvm_phy_type_[low|high] for link lenient mode support
1915   */
1916  static int ice_init_nvm_phy_type(struct ice_port_info *pi)
1917  {
1918  	struct ice_aqc_get_phy_caps_data *pcaps;
1919  	struct ice_pf *pf = pi->hw->back;
1920  	int err;
1921  
1922  	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1923  	if (!pcaps)
1924  		return -ENOMEM;
1925  
1926  	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
1927  				  pcaps, NULL);
1928  
1929  	if (err) {
1930  		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1931  		goto out;
1932  	}
1933  
1934  	pf->nvm_phy_type_hi = pcaps->phy_type_high;
1935  	pf->nvm_phy_type_lo = pcaps->phy_type_low;
1936  
1937  out:
1938  	kfree(pcaps);
1939  	return err;
1940  }
1941  
1942  /**
1943   * ice_init_link_dflt_override - Initialize link default override
1944   * @pi: port info structure
1945   *
1946   * Initialize link default override and PHY total port shutdown during probe
1947   */
1948  static void ice_init_link_dflt_override(struct ice_port_info *pi)
1949  {
1950  	struct ice_link_default_override_tlv *ldo;
1951  	struct ice_pf *pf = pi->hw->back;
1952  
1953  	ldo = &pf->link_dflt_override;
1954  	if (ice_get_link_default_override(ldo, pi))
1955  		return;
1956  
1957  	if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
1958  		return;
1959  
1960  	/* Enable Total Port Shutdown (override/replace link-down-on-close
1961  	 * ethtool private flag) for ports with Port Disable bit set.
1962  	 */
1963  	set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
1964  	set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
1965  }
1966  
1967  /**
1968   * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
1969   * @pi: port info structure
1970   *
1971   * If default override is enabled, initialize the user PHY cfg speed and FEC
1972   * settings using the default override mask from the NVM.
1973   *
1974   * The PHY should only be configured with the default override settings the
1975   * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
1976   * is used to indicate that the user PHY cfg default override is initialized
1977   * and the PHY has not been configured with the default override settings. The
1978   * state is set here, and cleared in ice_configure_phy the first time the PHY is
1979   * configured.
1980   *
1981   * This function should be called only if the FW doesn't support default
1982   * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
1983   */
1984  static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
1985  {
1986  	struct ice_link_default_override_tlv *ldo;
1987  	struct ice_aqc_set_phy_cfg_data *cfg;
1988  	struct ice_phy_info *phy = &pi->phy;
1989  	struct ice_pf *pf = pi->hw->back;
1990  
1991  	ldo = &pf->link_dflt_override;
1992  
1993  	/* If link default override is enabled, use to mask NVM PHY capabilities
1994  	 * for speed and FEC default configuration.
1995  	 */
1996  	cfg = &phy->curr_user_phy_cfg;
1997  
1998  	if (ldo->phy_type_low || ldo->phy_type_high) {
1999  		cfg->phy_type_low = pf->nvm_phy_type_lo &
2000  				    cpu_to_le64(ldo->phy_type_low);
2001  		cfg->phy_type_high = pf->nvm_phy_type_hi &
2002  				     cpu_to_le64(ldo->phy_type_high);
2003  	}
2004  	cfg->link_fec_opt = ldo->fec_options;
2005  	phy->curr_user_fec_req = ICE_FEC_AUTO;
2006  
2007  	set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
2008  }
2009  
2010  /**
2011   * ice_init_phy_user_cfg - Initialize the PHY user configuration
2012   * @pi: port info structure
2013   *
2014   * Initialize the current user PHY configuration, speed, FEC, and FC requested
2015   * mode to default. The PHY defaults are from get PHY capabilities topology
2016   * with media so call when media is first available. An error is returned if
2017   * called when media is not available. The PHY initialization completed state is
2018   * set here.
2019   *
2020   * These configurations are used when setting PHY
2021   * configuration. The user PHY configuration is updated on set PHY
2022   * configuration. Returns 0 on success, negative on failure
2023   */
2024  static int ice_init_phy_user_cfg(struct ice_port_info *pi)
2025  {
2026  	struct ice_aqc_get_phy_caps_data *pcaps;
2027  	struct ice_phy_info *phy = &pi->phy;
2028  	struct ice_pf *pf = pi->hw->back;
2029  	int err;
2030  
2031  	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2032  		return -EIO;
2033  
2034  	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
2035  	if (!pcaps)
2036  		return -ENOMEM;
2037  
2038  	if (ice_fw_supports_report_dflt_cfg(pi->hw))
2039  		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
2040  					  pcaps, NULL);
2041  	else
2042  		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2043  					  pcaps, NULL);
2044  	if (err) {
2045  		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
2046  		goto err_out;
2047  	}
2048  
2049  	ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
2050  
2051  	/* check if lenient mode is supported and enabled */
2052  	if (ice_fw_supports_link_override(pi->hw) &&
2053  	    !(pcaps->module_compliance_enforcement &
2054  	      ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
2055  		set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
2056  
2057  		/* if the FW supports default PHY configuration mode, then the driver
2058  		 * does not have to apply link override settings. If not,
2059  		 * initialize user PHY configuration with link override values
2060  		 */
2061  		if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
2062  		    (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
2063  			ice_init_phy_cfg_dflt_override(pi);
2064  			goto out;
2065  		}
2066  	}
2067  
2068  	/* if link default override is not enabled, set user flow control and
2069  	 * FEC settings based on what get_phy_caps returned
2070  	 */
2071  	phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
2072  						      pcaps->link_fec_options);
2073  	phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
2074  
2075  out:
2076  	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
2077  	set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
2078  err_out:
2079  	kfree(pcaps);
2080  	return err;
2081  }
2082  
2083  /**
2084   * ice_configure_phy - configure PHY
2085   * @vsi: VSI of PHY
2086   *
2087   * Set the PHY configuration. If the current PHY configuration is the same as
2088   * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
2089   * configure the based get PHY capabilities for topology with media.
2090   */
2091  static int ice_configure_phy(struct ice_vsi *vsi)
2092  {
2093  	struct device *dev = ice_pf_to_dev(vsi->back);
2094  	struct ice_port_info *pi = vsi->port_info;
2095  	struct ice_aqc_get_phy_caps_data *pcaps;
2096  	struct ice_aqc_set_phy_cfg_data *cfg;
2097  	struct ice_phy_info *phy = &pi->phy;
2098  	struct ice_pf *pf = vsi->back;
2099  	int err;
2100  
2101  	/* Ensure we have media as we cannot configure a medialess port */
2102  	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2103  		return -EPERM;
2104  
2105  	ice_print_topo_conflict(vsi);
2106  
2107  	if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
2108  	    phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
2109  		return -EPERM;
2110  
2111  	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
2112  		return ice_force_phys_link_state(vsi, true);
2113  
2114  	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
2115  	if (!pcaps)
2116  		return -ENOMEM;
2117  
2118  	/* Get current PHY config */
2119  	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
2120  				  NULL);
2121  	if (err) {
2122  		dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
2123  			vsi->vsi_num, err);
2124  		goto done;
2125  	}
2126  
2127  	/* If PHY enable link is configured and configuration has not changed,
2128  	 * there's nothing to do
2129  	 */
2130  	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
2131  	    ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
2132  		goto done;
2133  
2134  	/* Use PHY topology as baseline for configuration */
2135  	memset(pcaps, 0, sizeof(*pcaps));
2136  	if (ice_fw_supports_report_dflt_cfg(pi->hw))
2137  		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
2138  					  pcaps, NULL);
2139  	else
2140  		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2141  					  pcaps, NULL);
2142  	if (err) {
2143  		dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
2144  			vsi->vsi_num, err);
2145  		goto done;
2146  	}
2147  
2148  	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
2149  	if (!cfg) {
2150  		err = -ENOMEM;
2151  		goto done;
2152  	}
2153  
2154  	ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
2155  
2156  	/* Speed - If default override pending, use curr_user_phy_cfg set in
2157  	 * ice_init_phy_user_cfg_ldo.
2158  	 */
2159  	if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
2160  			       vsi->back->state)) {
2161  		cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
2162  		cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
2163  	} else {
2164  		u64 phy_low = 0, phy_high = 0;
2165  
2166  		ice_update_phy_type(&phy_low, &phy_high,
2167  				    pi->phy.curr_user_speed_req);
2168  		cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
2169  		cfg->phy_type_high = pcaps->phy_type_high &
2170  				     cpu_to_le64(phy_high);
2171  	}
2172  
2173  	/* Can't provide what was requested; use PHY capabilities */
2174  	if (!cfg->phy_type_low && !cfg->phy_type_high) {
2175  		cfg->phy_type_low = pcaps->phy_type_low;
2176  		cfg->phy_type_high = pcaps->phy_type_high;
2177  	}
2178  
2179  	/* FEC */
2180  	ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
2181  
2182  	/* Can't provide what was requested; use PHY capabilities */
2183  	if (cfg->link_fec_opt !=
2184  	    (cfg->link_fec_opt & pcaps->link_fec_options)) {
2185  		cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
2186  		cfg->link_fec_opt = pcaps->link_fec_options;
2187  	}
2188  
2189  	/* Flow Control - always supported; no need to check against
2190  	 * capabilities
2191  	 */
2192  	ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
2193  
2194  	/* Enable link and link update */
2195  	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
2196  
2197  	err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
2198  	if (err)
2199  		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
2200  			vsi->vsi_num, err);
2201  
2202  	kfree(cfg);
2203  done:
2204  	kfree(pcaps);
2205  	return err;
2206  }
2207  
2208  /**
2209   * ice_check_media_subtask - Check for media
2210   * @pf: pointer to PF struct
2211   *
2212   * If media is available, then initialize PHY user configuration if it is not
2213   * been, and configure the PHY if the interface is up.
2214   */
2215  static void ice_check_media_subtask(struct ice_pf *pf)
2216  {
2217  	struct ice_port_info *pi;
2218  	struct ice_vsi *vsi;
2219  	int err;
2220  
2221  	/* No need to check for media if it's already present */
2222  	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
2223  		return;
2224  
2225  	vsi = ice_get_main_vsi(pf);
2226  	if (!vsi)
2227  		return;
2228  
2229  	/* Refresh link info and check if media is present */
2230  	pi = vsi->port_info;
2231  	err = ice_update_link_info(pi);
2232  	if (err)
2233  		return;
2234  
2235  	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
2236  
2237  	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2238  		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
2239  			ice_init_phy_user_cfg(pi);
2240  
2241  		/* PHY settings are reset on media insertion, reconfigure
2242  		 * PHY to preserve settings.
2243  		 */
2244  		if (test_bit(ICE_VSI_DOWN, vsi->state) &&
2245  		    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
2246  			return;
2247  
2248  		err = ice_configure_phy(vsi);
2249  		if (!err)
2250  			clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
2251  
2252  		/* A Link Status Event will be generated; the event handler
2253  		 * will complete bringing the interface up
2254  		 */
2255  	}
2256  }
2257  
2258  /**
2259   * ice_service_task - manage and run subtasks
2260   * @work: pointer to work_struct contained by the PF struct
2261   */
2262  static void ice_service_task(struct work_struct *work)
2263  {
2264  	struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
2265  	unsigned long start_time = jiffies;
2266  
2267  	/* subtasks */
2268  
2269  	/* process reset requests first */
2270  	ice_reset_subtask(pf);
2271  
2272  	/* bail if a reset/recovery cycle is pending or rebuild failed */
2273  	if (ice_is_reset_in_progress(pf->state) ||
2274  	    test_bit(ICE_SUSPENDED, pf->state) ||
2275  	    test_bit(ICE_NEEDS_RESTART, pf->state)) {
2276  		ice_service_task_complete(pf);
2277  		return;
2278  	}
2279  
2280  	if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
2281  		struct iidc_event *event;
2282  
2283  		event = kzalloc(sizeof(*event), GFP_KERNEL);
2284  		if (event) {
2285  			set_bit(IIDC_EVENT_CRIT_ERR, event->type);
2286  			/* report the entire OICR value to AUX driver */
2287  			swap(event->reg, pf->oicr_err_reg);
2288  			ice_send_event_to_aux(pf, event);
2289  			kfree(event);
2290  		}
2291  	}
2292  
2293  	if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
2294  		/* Plug aux device per request */
2295  		ice_plug_aux_dev(pf);
2296  
2297  		/* Mark plugging as done but check whether unplug was
2298  		 * requested during ice_plug_aux_dev() call
2299  		 * (e.g. from ice_clear_rdma_cap()) and if so then
2300  		 * plug aux device.
2301  		 */
2302  		if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
2303  			ice_unplug_aux_dev(pf);
2304  	}
2305  
2306  	if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
2307  		struct iidc_event *event;
2308  
2309  		event = kzalloc(sizeof(*event), GFP_KERNEL);
2310  		if (event) {
2311  			set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
2312  			ice_send_event_to_aux(pf, event);
2313  			kfree(event);
2314  		}
2315  	}
2316  
2317  	ice_clean_adminq_subtask(pf);
2318  	ice_check_media_subtask(pf);
2319  	ice_check_for_hang_subtask(pf);
2320  	ice_sync_fltr_subtask(pf);
2321  	ice_handle_mdd_event(pf);
2322  	ice_watchdog_subtask(pf);
2323  
2324  	if (ice_is_safe_mode(pf)) {
2325  		ice_service_task_complete(pf);
2326  		return;
2327  	}
2328  
2329  	ice_process_vflr_event(pf);
2330  	ice_clean_mailboxq_subtask(pf);
2331  	ice_clean_sbq_subtask(pf);
2332  	ice_sync_arfs_fltrs(pf);
2333  	ice_flush_fdir_ctx(pf);
2334  
2335  	/* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
2336  	ice_service_task_complete(pf);
2337  
2338  	/* If the tasks have taken longer than one service timer period
2339  	 * or there is more work to be done, reset the service timer to
2340  	 * schedule the service task now.
2341  	 */
2342  	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
2343  	    test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
2344  	    test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
2345  	    test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
2346  	    test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
2347  	    test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
2348  	    test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
2349  		mod_timer(&pf->serv_tmr, jiffies);
2350  }
2351  
2352  /**
2353   * ice_set_ctrlq_len - helper function to set controlq length
2354   * @hw: pointer to the HW instance
2355   */
2356  static void ice_set_ctrlq_len(struct ice_hw *hw)
2357  {
2358  	hw->adminq.num_rq_entries = ICE_AQ_LEN;
2359  	hw->adminq.num_sq_entries = ICE_AQ_LEN;
2360  	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
2361  	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
2362  	hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
2363  	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
2364  	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2365  	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2366  	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
2367  	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
2368  	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2369  	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2370  }
2371  
2372  /**
2373   * ice_schedule_reset - schedule a reset
2374   * @pf: board private structure
2375   * @reset: reset being requested
2376   */
2377  int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
2378  {
2379  	struct device *dev = ice_pf_to_dev(pf);
2380  
2381  	/* bail out if earlier reset has failed */
2382  	if (test_bit(ICE_RESET_FAILED, pf->state)) {
2383  		dev_dbg(dev, "earlier reset has failed\n");
2384  		return -EIO;
2385  	}
2386  	/* bail if reset/recovery already in progress */
2387  	if (ice_is_reset_in_progress(pf->state)) {
2388  		dev_dbg(dev, "Reset already in progress\n");
2389  		return -EBUSY;
2390  	}
2391  
2392  	switch (reset) {
2393  	case ICE_RESET_PFR:
2394  		set_bit(ICE_PFR_REQ, pf->state);
2395  		break;
2396  	case ICE_RESET_CORER:
2397  		set_bit(ICE_CORER_REQ, pf->state);
2398  		break;
2399  	case ICE_RESET_GLOBR:
2400  		set_bit(ICE_GLOBR_REQ, pf->state);
2401  		break;
2402  	default:
2403  		return -EINVAL;
2404  	}
2405  
2406  	ice_service_task_schedule(pf);
2407  	return 0;
2408  }
2409  
2410  /**
2411   * ice_irq_affinity_notify - Callback for affinity changes
2412   * @notify: context as to what irq was changed
2413   * @mask: the new affinity mask
2414   *
2415   * This is a callback function used by the irq_set_affinity_notifier function
2416   * so that we may register to receive changes to the irq affinity masks.
2417   */
2418  static void
2419  ice_irq_affinity_notify(struct irq_affinity_notify *notify,
2420  			const cpumask_t *mask)
2421  {
2422  	struct ice_q_vector *q_vector =
2423  		container_of(notify, struct ice_q_vector, affinity_notify);
2424  
2425  	cpumask_copy(&q_vector->affinity_mask, mask);
2426  }
2427  
2428  /**
2429   * ice_irq_affinity_release - Callback for affinity notifier release
2430   * @ref: internal core kernel usage
2431   *
2432   * This is a callback function used by the irq_set_affinity_notifier function
2433   * to inform the current notification subscriber that they will no longer
2434   * receive notifications.
2435   */
2436  static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
2437  
2438  /**
2439   * ice_vsi_ena_irq - Enable IRQ for the given VSI
2440   * @vsi: the VSI being configured
2441   */
2442  static int ice_vsi_ena_irq(struct ice_vsi *vsi)
2443  {
2444  	struct ice_hw *hw = &vsi->back->hw;
2445  	int i;
2446  
2447  	ice_for_each_q_vector(vsi, i)
2448  		ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
2449  
2450  	ice_flush(hw);
2451  	return 0;
2452  }
2453  
2454  /**
2455   * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2456   * @vsi: the VSI being configured
2457   * @basename: name for the vector
2458   */
2459  static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
2460  {
2461  	int q_vectors = vsi->num_q_vectors;
2462  	struct ice_pf *pf = vsi->back;
2463  	int base = vsi->base_vector;
2464  	struct device *dev;
2465  	int rx_int_idx = 0;
2466  	int tx_int_idx = 0;
2467  	int vector, err;
2468  	int irq_num;
2469  
2470  	dev = ice_pf_to_dev(pf);
2471  	for (vector = 0; vector < q_vectors; vector++) {
2472  		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
2473  
2474  		irq_num = pf->msix_entries[base + vector].vector;
2475  
2476  		if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
2477  			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2478  				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
2479  			tx_int_idx++;
2480  		} else if (q_vector->rx.rx_ring) {
2481  			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2482  				 "%s-%s-%d", basename, "rx", rx_int_idx++);
2483  		} else if (q_vector->tx.tx_ring) {
2484  			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2485  				 "%s-%s-%d", basename, "tx", tx_int_idx++);
2486  		} else {
2487  			/* skip this unused q_vector */
2488  			continue;
2489  		}
2490  		if (vsi->type == ICE_VSI_CTRL && vsi->vf)
2491  			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2492  					       IRQF_SHARED, q_vector->name,
2493  					       q_vector);
2494  		else
2495  			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2496  					       0, q_vector->name, q_vector);
2497  		if (err) {
2498  			netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
2499  				   err);
2500  			goto free_q_irqs;
2501  		}
2502  
2503  		/* register for affinity change notifications */
2504  		if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
2505  			struct irq_affinity_notify *affinity_notify;
2506  
2507  			affinity_notify = &q_vector->affinity_notify;
2508  			affinity_notify->notify = ice_irq_affinity_notify;
2509  			affinity_notify->release = ice_irq_affinity_release;
2510  			irq_set_affinity_notifier(irq_num, affinity_notify);
2511  		}
2512  
2513  		/* assign the mask for this irq */
2514  		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
2515  	}
2516  
2517  	err = ice_set_cpu_rx_rmap(vsi);
2518  	if (err) {
2519  		netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n",
2520  			   vsi->vsi_num, ERR_PTR(err));
2521  		goto free_q_irqs;
2522  	}
2523  
2524  	vsi->irqs_ready = true;
2525  	return 0;
2526  
2527  free_q_irqs:
2528  	while (vector) {
2529  		vector--;
2530  		irq_num = pf->msix_entries[base + vector].vector;
2531  		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
2532  			irq_set_affinity_notifier(irq_num, NULL);
2533  		irq_set_affinity_hint(irq_num, NULL);
2534  		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
2535  	}
2536  	return err;
2537  }
2538  
2539  /**
2540   * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2541   * @vsi: VSI to setup Tx rings used by XDP
2542   *
2543   * Return 0 on success and negative value on error
2544   */
2545  static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
2546  {
2547  	struct device *dev = ice_pf_to_dev(vsi->back);
2548  	struct ice_tx_desc *tx_desc;
2549  	int i, j;
2550  
2551  	ice_for_each_xdp_txq(vsi, i) {
2552  		u16 xdp_q_idx = vsi->alloc_txq + i;
2553  		struct ice_ring_stats *ring_stats;
2554  		struct ice_tx_ring *xdp_ring;
2555  
2556  		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
2557  		if (!xdp_ring)
2558  			goto free_xdp_rings;
2559  
2560  		ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL);
2561  		if (!ring_stats) {
2562  			ice_free_tx_ring(xdp_ring);
2563  			goto free_xdp_rings;
2564  		}
2565  
2566  		xdp_ring->ring_stats = ring_stats;
2567  		xdp_ring->q_index = xdp_q_idx;
2568  		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
2569  		xdp_ring->vsi = vsi;
2570  		xdp_ring->netdev = NULL;
2571  		xdp_ring->dev = dev;
2572  		xdp_ring->count = vsi->num_tx_desc;
2573  		xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
2574  		xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
2575  		WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
2576  		if (ice_setup_tx_ring(xdp_ring))
2577  			goto free_xdp_rings;
2578  		ice_set_ring_xdp(xdp_ring);
2579  		spin_lock_init(&xdp_ring->tx_lock);
2580  		for (j = 0; j < xdp_ring->count; j++) {
2581  			tx_desc = ICE_TX_DESC(xdp_ring, j);
2582  			tx_desc->cmd_type_offset_bsz = 0;
2583  		}
2584  	}
2585  
2586  	return 0;
2587  
2588  free_xdp_rings:
2589  	for (; i >= 0; i--) {
2590  		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) {
2591  			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2592  			vsi->xdp_rings[i]->ring_stats = NULL;
2593  			ice_free_tx_ring(vsi->xdp_rings[i]);
2594  		}
2595  	}
2596  	return -ENOMEM;
2597  }
2598  
2599  /**
2600   * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2601   * @vsi: VSI to set the bpf prog on
2602   * @prog: the bpf prog pointer
2603   */
2604  static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
2605  {
2606  	struct bpf_prog *old_prog;
2607  	int i;
2608  
2609  	old_prog = xchg(&vsi->xdp_prog, prog);
2610  	if (old_prog)
2611  		bpf_prog_put(old_prog);
2612  
2613  	ice_for_each_rxq(vsi, i)
2614  		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
2615  }
2616  
2617  /**
2618   * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2619   * @vsi: VSI to bring up Tx rings used by XDP
2620   * @prog: bpf program that will be assigned to VSI
2621   *
2622   * Return 0 on success and negative value on error
2623   */
2624  int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
2625  {
2626  	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2627  	int xdp_rings_rem = vsi->num_xdp_txq;
2628  	struct ice_pf *pf = vsi->back;
2629  	struct ice_qs_cfg xdp_qs_cfg = {
2630  		.qs_mutex = &pf->avail_q_mutex,
2631  		.pf_map = pf->avail_txqs,
2632  		.pf_map_size = pf->max_pf_txqs,
2633  		.q_count = vsi->num_xdp_txq,
2634  		.scatter_count = ICE_MAX_SCATTER_TXQS,
2635  		.vsi_map = vsi->txq_map,
2636  		.vsi_map_offset = vsi->alloc_txq,
2637  		.mapping_mode = ICE_VSI_MAP_CONTIG
2638  	};
2639  	struct device *dev;
2640  	int i, v_idx;
2641  	int status;
2642  
2643  	dev = ice_pf_to_dev(pf);
2644  	vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
2645  				      sizeof(*vsi->xdp_rings), GFP_KERNEL);
2646  	if (!vsi->xdp_rings)
2647  		return -ENOMEM;
2648  
2649  	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
2650  	if (__ice_vsi_get_qs(&xdp_qs_cfg))
2651  		goto err_map_xdp;
2652  
2653  	if (static_key_enabled(&ice_xdp_locking_key))
2654  		netdev_warn(vsi->netdev,
2655  			    "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
2656  
2657  	if (ice_xdp_alloc_setup_rings(vsi))
2658  		goto clear_xdp_rings;
2659  
2660  	/* follow the logic from ice_vsi_map_rings_to_vectors */
2661  	ice_for_each_q_vector(vsi, v_idx) {
2662  		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2663  		int xdp_rings_per_v, q_id, q_base;
2664  
2665  		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
2666  					       vsi->num_q_vectors - v_idx);
2667  		q_base = vsi->num_xdp_txq - xdp_rings_rem;
2668  
2669  		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
2670  			struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
2671  
2672  			xdp_ring->q_vector = q_vector;
2673  			xdp_ring->next = q_vector->tx.tx_ring;
2674  			q_vector->tx.tx_ring = xdp_ring;
2675  		}
2676  		xdp_rings_rem -= xdp_rings_per_v;
2677  	}
2678  
2679  	ice_for_each_rxq(vsi, i) {
2680  		if (static_key_enabled(&ice_xdp_locking_key)) {
2681  			vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
2682  		} else {
2683  			struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
2684  			struct ice_tx_ring *ring;
2685  
2686  			ice_for_each_tx_ring(ring, q_vector->tx) {
2687  				if (ice_ring_is_xdp(ring)) {
2688  					vsi->rx_rings[i]->xdp_ring = ring;
2689  					break;
2690  				}
2691  			}
2692  		}
2693  		ice_tx_xsk_pool(vsi, i);
2694  	}
2695  
2696  	/* omit the scheduler update if in reset path; XDP queues will be
2697  	 * taken into account at the end of ice_vsi_rebuild, where
2698  	 * ice_cfg_vsi_lan is being called
2699  	 */
2700  	if (ice_is_reset_in_progress(pf->state))
2701  		return 0;
2702  
2703  	/* tell the Tx scheduler that right now we have
2704  	 * additional queues
2705  	 */
2706  	for (i = 0; i < vsi->tc_cfg.numtc; i++)
2707  		max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
2708  
2709  	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2710  				 max_txqs);
2711  	if (status) {
2712  		dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
2713  			status);
2714  		goto clear_xdp_rings;
2715  	}
2716  
2717  	/* assign the prog only when it's not already present on VSI;
2718  	 * this flow is a subject of both ethtool -L and ndo_bpf flows;
2719  	 * VSI rebuild that happens under ethtool -L can expose us to
2720  	 * the bpf_prog refcount issues as we would be swapping same
2721  	 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2722  	 * on it as it would be treated as an 'old_prog'; for ndo_bpf
2723  	 * this is not harmful as dev_xdp_install bumps the refcount
2724  	 * before calling the op exposed by the driver;
2725  	 */
2726  	if (!ice_is_xdp_ena_vsi(vsi))
2727  		ice_vsi_assign_bpf_prog(vsi, prog);
2728  
2729  	return 0;
2730  clear_xdp_rings:
2731  	ice_for_each_xdp_txq(vsi, i)
2732  		if (vsi->xdp_rings[i]) {
2733  			kfree_rcu(vsi->xdp_rings[i], rcu);
2734  			vsi->xdp_rings[i] = NULL;
2735  		}
2736  
2737  err_map_xdp:
2738  	mutex_lock(&pf->avail_q_mutex);
2739  	ice_for_each_xdp_txq(vsi, i) {
2740  		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2741  		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2742  	}
2743  	mutex_unlock(&pf->avail_q_mutex);
2744  
2745  	devm_kfree(dev, vsi->xdp_rings);
2746  	return -ENOMEM;
2747  }
2748  
2749  /**
2750   * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2751   * @vsi: VSI to remove XDP rings
2752   *
2753   * Detach XDP rings from irq vectors, clean up the PF bitmap and free
2754   * resources
2755   */
2756  int ice_destroy_xdp_rings(struct ice_vsi *vsi)
2757  {
2758  	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2759  	struct ice_pf *pf = vsi->back;
2760  	int i, v_idx;
2761  
2762  	/* q_vectors are freed in reset path so there's no point in detaching
2763  	 * rings; in case of rebuild being triggered not from reset bits
2764  	 * in pf->state won't be set, so additionally check first q_vector
2765  	 * against NULL
2766  	 */
2767  	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
2768  		goto free_qmap;
2769  
2770  	ice_for_each_q_vector(vsi, v_idx) {
2771  		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2772  		struct ice_tx_ring *ring;
2773  
2774  		ice_for_each_tx_ring(ring, q_vector->tx)
2775  			if (!ring->tx_buf || !ice_ring_is_xdp(ring))
2776  				break;
2777  
2778  		/* restore the value of last node prior to XDP setup */
2779  		q_vector->tx.tx_ring = ring;
2780  	}
2781  
2782  free_qmap:
2783  	mutex_lock(&pf->avail_q_mutex);
2784  	ice_for_each_xdp_txq(vsi, i) {
2785  		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2786  		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2787  	}
2788  	mutex_unlock(&pf->avail_q_mutex);
2789  
2790  	ice_for_each_xdp_txq(vsi, i)
2791  		if (vsi->xdp_rings[i]) {
2792  			if (vsi->xdp_rings[i]->desc) {
2793  				synchronize_rcu();
2794  				ice_free_tx_ring(vsi->xdp_rings[i]);
2795  			}
2796  			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2797  			vsi->xdp_rings[i]->ring_stats = NULL;
2798  			kfree_rcu(vsi->xdp_rings[i], rcu);
2799  			vsi->xdp_rings[i] = NULL;
2800  		}
2801  
2802  	devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
2803  	vsi->xdp_rings = NULL;
2804  
2805  	if (static_key_enabled(&ice_xdp_locking_key))
2806  		static_branch_dec(&ice_xdp_locking_key);
2807  
2808  	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
2809  		return 0;
2810  
2811  	ice_vsi_assign_bpf_prog(vsi, NULL);
2812  
2813  	/* notify Tx scheduler that we destroyed XDP queues and bring
2814  	 * back the old number of child nodes
2815  	 */
2816  	for (i = 0; i < vsi->tc_cfg.numtc; i++)
2817  		max_txqs[i] = vsi->num_txq;
2818  
2819  	/* change number of XDP Tx queues to 0 */
2820  	vsi->num_xdp_txq = 0;
2821  
2822  	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2823  			       max_txqs);
2824  }
2825  
2826  /**
2827   * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2828   * @vsi: VSI to schedule napi on
2829   */
2830  static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
2831  {
2832  	int i;
2833  
2834  	ice_for_each_rxq(vsi, i) {
2835  		struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
2836  
2837  		if (rx_ring->xsk_pool)
2838  			napi_schedule(&rx_ring->q_vector->napi);
2839  	}
2840  }
2841  
2842  /**
2843   * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
2844   * @vsi: VSI to determine the count of XDP Tx qs
2845   *
2846   * returns 0 if Tx qs count is higher than at least half of CPU count,
2847   * -ENOMEM otherwise
2848   */
2849  int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
2850  {
2851  	u16 avail = ice_get_avail_txq_count(vsi->back);
2852  	u16 cpus = num_possible_cpus();
2853  
2854  	if (avail < cpus / 2)
2855  		return -ENOMEM;
2856  
2857  	vsi->num_xdp_txq = min_t(u16, avail, cpus);
2858  
2859  	if (vsi->num_xdp_txq < cpus)
2860  		static_branch_inc(&ice_xdp_locking_key);
2861  
2862  	return 0;
2863  }
2864  
2865  /**
2866   * ice_xdp_setup_prog - Add or remove XDP eBPF program
2867   * @vsi: VSI to setup XDP for
2868   * @prog: XDP program
2869   * @extack: netlink extended ack
2870   */
2871  static int
2872  ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
2873  		   struct netlink_ext_ack *extack)
2874  {
2875  	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
2876  	bool if_running = netif_running(vsi->netdev);
2877  	int ret = 0, xdp_ring_err = 0;
2878  
2879  	if (frame_size > vsi->rx_buf_len) {
2880  		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
2881  		return -EOPNOTSUPP;
2882  	}
2883  
2884  	/* need to stop netdev while setting up the program for Rx rings */
2885  	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
2886  		ret = ice_down(vsi);
2887  		if (ret) {
2888  			NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
2889  			return ret;
2890  		}
2891  	}
2892  
2893  	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
2894  		xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
2895  		if (xdp_ring_err) {
2896  			NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
2897  		} else {
2898  			xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
2899  			if (xdp_ring_err)
2900  				NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
2901  		}
2902  		/* reallocate Rx queues that are used for zero-copy */
2903  		xdp_ring_err = ice_realloc_zc_buf(vsi, true);
2904  		if (xdp_ring_err)
2905  			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
2906  	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
2907  		xdp_ring_err = ice_destroy_xdp_rings(vsi);
2908  		if (xdp_ring_err)
2909  			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
2910  		/* reallocate Rx queues that were used for zero-copy */
2911  		xdp_ring_err = ice_realloc_zc_buf(vsi, false);
2912  		if (xdp_ring_err)
2913  			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
2914  	} else {
2915  		/* safe to call even when prog == vsi->xdp_prog as
2916  		 * dev_xdp_install in net/core/dev.c incremented prog's
2917  		 * refcount so corresponding bpf_prog_put won't cause
2918  		 * underflow
2919  		 */
2920  		ice_vsi_assign_bpf_prog(vsi, prog);
2921  	}
2922  
2923  	if (if_running)
2924  		ret = ice_up(vsi);
2925  
2926  	if (!ret && prog)
2927  		ice_vsi_rx_napi_schedule(vsi);
2928  
2929  	return (ret || xdp_ring_err) ? -ENOMEM : 0;
2930  }
2931  
2932  /**
2933   * ice_xdp_safe_mode - XDP handler for safe mode
2934   * @dev: netdevice
2935   * @xdp: XDP command
2936   */
2937  static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
2938  			     struct netdev_bpf *xdp)
2939  {
2940  	NL_SET_ERR_MSG_MOD(xdp->extack,
2941  			   "Please provide working DDP firmware package in order to use XDP\n"
2942  			   "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
2943  	return -EOPNOTSUPP;
2944  }
2945  
2946  /**
2947   * ice_xdp - implements XDP handler
2948   * @dev: netdevice
2949   * @xdp: XDP command
2950   */
2951  static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2952  {
2953  	struct ice_netdev_priv *np = netdev_priv(dev);
2954  	struct ice_vsi *vsi = np->vsi;
2955  
2956  	if (vsi->type != ICE_VSI_PF) {
2957  		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
2958  		return -EINVAL;
2959  	}
2960  
2961  	switch (xdp->command) {
2962  	case XDP_SETUP_PROG:
2963  		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
2964  	case XDP_SETUP_XSK_POOL:
2965  		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
2966  					  xdp->xsk.queue_id);
2967  	default:
2968  		return -EINVAL;
2969  	}
2970  }
2971  
2972  /**
2973   * ice_ena_misc_vector - enable the non-queue interrupts
2974   * @pf: board private structure
2975   */
2976  static void ice_ena_misc_vector(struct ice_pf *pf)
2977  {
2978  	struct ice_hw *hw = &pf->hw;
2979  	u32 val;
2980  
2981  	/* Disable anti-spoof detection interrupt to prevent spurious event
2982  	 * interrupts during a function reset. Anti-spoof functionally is
2983  	 * still supported.
2984  	 */
2985  	val = rd32(hw, GL_MDCK_TX_TDPU);
2986  	val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
2987  	wr32(hw, GL_MDCK_TX_TDPU, val);
2988  
2989  	/* clear things first */
2990  	wr32(hw, PFINT_OICR_ENA, 0);	/* disable all */
2991  	rd32(hw, PFINT_OICR);		/* read to clear */
2992  
2993  	val = (PFINT_OICR_ECC_ERR_M |
2994  	       PFINT_OICR_MAL_DETECT_M |
2995  	       PFINT_OICR_GRST_M |
2996  	       PFINT_OICR_PCI_EXCEPTION_M |
2997  	       PFINT_OICR_VFLR_M |
2998  	       PFINT_OICR_HMC_ERR_M |
2999  	       PFINT_OICR_PE_PUSH_M |
3000  	       PFINT_OICR_PE_CRITERR_M);
3001  
3002  	wr32(hw, PFINT_OICR_ENA, val);
3003  
3004  	/* SW_ITR_IDX = 0, but don't change INTENA */
3005  	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
3006  	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
3007  }
3008  
3009  /**
3010   * ice_misc_intr - misc interrupt handler
3011   * @irq: interrupt number
3012   * @data: pointer to a q_vector
3013   */
3014  static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
3015  {
3016  	struct ice_pf *pf = (struct ice_pf *)data;
3017  	struct ice_hw *hw = &pf->hw;
3018  	irqreturn_t ret = IRQ_NONE;
3019  	struct device *dev;
3020  	u32 oicr, ena_mask;
3021  
3022  	dev = ice_pf_to_dev(pf);
3023  	set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
3024  	set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
3025  	set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
3026  
3027  	oicr = rd32(hw, PFINT_OICR);
3028  	ena_mask = rd32(hw, PFINT_OICR_ENA);
3029  
3030  	if (oicr & PFINT_OICR_SWINT_M) {
3031  		ena_mask &= ~PFINT_OICR_SWINT_M;
3032  		pf->sw_int_count++;
3033  	}
3034  
3035  	if (oicr & PFINT_OICR_MAL_DETECT_M) {
3036  		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
3037  		set_bit(ICE_MDD_EVENT_PENDING, pf->state);
3038  	}
3039  	if (oicr & PFINT_OICR_VFLR_M) {
3040  		/* disable any further VFLR event notifications */
3041  		if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
3042  			u32 reg = rd32(hw, PFINT_OICR_ENA);
3043  
3044  			reg &= ~PFINT_OICR_VFLR_M;
3045  			wr32(hw, PFINT_OICR_ENA, reg);
3046  		} else {
3047  			ena_mask &= ~PFINT_OICR_VFLR_M;
3048  			set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
3049  		}
3050  	}
3051  
3052  	if (oicr & PFINT_OICR_GRST_M) {
3053  		u32 reset;
3054  
3055  		/* we have a reset warning */
3056  		ena_mask &= ~PFINT_OICR_GRST_M;
3057  		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
3058  			GLGEN_RSTAT_RESET_TYPE_S;
3059  
3060  		if (reset == ICE_RESET_CORER)
3061  			pf->corer_count++;
3062  		else if (reset == ICE_RESET_GLOBR)
3063  			pf->globr_count++;
3064  		else if (reset == ICE_RESET_EMPR)
3065  			pf->empr_count++;
3066  		else
3067  			dev_dbg(dev, "Invalid reset type %d\n", reset);
3068  
3069  		/* If a reset cycle isn't already in progress, we set a bit in
3070  		 * pf->state so that the service task can start a reset/rebuild.
3071  		 */
3072  		if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
3073  			if (reset == ICE_RESET_CORER)
3074  				set_bit(ICE_CORER_RECV, pf->state);
3075  			else if (reset == ICE_RESET_GLOBR)
3076  				set_bit(ICE_GLOBR_RECV, pf->state);
3077  			else
3078  				set_bit(ICE_EMPR_RECV, pf->state);
3079  
3080  			/* There are couple of different bits at play here.
3081  			 * hw->reset_ongoing indicates whether the hardware is
3082  			 * in reset. This is set to true when a reset interrupt
3083  			 * is received and set back to false after the driver
3084  			 * has determined that the hardware is out of reset.
3085  			 *
3086  			 * ICE_RESET_OICR_RECV in pf->state indicates
3087  			 * that a post reset rebuild is required before the
3088  			 * driver is operational again. This is set above.
3089  			 *
3090  			 * As this is the start of the reset/rebuild cycle, set
3091  			 * both to indicate that.
3092  			 */
3093  			hw->reset_ongoing = true;
3094  		}
3095  	}
3096  
3097  	if (oicr & PFINT_OICR_TSYN_TX_M) {
3098  		ena_mask &= ~PFINT_OICR_TSYN_TX_M;
3099  		if (!hw->reset_ongoing)
3100  			ret = IRQ_WAKE_THREAD;
3101  	}
3102  
3103  	if (oicr & PFINT_OICR_TSYN_EVNT_M) {
3104  		u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
3105  		u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
3106  
3107  		/* Save EVENTs from GTSYN register */
3108  		pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M |
3109  						     GLTSYN_STAT_EVENT1_M |
3110  						     GLTSYN_STAT_EVENT2_M);
3111  		ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
3112  		kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work);
3113  	}
3114  
3115  #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
3116  	if (oicr & ICE_AUX_CRIT_ERR) {
3117  		pf->oicr_err_reg |= oicr;
3118  		set_bit(ICE_AUX_ERR_PENDING, pf->state);
3119  		ena_mask &= ~ICE_AUX_CRIT_ERR;
3120  	}
3121  
3122  	/* Report any remaining unexpected interrupts */
3123  	oicr &= ena_mask;
3124  	if (oicr) {
3125  		dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
3126  		/* If a critical error is pending there is no choice but to
3127  		 * reset the device.
3128  		 */
3129  		if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
3130  			    PFINT_OICR_ECC_ERR_M)) {
3131  			set_bit(ICE_PFR_REQ, pf->state);
3132  			ice_service_task_schedule(pf);
3133  		}
3134  	}
3135  	if (!ret)
3136  		ret = IRQ_HANDLED;
3137  
3138  	ice_service_task_schedule(pf);
3139  	ice_irq_dynamic_ena(hw, NULL, NULL);
3140  
3141  	return ret;
3142  }
3143  
3144  /**
3145   * ice_misc_intr_thread_fn - misc interrupt thread function
3146   * @irq: interrupt number
3147   * @data: pointer to a q_vector
3148   */
3149  static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
3150  {
3151  	struct ice_pf *pf = data;
3152  
3153  	if (ice_is_reset_in_progress(pf->state))
3154  		return IRQ_HANDLED;
3155  
3156  	while (!ice_ptp_process_ts(pf))
3157  		usleep_range(50, 100);
3158  
3159  	return IRQ_HANDLED;
3160  }
3161  
3162  /**
3163   * ice_dis_ctrlq_interrupts - disable control queue interrupts
3164   * @hw: pointer to HW structure
3165   */
3166  static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
3167  {
3168  	/* disable Admin queue Interrupt causes */
3169  	wr32(hw, PFINT_FW_CTL,
3170  	     rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
3171  
3172  	/* disable Mailbox queue Interrupt causes */
3173  	wr32(hw, PFINT_MBX_CTL,
3174  	     rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
3175  
3176  	wr32(hw, PFINT_SB_CTL,
3177  	     rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
3178  
3179  	/* disable Control queue Interrupt causes */
3180  	wr32(hw, PFINT_OICR_CTL,
3181  	     rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
3182  
3183  	ice_flush(hw);
3184  }
3185  
3186  /**
3187   * ice_free_irq_msix_misc - Unroll misc vector setup
3188   * @pf: board private structure
3189   */
3190  static void ice_free_irq_msix_misc(struct ice_pf *pf)
3191  {
3192  	struct ice_hw *hw = &pf->hw;
3193  
3194  	ice_dis_ctrlq_interrupts(hw);
3195  
3196  	/* disable OICR interrupt */
3197  	wr32(hw, PFINT_OICR_ENA, 0);
3198  	ice_flush(hw);
3199  
3200  	if (pf->msix_entries) {
3201  		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
3202  		devm_free_irq(ice_pf_to_dev(pf),
3203  			      pf->msix_entries[pf->oicr_idx].vector, pf);
3204  	}
3205  
3206  	pf->num_avail_sw_msix += 1;
3207  	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
3208  }
3209  
3210  /**
3211   * ice_ena_ctrlq_interrupts - enable control queue interrupts
3212   * @hw: pointer to HW structure
3213   * @reg_idx: HW vector index to associate the control queue interrupts with
3214   */
3215  static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
3216  {
3217  	u32 val;
3218  
3219  	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
3220  	       PFINT_OICR_CTL_CAUSE_ENA_M);
3221  	wr32(hw, PFINT_OICR_CTL, val);
3222  
3223  	/* enable Admin queue Interrupt causes */
3224  	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
3225  	       PFINT_FW_CTL_CAUSE_ENA_M);
3226  	wr32(hw, PFINT_FW_CTL, val);
3227  
3228  	/* enable Mailbox queue Interrupt causes */
3229  	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
3230  	       PFINT_MBX_CTL_CAUSE_ENA_M);
3231  	wr32(hw, PFINT_MBX_CTL, val);
3232  
3233  	/* This enables Sideband queue Interrupt causes */
3234  	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
3235  	       PFINT_SB_CTL_CAUSE_ENA_M);
3236  	wr32(hw, PFINT_SB_CTL, val);
3237  
3238  	ice_flush(hw);
3239  }
3240  
3241  /**
3242   * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3243   * @pf: board private structure
3244   *
3245   * This sets up the handler for MSIX 0, which is used to manage the
3246   * non-queue interrupts, e.g. AdminQ and errors. This is not used
3247   * when in MSI or Legacy interrupt mode.
3248   */
3249  static int ice_req_irq_msix_misc(struct ice_pf *pf)
3250  {
3251  	struct device *dev = ice_pf_to_dev(pf);
3252  	struct ice_hw *hw = &pf->hw;
3253  	int oicr_idx, err = 0;
3254  
3255  	if (!pf->int_name[0])
3256  		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
3257  			 dev_driver_string(dev), dev_name(dev));
3258  
3259  	/* Do not request IRQ but do enable OICR interrupt since settings are
3260  	 * lost during reset. Note that this function is called only during
3261  	 * rebuild path and not while reset is in progress.
3262  	 */
3263  	if (ice_is_reset_in_progress(pf->state))
3264  		goto skip_req_irq;
3265  
3266  	/* reserve one vector in irq_tracker for misc interrupts */
3267  	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
3268  	if (oicr_idx < 0)
3269  		return oicr_idx;
3270  
3271  	pf->num_avail_sw_msix -= 1;
3272  	pf->oicr_idx = (u16)oicr_idx;
3273  
3274  	err = devm_request_threaded_irq(dev,
3275  					pf->msix_entries[pf->oicr_idx].vector,
3276  					ice_misc_intr, ice_misc_intr_thread_fn,
3277  					0, pf->int_name, pf);
3278  	if (err) {
3279  		dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
3280  			pf->int_name, err);
3281  		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
3282  		pf->num_avail_sw_msix += 1;
3283  		return err;
3284  	}
3285  
3286  skip_req_irq:
3287  	ice_ena_misc_vector(pf);
3288  
3289  	ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
3290  	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
3291  	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
3292  
3293  	ice_flush(hw);
3294  	ice_irq_dynamic_ena(hw, NULL, NULL);
3295  
3296  	return 0;
3297  }
3298  
3299  /**
3300   * ice_napi_add - register NAPI handler for the VSI
3301   * @vsi: VSI for which NAPI handler is to be registered
3302   *
3303   * This function is only called in the driver's load path. Registering the NAPI
3304   * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
3305   * reset/rebuild, etc.)
3306   */
3307  static void ice_napi_add(struct ice_vsi *vsi)
3308  {
3309  	int v_idx;
3310  
3311  	if (!vsi->netdev)
3312  		return;
3313  
3314  	ice_for_each_q_vector(vsi, v_idx)
3315  		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
3316  			       ice_napi_poll);
3317  }
3318  
3319  /**
3320   * ice_set_ops - set netdev and ethtools ops for the given netdev
3321   * @netdev: netdev instance
3322   */
3323  static void ice_set_ops(struct net_device *netdev)
3324  {
3325  	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3326  
3327  	if (ice_is_safe_mode(pf)) {
3328  		netdev->netdev_ops = &ice_netdev_safe_mode_ops;
3329  		ice_set_ethtool_safe_mode_ops(netdev);
3330  		return;
3331  	}
3332  
3333  	netdev->netdev_ops = &ice_netdev_ops;
3334  	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
3335  	ice_set_ethtool_ops(netdev);
3336  }
3337  
3338  /**
3339   * ice_set_netdev_features - set features for the given netdev
3340   * @netdev: netdev instance
3341   */
3342  static void ice_set_netdev_features(struct net_device *netdev)
3343  {
3344  	struct ice_pf *pf = ice_netdev_to_pf(netdev);
3345  	bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
3346  	netdev_features_t csumo_features;
3347  	netdev_features_t vlano_features;
3348  	netdev_features_t dflt_features;
3349  	netdev_features_t tso_features;
3350  
3351  	if (ice_is_safe_mode(pf)) {
3352  		/* safe mode */
3353  		netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
3354  		netdev->hw_features = netdev->features;
3355  		return;
3356  	}
3357  
3358  	dflt_features = NETIF_F_SG	|
3359  			NETIF_F_HIGHDMA	|
3360  			NETIF_F_NTUPLE	|
3361  			NETIF_F_RXHASH;
3362  
3363  	csumo_features = NETIF_F_RXCSUM	  |
3364  			 NETIF_F_IP_CSUM  |
3365  			 NETIF_F_SCTP_CRC |
3366  			 NETIF_F_IPV6_CSUM;
3367  
3368  	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
3369  			 NETIF_F_HW_VLAN_CTAG_TX     |
3370  			 NETIF_F_HW_VLAN_CTAG_RX;
3371  
3372  	/* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
3373  	if (is_dvm_ena)
3374  		vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER;
3375  
3376  	tso_features = NETIF_F_TSO			|
3377  		       NETIF_F_TSO_ECN			|
3378  		       NETIF_F_TSO6			|
3379  		       NETIF_F_GSO_GRE			|
3380  		       NETIF_F_GSO_UDP_TUNNEL		|
3381  		       NETIF_F_GSO_GRE_CSUM		|
3382  		       NETIF_F_GSO_UDP_TUNNEL_CSUM	|
3383  		       NETIF_F_GSO_PARTIAL		|
3384  		       NETIF_F_GSO_IPXIP4		|
3385  		       NETIF_F_GSO_IPXIP6		|
3386  		       NETIF_F_GSO_UDP_L4;
3387  
3388  	netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
3389  					NETIF_F_GSO_GRE_CSUM;
3390  	/* set features that user can change */
3391  	netdev->hw_features = dflt_features | csumo_features |
3392  			      vlano_features | tso_features;
3393  
3394  	/* add support for HW_CSUM on packets with MPLS header */
3395  	netdev->mpls_features =  NETIF_F_HW_CSUM |
3396  				 NETIF_F_TSO     |
3397  				 NETIF_F_TSO6;
3398  
3399  	/* enable features */
3400  	netdev->features |= netdev->hw_features;
3401  
3402  	netdev->hw_features |= NETIF_F_HW_TC;
3403  	netdev->hw_features |= NETIF_F_LOOPBACK;
3404  
3405  	/* encap and VLAN devices inherit default, csumo and tso features */
3406  	netdev->hw_enc_features |= dflt_features | csumo_features |
3407  				   tso_features;
3408  	netdev->vlan_features |= dflt_features | csumo_features |
3409  				 tso_features;
3410  
3411  	/* advertise support but don't enable by default since only one type of
3412  	 * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
3413  	 * type turns on the other has to be turned off. This is enforced by the
3414  	 * ice_fix_features() ndo callback.
3415  	 */
3416  	if (is_dvm_ena)
3417  		netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
3418  			NETIF_F_HW_VLAN_STAG_TX;
3419  
3420  	/* Leave CRC / FCS stripping enabled by default, but allow the value to
3421  	 * be changed at runtime
3422  	 */
3423  	netdev->hw_features |= NETIF_F_RXFCS;
3424  }
3425  
3426  /**
3427   * ice_cfg_netdev - Allocate, configure and register a netdev
3428   * @vsi: the VSI associated with the new netdev
3429   *
3430   * Returns 0 on success, negative value on failure
3431   */
3432  static int ice_cfg_netdev(struct ice_vsi *vsi)
3433  {
3434  	struct ice_netdev_priv *np;
3435  	struct net_device *netdev;
3436  	u8 mac_addr[ETH_ALEN];
3437  
3438  	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
3439  				    vsi->alloc_rxq);
3440  	if (!netdev)
3441  		return -ENOMEM;
3442  
3443  	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
3444  	vsi->netdev = netdev;
3445  	np = netdev_priv(netdev);
3446  	np->vsi = vsi;
3447  
3448  	ice_set_netdev_features(netdev);
3449  
3450  	ice_set_ops(netdev);
3451  
3452  	if (vsi->type == ICE_VSI_PF) {
3453  		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
3454  		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
3455  		eth_hw_addr_set(netdev, mac_addr);
3456  		ether_addr_copy(netdev->perm_addr, mac_addr);
3457  	}
3458  
3459  	netdev->priv_flags |= IFF_UNICAST_FLT;
3460  
3461  	/* Setup netdev TC information */
3462  	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
3463  
3464  	/* setup watchdog timeout value to be 5 second */
3465  	netdev->watchdog_timeo = 5 * HZ;
3466  
3467  	netdev->min_mtu = ETH_MIN_MTU;
3468  	netdev->max_mtu = ICE_MAX_MTU;
3469  
3470  	return 0;
3471  }
3472  
3473  /**
3474   * ice_fill_rss_lut - Fill the RSS lookup table with default values
3475   * @lut: Lookup table
3476   * @rss_table_size: Lookup table size
3477   * @rss_size: Range of queue number for hashing
3478   */
3479  void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
3480  {
3481  	u16 i;
3482  
3483  	for (i = 0; i < rss_table_size; i++)
3484  		lut[i] = i % rss_size;
3485  }
3486  
3487  /**
3488   * ice_pf_vsi_setup - Set up a PF VSI
3489   * @pf: board private structure
3490   * @pi: pointer to the port_info instance
3491   *
3492   * Returns pointer to the successfully allocated VSI software struct
3493   * on success, otherwise returns NULL on failure.
3494   */
3495  static struct ice_vsi *
3496  ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3497  {
3498  	return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL);
3499  }
3500  
3501  static struct ice_vsi *
3502  ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
3503  		   struct ice_channel *ch)
3504  {
3505  	return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch);
3506  }
3507  
3508  /**
3509   * ice_ctrl_vsi_setup - Set up a control VSI
3510   * @pf: board private structure
3511   * @pi: pointer to the port_info instance
3512   *
3513   * Returns pointer to the successfully allocated VSI software struct
3514   * on success, otherwise returns NULL on failure.
3515   */
3516  static struct ice_vsi *
3517  ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3518  {
3519  	return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL);
3520  }
3521  
3522  /**
3523   * ice_lb_vsi_setup - Set up a loopback VSI
3524   * @pf: board private structure
3525   * @pi: pointer to the port_info instance
3526   *
3527   * Returns pointer to the successfully allocated VSI software struct
3528   * on success, otherwise returns NULL on failure.
3529   */
3530  struct ice_vsi *
3531  ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3532  {
3533  	return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL);
3534  }
3535  
3536  /**
3537   * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3538   * @netdev: network interface to be adjusted
3539   * @proto: VLAN TPID
3540   * @vid: VLAN ID to be added
3541   *
3542   * net_device_ops implementation for adding VLAN IDs
3543   */
3544  static int
3545  ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3546  {
3547  	struct ice_netdev_priv *np = netdev_priv(netdev);
3548  	struct ice_vsi_vlan_ops *vlan_ops;
3549  	struct ice_vsi *vsi = np->vsi;
3550  	struct ice_vlan vlan;
3551  	int ret;
3552  
3553  	/* VLAN 0 is added by default during load/reset */
3554  	if (!vid)
3555  		return 0;
3556  
3557  	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
3558  		usleep_range(1000, 2000);
3559  
3560  	/* Add multicast promisc rule for the VLAN ID to be added if
3561  	 * all-multicast is currently enabled.
3562  	 */
3563  	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3564  		ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3565  					       ICE_MCAST_VLAN_PROMISC_BITS,
3566  					       vid);
3567  		if (ret)
3568  			goto finish;
3569  	}
3570  
3571  	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3572  
3573  	/* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
3574  	 * packets aren't pruned by the device's internal switch on Rx
3575  	 */
3576  	vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
3577  	ret = vlan_ops->add_vlan(vsi, &vlan);
3578  	if (ret)
3579  		goto finish;
3580  
3581  	/* If all-multicast is currently enabled and this VLAN ID is only one
3582  	 * besides VLAN-0 we have to update look-up type of multicast promisc
3583  	 * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
3584  	 */
3585  	if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
3586  	    ice_vsi_num_non_zero_vlans(vsi) == 1) {
3587  		ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3588  					   ICE_MCAST_PROMISC_BITS, 0);
3589  		ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3590  					 ICE_MCAST_VLAN_PROMISC_BITS, 0);
3591  	}
3592  
3593  finish:
3594  	clear_bit(ICE_CFG_BUSY, vsi->state);
3595  
3596  	return ret;
3597  }
3598  
3599  /**
3600   * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3601   * @netdev: network interface to be adjusted
3602   * @proto: VLAN TPID
3603   * @vid: VLAN ID to be removed
3604   *
3605   * net_device_ops implementation for removing VLAN IDs
3606   */
3607  static int
3608  ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3609  {
3610  	struct ice_netdev_priv *np = netdev_priv(netdev);
3611  	struct ice_vsi_vlan_ops *vlan_ops;
3612  	struct ice_vsi *vsi = np->vsi;
3613  	struct ice_vlan vlan;
3614  	int ret;
3615  
3616  	/* don't allow removal of VLAN 0 */
3617  	if (!vid)
3618  		return 0;
3619  
3620  	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
3621  		usleep_range(1000, 2000);
3622  
3623  	ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3624  				    ICE_MCAST_VLAN_PROMISC_BITS, vid);
3625  	if (ret) {
3626  		netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n",
3627  			   vsi->vsi_num);
3628  		vsi->current_netdev_flags |= IFF_ALLMULTI;
3629  	}
3630  
3631  	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3632  
3633  	/* Make sure VLAN delete is successful before updating VLAN
3634  	 * information
3635  	 */
3636  	vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
3637  	ret = vlan_ops->del_vlan(vsi, &vlan);
3638  	if (ret)
3639  		goto finish;
3640  
3641  	/* Remove multicast promisc rule for the removed VLAN ID if
3642  	 * all-multicast is enabled.
3643  	 */
3644  	if (vsi->current_netdev_flags & IFF_ALLMULTI)
3645  		ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3646  					   ICE_MCAST_VLAN_PROMISC_BITS, vid);
3647  
3648  	if (!ice_vsi_has_non_zero_vlans(vsi)) {
3649  		/* Update look-up type of multicast promisc rule for VLAN 0
3650  		 * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
3651  		 * all-multicast is enabled and VLAN 0 is the only VLAN rule.
3652  		 */
3653  		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3654  			ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3655  						   ICE_MCAST_VLAN_PROMISC_BITS,
3656  						   0);
3657  			ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3658  						 ICE_MCAST_PROMISC_BITS, 0);
3659  		}
3660  	}
3661  
3662  finish:
3663  	clear_bit(ICE_CFG_BUSY, vsi->state);
3664  
3665  	return ret;
3666  }
3667  
3668  /**
3669   * ice_rep_indr_tc_block_unbind
3670   * @cb_priv: indirection block private data
3671   */
3672  static void ice_rep_indr_tc_block_unbind(void *cb_priv)
3673  {
3674  	struct ice_indr_block_priv *indr_priv = cb_priv;
3675  
3676  	list_del(&indr_priv->list);
3677  	kfree(indr_priv);
3678  }
3679  
3680  /**
3681   * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
3682   * @vsi: VSI struct which has the netdev
3683   */
3684  static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
3685  {
3686  	struct ice_netdev_priv *np = netdev_priv(vsi->netdev);
3687  
3688  	flow_indr_dev_unregister(ice_indr_setup_tc_cb, np,
3689  				 ice_rep_indr_tc_block_unbind);
3690  }
3691  
3692  /**
3693   * ice_tc_indir_block_remove - clean indirect TC block notifications
3694   * @pf: PF structure
3695   */
3696  static void ice_tc_indir_block_remove(struct ice_pf *pf)
3697  {
3698  	struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
3699  
3700  	if (!pf_vsi)
3701  		return;
3702  
3703  	ice_tc_indir_block_unregister(pf_vsi);
3704  }
3705  
3706  /**
3707   * ice_tc_indir_block_register - Register TC indirect block notifications
3708   * @vsi: VSI struct which has the netdev
3709   *
3710   * Returns 0 on success, negative value on failure
3711   */
3712  static int ice_tc_indir_block_register(struct ice_vsi *vsi)
3713  {
3714  	struct ice_netdev_priv *np;
3715  
3716  	if (!vsi || !vsi->netdev)
3717  		return -EINVAL;
3718  
3719  	np = netdev_priv(vsi->netdev);
3720  
3721  	INIT_LIST_HEAD(&np->tc_indr_block_priv_list);
3722  	return flow_indr_dev_register(ice_indr_setup_tc_cb, np);
3723  }
3724  
3725  /**
3726   * ice_setup_pf_sw - Setup the HW switch on startup or after reset
3727   * @pf: board private structure
3728   *
3729   * Returns 0 on success, negative value on failure
3730   */
3731  static int ice_setup_pf_sw(struct ice_pf *pf)
3732  {
3733  	struct device *dev = ice_pf_to_dev(pf);
3734  	bool dvm = ice_is_dvm_ena(&pf->hw);
3735  	struct ice_vsi *vsi;
3736  	int status;
3737  
3738  	if (ice_is_reset_in_progress(pf->state))
3739  		return -EBUSY;
3740  
3741  	status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
3742  	if (status)
3743  		return -EIO;
3744  
3745  	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
3746  	if (!vsi)
3747  		return -ENOMEM;
3748  
3749  	/* init channel list */
3750  	INIT_LIST_HEAD(&vsi->ch_list);
3751  
3752  	status = ice_cfg_netdev(vsi);
3753  	if (status)
3754  		goto unroll_vsi_setup;
3755  	/* netdev has to be configured before setting frame size */
3756  	ice_vsi_cfg_frame_size(vsi);
3757  
3758  	/* init indirect block notifications */
3759  	status = ice_tc_indir_block_register(vsi);
3760  	if (status) {
3761  		dev_err(dev, "Failed to register netdev notifier\n");
3762  		goto unroll_cfg_netdev;
3763  	}
3764  
3765  	/* Setup DCB netlink interface */
3766  	ice_dcbnl_setup(vsi);
3767  
3768  	/* registering the NAPI handler requires both the queues and
3769  	 * netdev to be created, which are done in ice_pf_vsi_setup()
3770  	 * and ice_cfg_netdev() respectively
3771  	 */
3772  	ice_napi_add(vsi);
3773  
3774  	status = ice_init_mac_fltr(pf);
3775  	if (status)
3776  		goto unroll_napi_add;
3777  
3778  	return 0;
3779  
3780  unroll_napi_add:
3781  	ice_tc_indir_block_unregister(vsi);
3782  unroll_cfg_netdev:
3783  	if (vsi) {
3784  		ice_napi_del(vsi);
3785  		if (vsi->netdev) {
3786  			clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
3787  			free_netdev(vsi->netdev);
3788  			vsi->netdev = NULL;
3789  		}
3790  	}
3791  
3792  unroll_vsi_setup:
3793  	ice_vsi_release(vsi);
3794  	return status;
3795  }
3796  
3797  /**
3798   * ice_get_avail_q_count - Get count of queues in use
3799   * @pf_qmap: bitmap to get queue use count from
3800   * @lock: pointer to a mutex that protects access to pf_qmap
3801   * @size: size of the bitmap
3802   */
3803  static u16
3804  ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
3805  {
3806  	unsigned long bit;
3807  	u16 count = 0;
3808  
3809  	mutex_lock(lock);
3810  	for_each_clear_bit(bit, pf_qmap, size)
3811  		count++;
3812  	mutex_unlock(lock);
3813  
3814  	return count;
3815  }
3816  
3817  /**
3818   * ice_get_avail_txq_count - Get count of Tx queues in use
3819   * @pf: pointer to an ice_pf instance
3820   */
3821  u16 ice_get_avail_txq_count(struct ice_pf *pf)
3822  {
3823  	return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
3824  				     pf->max_pf_txqs);
3825  }
3826  
3827  /**
3828   * ice_get_avail_rxq_count - Get count of Rx queues in use
3829   * @pf: pointer to an ice_pf instance
3830   */
3831  u16 ice_get_avail_rxq_count(struct ice_pf *pf)
3832  {
3833  	return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
3834  				     pf->max_pf_rxqs);
3835  }
3836  
3837  /**
3838   * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3839   * @pf: board private structure to initialize
3840   */
3841  static void ice_deinit_pf(struct ice_pf *pf)
3842  {
3843  	ice_service_task_stop(pf);
3844  	mutex_destroy(&pf->adev_mutex);
3845  	mutex_destroy(&pf->sw_mutex);
3846  	mutex_destroy(&pf->tc_mutex);
3847  	mutex_destroy(&pf->avail_q_mutex);
3848  	mutex_destroy(&pf->vfs.table_lock);
3849  
3850  	if (pf->avail_txqs) {
3851  		bitmap_free(pf->avail_txqs);
3852  		pf->avail_txqs = NULL;
3853  	}
3854  
3855  	if (pf->avail_rxqs) {
3856  		bitmap_free(pf->avail_rxqs);
3857  		pf->avail_rxqs = NULL;
3858  	}
3859  
3860  	if (pf->ptp.clock)
3861  		ptp_clock_unregister(pf->ptp.clock);
3862  }
3863  
3864  /**
3865   * ice_set_pf_caps - set PFs capability flags
3866   * @pf: pointer to the PF instance
3867   */
3868  static void ice_set_pf_caps(struct ice_pf *pf)
3869  {
3870  	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
3871  
3872  	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
3873  	if (func_caps->common_cap.rdma)
3874  		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
3875  	clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3876  	if (func_caps->common_cap.dcb)
3877  		set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3878  	clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3879  	if (func_caps->common_cap.sr_iov_1_1) {
3880  		set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3881  		pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
3882  					      ICE_MAX_SRIOV_VFS);
3883  	}
3884  	clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
3885  	if (func_caps->common_cap.rss_table_size)
3886  		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
3887  
3888  	clear_bit(ICE_FLAG_FD_ENA, pf->flags);
3889  	if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
3890  		u16 unused;
3891  
3892  		/* ctrl_vsi_idx will be set to a valid value when flow director
3893  		 * is setup by ice_init_fdir
3894  		 */
3895  		pf->ctrl_vsi_idx = ICE_NO_VSI;
3896  		set_bit(ICE_FLAG_FD_ENA, pf->flags);
3897  		/* force guaranteed filter pool for PF */
3898  		ice_alloc_fd_guar_item(&pf->hw, &unused,
3899  				       func_caps->fd_fltr_guar);
3900  		/* force shared filter pool for PF */
3901  		ice_alloc_fd_shrd_item(&pf->hw, &unused,
3902  				       func_caps->fd_fltr_best_effort);
3903  	}
3904  
3905  	clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
3906  	if (func_caps->common_cap.ieee_1588)
3907  		set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
3908  
3909  	pf->max_pf_txqs = func_caps->common_cap.num_txq;
3910  	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
3911  }
3912  
3913  /**
3914   * ice_init_pf - Initialize general software structures (struct ice_pf)
3915   * @pf: board private structure to initialize
3916   */
3917  static int ice_init_pf(struct ice_pf *pf)
3918  {
3919  	ice_set_pf_caps(pf);
3920  
3921  	mutex_init(&pf->sw_mutex);
3922  	mutex_init(&pf->tc_mutex);
3923  	mutex_init(&pf->adev_mutex);
3924  
3925  	INIT_HLIST_HEAD(&pf->aq_wait_list);
3926  	spin_lock_init(&pf->aq_wait_lock);
3927  	init_waitqueue_head(&pf->aq_wait_queue);
3928  
3929  	init_waitqueue_head(&pf->reset_wait_queue);
3930  
3931  	/* setup service timer and periodic service task */
3932  	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
3933  	pf->serv_tmr_period = HZ;
3934  	INIT_WORK(&pf->serv_task, ice_service_task);
3935  	clear_bit(ICE_SERVICE_SCHED, pf->state);
3936  
3937  	mutex_init(&pf->avail_q_mutex);
3938  	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
3939  	if (!pf->avail_txqs)
3940  		return -ENOMEM;
3941  
3942  	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
3943  	if (!pf->avail_rxqs) {
3944  		bitmap_free(pf->avail_txqs);
3945  		pf->avail_txqs = NULL;
3946  		return -ENOMEM;
3947  	}
3948  
3949  	mutex_init(&pf->vfs.table_lock);
3950  	hash_init(pf->vfs.table);
3951  
3952  	return 0;
3953  }
3954  
3955  /**
3956   * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
3957   * @pf: board private structure
3958   * @v_remain: number of remaining MSI-X vectors to be distributed
3959   *
3960   * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
3961   * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
3962   * remaining vectors.
3963   */
3964  static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
3965  {
3966  	int v_rdma;
3967  
3968  	if (!ice_is_rdma_ena(pf)) {
3969  		pf->num_lan_msix = v_remain;
3970  		return;
3971  	}
3972  
3973  	/* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
3974  	v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
3975  
3976  	if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
3977  		dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
3978  		clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
3979  
3980  		pf->num_rdma_msix = 0;
3981  		pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
3982  	} else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
3983  		   (v_remain - v_rdma < v_rdma)) {
3984  		/* Support minimum RDMA and give remaining vectors to LAN MSIX */
3985  		pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
3986  		pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
3987  	} else {
3988  		/* Split remaining MSIX with RDMA after accounting for AEQ MSIX
3989  		 */
3990  		pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
3991  				    ICE_RDMA_NUM_AEQ_MSIX;
3992  		pf->num_lan_msix = v_remain - pf->num_rdma_msix;
3993  	}
3994  }
3995  
3996  /**
3997   * ice_ena_msix_range - Request a range of MSIX vectors from the OS
3998   * @pf: board private structure
3999   *
4000   * Compute the number of MSIX vectors wanted and request from the OS. Adjust
4001   * device usage if there are not enough vectors. Return the number of vectors
4002   * reserved or negative on failure.
4003   */
4004  static int ice_ena_msix_range(struct ice_pf *pf)
4005  {
4006  	int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
4007  	struct device *dev = ice_pf_to_dev(pf);
4008  	int err, i;
4009  
4010  	hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
4011  	num_cpus = num_online_cpus();
4012  
4013  	/* LAN miscellaneous handler */
4014  	v_other = ICE_MIN_LAN_OICR_MSIX;
4015  
4016  	/* Flow Director */
4017  	if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
4018  		v_other += ICE_FDIR_MSIX;
4019  
4020  	/* switchdev */
4021  	v_other += ICE_ESWITCH_MSIX;
4022  
4023  	v_wanted = v_other;
4024  
4025  	/* LAN traffic */
4026  	pf->num_lan_msix = num_cpus;
4027  	v_wanted += pf->num_lan_msix;
4028  
4029  	/* RDMA auxiliary driver */
4030  	if (ice_is_rdma_ena(pf)) {
4031  		pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
4032  		v_wanted += pf->num_rdma_msix;
4033  	}
4034  
4035  	if (v_wanted > hw_num_msix) {
4036  		int v_remain;
4037  
4038  		dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
4039  			 v_wanted, hw_num_msix);
4040  
4041  		if (hw_num_msix < ICE_MIN_MSIX) {
4042  			err = -ERANGE;
4043  			goto exit_err;
4044  		}
4045  
4046  		v_remain = hw_num_msix - v_other;
4047  		if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
4048  			v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
4049  			v_remain = ICE_MIN_LAN_TXRX_MSIX;
4050  		}
4051  
4052  		ice_reduce_msix_usage(pf, v_remain);
4053  		v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
4054  
4055  		dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
4056  			   pf->num_lan_msix);
4057  		if (ice_is_rdma_ena(pf))
4058  			dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
4059  				   pf->num_rdma_msix);
4060  	}
4061  
4062  	pf->msix_entries = devm_kcalloc(dev, v_wanted,
4063  					sizeof(*pf->msix_entries), GFP_KERNEL);
4064  	if (!pf->msix_entries) {
4065  		err = -ENOMEM;
4066  		goto exit_err;
4067  	}
4068  
4069  	for (i = 0; i < v_wanted; i++)
4070  		pf->msix_entries[i].entry = i;
4071  
4072  	/* actually reserve the vectors */
4073  	v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
4074  					 ICE_MIN_MSIX, v_wanted);
4075  	if (v_actual < 0) {
4076  		dev_err(dev, "unable to reserve MSI-X vectors\n");
4077  		err = v_actual;
4078  		goto msix_err;
4079  	}
4080  
4081  	if (v_actual < v_wanted) {
4082  		dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
4083  			 v_wanted, v_actual);
4084  
4085  		if (v_actual < ICE_MIN_MSIX) {
4086  			/* error if we can't get minimum vectors */
4087  			pci_disable_msix(pf->pdev);
4088  			err = -ERANGE;
4089  			goto msix_err;
4090  		} else {
4091  			int v_remain = v_actual - v_other;
4092  
4093  			if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
4094  				v_remain = ICE_MIN_LAN_TXRX_MSIX;
4095  
4096  			ice_reduce_msix_usage(pf, v_remain);
4097  
4098  			dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
4099  				   pf->num_lan_msix);
4100  
4101  			if (ice_is_rdma_ena(pf))
4102  				dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
4103  					   pf->num_rdma_msix);
4104  		}
4105  	}
4106  
4107  	return v_actual;
4108  
4109  msix_err:
4110  	devm_kfree(dev, pf->msix_entries);
4111  
4112  exit_err:
4113  	pf->num_rdma_msix = 0;
4114  	pf->num_lan_msix = 0;
4115  	return err;
4116  }
4117  
4118  /**
4119   * ice_dis_msix - Disable MSI-X interrupt setup in OS
4120   * @pf: board private structure
4121   */
4122  static void ice_dis_msix(struct ice_pf *pf)
4123  {
4124  	pci_disable_msix(pf->pdev);
4125  	devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
4126  	pf->msix_entries = NULL;
4127  }
4128  
4129  /**
4130   * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
4131   * @pf: board private structure
4132   */
4133  static void ice_clear_interrupt_scheme(struct ice_pf *pf)
4134  {
4135  	ice_dis_msix(pf);
4136  
4137  	if (pf->irq_tracker) {
4138  		devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
4139  		pf->irq_tracker = NULL;
4140  	}
4141  }
4142  
4143  /**
4144   * ice_init_interrupt_scheme - Determine proper interrupt scheme
4145   * @pf: board private structure to initialize
4146   */
4147  static int ice_init_interrupt_scheme(struct ice_pf *pf)
4148  {
4149  	int vectors;
4150  
4151  	vectors = ice_ena_msix_range(pf);
4152  
4153  	if (vectors < 0)
4154  		return vectors;
4155  
4156  	/* set up vector assignment tracking */
4157  	pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf),
4158  				       struct_size(pf->irq_tracker, list, vectors),
4159  				       GFP_KERNEL);
4160  	if (!pf->irq_tracker) {
4161  		ice_dis_msix(pf);
4162  		return -ENOMEM;
4163  	}
4164  
4165  	/* populate SW interrupts pool with number of OS granted IRQs. */
4166  	pf->num_avail_sw_msix = (u16)vectors;
4167  	pf->irq_tracker->num_entries = (u16)vectors;
4168  	pf->irq_tracker->end = pf->irq_tracker->num_entries;
4169  
4170  	return 0;
4171  }
4172  
4173  /**
4174   * ice_is_wol_supported - check if WoL is supported
4175   * @hw: pointer to hardware info
4176   *
4177   * Check if WoL is supported based on the HW configuration.
4178   * Returns true if NVM supports and enables WoL for this port, false otherwise
4179   */
4180  bool ice_is_wol_supported(struct ice_hw *hw)
4181  {
4182  	u16 wol_ctrl;
4183  
4184  	/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
4185  	 * word) indicates WoL is not supported on the corresponding PF ID.
4186  	 */
4187  	if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
4188  		return false;
4189  
4190  	return !(BIT(hw->port_info->lport) & wol_ctrl);
4191  }
4192  
4193  /**
4194   * ice_vsi_recfg_qs - Change the number of queues on a VSI
4195   * @vsi: VSI being changed
4196   * @new_rx: new number of Rx queues
4197   * @new_tx: new number of Tx queues
4198   *
4199   * Only change the number of queues if new_tx, or new_rx is non-0.
4200   *
4201   * Returns 0 on success.
4202   */
4203  int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
4204  {
4205  	struct ice_pf *pf = vsi->back;
4206  	int err = 0, timeout = 50;
4207  
4208  	if (!new_rx && !new_tx)
4209  		return -EINVAL;
4210  
4211  	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
4212  		timeout--;
4213  		if (!timeout)
4214  			return -EBUSY;
4215  		usleep_range(1000, 2000);
4216  	}
4217  
4218  	if (new_tx)
4219  		vsi->req_txq = (u16)new_tx;
4220  	if (new_rx)
4221  		vsi->req_rxq = (u16)new_rx;
4222  
4223  	/* set for the next time the netdev is started */
4224  	if (!netif_running(vsi->netdev)) {
4225  		ice_vsi_rebuild(vsi, false);
4226  		dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
4227  		goto done;
4228  	}
4229  
4230  	ice_vsi_close(vsi);
4231  	ice_vsi_rebuild(vsi, false);
4232  	ice_pf_dcb_recfg(pf);
4233  	ice_vsi_open(vsi);
4234  done:
4235  	clear_bit(ICE_CFG_BUSY, pf->state);
4236  	return err;
4237  }
4238  
4239  /**
4240   * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
4241   * @pf: PF to configure
4242   *
4243   * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
4244   * VSI can still Tx/Rx VLAN tagged packets.
4245   */
4246  static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
4247  {
4248  	struct ice_vsi *vsi = ice_get_main_vsi(pf);
4249  	struct ice_vsi_ctx *ctxt;
4250  	struct ice_hw *hw;
4251  	int status;
4252  
4253  	if (!vsi)
4254  		return;
4255  
4256  	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
4257  	if (!ctxt)
4258  		return;
4259  
4260  	hw = &pf->hw;
4261  	ctxt->info = vsi->info;
4262  
4263  	ctxt->info.valid_sections =
4264  		cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
4265  			    ICE_AQ_VSI_PROP_SECURITY_VALID |
4266  			    ICE_AQ_VSI_PROP_SW_VALID);
4267  
4268  	/* disable VLAN anti-spoof */
4269  	ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
4270  				  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
4271  
4272  	/* disable VLAN pruning and keep all other settings */
4273  	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
4274  
4275  	/* allow all VLANs on Tx and don't strip on Rx */
4276  	ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL |
4277  		ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
4278  
4279  	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
4280  	if (status) {
4281  		dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
4282  			status, ice_aq_str(hw->adminq.sq_last_status));
4283  	} else {
4284  		vsi->info.sec_flags = ctxt->info.sec_flags;
4285  		vsi->info.sw_flags2 = ctxt->info.sw_flags2;
4286  		vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
4287  	}
4288  
4289  	kfree(ctxt);
4290  }
4291  
4292  /**
4293   * ice_log_pkg_init - log result of DDP package load
4294   * @hw: pointer to hardware info
4295   * @state: state of package load
4296   */
4297  static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state)
4298  {
4299  	struct ice_pf *pf = hw->back;
4300  	struct device *dev;
4301  
4302  	dev = ice_pf_to_dev(pf);
4303  
4304  	switch (state) {
4305  	case ICE_DDP_PKG_SUCCESS:
4306  		dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
4307  			 hw->active_pkg_name,
4308  			 hw->active_pkg_ver.major,
4309  			 hw->active_pkg_ver.minor,
4310  			 hw->active_pkg_ver.update,
4311  			 hw->active_pkg_ver.draft);
4312  		break;
4313  	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
4314  		dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
4315  			 hw->active_pkg_name,
4316  			 hw->active_pkg_ver.major,
4317  			 hw->active_pkg_ver.minor,
4318  			 hw->active_pkg_ver.update,
4319  			 hw->active_pkg_ver.draft);
4320  		break;
4321  	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
4322  		dev_err(dev, "The device has a DDP package that is not supported by the driver.  The device has package '%s' version %d.%d.x.x.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
4323  			hw->active_pkg_name,
4324  			hw->active_pkg_ver.major,
4325  			hw->active_pkg_ver.minor,
4326  			ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4327  		break;
4328  	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
4329  		dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package '%s' version %d.%d.%d.%d.  The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
4330  			 hw->active_pkg_name,
4331  			 hw->active_pkg_ver.major,
4332  			 hw->active_pkg_ver.minor,
4333  			 hw->active_pkg_ver.update,
4334  			 hw->active_pkg_ver.draft,
4335  			 hw->pkg_name,
4336  			 hw->pkg_ver.major,
4337  			 hw->pkg_ver.minor,
4338  			 hw->pkg_ver.update,
4339  			 hw->pkg_ver.draft);
4340  		break;
4341  	case ICE_DDP_PKG_FW_MISMATCH:
4342  		dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
4343  		break;
4344  	case ICE_DDP_PKG_INVALID_FILE:
4345  		dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
4346  		break;
4347  	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
4348  		dev_err(dev, "The DDP package file version is higher than the driver supports.  Please use an updated driver.  Entering Safe Mode.\n");
4349  		break;
4350  	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
4351  		dev_err(dev, "The DDP package file version is lower than the driver supports.  The driver requires version %d.%d.x.x.  Please use an updated DDP Package file.  Entering Safe Mode.\n",
4352  			ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4353  		break;
4354  	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
4355  		dev_err(dev, "The DDP package could not be loaded because its signature is not valid.  Please use a valid DDP Package.  Entering Safe Mode.\n");
4356  		break;
4357  	case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
4358  		dev_err(dev, "The DDP Package could not be loaded because its security revision is too low.  Please use an updated DDP Package.  Entering Safe Mode.\n");
4359  		break;
4360  	case ICE_DDP_PKG_LOAD_ERROR:
4361  		dev_err(dev, "An error occurred on the device while loading the DDP package.  The device will be reset.\n");
4362  		/* poll for reset to complete */
4363  		if (ice_check_reset(hw))
4364  			dev_err(dev, "Error resetting device. Please reload the driver\n");
4365  		break;
4366  	case ICE_DDP_PKG_ERR:
4367  	default:
4368  		dev_err(dev, "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
4369  		break;
4370  	}
4371  }
4372  
4373  /**
4374   * ice_load_pkg - load/reload the DDP Package file
4375   * @firmware: firmware structure when firmware requested or NULL for reload
4376   * @pf: pointer to the PF instance
4377   *
4378   * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
4379   * initialize HW tables.
4380   */
4381  static void
4382  ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
4383  {
4384  	enum ice_ddp_state state = ICE_DDP_PKG_ERR;
4385  	struct device *dev = ice_pf_to_dev(pf);
4386  	struct ice_hw *hw = &pf->hw;
4387  
4388  	/* Load DDP Package */
4389  	if (firmware && !hw->pkg_copy) {
4390  		state = ice_copy_and_init_pkg(hw, firmware->data,
4391  					      firmware->size);
4392  		ice_log_pkg_init(hw, state);
4393  	} else if (!firmware && hw->pkg_copy) {
4394  		/* Reload package during rebuild after CORER/GLOBR reset */
4395  		state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
4396  		ice_log_pkg_init(hw, state);
4397  	} else {
4398  		dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
4399  	}
4400  
4401  	if (!ice_is_init_pkg_successful(state)) {
4402  		/* Safe Mode */
4403  		clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
4404  		return;
4405  	}
4406  
4407  	/* Successful download package is the precondition for advanced
4408  	 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
4409  	 */
4410  	set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
4411  }
4412  
4413  /**
4414   * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4415   * @pf: pointer to the PF structure
4416   *
4417   * There is no error returned here because the driver should be able to handle
4418   * 128 Byte cache lines, so we only print a warning in case issues are seen,
4419   * specifically with Tx.
4420   */
4421  static void ice_verify_cacheline_size(struct ice_pf *pf)
4422  {
4423  	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
4424  		dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4425  			 ICE_CACHE_LINE_BYTES);
4426  }
4427  
4428  /**
4429   * ice_send_version - update firmware with driver version
4430   * @pf: PF struct
4431   *
4432   * Returns 0 on success, else error code
4433   */
4434  static int ice_send_version(struct ice_pf *pf)
4435  {
4436  	struct ice_driver_ver dv;
4437  
4438  	dv.major_ver = 0xff;
4439  	dv.minor_ver = 0xff;
4440  	dv.build_ver = 0xff;
4441  	dv.subbuild_ver = 0;
4442  	strscpy((char *)dv.driver_string, UTS_RELEASE,
4443  		sizeof(dv.driver_string));
4444  	return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
4445  }
4446  
4447  /**
4448   * ice_init_fdir - Initialize flow director VSI and configuration
4449   * @pf: pointer to the PF instance
4450   *
4451   * returns 0 on success, negative on error
4452   */
4453  static int ice_init_fdir(struct ice_pf *pf)
4454  {
4455  	struct device *dev = ice_pf_to_dev(pf);
4456  	struct ice_vsi *ctrl_vsi;
4457  	int err;
4458  
4459  	/* Side Band Flow Director needs to have a control VSI.
4460  	 * Allocate it and store it in the PF.
4461  	 */
4462  	ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
4463  	if (!ctrl_vsi) {
4464  		dev_dbg(dev, "could not create control VSI\n");
4465  		return -ENOMEM;
4466  	}
4467  
4468  	err = ice_vsi_open_ctrl(ctrl_vsi);
4469  	if (err) {
4470  		dev_dbg(dev, "could not open control VSI\n");
4471  		goto err_vsi_open;
4472  	}
4473  
4474  	mutex_init(&pf->hw.fdir_fltr_lock);
4475  
4476  	err = ice_fdir_create_dflt_rules(pf);
4477  	if (err)
4478  		goto err_fdir_rule;
4479  
4480  	return 0;
4481  
4482  err_fdir_rule:
4483  	ice_fdir_release_flows(&pf->hw);
4484  	ice_vsi_close(ctrl_vsi);
4485  err_vsi_open:
4486  	ice_vsi_release(ctrl_vsi);
4487  	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4488  		pf->vsi[pf->ctrl_vsi_idx] = NULL;
4489  		pf->ctrl_vsi_idx = ICE_NO_VSI;
4490  	}
4491  	return err;
4492  }
4493  
4494  /**
4495   * ice_get_opt_fw_name - return optional firmware file name or NULL
4496   * @pf: pointer to the PF instance
4497   */
4498  static char *ice_get_opt_fw_name(struct ice_pf *pf)
4499  {
4500  	/* Optional firmware name same as default with additional dash
4501  	 * followed by a EUI-64 identifier (PCIe Device Serial Number)
4502  	 */
4503  	struct pci_dev *pdev = pf->pdev;
4504  	char *opt_fw_filename;
4505  	u64 dsn;
4506  
4507  	/* Determine the name of the optional file using the DSN (two
4508  	 * dwords following the start of the DSN Capability).
4509  	 */
4510  	dsn = pci_get_dsn(pdev);
4511  	if (!dsn)
4512  		return NULL;
4513  
4514  	opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
4515  	if (!opt_fw_filename)
4516  		return NULL;
4517  
4518  	snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
4519  		 ICE_DDP_PKG_PATH, dsn);
4520  
4521  	return opt_fw_filename;
4522  }
4523  
4524  /**
4525   * ice_request_fw - Device initialization routine
4526   * @pf: pointer to the PF instance
4527   */
4528  static void ice_request_fw(struct ice_pf *pf)
4529  {
4530  	char *opt_fw_filename = ice_get_opt_fw_name(pf);
4531  	const struct firmware *firmware = NULL;
4532  	struct device *dev = ice_pf_to_dev(pf);
4533  	int err = 0;
4534  
4535  	/* optional device-specific DDP (if present) overrides the default DDP
4536  	 * package file. kernel logs a debug message if the file doesn't exist,
4537  	 * and warning messages for other errors.
4538  	 */
4539  	if (opt_fw_filename) {
4540  		err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
4541  		if (err) {
4542  			kfree(opt_fw_filename);
4543  			goto dflt_pkg_load;
4544  		}
4545  
4546  		/* request for firmware was successful. Download to device */
4547  		ice_load_pkg(firmware, pf);
4548  		kfree(opt_fw_filename);
4549  		release_firmware(firmware);
4550  		return;
4551  	}
4552  
4553  dflt_pkg_load:
4554  	err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
4555  	if (err) {
4556  		dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4557  		return;
4558  	}
4559  
4560  	/* request for firmware was successful. Download to device */
4561  	ice_load_pkg(firmware, pf);
4562  	release_firmware(firmware);
4563  }
4564  
4565  /**
4566   * ice_print_wake_reason - show the wake up cause in the log
4567   * @pf: pointer to the PF struct
4568   */
4569  static void ice_print_wake_reason(struct ice_pf *pf)
4570  {
4571  	u32 wus = pf->wakeup_reason;
4572  	const char *wake_str;
4573  
4574  	/* if no wake event, nothing to print */
4575  	if (!wus)
4576  		return;
4577  
4578  	if (wus & PFPM_WUS_LNKC_M)
4579  		wake_str = "Link\n";
4580  	else if (wus & PFPM_WUS_MAG_M)
4581  		wake_str = "Magic Packet\n";
4582  	else if (wus & PFPM_WUS_MNG_M)
4583  		wake_str = "Management\n";
4584  	else if (wus & PFPM_WUS_FW_RST_WK_M)
4585  		wake_str = "Firmware Reset\n";
4586  	else
4587  		wake_str = "Unknown\n";
4588  
4589  	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
4590  }
4591  
4592  /**
4593   * ice_register_netdev - register netdev and devlink port
4594   * @pf: pointer to the PF struct
4595   */
4596  static int ice_register_netdev(struct ice_pf *pf)
4597  {
4598  	struct ice_vsi *vsi;
4599  	int err = 0;
4600  
4601  	vsi = ice_get_main_vsi(pf);
4602  	if (!vsi || !vsi->netdev)
4603  		return -EIO;
4604  
4605  	err = ice_devlink_create_pf_port(pf);
4606  	if (err)
4607  		goto err_devlink_create;
4608  
4609  	SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
4610  	err = register_netdev(vsi->netdev);
4611  	if (err)
4612  		goto err_register_netdev;
4613  
4614  	set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
4615  	netif_carrier_off(vsi->netdev);
4616  	netif_tx_stop_all_queues(vsi->netdev);
4617  
4618  	return 0;
4619  err_register_netdev:
4620  	ice_devlink_destroy_pf_port(pf);
4621  err_devlink_create:
4622  	free_netdev(vsi->netdev);
4623  	vsi->netdev = NULL;
4624  	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
4625  	return err;
4626  }
4627  
4628  /**
4629   * ice_probe - Device initialization routine
4630   * @pdev: PCI device information struct
4631   * @ent: entry in ice_pci_tbl
4632   *
4633   * Returns 0 on success, negative on failure
4634   */
4635  static int
4636  ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
4637  {
4638  	struct device *dev = &pdev->dev;
4639  	struct ice_pf *pf;
4640  	struct ice_hw *hw;
4641  	int i, err;
4642  
4643  	if (pdev->is_virtfn) {
4644  		dev_err(dev, "can't probe a virtual function\n");
4645  		return -EINVAL;
4646  	}
4647  
4648  	/* this driver uses devres, see
4649  	 * Documentation/driver-api/driver-model/devres.rst
4650  	 */
4651  	err = pcim_enable_device(pdev);
4652  	if (err)
4653  		return err;
4654  
4655  	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
4656  	if (err) {
4657  		dev_err(dev, "BAR0 I/O map error %d\n", err);
4658  		return err;
4659  	}
4660  
4661  	pf = ice_allocate_pf(dev);
4662  	if (!pf)
4663  		return -ENOMEM;
4664  
4665  	/* initialize Auxiliary index to invalid value */
4666  	pf->aux_idx = -1;
4667  
4668  	/* set up for high or low DMA */
4669  	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
4670  	if (err) {
4671  		dev_err(dev, "DMA configuration failed: 0x%x\n", err);
4672  		return err;
4673  	}
4674  
4675  	pci_enable_pcie_error_reporting(pdev);
4676  	pci_set_master(pdev);
4677  
4678  	pf->pdev = pdev;
4679  	pci_set_drvdata(pdev, pf);
4680  	set_bit(ICE_DOWN, pf->state);
4681  	/* Disable service task until DOWN bit is cleared */
4682  	set_bit(ICE_SERVICE_DIS, pf->state);
4683  
4684  	hw = &pf->hw;
4685  	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
4686  	pci_save_state(pdev);
4687  
4688  	hw->back = pf;
4689  	hw->vendor_id = pdev->vendor;
4690  	hw->device_id = pdev->device;
4691  	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
4692  	hw->subsystem_vendor_id = pdev->subsystem_vendor;
4693  	hw->subsystem_device_id = pdev->subsystem_device;
4694  	hw->bus.device = PCI_SLOT(pdev->devfn);
4695  	hw->bus.func = PCI_FUNC(pdev->devfn);
4696  	ice_set_ctrlq_len(hw);
4697  
4698  	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
4699  
4700  #ifndef CONFIG_DYNAMIC_DEBUG
4701  	if (debug < -1)
4702  		hw->debug_mask = debug;
4703  #endif
4704  
4705  	err = ice_init_hw(hw);
4706  	if (err) {
4707  		dev_err(dev, "ice_init_hw failed: %d\n", err);
4708  		err = -EIO;
4709  		goto err_exit_unroll;
4710  	}
4711  
4712  	ice_init_feature_support(pf);
4713  
4714  	ice_request_fw(pf);
4715  
4716  	/* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
4717  	 * set in pf->state, which will cause ice_is_safe_mode to return
4718  	 * true
4719  	 */
4720  	if (ice_is_safe_mode(pf)) {
4721  		/* we already got function/device capabilities but these don't
4722  		 * reflect what the driver needs to do in safe mode. Instead of
4723  		 * adding conditional logic everywhere to ignore these
4724  		 * device/function capabilities, override them.
4725  		 */
4726  		ice_set_safe_mode_caps(hw);
4727  	}
4728  
4729  	err = ice_init_pf(pf);
4730  	if (err) {
4731  		dev_err(dev, "ice_init_pf failed: %d\n", err);
4732  		goto err_init_pf_unroll;
4733  	}
4734  
4735  	ice_devlink_init_regions(pf);
4736  
4737  	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
4738  	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
4739  	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
4740  	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
4741  	i = 0;
4742  	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
4743  		pf->hw.udp_tunnel_nic.tables[i].n_entries =
4744  			pf->hw.tnl.valid_count[TNL_VXLAN];
4745  		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
4746  			UDP_TUNNEL_TYPE_VXLAN;
4747  		i++;
4748  	}
4749  	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
4750  		pf->hw.udp_tunnel_nic.tables[i].n_entries =
4751  			pf->hw.tnl.valid_count[TNL_GENEVE];
4752  		pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
4753  			UDP_TUNNEL_TYPE_GENEVE;
4754  		i++;
4755  	}
4756  
4757  	pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
4758  	if (!pf->num_alloc_vsi) {
4759  		err = -EIO;
4760  		goto err_init_pf_unroll;
4761  	}
4762  	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
4763  		dev_warn(&pf->pdev->dev,
4764  			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
4765  			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
4766  		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
4767  	}
4768  
4769  	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
4770  			       GFP_KERNEL);
4771  	if (!pf->vsi) {
4772  		err = -ENOMEM;
4773  		goto err_init_pf_unroll;
4774  	}
4775  
4776  	pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi,
4777  				     sizeof(*pf->vsi_stats), GFP_KERNEL);
4778  	if (!pf->vsi_stats) {
4779  		err = -ENOMEM;
4780  		goto err_init_vsi_unroll;
4781  	}
4782  
4783  	err = ice_init_interrupt_scheme(pf);
4784  	if (err) {
4785  		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
4786  		err = -EIO;
4787  		goto err_init_vsi_stats_unroll;
4788  	}
4789  
4790  	/* In case of MSIX we are going to setup the misc vector right here
4791  	 * to handle admin queue events etc. In case of legacy and MSI
4792  	 * the misc functionality and queue processing is combined in
4793  	 * the same vector and that gets setup at open.
4794  	 */
4795  	err = ice_req_irq_msix_misc(pf);
4796  	if (err) {
4797  		dev_err(dev, "setup of misc vector failed: %d\n", err);
4798  		goto err_init_interrupt_unroll;
4799  	}
4800  
4801  	/* create switch struct for the switch element created by FW on boot */
4802  	pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL);
4803  	if (!pf->first_sw) {
4804  		err = -ENOMEM;
4805  		goto err_msix_misc_unroll;
4806  	}
4807  
4808  	if (hw->evb_veb)
4809  		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
4810  	else
4811  		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
4812  
4813  	pf->first_sw->pf = pf;
4814  
4815  	/* record the sw_id available for later use */
4816  	pf->first_sw->sw_id = hw->port_info->sw_id;
4817  
4818  	err = ice_setup_pf_sw(pf);
4819  	if (err) {
4820  		dev_err(dev, "probe failed due to setup PF switch: %d\n", err);
4821  		goto err_alloc_sw_unroll;
4822  	}
4823  
4824  	clear_bit(ICE_SERVICE_DIS, pf->state);
4825  
4826  	/* tell the firmware we are up */
4827  	err = ice_send_version(pf);
4828  	if (err) {
4829  		dev_err(dev, "probe failed sending driver version %s. error: %d\n",
4830  			UTS_RELEASE, err);
4831  		goto err_send_version_unroll;
4832  	}
4833  
4834  	/* since everything is good, start the service timer */
4835  	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
4836  
4837  	err = ice_init_link_events(pf->hw.port_info);
4838  	if (err) {
4839  		dev_err(dev, "ice_init_link_events failed: %d\n", err);
4840  		goto err_send_version_unroll;
4841  	}
4842  
4843  	/* not a fatal error if this fails */
4844  	err = ice_init_nvm_phy_type(pf->hw.port_info);
4845  	if (err)
4846  		dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
4847  
4848  	/* not a fatal error if this fails */
4849  	err = ice_update_link_info(pf->hw.port_info);
4850  	if (err)
4851  		dev_err(dev, "ice_update_link_info failed: %d\n", err);
4852  
4853  	ice_init_link_dflt_override(pf->hw.port_info);
4854  
4855  	ice_check_link_cfg_err(pf,
4856  			       pf->hw.port_info->phy.link_info.link_cfg_err);
4857  
4858  	/* if media available, initialize PHY settings */
4859  	if (pf->hw.port_info->phy.link_info.link_info &
4860  	    ICE_AQ_MEDIA_AVAILABLE) {
4861  		/* not a fatal error if this fails */
4862  		err = ice_init_phy_user_cfg(pf->hw.port_info);
4863  		if (err)
4864  			dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
4865  
4866  		if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
4867  			struct ice_vsi *vsi = ice_get_main_vsi(pf);
4868  
4869  			if (vsi)
4870  				ice_configure_phy(vsi);
4871  		}
4872  	} else {
4873  		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
4874  	}
4875  
4876  	ice_verify_cacheline_size(pf);
4877  
4878  	/* Save wakeup reason register for later use */
4879  	pf->wakeup_reason = rd32(hw, PFPM_WUS);
4880  
4881  	/* check for a power management event */
4882  	ice_print_wake_reason(pf);
4883  
4884  	/* clear wake status, all bits */
4885  	wr32(hw, PFPM_WUS, U32_MAX);
4886  
4887  	/* Disable WoL at init, wait for user to enable */
4888  	device_set_wakeup_enable(dev, false);
4889  
4890  	if (ice_is_safe_mode(pf)) {
4891  		ice_set_safe_mode_vlan_cfg(pf);
4892  		goto probe_done;
4893  	}
4894  
4895  	/* initialize DDP driven features */
4896  	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4897  		ice_ptp_init(pf);
4898  
4899  	if (ice_is_feature_supported(pf, ICE_F_GNSS))
4900  		ice_gnss_init(pf);
4901  
4902  	/* Note: Flow director init failure is non-fatal to load */
4903  	if (ice_init_fdir(pf))
4904  		dev_err(dev, "could not initialize flow director\n");
4905  
4906  	/* Note: DCB init failure is non-fatal to load */
4907  	if (ice_init_pf_dcb(pf, false)) {
4908  		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
4909  		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
4910  	} else {
4911  		ice_cfg_lldp_mib_change(&pf->hw, true);
4912  	}
4913  
4914  	if (ice_init_lag(pf))
4915  		dev_warn(dev, "Failed to init link aggregation support\n");
4916  
4917  	/* print PCI link speed and width */
4918  	pcie_print_link_status(pf->pdev);
4919  
4920  probe_done:
4921  	err = ice_register_netdev(pf);
4922  	if (err)
4923  		goto err_netdev_reg;
4924  
4925  	err = ice_devlink_register_params(pf);
4926  	if (err)
4927  		goto err_netdev_reg;
4928  
4929  	/* ready to go, so clear down state bit */
4930  	clear_bit(ICE_DOWN, pf->state);
4931  	if (ice_is_rdma_ena(pf)) {
4932  		pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL);
4933  		if (pf->aux_idx < 0) {
4934  			dev_err(dev, "Failed to allocate device ID for AUX driver\n");
4935  			err = -ENOMEM;
4936  			goto err_devlink_reg_param;
4937  		}
4938  
4939  		err = ice_init_rdma(pf);
4940  		if (err) {
4941  			dev_err(dev, "Failed to initialize RDMA: %d\n", err);
4942  			err = -EIO;
4943  			goto err_init_aux_unroll;
4944  		}
4945  	} else {
4946  		dev_warn(dev, "RDMA is not supported on this device\n");
4947  	}
4948  
4949  	ice_devlink_register(pf);
4950  	return 0;
4951  
4952  err_init_aux_unroll:
4953  	pf->adev = NULL;
4954  	ida_free(&ice_aux_ida, pf->aux_idx);
4955  err_devlink_reg_param:
4956  	ice_devlink_unregister_params(pf);
4957  err_netdev_reg:
4958  err_send_version_unroll:
4959  	ice_vsi_release_all(pf);
4960  err_alloc_sw_unroll:
4961  	set_bit(ICE_SERVICE_DIS, pf->state);
4962  	set_bit(ICE_DOWN, pf->state);
4963  	devm_kfree(dev, pf->first_sw);
4964  err_msix_misc_unroll:
4965  	ice_free_irq_msix_misc(pf);
4966  err_init_interrupt_unroll:
4967  	ice_clear_interrupt_scheme(pf);
4968  err_init_vsi_stats_unroll:
4969  	devm_kfree(dev, pf->vsi_stats);
4970  	pf->vsi_stats = NULL;
4971  err_init_vsi_unroll:
4972  	devm_kfree(dev, pf->vsi);
4973  err_init_pf_unroll:
4974  	ice_deinit_pf(pf);
4975  	ice_devlink_destroy_regions(pf);
4976  	ice_deinit_hw(hw);
4977  err_exit_unroll:
4978  	pci_disable_pcie_error_reporting(pdev);
4979  	pci_disable_device(pdev);
4980  	return err;
4981  }
4982  
4983  /**
4984   * ice_set_wake - enable or disable Wake on LAN
4985   * @pf: pointer to the PF struct
4986   *
4987   * Simple helper for WoL control
4988   */
4989  static void ice_set_wake(struct ice_pf *pf)
4990  {
4991  	struct ice_hw *hw = &pf->hw;
4992  	bool wol = pf->wol_ena;
4993  
4994  	/* clear wake state, otherwise new wake events won't fire */
4995  	wr32(hw, PFPM_WUS, U32_MAX);
4996  
4997  	/* enable / disable APM wake up, no RMW needed */
4998  	wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
4999  
5000  	/* set magic packet filter enabled */
5001  	wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
5002  }
5003  
5004  /**
5005   * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
5006   * @pf: pointer to the PF struct
5007   *
5008   * Issue firmware command to enable multicast magic wake, making
5009   * sure that any locally administered address (LAA) is used for
5010   * wake, and that PF reset doesn't undo the LAA.
5011   */
5012  static void ice_setup_mc_magic_wake(struct ice_pf *pf)
5013  {
5014  	struct device *dev = ice_pf_to_dev(pf);
5015  	struct ice_hw *hw = &pf->hw;
5016  	u8 mac_addr[ETH_ALEN];
5017  	struct ice_vsi *vsi;
5018  	int status;
5019  	u8 flags;
5020  
5021  	if (!pf->wol_ena)
5022  		return;
5023  
5024  	vsi = ice_get_main_vsi(pf);
5025  	if (!vsi)
5026  		return;
5027  
5028  	/* Get current MAC address in case it's an LAA */
5029  	if (vsi->netdev)
5030  		ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
5031  	else
5032  		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
5033  
5034  	flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
5035  		ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
5036  		ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
5037  
5038  	status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
5039  	if (status)
5040  		dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
5041  			status, ice_aq_str(hw->adminq.sq_last_status));
5042  }
5043  
5044  /**
5045   * ice_remove - Device removal routine
5046   * @pdev: PCI device information struct
5047   */
5048  static void ice_remove(struct pci_dev *pdev)
5049  {
5050  	struct ice_pf *pf = pci_get_drvdata(pdev);
5051  	int i;
5052  
5053  	ice_devlink_unregister(pf);
5054  	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
5055  		if (!ice_is_reset_in_progress(pf->state))
5056  			break;
5057  		msleep(100);
5058  	}
5059  
5060  	ice_tc_indir_block_remove(pf);
5061  
5062  	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
5063  		set_bit(ICE_VF_RESETS_DISABLED, pf->state);
5064  		ice_free_vfs(pf);
5065  	}
5066  
5067  	ice_service_task_stop(pf);
5068  
5069  	ice_aq_cancel_waiting_tasks(pf);
5070  	ice_unplug_aux_dev(pf);
5071  	if (pf->aux_idx >= 0)
5072  		ida_free(&ice_aux_ida, pf->aux_idx);
5073  	ice_devlink_unregister_params(pf);
5074  	set_bit(ICE_DOWN, pf->state);
5075  
5076  	ice_deinit_lag(pf);
5077  	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
5078  		ice_ptp_release(pf);
5079  	if (ice_is_feature_supported(pf, ICE_F_GNSS))
5080  		ice_gnss_exit(pf);
5081  	if (!ice_is_safe_mode(pf))
5082  		ice_remove_arfs(pf);
5083  	ice_setup_mc_magic_wake(pf);
5084  	ice_vsi_release_all(pf);
5085  	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
5086  	ice_set_wake(pf);
5087  	ice_free_irq_msix_misc(pf);
5088  	ice_for_each_vsi(pf, i) {
5089  		if (!pf->vsi[i])
5090  			continue;
5091  		ice_vsi_free_q_vectors(pf->vsi[i]);
5092  	}
5093  	devm_kfree(&pdev->dev, pf->vsi_stats);
5094  	pf->vsi_stats = NULL;
5095  	ice_deinit_pf(pf);
5096  	ice_devlink_destroy_regions(pf);
5097  	ice_deinit_hw(&pf->hw);
5098  
5099  	/* Issue a PFR as part of the prescribed driver unload flow.  Do not
5100  	 * do it via ice_schedule_reset() since there is no need to rebuild
5101  	 * and the service task is already stopped.
5102  	 */
5103  	ice_reset(&pf->hw, ICE_RESET_PFR);
5104  	pci_wait_for_pending_transaction(pdev);
5105  	ice_clear_interrupt_scheme(pf);
5106  	pci_disable_pcie_error_reporting(pdev);
5107  	pci_disable_device(pdev);
5108  }
5109  
5110  /**
5111   * ice_shutdown - PCI callback for shutting down device
5112   * @pdev: PCI device information struct
5113   */
5114  static void ice_shutdown(struct pci_dev *pdev)
5115  {
5116  	struct ice_pf *pf = pci_get_drvdata(pdev);
5117  
5118  	ice_remove(pdev);
5119  
5120  	if (system_state == SYSTEM_POWER_OFF) {
5121  		pci_wake_from_d3(pdev, pf->wol_ena);
5122  		pci_set_power_state(pdev, PCI_D3hot);
5123  	}
5124  }
5125  
5126  #ifdef CONFIG_PM
5127  /**
5128   * ice_prepare_for_shutdown - prep for PCI shutdown
5129   * @pf: board private structure
5130   *
5131   * Inform or close all dependent features in prep for PCI device shutdown
5132   */
5133  static void ice_prepare_for_shutdown(struct ice_pf *pf)
5134  {
5135  	struct ice_hw *hw = &pf->hw;
5136  	u32 v;
5137  
5138  	/* Notify VFs of impending reset */
5139  	if (ice_check_sq_alive(hw, &hw->mailboxq))
5140  		ice_vc_notify_reset(pf);
5141  
5142  	dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
5143  
5144  	/* disable the VSIs and their queues that are not already DOWN */
5145  	ice_pf_dis_all_vsi(pf, false);
5146  
5147  	ice_for_each_vsi(pf, v)
5148  		if (pf->vsi[v])
5149  			pf->vsi[v]->vsi_num = 0;
5150  
5151  	ice_shutdown_all_ctrlq(hw);
5152  }
5153  
5154  /**
5155   * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
5156   * @pf: board private structure to reinitialize
5157   *
5158   * This routine reinitialize interrupt scheme that was cleared during
5159   * power management suspend callback.
5160   *
5161   * This should be called during resume routine to re-allocate the q_vectors
5162   * and reacquire interrupts.
5163   */
5164  static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
5165  {
5166  	struct device *dev = ice_pf_to_dev(pf);
5167  	int ret, v;
5168  
5169  	/* Since we clear MSIX flag during suspend, we need to
5170  	 * set it back during resume...
5171  	 */
5172  
5173  	ret = ice_init_interrupt_scheme(pf);
5174  	if (ret) {
5175  		dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
5176  		return ret;
5177  	}
5178  
5179  	/* Remap vectors and rings, after successful re-init interrupts */
5180  	ice_for_each_vsi(pf, v) {
5181  		if (!pf->vsi[v])
5182  			continue;
5183  
5184  		ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
5185  		if (ret)
5186  			goto err_reinit;
5187  		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
5188  	}
5189  
5190  	ret = ice_req_irq_msix_misc(pf);
5191  	if (ret) {
5192  		dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
5193  			ret);
5194  		goto err_reinit;
5195  	}
5196  
5197  	return 0;
5198  
5199  err_reinit:
5200  	while (v--)
5201  		if (pf->vsi[v])
5202  			ice_vsi_free_q_vectors(pf->vsi[v]);
5203  
5204  	return ret;
5205  }
5206  
5207  /**
5208   * ice_suspend
5209   * @dev: generic device information structure
5210   *
5211   * Power Management callback to quiesce the device and prepare
5212   * for D3 transition.
5213   */
5214  static int __maybe_unused ice_suspend(struct device *dev)
5215  {
5216  	struct pci_dev *pdev = to_pci_dev(dev);
5217  	struct ice_pf *pf;
5218  	int disabled, v;
5219  
5220  	pf = pci_get_drvdata(pdev);
5221  
5222  	if (!ice_pf_state_is_nominal(pf)) {
5223  		dev_err(dev, "Device is not ready, no need to suspend it\n");
5224  		return -EBUSY;
5225  	}
5226  
5227  	/* Stop watchdog tasks until resume completion.
5228  	 * Even though it is most likely that the service task is
5229  	 * disabled if the device is suspended or down, the service task's
5230  	 * state is controlled by a different state bit, and we should
5231  	 * store and honor whatever state that bit is in at this point.
5232  	 */
5233  	disabled = ice_service_task_stop(pf);
5234  
5235  	ice_unplug_aux_dev(pf);
5236  
5237  	/* Already suspended?, then there is nothing to do */
5238  	if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
5239  		if (!disabled)
5240  			ice_service_task_restart(pf);
5241  		return 0;
5242  	}
5243  
5244  	if (test_bit(ICE_DOWN, pf->state) ||
5245  	    ice_is_reset_in_progress(pf->state)) {
5246  		dev_err(dev, "can't suspend device in reset or already down\n");
5247  		if (!disabled)
5248  			ice_service_task_restart(pf);
5249  		return 0;
5250  	}
5251  
5252  	ice_setup_mc_magic_wake(pf);
5253  
5254  	ice_prepare_for_shutdown(pf);
5255  
5256  	ice_set_wake(pf);
5257  
5258  	/* Free vectors, clear the interrupt scheme and release IRQs
5259  	 * for proper hibernation, especially with large number of CPUs.
5260  	 * Otherwise hibernation might fail when mapping all the vectors back
5261  	 * to CPU0.
5262  	 */
5263  	ice_free_irq_msix_misc(pf);
5264  	ice_for_each_vsi(pf, v) {
5265  		if (!pf->vsi[v])
5266  			continue;
5267  		ice_vsi_free_q_vectors(pf->vsi[v]);
5268  	}
5269  	ice_clear_interrupt_scheme(pf);
5270  
5271  	pci_save_state(pdev);
5272  	pci_wake_from_d3(pdev, pf->wol_ena);
5273  	pci_set_power_state(pdev, PCI_D3hot);
5274  	return 0;
5275  }
5276  
5277  /**
5278   * ice_resume - PM callback for waking up from D3
5279   * @dev: generic device information structure
5280   */
5281  static int __maybe_unused ice_resume(struct device *dev)
5282  {
5283  	struct pci_dev *pdev = to_pci_dev(dev);
5284  	enum ice_reset_req reset_type;
5285  	struct ice_pf *pf;
5286  	struct ice_hw *hw;
5287  	int ret;
5288  
5289  	pci_set_power_state(pdev, PCI_D0);
5290  	pci_restore_state(pdev);
5291  	pci_save_state(pdev);
5292  
5293  	if (!pci_device_is_present(pdev))
5294  		return -ENODEV;
5295  
5296  	ret = pci_enable_device_mem(pdev);
5297  	if (ret) {
5298  		dev_err(dev, "Cannot enable device after suspend\n");
5299  		return ret;
5300  	}
5301  
5302  	pf = pci_get_drvdata(pdev);
5303  	hw = &pf->hw;
5304  
5305  	pf->wakeup_reason = rd32(hw, PFPM_WUS);
5306  	ice_print_wake_reason(pf);
5307  
5308  	/* We cleared the interrupt scheme when we suspended, so we need to
5309  	 * restore it now to resume device functionality.
5310  	 */
5311  	ret = ice_reinit_interrupt_scheme(pf);
5312  	if (ret)
5313  		dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
5314  
5315  	clear_bit(ICE_DOWN, pf->state);
5316  	/* Now perform PF reset and rebuild */
5317  	reset_type = ICE_RESET_PFR;
5318  	/* re-enable service task for reset, but allow reset to schedule it */
5319  	clear_bit(ICE_SERVICE_DIS, pf->state);
5320  
5321  	if (ice_schedule_reset(pf, reset_type))
5322  		dev_err(dev, "Reset during resume failed.\n");
5323  
5324  	clear_bit(ICE_SUSPENDED, pf->state);
5325  	ice_service_task_restart(pf);
5326  
5327  	/* Restart the service task */
5328  	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
5329  
5330  	return 0;
5331  }
5332  #endif /* CONFIG_PM */
5333  
5334  /**
5335   * ice_pci_err_detected - warning that PCI error has been detected
5336   * @pdev: PCI device information struct
5337   * @err: the type of PCI error
5338   *
5339   * Called to warn that something happened on the PCI bus and the error handling
5340   * is in progress.  Allows the driver to gracefully prepare/handle PCI errors.
5341   */
5342  static pci_ers_result_t
5343  ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
5344  {
5345  	struct ice_pf *pf = pci_get_drvdata(pdev);
5346  
5347  	if (!pf) {
5348  		dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
5349  			__func__, err);
5350  		return PCI_ERS_RESULT_DISCONNECT;
5351  	}
5352  
5353  	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5354  		ice_service_task_stop(pf);
5355  
5356  		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5357  			set_bit(ICE_PFR_REQ, pf->state);
5358  			ice_prepare_for_reset(pf, ICE_RESET_PFR);
5359  		}
5360  	}
5361  
5362  	return PCI_ERS_RESULT_NEED_RESET;
5363  }
5364  
5365  /**
5366   * ice_pci_err_slot_reset - a PCI slot reset has just happened
5367   * @pdev: PCI device information struct
5368   *
5369   * Called to determine if the driver can recover from the PCI slot reset by
5370   * using a register read to determine if the device is recoverable.
5371   */
5372  static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
5373  {
5374  	struct ice_pf *pf = pci_get_drvdata(pdev);
5375  	pci_ers_result_t result;
5376  	int err;
5377  	u32 reg;
5378  
5379  	err = pci_enable_device_mem(pdev);
5380  	if (err) {
5381  		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
5382  			err);
5383  		result = PCI_ERS_RESULT_DISCONNECT;
5384  	} else {
5385  		pci_set_master(pdev);
5386  		pci_restore_state(pdev);
5387  		pci_save_state(pdev);
5388  		pci_wake_from_d3(pdev, false);
5389  
5390  		/* Check for life */
5391  		reg = rd32(&pf->hw, GLGEN_RTRIG);
5392  		if (!reg)
5393  			result = PCI_ERS_RESULT_RECOVERED;
5394  		else
5395  			result = PCI_ERS_RESULT_DISCONNECT;
5396  	}
5397  
5398  	return result;
5399  }
5400  
5401  /**
5402   * ice_pci_err_resume - restart operations after PCI error recovery
5403   * @pdev: PCI device information struct
5404   *
5405   * Called to allow the driver to bring things back up after PCI error and/or
5406   * reset recovery have finished
5407   */
5408  static void ice_pci_err_resume(struct pci_dev *pdev)
5409  {
5410  	struct ice_pf *pf = pci_get_drvdata(pdev);
5411  
5412  	if (!pf) {
5413  		dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
5414  			__func__);
5415  		return;
5416  	}
5417  
5418  	if (test_bit(ICE_SUSPENDED, pf->state)) {
5419  		dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
5420  			__func__);
5421  		return;
5422  	}
5423  
5424  	ice_restore_all_vfs_msi_state(pdev);
5425  
5426  	ice_do_reset(pf, ICE_RESET_PFR);
5427  	ice_service_task_restart(pf);
5428  	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
5429  }
5430  
5431  /**
5432   * ice_pci_err_reset_prepare - prepare device driver for PCI reset
5433   * @pdev: PCI device information struct
5434   */
5435  static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
5436  {
5437  	struct ice_pf *pf = pci_get_drvdata(pdev);
5438  
5439  	if (!test_bit(ICE_SUSPENDED, pf->state)) {
5440  		ice_service_task_stop(pf);
5441  
5442  		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5443  			set_bit(ICE_PFR_REQ, pf->state);
5444  			ice_prepare_for_reset(pf, ICE_RESET_PFR);
5445  		}
5446  	}
5447  }
5448  
5449  /**
5450   * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5451   * @pdev: PCI device information struct
5452   */
5453  static void ice_pci_err_reset_done(struct pci_dev *pdev)
5454  {
5455  	ice_pci_err_resume(pdev);
5456  }
5457  
5458  /* ice_pci_tbl - PCI Device ID Table
5459   *
5460   * Wildcard entries (PCI_ANY_ID) should come last
5461   * Last entry must be all 0s
5462   *
5463   * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5464   *   Class, Class Mask, private data (not used) }
5465   */
5466  static const struct pci_device_id ice_pci_tbl[] = {
5467  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
5468  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
5469  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
5470  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 },
5471  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 },
5472  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 },
5473  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 },
5474  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 },
5475  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 },
5476  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 },
5477  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 },
5478  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
5479  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
5480  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
5481  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
5482  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
5483  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 },
5484  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
5485  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
5486  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
5487  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 },
5488  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 },
5489  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
5490  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
5491  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
5492  	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 },
5493  	/* required last entry */
5494  	{ 0, }
5495  };
5496  MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
5497  
5498  static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
5499  
5500  static const struct pci_error_handlers ice_pci_err_handler = {
5501  	.error_detected = ice_pci_err_detected,
5502  	.slot_reset = ice_pci_err_slot_reset,
5503  	.reset_prepare = ice_pci_err_reset_prepare,
5504  	.reset_done = ice_pci_err_reset_done,
5505  	.resume = ice_pci_err_resume
5506  };
5507  
5508  static struct pci_driver ice_driver = {
5509  	.name = KBUILD_MODNAME,
5510  	.id_table = ice_pci_tbl,
5511  	.probe = ice_probe,
5512  	.remove = ice_remove,
5513  #ifdef CONFIG_PM
5514  	.driver.pm = &ice_pm_ops,
5515  #endif /* CONFIG_PM */
5516  	.shutdown = ice_shutdown,
5517  	.sriov_configure = ice_sriov_configure,
5518  	.err_handler = &ice_pci_err_handler
5519  };
5520  
5521  /**
5522   * ice_module_init - Driver registration routine
5523   *
5524   * ice_module_init is the first routine called when the driver is
5525   * loaded. All it does is register with the PCI subsystem.
5526   */
5527  static int __init ice_module_init(void)
5528  {
5529  	int status;
5530  
5531  	pr_info("%s\n", ice_driver_string);
5532  	pr_info("%s\n", ice_copyright);
5533  
5534  	ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME);
5535  	if (!ice_wq) {
5536  		pr_err("Failed to create workqueue\n");
5537  		return -ENOMEM;
5538  	}
5539  
5540  	status = pci_register_driver(&ice_driver);
5541  	if (status) {
5542  		pr_err("failed to register PCI driver, err %d\n", status);
5543  		destroy_workqueue(ice_wq);
5544  	}
5545  
5546  	return status;
5547  }
5548  module_init(ice_module_init);
5549  
5550  /**
5551   * ice_module_exit - Driver exit cleanup routine
5552   *
5553   * ice_module_exit is called just before the driver is removed
5554   * from memory.
5555   */
5556  static void __exit ice_module_exit(void)
5557  {
5558  	pci_unregister_driver(&ice_driver);
5559  	destroy_workqueue(ice_wq);
5560  	pr_info("module unloaded\n");
5561  }
5562  module_exit(ice_module_exit);
5563  
5564  /**
5565   * ice_set_mac_address - NDO callback to set MAC address
5566   * @netdev: network interface device structure
5567   * @pi: pointer to an address structure
5568   *
5569   * Returns 0 on success, negative on failure
5570   */
5571  static int ice_set_mac_address(struct net_device *netdev, void *pi)
5572  {
5573  	struct ice_netdev_priv *np = netdev_priv(netdev);
5574  	struct ice_vsi *vsi = np->vsi;
5575  	struct ice_pf *pf = vsi->back;
5576  	struct ice_hw *hw = &pf->hw;
5577  	struct sockaddr *addr = pi;
5578  	u8 old_mac[ETH_ALEN];
5579  	u8 flags = 0;
5580  	u8 *mac;
5581  	int err;
5582  
5583  	mac = (u8 *)addr->sa_data;
5584  
5585  	if (!is_valid_ether_addr(mac))
5586  		return -EADDRNOTAVAIL;
5587  
5588  	if (ether_addr_equal(netdev->dev_addr, mac)) {
5589  		netdev_dbg(netdev, "already using mac %pM\n", mac);
5590  		return 0;
5591  	}
5592  
5593  	if (test_bit(ICE_DOWN, pf->state) ||
5594  	    ice_is_reset_in_progress(pf->state)) {
5595  		netdev_err(netdev, "can't set mac %pM. device not ready\n",
5596  			   mac);
5597  		return -EBUSY;
5598  	}
5599  
5600  	if (ice_chnl_dmac_fltr_cnt(pf)) {
5601  		netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
5602  			   mac);
5603  		return -EAGAIN;
5604  	}
5605  
5606  	netif_addr_lock_bh(netdev);
5607  	ether_addr_copy(old_mac, netdev->dev_addr);
5608  	/* change the netdev's MAC address */
5609  	eth_hw_addr_set(netdev, mac);
5610  	netif_addr_unlock_bh(netdev);
5611  
5612  	/* Clean up old MAC filter. Not an error if old filter doesn't exist */
5613  	err = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
5614  	if (err && err != -ENOENT) {
5615  		err = -EADDRNOTAVAIL;
5616  		goto err_update_filters;
5617  	}
5618  
5619  	/* Add filter for new MAC. If filter exists, return success */
5620  	err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
5621  	if (err == -EEXIST) {
5622  		/* Although this MAC filter is already present in hardware it's
5623  		 * possible in some cases (e.g. bonding) that dev_addr was
5624  		 * modified outside of the driver and needs to be restored back
5625  		 * to this value.
5626  		 */
5627  		netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
5628  
5629  		return 0;
5630  	} else if (err) {
5631  		/* error if the new filter addition failed */
5632  		err = -EADDRNOTAVAIL;
5633  	}
5634  
5635  err_update_filters:
5636  	if (err) {
5637  		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
5638  			   mac);
5639  		netif_addr_lock_bh(netdev);
5640  		eth_hw_addr_set(netdev, old_mac);
5641  		netif_addr_unlock_bh(netdev);
5642  		return err;
5643  	}
5644  
5645  	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
5646  		   netdev->dev_addr);
5647  
5648  	/* write new MAC address to the firmware */
5649  	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
5650  	err = ice_aq_manage_mac_write(hw, mac, flags, NULL);
5651  	if (err) {
5652  		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
5653  			   mac, err);
5654  	}
5655  	return 0;
5656  }
5657  
5658  /**
5659   * ice_set_rx_mode - NDO callback to set the netdev filters
5660   * @netdev: network interface device structure
5661   */
5662  static void ice_set_rx_mode(struct net_device *netdev)
5663  {
5664  	struct ice_netdev_priv *np = netdev_priv(netdev);
5665  	struct ice_vsi *vsi = np->vsi;
5666  
5667  	if (!vsi)
5668  		return;
5669  
5670  	/* Set the flags to synchronize filters
5671  	 * ndo_set_rx_mode may be triggered even without a change in netdev
5672  	 * flags
5673  	 */
5674  	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
5675  	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
5676  	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
5677  
5678  	/* schedule our worker thread which will take care of
5679  	 * applying the new filter changes
5680  	 */
5681  	ice_service_task_schedule(vsi->back);
5682  }
5683  
5684  /**
5685   * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
5686   * @netdev: network interface device structure
5687   * @queue_index: Queue ID
5688   * @maxrate: maximum bandwidth in Mbps
5689   */
5690  static int
5691  ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
5692  {
5693  	struct ice_netdev_priv *np = netdev_priv(netdev);
5694  	struct ice_vsi *vsi = np->vsi;
5695  	u16 q_handle;
5696  	int status;
5697  	u8 tc;
5698  
5699  	/* Validate maxrate requested is within permitted range */
5700  	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
5701  		netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
5702  			   maxrate, queue_index);
5703  		return -EINVAL;
5704  	}
5705  
5706  	q_handle = vsi->tx_rings[queue_index]->q_handle;
5707  	tc = ice_dcb_get_tc(vsi, queue_index);
5708  
5709  	/* Set BW back to default, when user set maxrate to 0 */
5710  	if (!maxrate)
5711  		status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
5712  					       q_handle, ICE_MAX_BW);
5713  	else
5714  		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
5715  					  q_handle, ICE_MAX_BW, maxrate * 1000);
5716  	if (status)
5717  		netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
5718  			   status);
5719  
5720  	return status;
5721  }
5722  
5723  /**
5724   * ice_fdb_add - add an entry to the hardware database
5725   * @ndm: the input from the stack
5726   * @tb: pointer to array of nladdr (unused)
5727   * @dev: the net device pointer
5728   * @addr: the MAC address entry being added
5729   * @vid: VLAN ID
5730   * @flags: instructions from stack about fdb operation
5731   * @extack: netlink extended ack
5732   */
5733  static int
5734  ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
5735  	    struct net_device *dev, const unsigned char *addr, u16 vid,
5736  	    u16 flags, struct netlink_ext_ack __always_unused *extack)
5737  {
5738  	int err;
5739  
5740  	if (vid) {
5741  		netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
5742  		return -EINVAL;
5743  	}
5744  	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
5745  		netdev_err(dev, "FDB only supports static addresses\n");
5746  		return -EINVAL;
5747  	}
5748  
5749  	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
5750  		err = dev_uc_add_excl(dev, addr);
5751  	else if (is_multicast_ether_addr(addr))
5752  		err = dev_mc_add_excl(dev, addr);
5753  	else
5754  		err = -EINVAL;
5755  
5756  	/* Only return duplicate errors if NLM_F_EXCL is set */
5757  	if (err == -EEXIST && !(flags & NLM_F_EXCL))
5758  		err = 0;
5759  
5760  	return err;
5761  }
5762  
5763  /**
5764   * ice_fdb_del - delete an entry from the hardware database
5765   * @ndm: the input from the stack
5766   * @tb: pointer to array of nladdr (unused)
5767   * @dev: the net device pointer
5768   * @addr: the MAC address entry being added
5769   * @vid: VLAN ID
5770   * @extack: netlink extended ack
5771   */
5772  static int
5773  ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
5774  	    struct net_device *dev, const unsigned char *addr,
5775  	    __always_unused u16 vid, struct netlink_ext_ack *extack)
5776  {
5777  	int err;
5778  
5779  	if (ndm->ndm_state & NUD_PERMANENT) {
5780  		netdev_err(dev, "FDB only supports static addresses\n");
5781  		return -EINVAL;
5782  	}
5783  
5784  	if (is_unicast_ether_addr(addr))
5785  		err = dev_uc_del(dev, addr);
5786  	else if (is_multicast_ether_addr(addr))
5787  		err = dev_mc_del(dev, addr);
5788  	else
5789  		err = -EINVAL;
5790  
5791  	return err;
5792  }
5793  
5794  #define NETIF_VLAN_OFFLOAD_FEATURES	(NETIF_F_HW_VLAN_CTAG_RX | \
5795  					 NETIF_F_HW_VLAN_CTAG_TX | \
5796  					 NETIF_F_HW_VLAN_STAG_RX | \
5797  					 NETIF_F_HW_VLAN_STAG_TX)
5798  
5799  #define NETIF_VLAN_STRIPPING_FEATURES	(NETIF_F_HW_VLAN_CTAG_RX | \
5800  					 NETIF_F_HW_VLAN_STAG_RX)
5801  
5802  #define NETIF_VLAN_FILTERING_FEATURES	(NETIF_F_HW_VLAN_CTAG_FILTER | \
5803  					 NETIF_F_HW_VLAN_STAG_FILTER)
5804  
5805  /**
5806   * ice_fix_features - fix the netdev features flags based on device limitations
5807   * @netdev: ptr to the netdev that flags are being fixed on
5808   * @features: features that need to be checked and possibly fixed
5809   *
5810   * Make sure any fixups are made to features in this callback. This enables the
5811   * driver to not have to check unsupported configurations throughout the driver
5812   * because that's the responsiblity of this callback.
5813   *
5814   * Single VLAN Mode (SVM) Supported Features:
5815   *	NETIF_F_HW_VLAN_CTAG_FILTER
5816   *	NETIF_F_HW_VLAN_CTAG_RX
5817   *	NETIF_F_HW_VLAN_CTAG_TX
5818   *
5819   * Double VLAN Mode (DVM) Supported Features:
5820   *	NETIF_F_HW_VLAN_CTAG_FILTER
5821   *	NETIF_F_HW_VLAN_CTAG_RX
5822   *	NETIF_F_HW_VLAN_CTAG_TX
5823   *
5824   *	NETIF_F_HW_VLAN_STAG_FILTER
5825   *	NETIF_HW_VLAN_STAG_RX
5826   *	NETIF_HW_VLAN_STAG_TX
5827   *
5828   * Features that need fixing:
5829   *	Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
5830   *	These are mutually exlusive as the VSI context cannot support multiple
5831   *	VLAN ethertypes simultaneously for stripping and/or insertion. If this
5832   *	is not done, then default to clearing the requested STAG offload
5833   *	settings.
5834   *
5835   *	All supported filtering has to be enabled or disabled together. For
5836   *	example, in DVM, CTAG and STAG filtering have to be enabled and disabled
5837   *	together. If this is not done, then default to VLAN filtering disabled.
5838   *	These are mutually exclusive as there is currently no way to
5839   *	enable/disable VLAN filtering based on VLAN ethertype when using VLAN
5840   *	prune rules.
5841   */
5842  static netdev_features_t
5843  ice_fix_features(struct net_device *netdev, netdev_features_t features)
5844  {
5845  	struct ice_netdev_priv *np = netdev_priv(netdev);
5846  	netdev_features_t req_vlan_fltr, cur_vlan_fltr;
5847  	bool cur_ctag, cur_stag, req_ctag, req_stag;
5848  
5849  	cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
5850  	cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
5851  	cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
5852  
5853  	req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
5854  	req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
5855  	req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
5856  
5857  	if (req_vlan_fltr != cur_vlan_fltr) {
5858  		if (ice_is_dvm_ena(&np->vsi->back->hw)) {
5859  			if (req_ctag && req_stag) {
5860  				features |= NETIF_VLAN_FILTERING_FEATURES;
5861  			} else if (!req_ctag && !req_stag) {
5862  				features &= ~NETIF_VLAN_FILTERING_FEATURES;
5863  			} else if ((!cur_ctag && req_ctag && !cur_stag) ||
5864  				   (!cur_stag && req_stag && !cur_ctag)) {
5865  				features |= NETIF_VLAN_FILTERING_FEATURES;
5866  				netdev_warn(netdev,  "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
5867  			} else if ((cur_ctag && !req_ctag && cur_stag) ||
5868  				   (cur_stag && !req_stag && cur_ctag)) {
5869  				features &= ~NETIF_VLAN_FILTERING_FEATURES;
5870  				netdev_warn(netdev,  "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
5871  			}
5872  		} else {
5873  			if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
5874  				netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
5875  
5876  			if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
5877  				features |= NETIF_F_HW_VLAN_CTAG_FILTER;
5878  		}
5879  	}
5880  
5881  	if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
5882  	    (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) {
5883  		netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
5884  		features &= ~(NETIF_F_HW_VLAN_STAG_RX |
5885  			      NETIF_F_HW_VLAN_STAG_TX);
5886  	}
5887  
5888  	if (!(netdev->features & NETIF_F_RXFCS) &&
5889  	    (features & NETIF_F_RXFCS) &&
5890  	    (features & NETIF_VLAN_STRIPPING_FEATURES) &&
5891  	    !ice_vsi_has_non_zero_vlans(np->vsi)) {
5892  		netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
5893  		features &= ~NETIF_VLAN_STRIPPING_FEATURES;
5894  	}
5895  
5896  	return features;
5897  }
5898  
5899  /**
5900   * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
5901   * @vsi: PF's VSI
5902   * @features: features used to determine VLAN offload settings
5903   *
5904   * First, determine the vlan_ethertype based on the VLAN offload bits in
5905   * features. Then determine if stripping and insertion should be enabled or
5906   * disabled. Finally enable or disable VLAN stripping and insertion.
5907   */
5908  static int
5909  ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
5910  {
5911  	bool enable_stripping = true, enable_insertion = true;
5912  	struct ice_vsi_vlan_ops *vlan_ops;
5913  	int strip_err = 0, insert_err = 0;
5914  	u16 vlan_ethertype = 0;
5915  
5916  	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
5917  
5918  	if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
5919  		vlan_ethertype = ETH_P_8021AD;
5920  	else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
5921  		vlan_ethertype = ETH_P_8021Q;
5922  
5923  	if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
5924  		enable_stripping = false;
5925  	if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
5926  		enable_insertion = false;
5927  
5928  	if (enable_stripping)
5929  		strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
5930  	else
5931  		strip_err = vlan_ops->dis_stripping(vsi);
5932  
5933  	if (enable_insertion)
5934  		insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
5935  	else
5936  		insert_err = vlan_ops->dis_insertion(vsi);
5937  
5938  	if (strip_err || insert_err)
5939  		return -EIO;
5940  
5941  	return 0;
5942  }
5943  
5944  /**
5945   * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
5946   * @vsi: PF's VSI
5947   * @features: features used to determine VLAN filtering settings
5948   *
5949   * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
5950   * features.
5951   */
5952  static int
5953  ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
5954  {
5955  	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
5956  	int err = 0;
5957  
5958  	/* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
5959  	 * if either bit is set
5960  	 */
5961  	if (features &
5962  	    (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER))
5963  		err = vlan_ops->ena_rx_filtering(vsi);
5964  	else
5965  		err = vlan_ops->dis_rx_filtering(vsi);
5966  
5967  	return err;
5968  }
5969  
5970  /**
5971   * ice_set_vlan_features - set VLAN settings based on suggested feature set
5972   * @netdev: ptr to the netdev being adjusted
5973   * @features: the feature set that the stack is suggesting
5974   *
5975   * Only update VLAN settings if the requested_vlan_features are different than
5976   * the current_vlan_features.
5977   */
5978  static int
5979  ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
5980  {
5981  	netdev_features_t current_vlan_features, requested_vlan_features;
5982  	struct ice_netdev_priv *np = netdev_priv(netdev);
5983  	struct ice_vsi *vsi = np->vsi;
5984  	int err;
5985  
5986  	current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
5987  	requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
5988  	if (current_vlan_features ^ requested_vlan_features) {
5989  		if ((features & NETIF_F_RXFCS) &&
5990  		    (features & NETIF_VLAN_STRIPPING_FEATURES)) {
5991  			dev_err(ice_pf_to_dev(vsi->back),
5992  				"To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
5993  			return -EIO;
5994  		}
5995  
5996  		err = ice_set_vlan_offload_features(vsi, features);
5997  		if (err)
5998  			return err;
5999  	}
6000  
6001  	current_vlan_features = netdev->features &
6002  		NETIF_VLAN_FILTERING_FEATURES;
6003  	requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
6004  	if (current_vlan_features ^ requested_vlan_features) {
6005  		err = ice_set_vlan_filtering_features(vsi, features);
6006  		if (err)
6007  			return err;
6008  	}
6009  
6010  	return 0;
6011  }
6012  
6013  /**
6014   * ice_set_loopback - turn on/off loopback mode on underlying PF
6015   * @vsi: ptr to VSI
6016   * @ena: flag to indicate the on/off setting
6017   */
6018  static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
6019  {
6020  	bool if_running = netif_running(vsi->netdev);
6021  	int ret;
6022  
6023  	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
6024  		ret = ice_down(vsi);
6025  		if (ret) {
6026  			netdev_err(vsi->netdev, "Preparing device to toggle loopback failed\n");
6027  			return ret;
6028  		}
6029  	}
6030  	ret = ice_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
6031  	if (ret)
6032  		netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
6033  	if (if_running)
6034  		ret = ice_up(vsi);
6035  
6036  	return ret;
6037  }
6038  
6039  /**
6040   * ice_set_features - set the netdev feature flags
6041   * @netdev: ptr to the netdev being adjusted
6042   * @features: the feature set that the stack is suggesting
6043   */
6044  static int
6045  ice_set_features(struct net_device *netdev, netdev_features_t features)
6046  {
6047  	netdev_features_t changed = netdev->features ^ features;
6048  	struct ice_netdev_priv *np = netdev_priv(netdev);
6049  	struct ice_vsi *vsi = np->vsi;
6050  	struct ice_pf *pf = vsi->back;
6051  	int ret = 0;
6052  
6053  	/* Don't set any netdev advanced features with device in Safe Mode */
6054  	if (ice_is_safe_mode(pf)) {
6055  		dev_err(ice_pf_to_dev(pf),
6056  			"Device is in Safe Mode - not enabling advanced netdev features\n");
6057  		return ret;
6058  	}
6059  
6060  	/* Do not change setting during reset */
6061  	if (ice_is_reset_in_progress(pf->state)) {
6062  		dev_err(ice_pf_to_dev(pf),
6063  			"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
6064  		return -EBUSY;
6065  	}
6066  
6067  	/* Multiple features can be changed in one call so keep features in
6068  	 * separate if/else statements to guarantee each feature is checked
6069  	 */
6070  	if (changed & NETIF_F_RXHASH)
6071  		ice_vsi_manage_rss_lut(vsi, !!(features & NETIF_F_RXHASH));
6072  
6073  	ret = ice_set_vlan_features(netdev, features);
6074  	if (ret)
6075  		return ret;
6076  
6077  	/* Turn on receive of FCS aka CRC, and after setting this
6078  	 * flag the packet data will have the 4 byte CRC appended
6079  	 */
6080  	if (changed & NETIF_F_RXFCS) {
6081  		if ((features & NETIF_F_RXFCS) &&
6082  		    (features & NETIF_VLAN_STRIPPING_FEATURES)) {
6083  			dev_err(ice_pf_to_dev(vsi->back),
6084  				"To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
6085  			return -EIO;
6086  		}
6087  
6088  		ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS));
6089  		ret = ice_down_up(vsi);
6090  		if (ret)
6091  			return ret;
6092  	}
6093  
6094  	if (changed & NETIF_F_NTUPLE) {
6095  		bool ena = !!(features & NETIF_F_NTUPLE);
6096  
6097  		ice_vsi_manage_fdir(vsi, ena);
6098  		ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
6099  	}
6100  
6101  	/* don't turn off hw_tc_offload when ADQ is already enabled */
6102  	if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
6103  		dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
6104  		return -EACCES;
6105  	}
6106  
6107  	if (changed & NETIF_F_HW_TC) {
6108  		bool ena = !!(features & NETIF_F_HW_TC);
6109  
6110  		ena ? set_bit(ICE_FLAG_CLS_FLOWER, pf->flags) :
6111  		      clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
6112  	}
6113  
6114  	if (changed & NETIF_F_LOOPBACK)
6115  		ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
6116  
6117  	return ret;
6118  }
6119  
6120  /**
6121   * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
6122   * @vsi: VSI to setup VLAN properties for
6123   */
6124  static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
6125  {
6126  	int err;
6127  
6128  	err = ice_set_vlan_offload_features(vsi, vsi->netdev->features);
6129  	if (err)
6130  		return err;
6131  
6132  	err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features);
6133  	if (err)
6134  		return err;
6135  
6136  	return ice_vsi_add_vlan_zero(vsi);
6137  }
6138  
6139  /**
6140   * ice_vsi_cfg - Setup the VSI
6141   * @vsi: the VSI being configured
6142   *
6143   * Return 0 on success and negative value on error
6144   */
6145  int ice_vsi_cfg(struct ice_vsi *vsi)
6146  {
6147  	int err;
6148  
6149  	if (vsi->netdev) {
6150  		ice_set_rx_mode(vsi->netdev);
6151  
6152  		if (vsi->type != ICE_VSI_LB) {
6153  			err = ice_vsi_vlan_setup(vsi);
6154  
6155  			if (err)
6156  				return err;
6157  		}
6158  	}
6159  	ice_vsi_cfg_dcb_rings(vsi);
6160  
6161  	err = ice_vsi_cfg_lan_txqs(vsi);
6162  	if (!err && ice_is_xdp_ena_vsi(vsi))
6163  		err = ice_vsi_cfg_xdp_txqs(vsi);
6164  	if (!err)
6165  		err = ice_vsi_cfg_rxqs(vsi);
6166  
6167  	return err;
6168  }
6169  
6170  /* THEORY OF MODERATION:
6171   * The ice driver hardware works differently than the hardware that DIMLIB was
6172   * originally made for. ice hardware doesn't have packet count limits that
6173   * can trigger an interrupt, but it *does* have interrupt rate limit support,
6174   * which is hard-coded to a limit of 250,000 ints/second.
6175   * If not using dynamic moderation, the INTRL value can be modified
6176   * by ethtool rx-usecs-high.
6177   */
6178  struct ice_dim {
6179  	/* the throttle rate for interrupts, basically worst case delay before
6180  	 * an initial interrupt fires, value is stored in microseconds.
6181  	 */
6182  	u16 itr;
6183  };
6184  
6185  /* Make a different profile for Rx that doesn't allow quite so aggressive
6186   * moderation at the high end (it maxes out at 126us or about 8k interrupts a
6187   * second.
6188   */
6189  static const struct ice_dim rx_profile[] = {
6190  	{2},    /* 500,000 ints/s, capped at 250K by INTRL */
6191  	{8},    /* 125,000 ints/s */
6192  	{16},   /*  62,500 ints/s */
6193  	{62},   /*  16,129 ints/s */
6194  	{126}   /*   7,936 ints/s */
6195  };
6196  
6197  /* The transmit profile, which has the same sorts of values
6198   * as the previous struct
6199   */
6200  static const struct ice_dim tx_profile[] = {
6201  	{2},    /* 500,000 ints/s, capped at 250K by INTRL */
6202  	{8},    /* 125,000 ints/s */
6203  	{40},   /*  16,125 ints/s */
6204  	{128},  /*   7,812 ints/s */
6205  	{256}   /*   3,906 ints/s */
6206  };
6207  
6208  static void ice_tx_dim_work(struct work_struct *work)
6209  {
6210  	struct ice_ring_container *rc;
6211  	struct dim *dim;
6212  	u16 itr;
6213  
6214  	dim = container_of(work, struct dim, work);
6215  	rc = (struct ice_ring_container *)dim->priv;
6216  
6217  	WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
6218  
6219  	/* look up the values in our local table */
6220  	itr = tx_profile[dim->profile_ix].itr;
6221  
6222  	ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
6223  	ice_write_itr(rc, itr);
6224  
6225  	dim->state = DIM_START_MEASURE;
6226  }
6227  
6228  static void ice_rx_dim_work(struct work_struct *work)
6229  {
6230  	struct ice_ring_container *rc;
6231  	struct dim *dim;
6232  	u16 itr;
6233  
6234  	dim = container_of(work, struct dim, work);
6235  	rc = (struct ice_ring_container *)dim->priv;
6236  
6237  	WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
6238  
6239  	/* look up the values in our local table */
6240  	itr = rx_profile[dim->profile_ix].itr;
6241  
6242  	ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
6243  	ice_write_itr(rc, itr);
6244  
6245  	dim->state = DIM_START_MEASURE;
6246  }
6247  
6248  #define ICE_DIM_DEFAULT_PROFILE_IX 1
6249  
6250  /**
6251   * ice_init_moderation - set up interrupt moderation
6252   * @q_vector: the vector containing rings to be configured
6253   *
6254   * Set up interrupt moderation registers, with the intent to do the right thing
6255   * when called from reset or from probe, and whether or not dynamic moderation
6256   * is enabled or not. Take special care to write all the registers in both
6257   * dynamic moderation mode or not in order to make sure hardware is in a known
6258   * state.
6259   */
6260  static void ice_init_moderation(struct ice_q_vector *q_vector)
6261  {
6262  	struct ice_ring_container *rc;
6263  	bool tx_dynamic, rx_dynamic;
6264  
6265  	rc = &q_vector->tx;
6266  	INIT_WORK(&rc->dim.work, ice_tx_dim_work);
6267  	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6268  	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6269  	rc->dim.priv = rc;
6270  	tx_dynamic = ITR_IS_DYNAMIC(rc);
6271  
6272  	/* set the initial TX ITR to match the above */
6273  	ice_write_itr(rc, tx_dynamic ?
6274  		      tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
6275  
6276  	rc = &q_vector->rx;
6277  	INIT_WORK(&rc->dim.work, ice_rx_dim_work);
6278  	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6279  	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6280  	rc->dim.priv = rc;
6281  	rx_dynamic = ITR_IS_DYNAMIC(rc);
6282  
6283  	/* set the initial RX ITR to match the above */
6284  	ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
6285  				       rc->itr_setting);
6286  
6287  	ice_set_q_vector_intrl(q_vector);
6288  }
6289  
6290  /**
6291   * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
6292   * @vsi: the VSI being configured
6293   */
6294  static void ice_napi_enable_all(struct ice_vsi *vsi)
6295  {
6296  	int q_idx;
6297  
6298  	if (!vsi->netdev)
6299  		return;
6300  
6301  	ice_for_each_q_vector(vsi, q_idx) {
6302  		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6303  
6304  		ice_init_moderation(q_vector);
6305  
6306  		if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
6307  			napi_enable(&q_vector->napi);
6308  	}
6309  }
6310  
6311  /**
6312   * ice_up_complete - Finish the last steps of bringing up a connection
6313   * @vsi: The VSI being configured
6314   *
6315   * Return 0 on success and negative value on error
6316   */
6317  static int ice_up_complete(struct ice_vsi *vsi)
6318  {
6319  	struct ice_pf *pf = vsi->back;
6320  	int err;
6321  
6322  	ice_vsi_cfg_msix(vsi);
6323  
6324  	/* Enable only Rx rings, Tx rings were enabled by the FW when the
6325  	 * Tx queue group list was configured and the context bits were
6326  	 * programmed using ice_vsi_cfg_txqs
6327  	 */
6328  	err = ice_vsi_start_all_rx_rings(vsi);
6329  	if (err)
6330  		return err;
6331  
6332  	clear_bit(ICE_VSI_DOWN, vsi->state);
6333  	ice_napi_enable_all(vsi);
6334  	ice_vsi_ena_irq(vsi);
6335  
6336  	if (vsi->port_info &&
6337  	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
6338  	    vsi->netdev) {
6339  		ice_print_link_msg(vsi, true);
6340  		netif_tx_start_all_queues(vsi->netdev);
6341  		netif_carrier_on(vsi->netdev);
6342  		ice_ptp_link_change(pf, pf->hw.pf_id, true);
6343  	}
6344  
6345  	/* Perform an initial read of the statistics registers now to
6346  	 * set the baseline so counters are ready when interface is up
6347  	 */
6348  	ice_update_eth_stats(vsi);
6349  	ice_service_task_schedule(pf);
6350  
6351  	return 0;
6352  }
6353  
6354  /**
6355   * ice_up - Bring the connection back up after being down
6356   * @vsi: VSI being configured
6357   */
6358  int ice_up(struct ice_vsi *vsi)
6359  {
6360  	int err;
6361  
6362  	err = ice_vsi_cfg(vsi);
6363  	if (!err)
6364  		err = ice_up_complete(vsi);
6365  
6366  	return err;
6367  }
6368  
6369  /**
6370   * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
6371   * @syncp: pointer to u64_stats_sync
6372   * @stats: stats that pkts and bytes count will be taken from
6373   * @pkts: packets stats counter
6374   * @bytes: bytes stats counter
6375   *
6376   * This function fetches stats from the ring considering the atomic operations
6377   * that needs to be performed to read u64 values in 32 bit machine.
6378   */
6379  void
6380  ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
6381  			     struct ice_q_stats stats, u64 *pkts, u64 *bytes)
6382  {
6383  	unsigned int start;
6384  
6385  	do {
6386  		start = u64_stats_fetch_begin(syncp);
6387  		*pkts = stats.pkts;
6388  		*bytes = stats.bytes;
6389  	} while (u64_stats_fetch_retry(syncp, start));
6390  }
6391  
6392  /**
6393   * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
6394   * @vsi: the VSI to be updated
6395   * @vsi_stats: the stats struct to be updated
6396   * @rings: rings to work on
6397   * @count: number of rings
6398   */
6399  static void
6400  ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
6401  			     struct rtnl_link_stats64 *vsi_stats,
6402  			     struct ice_tx_ring **rings, u16 count)
6403  {
6404  	u16 i;
6405  
6406  	for (i = 0; i < count; i++) {
6407  		struct ice_tx_ring *ring;
6408  		u64 pkts = 0, bytes = 0;
6409  
6410  		ring = READ_ONCE(rings[i]);
6411  		if (!ring || !ring->ring_stats)
6412  			continue;
6413  		ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp,
6414  					     ring->ring_stats->stats, &pkts,
6415  					     &bytes);
6416  		vsi_stats->tx_packets += pkts;
6417  		vsi_stats->tx_bytes += bytes;
6418  		vsi->tx_restart += ring->ring_stats->tx_stats.restart_q;
6419  		vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy;
6420  		vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize;
6421  	}
6422  }
6423  
6424  /**
6425   * ice_update_vsi_ring_stats - Update VSI stats counters
6426   * @vsi: the VSI to be updated
6427   */
6428  static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
6429  {
6430  	struct rtnl_link_stats64 *net_stats, *stats_prev;
6431  	struct rtnl_link_stats64 *vsi_stats;
6432  	u64 pkts, bytes;
6433  	int i;
6434  
6435  	vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC);
6436  	if (!vsi_stats)
6437  		return;
6438  
6439  	/* reset non-netdev (extended) stats */
6440  	vsi->tx_restart = 0;
6441  	vsi->tx_busy = 0;
6442  	vsi->tx_linearize = 0;
6443  	vsi->rx_buf_failed = 0;
6444  	vsi->rx_page_failed = 0;
6445  
6446  	rcu_read_lock();
6447  
6448  	/* update Tx rings counters */
6449  	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings,
6450  				     vsi->num_txq);
6451  
6452  	/* update Rx rings counters */
6453  	ice_for_each_rxq(vsi, i) {
6454  		struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]);
6455  		struct ice_ring_stats *ring_stats;
6456  
6457  		ring_stats = ring->ring_stats;
6458  		ice_fetch_u64_stats_per_ring(&ring_stats->syncp,
6459  					     ring_stats->stats, &pkts,
6460  					     &bytes);
6461  		vsi_stats->rx_packets += pkts;
6462  		vsi_stats->rx_bytes += bytes;
6463  		vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed;
6464  		vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed;
6465  	}
6466  
6467  	/* update XDP Tx rings counters */
6468  	if (ice_is_xdp_ena_vsi(vsi))
6469  		ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings,
6470  					     vsi->num_xdp_txq);
6471  
6472  	rcu_read_unlock();
6473  
6474  	net_stats = &vsi->net_stats;
6475  	stats_prev = &vsi->net_stats_prev;
6476  
6477  	/* clear prev counters after reset */
6478  	if (vsi_stats->tx_packets < stats_prev->tx_packets ||
6479  	    vsi_stats->rx_packets < stats_prev->rx_packets) {
6480  		stats_prev->tx_packets = 0;
6481  		stats_prev->tx_bytes = 0;
6482  		stats_prev->rx_packets = 0;
6483  		stats_prev->rx_bytes = 0;
6484  	}
6485  
6486  	/* update netdev counters */
6487  	net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
6488  	net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
6489  	net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
6490  	net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
6491  
6492  	stats_prev->tx_packets = vsi_stats->tx_packets;
6493  	stats_prev->tx_bytes = vsi_stats->tx_bytes;
6494  	stats_prev->rx_packets = vsi_stats->rx_packets;
6495  	stats_prev->rx_bytes = vsi_stats->rx_bytes;
6496  
6497  	kfree(vsi_stats);
6498  }
6499  
6500  /**
6501   * ice_update_vsi_stats - Update VSI stats counters
6502   * @vsi: the VSI to be updated
6503   */
6504  void ice_update_vsi_stats(struct ice_vsi *vsi)
6505  {
6506  	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
6507  	struct ice_eth_stats *cur_es = &vsi->eth_stats;
6508  	struct ice_pf *pf = vsi->back;
6509  
6510  	if (test_bit(ICE_VSI_DOWN, vsi->state) ||
6511  	    test_bit(ICE_CFG_BUSY, pf->state))
6512  		return;
6513  
6514  	/* get stats as recorded by Tx/Rx rings */
6515  	ice_update_vsi_ring_stats(vsi);
6516  
6517  	/* get VSI stats as recorded by the hardware */
6518  	ice_update_eth_stats(vsi);
6519  
6520  	cur_ns->tx_errors = cur_es->tx_errors;
6521  	cur_ns->rx_dropped = cur_es->rx_discards;
6522  	cur_ns->tx_dropped = cur_es->tx_discards;
6523  	cur_ns->multicast = cur_es->rx_multicast;
6524  
6525  	/* update some more netdev stats if this is main VSI */
6526  	if (vsi->type == ICE_VSI_PF) {
6527  		cur_ns->rx_crc_errors = pf->stats.crc_errors;
6528  		cur_ns->rx_errors = pf->stats.crc_errors +
6529  				    pf->stats.illegal_bytes +
6530  				    pf->stats.rx_len_errors +
6531  				    pf->stats.rx_undersize +
6532  				    pf->hw_csum_rx_error +
6533  				    pf->stats.rx_jabber +
6534  				    pf->stats.rx_fragments +
6535  				    pf->stats.rx_oversize;
6536  		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
6537  		/* record drops from the port level */
6538  		cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
6539  	}
6540  }
6541  
6542  /**
6543   * ice_update_pf_stats - Update PF port stats counters
6544   * @pf: PF whose stats needs to be updated
6545   */
6546  void ice_update_pf_stats(struct ice_pf *pf)
6547  {
6548  	struct ice_hw_port_stats *prev_ps, *cur_ps;
6549  	struct ice_hw *hw = &pf->hw;
6550  	u16 fd_ctr_base;
6551  	u8 port;
6552  
6553  	port = hw->port_info->lport;
6554  	prev_ps = &pf->stats_prev;
6555  	cur_ps = &pf->stats;
6556  
6557  	if (ice_is_reset_in_progress(pf->state))
6558  		pf->stat_prev_loaded = false;
6559  
6560  	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
6561  			  &prev_ps->eth.rx_bytes,
6562  			  &cur_ps->eth.rx_bytes);
6563  
6564  	ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
6565  			  &prev_ps->eth.rx_unicast,
6566  			  &cur_ps->eth.rx_unicast);
6567  
6568  	ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
6569  			  &prev_ps->eth.rx_multicast,
6570  			  &cur_ps->eth.rx_multicast);
6571  
6572  	ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
6573  			  &prev_ps->eth.rx_broadcast,
6574  			  &cur_ps->eth.rx_broadcast);
6575  
6576  	ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
6577  			  &prev_ps->eth.rx_discards,
6578  			  &cur_ps->eth.rx_discards);
6579  
6580  	ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
6581  			  &prev_ps->eth.tx_bytes,
6582  			  &cur_ps->eth.tx_bytes);
6583  
6584  	ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
6585  			  &prev_ps->eth.tx_unicast,
6586  			  &cur_ps->eth.tx_unicast);
6587  
6588  	ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
6589  			  &prev_ps->eth.tx_multicast,
6590  			  &cur_ps->eth.tx_multicast);
6591  
6592  	ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
6593  			  &prev_ps->eth.tx_broadcast,
6594  			  &cur_ps->eth.tx_broadcast);
6595  
6596  	ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
6597  			  &prev_ps->tx_dropped_link_down,
6598  			  &cur_ps->tx_dropped_link_down);
6599  
6600  	ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
6601  			  &prev_ps->rx_size_64, &cur_ps->rx_size_64);
6602  
6603  	ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
6604  			  &prev_ps->rx_size_127, &cur_ps->rx_size_127);
6605  
6606  	ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
6607  			  &prev_ps->rx_size_255, &cur_ps->rx_size_255);
6608  
6609  	ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
6610  			  &prev_ps->rx_size_511, &cur_ps->rx_size_511);
6611  
6612  	ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
6613  			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
6614  
6615  	ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
6616  			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
6617  
6618  	ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
6619  			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
6620  
6621  	ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
6622  			  &prev_ps->tx_size_64, &cur_ps->tx_size_64);
6623  
6624  	ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
6625  			  &prev_ps->tx_size_127, &cur_ps->tx_size_127);
6626  
6627  	ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
6628  			  &prev_ps->tx_size_255, &cur_ps->tx_size_255);
6629  
6630  	ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
6631  			  &prev_ps->tx_size_511, &cur_ps->tx_size_511);
6632  
6633  	ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
6634  			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
6635  
6636  	ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
6637  			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
6638  
6639  	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
6640  			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
6641  
6642  	fd_ctr_base = hw->fd_ctr_base;
6643  
6644  	ice_stat_update40(hw,
6645  			  GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
6646  			  pf->stat_prev_loaded, &prev_ps->fd_sb_match,
6647  			  &cur_ps->fd_sb_match);
6648  	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
6649  			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
6650  
6651  	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
6652  			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
6653  
6654  	ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
6655  			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
6656  
6657  	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
6658  			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
6659  
6660  	ice_update_dcb_stats(pf);
6661  
6662  	ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
6663  			  &prev_ps->crc_errors, &cur_ps->crc_errors);
6664  
6665  	ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
6666  			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
6667  
6668  	ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
6669  			  &prev_ps->mac_local_faults,
6670  			  &cur_ps->mac_local_faults);
6671  
6672  	ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
6673  			  &prev_ps->mac_remote_faults,
6674  			  &cur_ps->mac_remote_faults);
6675  
6676  	ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
6677  			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
6678  
6679  	ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
6680  			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
6681  
6682  	ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
6683  			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
6684  
6685  	ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
6686  			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
6687  
6688  	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
6689  			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
6690  
6691  	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
6692  
6693  	pf->stat_prev_loaded = true;
6694  }
6695  
6696  /**
6697   * ice_get_stats64 - get statistics for network device structure
6698   * @netdev: network interface device structure
6699   * @stats: main device statistics structure
6700   */
6701  static
6702  void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
6703  {
6704  	struct ice_netdev_priv *np = netdev_priv(netdev);
6705  	struct rtnl_link_stats64 *vsi_stats;
6706  	struct ice_vsi *vsi = np->vsi;
6707  
6708  	vsi_stats = &vsi->net_stats;
6709  
6710  	if (!vsi->num_txq || !vsi->num_rxq)
6711  		return;
6712  
6713  	/* netdev packet/byte stats come from ring counter. These are obtained
6714  	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
6715  	 * But, only call the update routine and read the registers if VSI is
6716  	 * not down.
6717  	 */
6718  	if (!test_bit(ICE_VSI_DOWN, vsi->state))
6719  		ice_update_vsi_ring_stats(vsi);
6720  	stats->tx_packets = vsi_stats->tx_packets;
6721  	stats->tx_bytes = vsi_stats->tx_bytes;
6722  	stats->rx_packets = vsi_stats->rx_packets;
6723  	stats->rx_bytes = vsi_stats->rx_bytes;
6724  
6725  	/* The rest of the stats can be read from the hardware but instead we
6726  	 * just return values that the watchdog task has already obtained from
6727  	 * the hardware.
6728  	 */
6729  	stats->multicast = vsi_stats->multicast;
6730  	stats->tx_errors = vsi_stats->tx_errors;
6731  	stats->tx_dropped = vsi_stats->tx_dropped;
6732  	stats->rx_errors = vsi_stats->rx_errors;
6733  	stats->rx_dropped = vsi_stats->rx_dropped;
6734  	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
6735  	stats->rx_length_errors = vsi_stats->rx_length_errors;
6736  }
6737  
6738  /**
6739   * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
6740   * @vsi: VSI having NAPI disabled
6741   */
6742  static void ice_napi_disable_all(struct ice_vsi *vsi)
6743  {
6744  	int q_idx;
6745  
6746  	if (!vsi->netdev)
6747  		return;
6748  
6749  	ice_for_each_q_vector(vsi, q_idx) {
6750  		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6751  
6752  		if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
6753  			napi_disable(&q_vector->napi);
6754  
6755  		cancel_work_sync(&q_vector->tx.dim.work);
6756  		cancel_work_sync(&q_vector->rx.dim.work);
6757  	}
6758  }
6759  
6760  /**
6761   * ice_down - Shutdown the connection
6762   * @vsi: The VSI being stopped
6763   *
6764   * Caller of this function is expected to set the vsi->state ICE_DOWN bit
6765   */
6766  int ice_down(struct ice_vsi *vsi)
6767  {
6768  	int i, tx_err, rx_err, vlan_err = 0;
6769  
6770  	WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
6771  
6772  	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
6773  		vlan_err = ice_vsi_del_vlan_zero(vsi);
6774  		ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
6775  		netif_carrier_off(vsi->netdev);
6776  		netif_tx_disable(vsi->netdev);
6777  	} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
6778  		ice_eswitch_stop_all_tx_queues(vsi->back);
6779  	}
6780  
6781  	ice_vsi_dis_irq(vsi);
6782  
6783  	tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
6784  	if (tx_err)
6785  		netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
6786  			   vsi->vsi_num, tx_err);
6787  	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
6788  		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
6789  		if (tx_err)
6790  			netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
6791  				   vsi->vsi_num, tx_err);
6792  	}
6793  
6794  	rx_err = ice_vsi_stop_all_rx_rings(vsi);
6795  	if (rx_err)
6796  		netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
6797  			   vsi->vsi_num, rx_err);
6798  
6799  	ice_napi_disable_all(vsi);
6800  
6801  	ice_for_each_txq(vsi, i)
6802  		ice_clean_tx_ring(vsi->tx_rings[i]);
6803  
6804  	ice_for_each_rxq(vsi, i)
6805  		ice_clean_rx_ring(vsi->rx_rings[i]);
6806  
6807  	if (tx_err || rx_err || vlan_err) {
6808  		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
6809  			   vsi->vsi_num, vsi->vsw->sw_id);
6810  		return -EIO;
6811  	}
6812  
6813  	return 0;
6814  }
6815  
6816  /**
6817   * ice_down_up - shutdown the VSI connection and bring it up
6818   * @vsi: the VSI to be reconnected
6819   */
6820  int ice_down_up(struct ice_vsi *vsi)
6821  {
6822  	int ret;
6823  
6824  	/* if DOWN already set, nothing to do */
6825  	if (test_and_set_bit(ICE_VSI_DOWN, vsi->state))
6826  		return 0;
6827  
6828  	ret = ice_down(vsi);
6829  	if (ret)
6830  		return ret;
6831  
6832  	ret = ice_up(vsi);
6833  	if (ret) {
6834  		netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n");
6835  		return ret;
6836  	}
6837  
6838  	return 0;
6839  }
6840  
6841  /**
6842   * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
6843   * @vsi: VSI having resources allocated
6844   *
6845   * Return 0 on success, negative on failure
6846   */
6847  int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
6848  {
6849  	int i, err = 0;
6850  
6851  	if (!vsi->num_txq) {
6852  		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
6853  			vsi->vsi_num);
6854  		return -EINVAL;
6855  	}
6856  
6857  	ice_for_each_txq(vsi, i) {
6858  		struct ice_tx_ring *ring = vsi->tx_rings[i];
6859  
6860  		if (!ring)
6861  			return -EINVAL;
6862  
6863  		if (vsi->netdev)
6864  			ring->netdev = vsi->netdev;
6865  		err = ice_setup_tx_ring(ring);
6866  		if (err)
6867  			break;
6868  	}
6869  
6870  	return err;
6871  }
6872  
6873  /**
6874   * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
6875   * @vsi: VSI having resources allocated
6876   *
6877   * Return 0 on success, negative on failure
6878   */
6879  int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
6880  {
6881  	int i, err = 0;
6882  
6883  	if (!vsi->num_rxq) {
6884  		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
6885  			vsi->vsi_num);
6886  		return -EINVAL;
6887  	}
6888  
6889  	ice_for_each_rxq(vsi, i) {
6890  		struct ice_rx_ring *ring = vsi->rx_rings[i];
6891  
6892  		if (!ring)
6893  			return -EINVAL;
6894  
6895  		if (vsi->netdev)
6896  			ring->netdev = vsi->netdev;
6897  		err = ice_setup_rx_ring(ring);
6898  		if (err)
6899  			break;
6900  	}
6901  
6902  	return err;
6903  }
6904  
6905  /**
6906   * ice_vsi_open_ctrl - open control VSI for use
6907   * @vsi: the VSI to open
6908   *
6909   * Initialization of the Control VSI
6910   *
6911   * Returns 0 on success, negative value on error
6912   */
6913  int ice_vsi_open_ctrl(struct ice_vsi *vsi)
6914  {
6915  	char int_name[ICE_INT_NAME_STR_LEN];
6916  	struct ice_pf *pf = vsi->back;
6917  	struct device *dev;
6918  	int err;
6919  
6920  	dev = ice_pf_to_dev(pf);
6921  	/* allocate descriptors */
6922  	err = ice_vsi_setup_tx_rings(vsi);
6923  	if (err)
6924  		goto err_setup_tx;
6925  
6926  	err = ice_vsi_setup_rx_rings(vsi);
6927  	if (err)
6928  		goto err_setup_rx;
6929  
6930  	err = ice_vsi_cfg(vsi);
6931  	if (err)
6932  		goto err_setup_rx;
6933  
6934  	snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
6935  		 dev_driver_string(dev), dev_name(dev));
6936  	err = ice_vsi_req_irq_msix(vsi, int_name);
6937  	if (err)
6938  		goto err_setup_rx;
6939  
6940  	ice_vsi_cfg_msix(vsi);
6941  
6942  	err = ice_vsi_start_all_rx_rings(vsi);
6943  	if (err)
6944  		goto err_up_complete;
6945  
6946  	clear_bit(ICE_VSI_DOWN, vsi->state);
6947  	ice_vsi_ena_irq(vsi);
6948  
6949  	return 0;
6950  
6951  err_up_complete:
6952  	ice_down(vsi);
6953  err_setup_rx:
6954  	ice_vsi_free_rx_rings(vsi);
6955  err_setup_tx:
6956  	ice_vsi_free_tx_rings(vsi);
6957  
6958  	return err;
6959  }
6960  
6961  /**
6962   * ice_vsi_open - Called when a network interface is made active
6963   * @vsi: the VSI to open
6964   *
6965   * Initialization of the VSI
6966   *
6967   * Returns 0 on success, negative value on error
6968   */
6969  int ice_vsi_open(struct ice_vsi *vsi)
6970  {
6971  	char int_name[ICE_INT_NAME_STR_LEN];
6972  	struct ice_pf *pf = vsi->back;
6973  	int err;
6974  
6975  	/* allocate descriptors */
6976  	err = ice_vsi_setup_tx_rings(vsi);
6977  	if (err)
6978  		goto err_setup_tx;
6979  
6980  	err = ice_vsi_setup_rx_rings(vsi);
6981  	if (err)
6982  		goto err_setup_rx;
6983  
6984  	err = ice_vsi_cfg(vsi);
6985  	if (err)
6986  		goto err_setup_rx;
6987  
6988  	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
6989  		 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
6990  	err = ice_vsi_req_irq_msix(vsi, int_name);
6991  	if (err)
6992  		goto err_setup_rx;
6993  
6994  	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
6995  
6996  	if (vsi->type == ICE_VSI_PF) {
6997  		/* Notify the stack of the actual queue counts. */
6998  		err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
6999  		if (err)
7000  			goto err_set_qs;
7001  
7002  		err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
7003  		if (err)
7004  			goto err_set_qs;
7005  	}
7006  
7007  	err = ice_up_complete(vsi);
7008  	if (err)
7009  		goto err_up_complete;
7010  
7011  	return 0;
7012  
7013  err_up_complete:
7014  	ice_down(vsi);
7015  err_set_qs:
7016  	ice_vsi_free_irq(vsi);
7017  err_setup_rx:
7018  	ice_vsi_free_rx_rings(vsi);
7019  err_setup_tx:
7020  	ice_vsi_free_tx_rings(vsi);
7021  
7022  	return err;
7023  }
7024  
7025  /**
7026   * ice_vsi_release_all - Delete all VSIs
7027   * @pf: PF from which all VSIs are being removed
7028   */
7029  static void ice_vsi_release_all(struct ice_pf *pf)
7030  {
7031  	int err, i;
7032  
7033  	if (!pf->vsi)
7034  		return;
7035  
7036  	ice_for_each_vsi(pf, i) {
7037  		if (!pf->vsi[i])
7038  			continue;
7039  
7040  		if (pf->vsi[i]->type == ICE_VSI_CHNL)
7041  			continue;
7042  
7043  		err = ice_vsi_release(pf->vsi[i]);
7044  		if (err)
7045  			dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
7046  				i, err, pf->vsi[i]->vsi_num);
7047  	}
7048  }
7049  
7050  /**
7051   * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
7052   * @pf: pointer to the PF instance
7053   * @type: VSI type to rebuild
7054   *
7055   * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
7056   */
7057  static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
7058  {
7059  	struct device *dev = ice_pf_to_dev(pf);
7060  	int i, err;
7061  
7062  	ice_for_each_vsi(pf, i) {
7063  		struct ice_vsi *vsi = pf->vsi[i];
7064  
7065  		if (!vsi || vsi->type != type)
7066  			continue;
7067  
7068  		/* rebuild the VSI */
7069  		err = ice_vsi_rebuild(vsi, true);
7070  		if (err) {
7071  			dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
7072  				err, vsi->idx, ice_vsi_type_str(type));
7073  			return err;
7074  		}
7075  
7076  		/* replay filters for the VSI */
7077  		err = ice_replay_vsi(&pf->hw, vsi->idx);
7078  		if (err) {
7079  			dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
7080  				err, vsi->idx, ice_vsi_type_str(type));
7081  			return err;
7082  		}
7083  
7084  		/* Re-map HW VSI number, using VSI handle that has been
7085  		 * previously validated in ice_replay_vsi() call above
7086  		 */
7087  		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
7088  
7089  		/* enable the VSI */
7090  		err = ice_ena_vsi(vsi, false);
7091  		if (err) {
7092  			dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
7093  				err, vsi->idx, ice_vsi_type_str(type));
7094  			return err;
7095  		}
7096  
7097  		dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
7098  			 ice_vsi_type_str(type));
7099  	}
7100  
7101  	return 0;
7102  }
7103  
7104  /**
7105   * ice_update_pf_netdev_link - Update PF netdev link status
7106   * @pf: pointer to the PF instance
7107   */
7108  static void ice_update_pf_netdev_link(struct ice_pf *pf)
7109  {
7110  	bool link_up;
7111  	int i;
7112  
7113  	ice_for_each_vsi(pf, i) {
7114  		struct ice_vsi *vsi = pf->vsi[i];
7115  
7116  		if (!vsi || vsi->type != ICE_VSI_PF)
7117  			return;
7118  
7119  		ice_get_link_status(pf->vsi[i]->port_info, &link_up);
7120  		if (link_up) {
7121  			netif_carrier_on(pf->vsi[i]->netdev);
7122  			netif_tx_wake_all_queues(pf->vsi[i]->netdev);
7123  		} else {
7124  			netif_carrier_off(pf->vsi[i]->netdev);
7125  			netif_tx_stop_all_queues(pf->vsi[i]->netdev);
7126  		}
7127  	}
7128  }
7129  
7130  /**
7131   * ice_rebuild - rebuild after reset
7132   * @pf: PF to rebuild
7133   * @reset_type: type of reset
7134   *
7135   * Do not rebuild VF VSI in this flow because that is already handled via
7136   * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
7137   * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
7138   * to reset/rebuild all the VF VSI twice.
7139   */
7140  static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
7141  {
7142  	struct device *dev = ice_pf_to_dev(pf);
7143  	struct ice_hw *hw = &pf->hw;
7144  	bool dvm;
7145  	int err;
7146  
7147  	if (test_bit(ICE_DOWN, pf->state))
7148  		goto clear_recovery;
7149  
7150  	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
7151  
7152  #define ICE_EMP_RESET_SLEEP_MS 5000
7153  	if (reset_type == ICE_RESET_EMPR) {
7154  		/* If an EMP reset has occurred, any previously pending flash
7155  		 * update will have completed. We no longer know whether or
7156  		 * not the NVM update EMP reset is restricted.
7157  		 */
7158  		pf->fw_emp_reset_disabled = false;
7159  
7160  		msleep(ICE_EMP_RESET_SLEEP_MS);
7161  	}
7162  
7163  	err = ice_init_all_ctrlq(hw);
7164  	if (err) {
7165  		dev_err(dev, "control queues init failed %d\n", err);
7166  		goto err_init_ctrlq;
7167  	}
7168  
7169  	/* if DDP was previously loaded successfully */
7170  	if (!ice_is_safe_mode(pf)) {
7171  		/* reload the SW DB of filter tables */
7172  		if (reset_type == ICE_RESET_PFR)
7173  			ice_fill_blk_tbls(hw);
7174  		else
7175  			/* Reload DDP Package after CORER/GLOBR reset */
7176  			ice_load_pkg(NULL, pf);
7177  	}
7178  
7179  	err = ice_clear_pf_cfg(hw);
7180  	if (err) {
7181  		dev_err(dev, "clear PF configuration failed %d\n", err);
7182  		goto err_init_ctrlq;
7183  	}
7184  
7185  	ice_clear_pxe_mode(hw);
7186  
7187  	err = ice_init_nvm(hw);
7188  	if (err) {
7189  		dev_err(dev, "ice_init_nvm failed %d\n", err);
7190  		goto err_init_ctrlq;
7191  	}
7192  
7193  	err = ice_get_caps(hw);
7194  	if (err) {
7195  		dev_err(dev, "ice_get_caps failed %d\n", err);
7196  		goto err_init_ctrlq;
7197  	}
7198  
7199  	err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
7200  	if (err) {
7201  		dev_err(dev, "set_mac_cfg failed %d\n", err);
7202  		goto err_init_ctrlq;
7203  	}
7204  
7205  	dvm = ice_is_dvm_ena(hw);
7206  
7207  	err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
7208  	if (err)
7209  		goto err_init_ctrlq;
7210  
7211  	err = ice_sched_init_port(hw->port_info);
7212  	if (err)
7213  		goto err_sched_init_port;
7214  
7215  	/* start misc vector */
7216  	err = ice_req_irq_msix_misc(pf);
7217  	if (err) {
7218  		dev_err(dev, "misc vector setup failed: %d\n", err);
7219  		goto err_sched_init_port;
7220  	}
7221  
7222  	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7223  		wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
7224  		if (!rd32(hw, PFQF_FD_SIZE)) {
7225  			u16 unused, guar, b_effort;
7226  
7227  			guar = hw->func_caps.fd_fltr_guar;
7228  			b_effort = hw->func_caps.fd_fltr_best_effort;
7229  
7230  			/* force guaranteed filter pool for PF */
7231  			ice_alloc_fd_guar_item(hw, &unused, guar);
7232  			/* force shared filter pool for PF */
7233  			ice_alloc_fd_shrd_item(hw, &unused, b_effort);
7234  		}
7235  	}
7236  
7237  	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
7238  		ice_dcb_rebuild(pf);
7239  
7240  	/* If the PF previously had enabled PTP, PTP init needs to happen before
7241  	 * the VSI rebuild. If not, this causes the PTP link status events to
7242  	 * fail.
7243  	 */
7244  	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
7245  		ice_ptp_reset(pf);
7246  
7247  	if (ice_is_feature_supported(pf, ICE_F_GNSS))
7248  		ice_gnss_init(pf);
7249  
7250  	/* rebuild PF VSI */
7251  	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
7252  	if (err) {
7253  		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
7254  		goto err_vsi_rebuild;
7255  	}
7256  
7257  	/* configure PTP timestamping after VSI rebuild */
7258  	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
7259  		ice_ptp_cfg_timestamp(pf, false);
7260  
7261  	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
7262  	if (err) {
7263  		dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
7264  		goto err_vsi_rebuild;
7265  	}
7266  
7267  	if (reset_type == ICE_RESET_PFR) {
7268  		err = ice_rebuild_channels(pf);
7269  		if (err) {
7270  			dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
7271  				err);
7272  			goto err_vsi_rebuild;
7273  		}
7274  	}
7275  
7276  	/* If Flow Director is active */
7277  	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7278  		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
7279  		if (err) {
7280  			dev_err(dev, "control VSI rebuild failed: %d\n", err);
7281  			goto err_vsi_rebuild;
7282  		}
7283  
7284  		/* replay HW Flow Director recipes */
7285  		if (hw->fdir_prof)
7286  			ice_fdir_replay_flows(hw);
7287  
7288  		/* replay Flow Director filters */
7289  		ice_fdir_replay_fltrs(pf);
7290  
7291  		ice_rebuild_arfs(pf);
7292  	}
7293  
7294  	ice_update_pf_netdev_link(pf);
7295  
7296  	/* tell the firmware we are up */
7297  	err = ice_send_version(pf);
7298  	if (err) {
7299  		dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
7300  			err);
7301  		goto err_vsi_rebuild;
7302  	}
7303  
7304  	ice_replay_post(hw);
7305  
7306  	/* if we get here, reset flow is successful */
7307  	clear_bit(ICE_RESET_FAILED, pf->state);
7308  
7309  	ice_plug_aux_dev(pf);
7310  	return;
7311  
7312  err_vsi_rebuild:
7313  err_sched_init_port:
7314  	ice_sched_cleanup_all(hw);
7315  err_init_ctrlq:
7316  	ice_shutdown_all_ctrlq(hw);
7317  	set_bit(ICE_RESET_FAILED, pf->state);
7318  clear_recovery:
7319  	/* set this bit in PF state to control service task scheduling */
7320  	set_bit(ICE_NEEDS_RESTART, pf->state);
7321  	dev_err(dev, "Rebuild failed, unload and reload driver\n");
7322  }
7323  
7324  /**
7325   * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
7326   * @vsi: Pointer to VSI structure
7327   */
7328  static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
7329  {
7330  	if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
7331  		return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
7332  	else
7333  		return ICE_RXBUF_3072;
7334  }
7335  
7336  /**
7337   * ice_change_mtu - NDO callback to change the MTU
7338   * @netdev: network interface device structure
7339   * @new_mtu: new value for maximum frame size
7340   *
7341   * Returns 0 on success, negative on failure
7342   */
7343  static int ice_change_mtu(struct net_device *netdev, int new_mtu)
7344  {
7345  	struct ice_netdev_priv *np = netdev_priv(netdev);
7346  	struct ice_vsi *vsi = np->vsi;
7347  	struct ice_pf *pf = vsi->back;
7348  	u8 count = 0;
7349  	int err = 0;
7350  
7351  	if (new_mtu == (int)netdev->mtu) {
7352  		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
7353  		return 0;
7354  	}
7355  
7356  	if (ice_is_xdp_ena_vsi(vsi)) {
7357  		int frame_size = ice_max_xdp_frame_size(vsi);
7358  
7359  		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
7360  			netdev_err(netdev, "max MTU for XDP usage is %d\n",
7361  				   frame_size - ICE_ETH_PKT_HDR_PAD);
7362  			return -EINVAL;
7363  		}
7364  	}
7365  
7366  	/* if a reset is in progress, wait for some time for it to complete */
7367  	do {
7368  		if (ice_is_reset_in_progress(pf->state)) {
7369  			count++;
7370  			usleep_range(1000, 2000);
7371  		} else {
7372  			break;
7373  		}
7374  
7375  	} while (count < 100);
7376  
7377  	if (count == 100) {
7378  		netdev_err(netdev, "can't change MTU. Device is busy\n");
7379  		return -EBUSY;
7380  	}
7381  
7382  	netdev->mtu = (unsigned int)new_mtu;
7383  
7384  	/* if VSI is up, bring it down and then back up */
7385  	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
7386  		err = ice_down(vsi);
7387  		if (err) {
7388  			netdev_err(netdev, "change MTU if_down err %d\n", err);
7389  			return err;
7390  		}
7391  
7392  		err = ice_up(vsi);
7393  		if (err) {
7394  			netdev_err(netdev, "change MTU if_up err %d\n", err);
7395  			return err;
7396  		}
7397  	}
7398  
7399  	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
7400  	set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
7401  
7402  	return err;
7403  }
7404  
7405  /**
7406   * ice_eth_ioctl - Access the hwtstamp interface
7407   * @netdev: network interface device structure
7408   * @ifr: interface request data
7409   * @cmd: ioctl command
7410   */
7411  static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7412  {
7413  	struct ice_netdev_priv *np = netdev_priv(netdev);
7414  	struct ice_pf *pf = np->vsi->back;
7415  
7416  	switch (cmd) {
7417  	case SIOCGHWTSTAMP:
7418  		return ice_ptp_get_ts_config(pf, ifr);
7419  	case SIOCSHWTSTAMP:
7420  		return ice_ptp_set_ts_config(pf, ifr);
7421  	default:
7422  		return -EOPNOTSUPP;
7423  	}
7424  }
7425  
7426  /**
7427   * ice_aq_str - convert AQ err code to a string
7428   * @aq_err: the AQ error code to convert
7429   */
7430  const char *ice_aq_str(enum ice_aq_err aq_err)
7431  {
7432  	switch (aq_err) {
7433  	case ICE_AQ_RC_OK:
7434  		return "OK";
7435  	case ICE_AQ_RC_EPERM:
7436  		return "ICE_AQ_RC_EPERM";
7437  	case ICE_AQ_RC_ENOENT:
7438  		return "ICE_AQ_RC_ENOENT";
7439  	case ICE_AQ_RC_ENOMEM:
7440  		return "ICE_AQ_RC_ENOMEM";
7441  	case ICE_AQ_RC_EBUSY:
7442  		return "ICE_AQ_RC_EBUSY";
7443  	case ICE_AQ_RC_EEXIST:
7444  		return "ICE_AQ_RC_EEXIST";
7445  	case ICE_AQ_RC_EINVAL:
7446  		return "ICE_AQ_RC_EINVAL";
7447  	case ICE_AQ_RC_ENOSPC:
7448  		return "ICE_AQ_RC_ENOSPC";
7449  	case ICE_AQ_RC_ENOSYS:
7450  		return "ICE_AQ_RC_ENOSYS";
7451  	case ICE_AQ_RC_EMODE:
7452  		return "ICE_AQ_RC_EMODE";
7453  	case ICE_AQ_RC_ENOSEC:
7454  		return "ICE_AQ_RC_ENOSEC";
7455  	case ICE_AQ_RC_EBADSIG:
7456  		return "ICE_AQ_RC_EBADSIG";
7457  	case ICE_AQ_RC_ESVN:
7458  		return "ICE_AQ_RC_ESVN";
7459  	case ICE_AQ_RC_EBADMAN:
7460  		return "ICE_AQ_RC_EBADMAN";
7461  	case ICE_AQ_RC_EBADBUF:
7462  		return "ICE_AQ_RC_EBADBUF";
7463  	}
7464  
7465  	return "ICE_AQ_RC_UNKNOWN";
7466  }
7467  
7468  /**
7469   * ice_set_rss_lut - Set RSS LUT
7470   * @vsi: Pointer to VSI structure
7471   * @lut: Lookup table
7472   * @lut_size: Lookup table size
7473   *
7474   * Returns 0 on success, negative on failure
7475   */
7476  int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
7477  {
7478  	struct ice_aq_get_set_rss_lut_params params = {};
7479  	struct ice_hw *hw = &vsi->back->hw;
7480  	int status;
7481  
7482  	if (!lut)
7483  		return -EINVAL;
7484  
7485  	params.vsi_handle = vsi->idx;
7486  	params.lut_size = lut_size;
7487  	params.lut_type = vsi->rss_lut_type;
7488  	params.lut = lut;
7489  
7490  	status = ice_aq_set_rss_lut(hw, &params);
7491  	if (status)
7492  		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
7493  			status, ice_aq_str(hw->adminq.sq_last_status));
7494  
7495  	return status;
7496  }
7497  
7498  /**
7499   * ice_set_rss_key - Set RSS key
7500   * @vsi: Pointer to the VSI structure
7501   * @seed: RSS hash seed
7502   *
7503   * Returns 0 on success, negative on failure
7504   */
7505  int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
7506  {
7507  	struct ice_hw *hw = &vsi->back->hw;
7508  	int status;
7509  
7510  	if (!seed)
7511  		return -EINVAL;
7512  
7513  	status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
7514  	if (status)
7515  		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
7516  			status, ice_aq_str(hw->adminq.sq_last_status));
7517  
7518  	return status;
7519  }
7520  
7521  /**
7522   * ice_get_rss_lut - Get RSS LUT
7523   * @vsi: Pointer to VSI structure
7524   * @lut: Buffer to store the lookup table entries
7525   * @lut_size: Size of buffer to store the lookup table entries
7526   *
7527   * Returns 0 on success, negative on failure
7528   */
7529  int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
7530  {
7531  	struct ice_aq_get_set_rss_lut_params params = {};
7532  	struct ice_hw *hw = &vsi->back->hw;
7533  	int status;
7534  
7535  	if (!lut)
7536  		return -EINVAL;
7537  
7538  	params.vsi_handle = vsi->idx;
7539  	params.lut_size = lut_size;
7540  	params.lut_type = vsi->rss_lut_type;
7541  	params.lut = lut;
7542  
7543  	status = ice_aq_get_rss_lut(hw, &params);
7544  	if (status)
7545  		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
7546  			status, ice_aq_str(hw->adminq.sq_last_status));
7547  
7548  	return status;
7549  }
7550  
7551  /**
7552   * ice_get_rss_key - Get RSS key
7553   * @vsi: Pointer to VSI structure
7554   * @seed: Buffer to store the key in
7555   *
7556   * Returns 0 on success, negative on failure
7557   */
7558  int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
7559  {
7560  	struct ice_hw *hw = &vsi->back->hw;
7561  	int status;
7562  
7563  	if (!seed)
7564  		return -EINVAL;
7565  
7566  	status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
7567  	if (status)
7568  		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
7569  			status, ice_aq_str(hw->adminq.sq_last_status));
7570  
7571  	return status;
7572  }
7573  
7574  /**
7575   * ice_bridge_getlink - Get the hardware bridge mode
7576   * @skb: skb buff
7577   * @pid: process ID
7578   * @seq: RTNL message seq
7579   * @dev: the netdev being configured
7580   * @filter_mask: filter mask passed in
7581   * @nlflags: netlink flags passed in
7582   *
7583   * Return the bridge mode (VEB/VEPA)
7584   */
7585  static int
7586  ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
7587  		   struct net_device *dev, u32 filter_mask, int nlflags)
7588  {
7589  	struct ice_netdev_priv *np = netdev_priv(dev);
7590  	struct ice_vsi *vsi = np->vsi;
7591  	struct ice_pf *pf = vsi->back;
7592  	u16 bmode;
7593  
7594  	bmode = pf->first_sw->bridge_mode;
7595  
7596  	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
7597  				       filter_mask, NULL);
7598  }
7599  
7600  /**
7601   * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
7602   * @vsi: Pointer to VSI structure
7603   * @bmode: Hardware bridge mode (VEB/VEPA)
7604   *
7605   * Returns 0 on success, negative on failure
7606   */
7607  static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
7608  {
7609  	struct ice_aqc_vsi_props *vsi_props;
7610  	struct ice_hw *hw = &vsi->back->hw;
7611  	struct ice_vsi_ctx *ctxt;
7612  	int ret;
7613  
7614  	vsi_props = &vsi->info;
7615  
7616  	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
7617  	if (!ctxt)
7618  		return -ENOMEM;
7619  
7620  	ctxt->info = vsi->info;
7621  
7622  	if (bmode == BRIDGE_MODE_VEB)
7623  		/* change from VEPA to VEB mode */
7624  		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7625  	else
7626  		/* change from VEB to VEPA mode */
7627  		ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
7628  	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
7629  
7630  	ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
7631  	if (ret) {
7632  		dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
7633  			bmode, ret, ice_aq_str(hw->adminq.sq_last_status));
7634  		goto out;
7635  	}
7636  	/* Update sw flags for book keeping */
7637  	vsi_props->sw_flags = ctxt->info.sw_flags;
7638  
7639  out:
7640  	kfree(ctxt);
7641  	return ret;
7642  }
7643  
7644  /**
7645   * ice_bridge_setlink - Set the hardware bridge mode
7646   * @dev: the netdev being configured
7647   * @nlh: RTNL message
7648   * @flags: bridge setlink flags
7649   * @extack: netlink extended ack
7650   *
7651   * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
7652   * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
7653   * not already set for all VSIs connected to this switch. And also update the
7654   * unicast switch filter rules for the corresponding switch of the netdev.
7655   */
7656  static int
7657  ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
7658  		   u16 __always_unused flags,
7659  		   struct netlink_ext_ack __always_unused *extack)
7660  {
7661  	struct ice_netdev_priv *np = netdev_priv(dev);
7662  	struct ice_pf *pf = np->vsi->back;
7663  	struct nlattr *attr, *br_spec;
7664  	struct ice_hw *hw = &pf->hw;
7665  	struct ice_sw *pf_sw;
7666  	int rem, v, err = 0;
7667  
7668  	pf_sw = pf->first_sw;
7669  	/* find the attribute in the netlink message */
7670  	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
7671  
7672  	nla_for_each_nested(attr, br_spec, rem) {
7673  		__u16 mode;
7674  
7675  		if (nla_type(attr) != IFLA_BRIDGE_MODE)
7676  			continue;
7677  		mode = nla_get_u16(attr);
7678  		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
7679  			return -EINVAL;
7680  		/* Continue  if bridge mode is not being flipped */
7681  		if (mode == pf_sw->bridge_mode)
7682  			continue;
7683  		/* Iterates through the PF VSI list and update the loopback
7684  		 * mode of the VSI
7685  		 */
7686  		ice_for_each_vsi(pf, v) {
7687  			if (!pf->vsi[v])
7688  				continue;
7689  			err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
7690  			if (err)
7691  				return err;
7692  		}
7693  
7694  		hw->evb_veb = (mode == BRIDGE_MODE_VEB);
7695  		/* Update the unicast switch filter rules for the corresponding
7696  		 * switch of the netdev
7697  		 */
7698  		err = ice_update_sw_rule_bridge_mode(hw);
7699  		if (err) {
7700  			netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n",
7701  				   mode, err,
7702  				   ice_aq_str(hw->adminq.sq_last_status));
7703  			/* revert hw->evb_veb */
7704  			hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
7705  			return err;
7706  		}
7707  
7708  		pf_sw->bridge_mode = mode;
7709  	}
7710  
7711  	return 0;
7712  }
7713  
7714  /**
7715   * ice_tx_timeout - Respond to a Tx Hang
7716   * @netdev: network interface device structure
7717   * @txqueue: Tx queue
7718   */
7719  static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
7720  {
7721  	struct ice_netdev_priv *np = netdev_priv(netdev);
7722  	struct ice_tx_ring *tx_ring = NULL;
7723  	struct ice_vsi *vsi = np->vsi;
7724  	struct ice_pf *pf = vsi->back;
7725  	u32 i;
7726  
7727  	pf->tx_timeout_count++;
7728  
7729  	/* Check if PFC is enabled for the TC to which the queue belongs
7730  	 * to. If yes then Tx timeout is not caused by a hung queue, no
7731  	 * need to reset and rebuild
7732  	 */
7733  	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
7734  		dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
7735  			 txqueue);
7736  		return;
7737  	}
7738  
7739  	/* now that we have an index, find the tx_ring struct */
7740  	ice_for_each_txq(vsi, i)
7741  		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
7742  			if (txqueue == vsi->tx_rings[i]->q_index) {
7743  				tx_ring = vsi->tx_rings[i];
7744  				break;
7745  			}
7746  
7747  	/* Reset recovery level if enough time has elapsed after last timeout.
7748  	 * Also ensure no new reset action happens before next timeout period.
7749  	 */
7750  	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
7751  		pf->tx_timeout_recovery_level = 1;
7752  	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
7753  				       netdev->watchdog_timeo)))
7754  		return;
7755  
7756  	if (tx_ring) {
7757  		struct ice_hw *hw = &pf->hw;
7758  		u32 head, val = 0;
7759  
7760  		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
7761  			QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
7762  		/* Read interrupt register */
7763  		val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
7764  
7765  		netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
7766  			    vsi->vsi_num, txqueue, tx_ring->next_to_clean,
7767  			    head, tx_ring->next_to_use, val);
7768  	}
7769  
7770  	pf->tx_timeout_last_recovery = jiffies;
7771  	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n",
7772  		    pf->tx_timeout_recovery_level, txqueue);
7773  
7774  	switch (pf->tx_timeout_recovery_level) {
7775  	case 1:
7776  		set_bit(ICE_PFR_REQ, pf->state);
7777  		break;
7778  	case 2:
7779  		set_bit(ICE_CORER_REQ, pf->state);
7780  		break;
7781  	case 3:
7782  		set_bit(ICE_GLOBR_REQ, pf->state);
7783  		break;
7784  	default:
7785  		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
7786  		set_bit(ICE_DOWN, pf->state);
7787  		set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
7788  		set_bit(ICE_SERVICE_DIS, pf->state);
7789  		break;
7790  	}
7791  
7792  	ice_service_task_schedule(pf);
7793  	pf->tx_timeout_recovery_level++;
7794  }
7795  
7796  /**
7797   * ice_setup_tc_cls_flower - flower classifier offloads
7798   * @np: net device to configure
7799   * @filter_dev: device on which filter is added
7800   * @cls_flower: offload data
7801   */
7802  static int
7803  ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
7804  			struct net_device *filter_dev,
7805  			struct flow_cls_offload *cls_flower)
7806  {
7807  	struct ice_vsi *vsi = np->vsi;
7808  
7809  	if (cls_flower->common.chain_index)
7810  		return -EOPNOTSUPP;
7811  
7812  	switch (cls_flower->command) {
7813  	case FLOW_CLS_REPLACE:
7814  		return ice_add_cls_flower(filter_dev, vsi, cls_flower);
7815  	case FLOW_CLS_DESTROY:
7816  		return ice_del_cls_flower(vsi, cls_flower);
7817  	default:
7818  		return -EINVAL;
7819  	}
7820  }
7821  
7822  /**
7823   * ice_setup_tc_block_cb - callback handler registered for TC block
7824   * @type: TC SETUP type
7825   * @type_data: TC flower offload data that contains user input
7826   * @cb_priv: netdev private data
7827   */
7828  static int
7829  ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
7830  {
7831  	struct ice_netdev_priv *np = cb_priv;
7832  
7833  	switch (type) {
7834  	case TC_SETUP_CLSFLOWER:
7835  		return ice_setup_tc_cls_flower(np, np->vsi->netdev,
7836  					       type_data);
7837  	default:
7838  		return -EOPNOTSUPP;
7839  	}
7840  }
7841  
7842  /**
7843   * ice_validate_mqprio_qopt - Validate TCF input parameters
7844   * @vsi: Pointer to VSI
7845   * @mqprio_qopt: input parameters for mqprio queue configuration
7846   *
7847   * This function validates MQPRIO params, such as qcount (power of 2 wherever
7848   * needed), and make sure user doesn't specify qcount and BW rate limit
7849   * for TCs, which are more than "num_tc"
7850   */
7851  static int
7852  ice_validate_mqprio_qopt(struct ice_vsi *vsi,
7853  			 struct tc_mqprio_qopt_offload *mqprio_qopt)
7854  {
7855  	u64 sum_max_rate = 0, sum_min_rate = 0;
7856  	int non_power_of_2_qcount = 0;
7857  	struct ice_pf *pf = vsi->back;
7858  	int max_rss_q_cnt = 0;
7859  	struct device *dev;
7860  	int i, speed;
7861  	u8 num_tc;
7862  
7863  	if (vsi->type != ICE_VSI_PF)
7864  		return -EINVAL;
7865  
7866  	if (mqprio_qopt->qopt.offset[0] != 0 ||
7867  	    mqprio_qopt->qopt.num_tc < 1 ||
7868  	    mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
7869  		return -EINVAL;
7870  
7871  	dev = ice_pf_to_dev(pf);
7872  	vsi->ch_rss_size = 0;
7873  	num_tc = mqprio_qopt->qopt.num_tc;
7874  
7875  	for (i = 0; num_tc; i++) {
7876  		int qcount = mqprio_qopt->qopt.count[i];
7877  		u64 max_rate, min_rate, rem;
7878  
7879  		if (!qcount)
7880  			return -EINVAL;
7881  
7882  		if (is_power_of_2(qcount)) {
7883  			if (non_power_of_2_qcount &&
7884  			    qcount > non_power_of_2_qcount) {
7885  				dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
7886  					qcount, non_power_of_2_qcount);
7887  				return -EINVAL;
7888  			}
7889  			if (qcount > max_rss_q_cnt)
7890  				max_rss_q_cnt = qcount;
7891  		} else {
7892  			if (non_power_of_2_qcount &&
7893  			    qcount != non_power_of_2_qcount) {
7894  				dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
7895  					qcount, non_power_of_2_qcount);
7896  				return -EINVAL;
7897  			}
7898  			if (qcount < max_rss_q_cnt) {
7899  				dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
7900  					qcount, max_rss_q_cnt);
7901  				return -EINVAL;
7902  			}
7903  			max_rss_q_cnt = qcount;
7904  			non_power_of_2_qcount = qcount;
7905  		}
7906  
7907  		/* TC command takes input in K/N/Gbps or K/M/Gbit etc but
7908  		 * converts the bandwidth rate limit into Bytes/s when
7909  		 * passing it down to the driver. So convert input bandwidth
7910  		 * from Bytes/s to Kbps
7911  		 */
7912  		max_rate = mqprio_qopt->max_rate[i];
7913  		max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
7914  		sum_max_rate += max_rate;
7915  
7916  		/* min_rate is minimum guaranteed rate and it can't be zero */
7917  		min_rate = mqprio_qopt->min_rate[i];
7918  		min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
7919  		sum_min_rate += min_rate;
7920  
7921  		if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
7922  			dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
7923  				min_rate, ICE_MIN_BW_LIMIT);
7924  			return -EINVAL;
7925  		}
7926  
7927  		iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
7928  		if (rem) {
7929  			dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
7930  				i, ICE_MIN_BW_LIMIT);
7931  			return -EINVAL;
7932  		}
7933  
7934  		iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
7935  		if (rem) {
7936  			dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
7937  				i, ICE_MIN_BW_LIMIT);
7938  			return -EINVAL;
7939  		}
7940  
7941  		/* min_rate can't be more than max_rate, except when max_rate
7942  		 * is zero (implies max_rate sought is max line rate). In such
7943  		 * a case min_rate can be more than max.
7944  		 */
7945  		if (max_rate && min_rate > max_rate) {
7946  			dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
7947  				min_rate, max_rate);
7948  			return -EINVAL;
7949  		}
7950  
7951  		if (i >= mqprio_qopt->qopt.num_tc - 1)
7952  			break;
7953  		if (mqprio_qopt->qopt.offset[i + 1] !=
7954  		    (mqprio_qopt->qopt.offset[i] + qcount))
7955  			return -EINVAL;
7956  	}
7957  	if (vsi->num_rxq <
7958  	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
7959  		return -EINVAL;
7960  	if (vsi->num_txq <
7961  	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
7962  		return -EINVAL;
7963  
7964  	speed = ice_get_link_speed_kbps(vsi);
7965  	if (sum_max_rate && sum_max_rate > (u64)speed) {
7966  		dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
7967  			sum_max_rate, speed);
7968  		return -EINVAL;
7969  	}
7970  	if (sum_min_rate && sum_min_rate > (u64)speed) {
7971  		dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
7972  			sum_min_rate, speed);
7973  		return -EINVAL;
7974  	}
7975  
7976  	/* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
7977  	vsi->ch_rss_size = max_rss_q_cnt;
7978  
7979  	return 0;
7980  }
7981  
7982  /**
7983   * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
7984   * @pf: ptr to PF device
7985   * @vsi: ptr to VSI
7986   */
7987  static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi)
7988  {
7989  	struct device *dev = ice_pf_to_dev(pf);
7990  	bool added = false;
7991  	struct ice_hw *hw;
7992  	int flow;
7993  
7994  	if (!(vsi->num_gfltr || vsi->num_bfltr))
7995  		return -EINVAL;
7996  
7997  	hw = &pf->hw;
7998  	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
7999  		struct ice_fd_hw_prof *prof;
8000  		int tun, status;
8001  		u64 entry_h;
8002  
8003  		if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
8004  		      hw->fdir_prof[flow]->cnt))
8005  			continue;
8006  
8007  		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
8008  			enum ice_flow_priority prio;
8009  			u64 prof_id;
8010  
8011  			/* add this VSI to FDir profile for this flow */
8012  			prio = ICE_FLOW_PRIO_NORMAL;
8013  			prof = hw->fdir_prof[flow];
8014  			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
8015  			status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
8016  						    prof->vsi_h[0], vsi->idx,
8017  						    prio, prof->fdir_seg[tun],
8018  						    &entry_h);
8019  			if (status) {
8020  				dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
8021  					vsi->idx, flow);
8022  				continue;
8023  			}
8024  
8025  			prof->entry_h[prof->cnt][tun] = entry_h;
8026  		}
8027  
8028  		/* store VSI for filter replay and delete */
8029  		prof->vsi_h[prof->cnt] = vsi->idx;
8030  		prof->cnt++;
8031  
8032  		added = true;
8033  		dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
8034  			flow);
8035  	}
8036  
8037  	if (!added)
8038  		dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
8039  
8040  	return 0;
8041  }
8042  
8043  /**
8044   * ice_add_channel - add a channel by adding VSI
8045   * @pf: ptr to PF device
8046   * @sw_id: underlying HW switching element ID
8047   * @ch: ptr to channel structure
8048   *
8049   * Add a channel (VSI) using add_vsi and queue_map
8050   */
8051  static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
8052  {
8053  	struct device *dev = ice_pf_to_dev(pf);
8054  	struct ice_vsi *vsi;
8055  
8056  	if (ch->type != ICE_VSI_CHNL) {
8057  		dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
8058  		return -EINVAL;
8059  	}
8060  
8061  	vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
8062  	if (!vsi || vsi->type != ICE_VSI_CHNL) {
8063  		dev_err(dev, "create chnl VSI failure\n");
8064  		return -EINVAL;
8065  	}
8066  
8067  	ice_add_vsi_to_fdir(pf, vsi);
8068  
8069  	ch->sw_id = sw_id;
8070  	ch->vsi_num = vsi->vsi_num;
8071  	ch->info.mapping_flags = vsi->info.mapping_flags;
8072  	ch->ch_vsi = vsi;
8073  	/* set the back pointer of channel for newly created VSI */
8074  	vsi->ch = ch;
8075  
8076  	memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
8077  	       sizeof(vsi->info.q_mapping));
8078  	memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
8079  	       sizeof(vsi->info.tc_mapping));
8080  
8081  	return 0;
8082  }
8083  
8084  /**
8085   * ice_chnl_cfg_res
8086   * @vsi: the VSI being setup
8087   * @ch: ptr to channel structure
8088   *
8089   * Configure channel specific resources such as rings, vector.
8090   */
8091  static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
8092  {
8093  	int i;
8094  
8095  	for (i = 0; i < ch->num_txq; i++) {
8096  		struct ice_q_vector *tx_q_vector, *rx_q_vector;
8097  		struct ice_ring_container *rc;
8098  		struct ice_tx_ring *tx_ring;
8099  		struct ice_rx_ring *rx_ring;
8100  
8101  		tx_ring = vsi->tx_rings[ch->base_q + i];
8102  		rx_ring = vsi->rx_rings[ch->base_q + i];
8103  		if (!tx_ring || !rx_ring)
8104  			continue;
8105  
8106  		/* setup ring being channel enabled */
8107  		tx_ring->ch = ch;
8108  		rx_ring->ch = ch;
8109  
8110  		/* following code block sets up vector specific attributes */
8111  		tx_q_vector = tx_ring->q_vector;
8112  		rx_q_vector = rx_ring->q_vector;
8113  		if (!tx_q_vector && !rx_q_vector)
8114  			continue;
8115  
8116  		if (tx_q_vector) {
8117  			tx_q_vector->ch = ch;
8118  			/* setup Tx and Rx ITR setting if DIM is off */
8119  			rc = &tx_q_vector->tx;
8120  			if (!ITR_IS_DYNAMIC(rc))
8121  				ice_write_itr(rc, rc->itr_setting);
8122  		}
8123  		if (rx_q_vector) {
8124  			rx_q_vector->ch = ch;
8125  			/* setup Tx and Rx ITR setting if DIM is off */
8126  			rc = &rx_q_vector->rx;
8127  			if (!ITR_IS_DYNAMIC(rc))
8128  				ice_write_itr(rc, rc->itr_setting);
8129  		}
8130  	}
8131  
8132  	/* it is safe to assume that, if channel has non-zero num_t[r]xq, then
8133  	 * GLINT_ITR register would have written to perform in-context
8134  	 * update, hence perform flush
8135  	 */
8136  	if (ch->num_txq || ch->num_rxq)
8137  		ice_flush(&vsi->back->hw);
8138  }
8139  
8140  /**
8141   * ice_cfg_chnl_all_res - configure channel resources
8142   * @vsi: pte to main_vsi
8143   * @ch: ptr to channel structure
8144   *
8145   * This function configures channel specific resources such as flow-director
8146   * counter index, and other resources such as queues, vectors, ITR settings
8147   */
8148  static void
8149  ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
8150  {
8151  	/* configure channel (aka ADQ) resources such as queues, vectors,
8152  	 * ITR settings for channel specific vectors and anything else
8153  	 */
8154  	ice_chnl_cfg_res(vsi, ch);
8155  }
8156  
8157  /**
8158   * ice_setup_hw_channel - setup new channel
8159   * @pf: ptr to PF device
8160   * @vsi: the VSI being setup
8161   * @ch: ptr to channel structure
8162   * @sw_id: underlying HW switching element ID
8163   * @type: type of channel to be created (VMDq2/VF)
8164   *
8165   * Setup new channel (VSI) based on specified type (VMDq2/VF)
8166   * and configures Tx rings accordingly
8167   */
8168  static int
8169  ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
8170  		     struct ice_channel *ch, u16 sw_id, u8 type)
8171  {
8172  	struct device *dev = ice_pf_to_dev(pf);
8173  	int ret;
8174  
8175  	ch->base_q = vsi->next_base_q;
8176  	ch->type = type;
8177  
8178  	ret = ice_add_channel(pf, sw_id, ch);
8179  	if (ret) {
8180  		dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
8181  		return ret;
8182  	}
8183  
8184  	/* configure/setup ADQ specific resources */
8185  	ice_cfg_chnl_all_res(vsi, ch);
8186  
8187  	/* make sure to update the next_base_q so that subsequent channel's
8188  	 * (aka ADQ) VSI queue map is correct
8189  	 */
8190  	vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
8191  	dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
8192  		ch->num_rxq);
8193  
8194  	return 0;
8195  }
8196  
8197  /**
8198   * ice_setup_channel - setup new channel using uplink element
8199   * @pf: ptr to PF device
8200   * @vsi: the VSI being setup
8201   * @ch: ptr to channel structure
8202   *
8203   * Setup new channel (VSI) based on specified type (VMDq2/VF)
8204   * and uplink switching element
8205   */
8206  static bool
8207  ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
8208  		  struct ice_channel *ch)
8209  {
8210  	struct device *dev = ice_pf_to_dev(pf);
8211  	u16 sw_id;
8212  	int ret;
8213  
8214  	if (vsi->type != ICE_VSI_PF) {
8215  		dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
8216  		return false;
8217  	}
8218  
8219  	sw_id = pf->first_sw->sw_id;
8220  
8221  	/* create channel (VSI) */
8222  	ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
8223  	if (ret) {
8224  		dev_err(dev, "failed to setup hw_channel\n");
8225  		return false;
8226  	}
8227  	dev_dbg(dev, "successfully created channel()\n");
8228  
8229  	return ch->ch_vsi ? true : false;
8230  }
8231  
8232  /**
8233   * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
8234   * @vsi: VSI to be configured
8235   * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
8236   * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
8237   */
8238  static int
8239  ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
8240  {
8241  	int err;
8242  
8243  	err = ice_set_min_bw_limit(vsi, min_tx_rate);
8244  	if (err)
8245  		return err;
8246  
8247  	return ice_set_max_bw_limit(vsi, max_tx_rate);
8248  }
8249  
8250  /**
8251   * ice_create_q_channel - function to create channel
8252   * @vsi: VSI to be configured
8253   * @ch: ptr to channel (it contains channel specific params)
8254   *
8255   * This function creates channel (VSI) using num_queues specified by user,
8256   * reconfigs RSS if needed.
8257   */
8258  static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
8259  {
8260  	struct ice_pf *pf = vsi->back;
8261  	struct device *dev;
8262  
8263  	if (!ch)
8264  		return -EINVAL;
8265  
8266  	dev = ice_pf_to_dev(pf);
8267  	if (!ch->num_txq || !ch->num_rxq) {
8268  		dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
8269  		return -EINVAL;
8270  	}
8271  
8272  	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
8273  		dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
8274  			vsi->cnt_q_avail, ch->num_txq);
8275  		return -EINVAL;
8276  	}
8277  
8278  	if (!ice_setup_channel(pf, vsi, ch)) {
8279  		dev_info(dev, "Failed to setup channel\n");
8280  		return -EINVAL;
8281  	}
8282  	/* configure BW rate limit */
8283  	if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
8284  		int ret;
8285  
8286  		ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
8287  				       ch->min_tx_rate);
8288  		if (ret)
8289  			dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
8290  				ch->max_tx_rate, ch->ch_vsi->vsi_num);
8291  		else
8292  			dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
8293  				ch->max_tx_rate, ch->ch_vsi->vsi_num);
8294  	}
8295  
8296  	vsi->cnt_q_avail -= ch->num_txq;
8297  
8298  	return 0;
8299  }
8300  
8301  /**
8302   * ice_rem_all_chnl_fltrs - removes all channel filters
8303   * @pf: ptr to PF, TC-flower based filter are tracked at PF level
8304   *
8305   * Remove all advanced switch filters only if they are channel specific
8306   * tc-flower based filter
8307   */
8308  static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
8309  {
8310  	struct ice_tc_flower_fltr *fltr;
8311  	struct hlist_node *node;
8312  
8313  	/* to remove all channel filters, iterate an ordered list of filters */
8314  	hlist_for_each_entry_safe(fltr, node,
8315  				  &pf->tc_flower_fltr_list,
8316  				  tc_flower_node) {
8317  		struct ice_rule_query_data rule;
8318  		int status;
8319  
8320  		/* for now process only channel specific filters */
8321  		if (!ice_is_chnl_fltr(fltr))
8322  			continue;
8323  
8324  		rule.rid = fltr->rid;
8325  		rule.rule_id = fltr->rule_id;
8326  		rule.vsi_handle = fltr->dest_vsi_handle;
8327  		status = ice_rem_adv_rule_by_id(&pf->hw, &rule);
8328  		if (status) {
8329  			if (status == -ENOENT)
8330  				dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
8331  					rule.rule_id);
8332  			else
8333  				dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
8334  					status);
8335  		} else if (fltr->dest_vsi) {
8336  			/* update advanced switch filter count */
8337  			if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
8338  				u32 flags = fltr->flags;
8339  
8340  				fltr->dest_vsi->num_chnl_fltr--;
8341  				if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
8342  					     ICE_TC_FLWR_FIELD_ENC_DST_MAC))
8343  					pf->num_dmac_chnl_fltrs--;
8344  			}
8345  		}
8346  
8347  		hlist_del(&fltr->tc_flower_node);
8348  		kfree(fltr);
8349  	}
8350  }
8351  
8352  /**
8353   * ice_remove_q_channels - Remove queue channels for the TCs
8354   * @vsi: VSI to be configured
8355   * @rem_fltr: delete advanced switch filter or not
8356   *
8357   * Remove queue channels for the TCs
8358   */
8359  static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
8360  {
8361  	struct ice_channel *ch, *ch_tmp;
8362  	struct ice_pf *pf = vsi->back;
8363  	int i;
8364  
8365  	/* remove all tc-flower based filter if they are channel filters only */
8366  	if (rem_fltr)
8367  		ice_rem_all_chnl_fltrs(pf);
8368  
8369  	/* remove ntuple filters since queue configuration is being changed */
8370  	if  (vsi->netdev->features & NETIF_F_NTUPLE) {
8371  		struct ice_hw *hw = &pf->hw;
8372  
8373  		mutex_lock(&hw->fdir_fltr_lock);
8374  		ice_fdir_del_all_fltrs(vsi);
8375  		mutex_unlock(&hw->fdir_fltr_lock);
8376  	}
8377  
8378  	/* perform cleanup for channels if they exist */
8379  	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
8380  		struct ice_vsi *ch_vsi;
8381  
8382  		list_del(&ch->list);
8383  		ch_vsi = ch->ch_vsi;
8384  		if (!ch_vsi) {
8385  			kfree(ch);
8386  			continue;
8387  		}
8388  
8389  		/* Reset queue contexts */
8390  		for (i = 0; i < ch->num_rxq; i++) {
8391  			struct ice_tx_ring *tx_ring;
8392  			struct ice_rx_ring *rx_ring;
8393  
8394  			tx_ring = vsi->tx_rings[ch->base_q + i];
8395  			rx_ring = vsi->rx_rings[ch->base_q + i];
8396  			if (tx_ring) {
8397  				tx_ring->ch = NULL;
8398  				if (tx_ring->q_vector)
8399  					tx_ring->q_vector->ch = NULL;
8400  			}
8401  			if (rx_ring) {
8402  				rx_ring->ch = NULL;
8403  				if (rx_ring->q_vector)
8404  					rx_ring->q_vector->ch = NULL;
8405  			}
8406  		}
8407  
8408  		/* Release FD resources for the channel VSI */
8409  		ice_fdir_rem_adq_chnl(&pf->hw, ch->ch_vsi->idx);
8410  
8411  		/* clear the VSI from scheduler tree */
8412  		ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
8413  
8414  		/* Delete VSI from FW */
8415  		ice_vsi_delete(ch->ch_vsi);
8416  
8417  		/* Delete VSI from PF and HW VSI arrays */
8418  		ice_vsi_clear(ch->ch_vsi);
8419  
8420  		/* free the channel */
8421  		kfree(ch);
8422  	}
8423  
8424  	/* clear the channel VSI map which is stored in main VSI */
8425  	ice_for_each_chnl_tc(i)
8426  		vsi->tc_map_vsi[i] = NULL;
8427  
8428  	/* reset main VSI's all TC information */
8429  	vsi->all_enatc = 0;
8430  	vsi->all_numtc = 0;
8431  }
8432  
8433  /**
8434   * ice_rebuild_channels - rebuild channel
8435   * @pf: ptr to PF
8436   *
8437   * Recreate channel VSIs and replay filters
8438   */
8439  static int ice_rebuild_channels(struct ice_pf *pf)
8440  {
8441  	struct device *dev = ice_pf_to_dev(pf);
8442  	struct ice_vsi *main_vsi;
8443  	bool rem_adv_fltr = true;
8444  	struct ice_channel *ch;
8445  	struct ice_vsi *vsi;
8446  	int tc_idx = 1;
8447  	int i, err;
8448  
8449  	main_vsi = ice_get_main_vsi(pf);
8450  	if (!main_vsi)
8451  		return 0;
8452  
8453  	if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
8454  	    main_vsi->old_numtc == 1)
8455  		return 0; /* nothing to be done */
8456  
8457  	/* reconfigure main VSI based on old value of TC and cached values
8458  	 * for MQPRIO opts
8459  	 */
8460  	err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
8461  	if (err) {
8462  		dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
8463  			main_vsi->old_ena_tc, main_vsi->vsi_num);
8464  		return err;
8465  	}
8466  
8467  	/* rebuild ADQ VSIs */
8468  	ice_for_each_vsi(pf, i) {
8469  		enum ice_vsi_type type;
8470  
8471  		vsi = pf->vsi[i];
8472  		if (!vsi || vsi->type != ICE_VSI_CHNL)
8473  			continue;
8474  
8475  		type = vsi->type;
8476  
8477  		/* rebuild ADQ VSI */
8478  		err = ice_vsi_rebuild(vsi, true);
8479  		if (err) {
8480  			dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
8481  				ice_vsi_type_str(type), vsi->idx, err);
8482  			goto cleanup;
8483  		}
8484  
8485  		/* Re-map HW VSI number, using VSI handle that has been
8486  		 * previously validated in ice_replay_vsi() call above
8487  		 */
8488  		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
8489  
8490  		/* replay filters for the VSI */
8491  		err = ice_replay_vsi(&pf->hw, vsi->idx);
8492  		if (err) {
8493  			dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
8494  				ice_vsi_type_str(type), err, vsi->idx);
8495  			rem_adv_fltr = false;
8496  			goto cleanup;
8497  		}
8498  		dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
8499  			 ice_vsi_type_str(type), vsi->idx);
8500  
8501  		/* store ADQ VSI at correct TC index in main VSI's
8502  		 * map of TC to VSI
8503  		 */
8504  		main_vsi->tc_map_vsi[tc_idx++] = vsi;
8505  	}
8506  
8507  	/* ADQ VSI(s) has been rebuilt successfully, so setup
8508  	 * channel for main VSI's Tx and Rx rings
8509  	 */
8510  	list_for_each_entry(ch, &main_vsi->ch_list, list) {
8511  		struct ice_vsi *ch_vsi;
8512  
8513  		ch_vsi = ch->ch_vsi;
8514  		if (!ch_vsi)
8515  			continue;
8516  
8517  		/* reconfig channel resources */
8518  		ice_cfg_chnl_all_res(main_vsi, ch);
8519  
8520  		/* replay BW rate limit if it is non-zero */
8521  		if (!ch->max_tx_rate && !ch->min_tx_rate)
8522  			continue;
8523  
8524  		err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
8525  				       ch->min_tx_rate);
8526  		if (err)
8527  			dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8528  				err, ch->max_tx_rate, ch->min_tx_rate,
8529  				ch_vsi->vsi_num);
8530  		else
8531  			dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
8532  				ch->max_tx_rate, ch->min_tx_rate,
8533  				ch_vsi->vsi_num);
8534  	}
8535  
8536  	/* reconfig RSS for main VSI */
8537  	if (main_vsi->ch_rss_size)
8538  		ice_vsi_cfg_rss_lut_key(main_vsi);
8539  
8540  	return 0;
8541  
8542  cleanup:
8543  	ice_remove_q_channels(main_vsi, rem_adv_fltr);
8544  	return err;
8545  }
8546  
8547  /**
8548   * ice_create_q_channels - Add queue channel for the given TCs
8549   * @vsi: VSI to be configured
8550   *
8551   * Configures queue channel mapping to the given TCs
8552   */
8553  static int ice_create_q_channels(struct ice_vsi *vsi)
8554  {
8555  	struct ice_pf *pf = vsi->back;
8556  	struct ice_channel *ch;
8557  	int ret = 0, i;
8558  
8559  	ice_for_each_chnl_tc(i) {
8560  		if (!(vsi->all_enatc & BIT(i)))
8561  			continue;
8562  
8563  		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
8564  		if (!ch) {
8565  			ret = -ENOMEM;
8566  			goto err_free;
8567  		}
8568  		INIT_LIST_HEAD(&ch->list);
8569  		ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
8570  		ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
8571  		ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
8572  		ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
8573  		ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
8574  
8575  		/* convert to Kbits/s */
8576  		if (ch->max_tx_rate)
8577  			ch->max_tx_rate = div_u64(ch->max_tx_rate,
8578  						  ICE_BW_KBPS_DIVISOR);
8579  		if (ch->min_tx_rate)
8580  			ch->min_tx_rate = div_u64(ch->min_tx_rate,
8581  						  ICE_BW_KBPS_DIVISOR);
8582  
8583  		ret = ice_create_q_channel(vsi, ch);
8584  		if (ret) {
8585  			dev_err(ice_pf_to_dev(pf),
8586  				"failed creating channel TC:%d\n", i);
8587  			kfree(ch);
8588  			goto err_free;
8589  		}
8590  		list_add_tail(&ch->list, &vsi->ch_list);
8591  		vsi->tc_map_vsi[i] = ch->ch_vsi;
8592  		dev_dbg(ice_pf_to_dev(pf),
8593  			"successfully created channel: VSI %pK\n", ch->ch_vsi);
8594  	}
8595  	return 0;
8596  
8597  err_free:
8598  	ice_remove_q_channels(vsi, false);
8599  
8600  	return ret;
8601  }
8602  
8603  /**
8604   * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
8605   * @netdev: net device to configure
8606   * @type_data: TC offload data
8607   */
8608  static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
8609  {
8610  	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
8611  	struct ice_netdev_priv *np = netdev_priv(netdev);
8612  	struct ice_vsi *vsi = np->vsi;
8613  	struct ice_pf *pf = vsi->back;
8614  	u16 mode, ena_tc_qdisc = 0;
8615  	int cur_txq, cur_rxq;
8616  	u8 hw = 0, num_tcf;
8617  	struct device *dev;
8618  	int ret, i;
8619  
8620  	dev = ice_pf_to_dev(pf);
8621  	num_tcf = mqprio_qopt->qopt.num_tc;
8622  	hw = mqprio_qopt->qopt.hw;
8623  	mode = mqprio_qopt->mode;
8624  	if (!hw) {
8625  		clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
8626  		vsi->ch_rss_size = 0;
8627  		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8628  		goto config_tcf;
8629  	}
8630  
8631  	/* Generate queue region map for number of TCF requested */
8632  	for (i = 0; i < num_tcf; i++)
8633  		ena_tc_qdisc |= BIT(i);
8634  
8635  	switch (mode) {
8636  	case TC_MQPRIO_MODE_CHANNEL:
8637  
8638  		if (pf->hw.port_info->is_custom_tx_enabled) {
8639  			dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
8640  			return -EBUSY;
8641  		}
8642  		ice_tear_down_devlink_rate_tree(pf);
8643  
8644  		ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
8645  		if (ret) {
8646  			netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
8647  				   ret);
8648  			return ret;
8649  		}
8650  		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8651  		set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
8652  		/* don't assume state of hw_tc_offload during driver load
8653  		 * and set the flag for TC flower filter if hw_tc_offload
8654  		 * already ON
8655  		 */
8656  		if (vsi->netdev->features & NETIF_F_HW_TC)
8657  			set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
8658  		break;
8659  	default:
8660  		return -EINVAL;
8661  	}
8662  
8663  config_tcf:
8664  
8665  	/* Requesting same TCF configuration as already enabled */
8666  	if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
8667  	    mode != TC_MQPRIO_MODE_CHANNEL)
8668  		return 0;
8669  
8670  	/* Pause VSI queues */
8671  	ice_dis_vsi(vsi, true);
8672  
8673  	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
8674  		ice_remove_q_channels(vsi, true);
8675  
8676  	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
8677  		vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
8678  				     num_online_cpus());
8679  		vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
8680  				     num_online_cpus());
8681  	} else {
8682  		/* logic to rebuild VSI, same like ethtool -L */
8683  		u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
8684  
8685  		for (i = 0; i < num_tcf; i++) {
8686  			if (!(ena_tc_qdisc & BIT(i)))
8687  				continue;
8688  
8689  			offset = vsi->mqprio_qopt.qopt.offset[i];
8690  			qcount_rx = vsi->mqprio_qopt.qopt.count[i];
8691  			qcount_tx = vsi->mqprio_qopt.qopt.count[i];
8692  		}
8693  		vsi->req_txq = offset + qcount_tx;
8694  		vsi->req_rxq = offset + qcount_rx;
8695  
8696  		/* store away original rss_size info, so that it gets reused
8697  		 * form ice_vsi_rebuild during tc-qdisc delete stage - to
8698  		 * determine, what should be the rss_sizefor main VSI
8699  		 */
8700  		vsi->orig_rss_size = vsi->rss_size;
8701  	}
8702  
8703  	/* save current values of Tx and Rx queues before calling VSI rebuild
8704  	 * for fallback option
8705  	 */
8706  	cur_txq = vsi->num_txq;
8707  	cur_rxq = vsi->num_rxq;
8708  
8709  	/* proceed with rebuild main VSI using correct number of queues */
8710  	ret = ice_vsi_rebuild(vsi, false);
8711  	if (ret) {
8712  		/* fallback to current number of queues */
8713  		dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
8714  		vsi->req_txq = cur_txq;
8715  		vsi->req_rxq = cur_rxq;
8716  		clear_bit(ICE_RESET_FAILED, pf->state);
8717  		if (ice_vsi_rebuild(vsi, false)) {
8718  			dev_err(dev, "Rebuild of main VSI failed again\n");
8719  			return ret;
8720  		}
8721  	}
8722  
8723  	vsi->all_numtc = num_tcf;
8724  	vsi->all_enatc = ena_tc_qdisc;
8725  	ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
8726  	if (ret) {
8727  		netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
8728  			   vsi->vsi_num);
8729  		goto exit;
8730  	}
8731  
8732  	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
8733  		u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
8734  		u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
8735  
8736  		/* set TC0 rate limit if specified */
8737  		if (max_tx_rate || min_tx_rate) {
8738  			/* convert to Kbits/s */
8739  			if (max_tx_rate)
8740  				max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
8741  			if (min_tx_rate)
8742  				min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
8743  
8744  			ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
8745  			if (!ret) {
8746  				dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
8747  					max_tx_rate, min_tx_rate, vsi->vsi_num);
8748  			} else {
8749  				dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
8750  					max_tx_rate, min_tx_rate, vsi->vsi_num);
8751  				goto exit;
8752  			}
8753  		}
8754  		ret = ice_create_q_channels(vsi);
8755  		if (ret) {
8756  			netdev_err(netdev, "failed configuring queue channels\n");
8757  			goto exit;
8758  		} else {
8759  			netdev_dbg(netdev, "successfully configured channels\n");
8760  		}
8761  	}
8762  
8763  	if (vsi->ch_rss_size)
8764  		ice_vsi_cfg_rss_lut_key(vsi);
8765  
8766  exit:
8767  	/* if error, reset the all_numtc and all_enatc */
8768  	if (ret) {
8769  		vsi->all_numtc = 0;
8770  		vsi->all_enatc = 0;
8771  	}
8772  	/* resume VSI */
8773  	ice_ena_vsi(vsi, true);
8774  
8775  	return ret;
8776  }
8777  
8778  static LIST_HEAD(ice_block_cb_list);
8779  
8780  static int
8781  ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
8782  	     void *type_data)
8783  {
8784  	struct ice_netdev_priv *np = netdev_priv(netdev);
8785  	struct ice_pf *pf = np->vsi->back;
8786  	int err;
8787  
8788  	switch (type) {
8789  	case TC_SETUP_BLOCK:
8790  		return flow_block_cb_setup_simple(type_data,
8791  						  &ice_block_cb_list,
8792  						  ice_setup_tc_block_cb,
8793  						  np, np, true);
8794  	case TC_SETUP_QDISC_MQPRIO:
8795  		/* setup traffic classifier for receive side */
8796  		mutex_lock(&pf->tc_mutex);
8797  		err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
8798  		mutex_unlock(&pf->tc_mutex);
8799  		return err;
8800  	default:
8801  		return -EOPNOTSUPP;
8802  	}
8803  	return -EOPNOTSUPP;
8804  }
8805  
8806  static struct ice_indr_block_priv *
8807  ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
8808  			   struct net_device *netdev)
8809  {
8810  	struct ice_indr_block_priv *cb_priv;
8811  
8812  	list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
8813  		if (!cb_priv->netdev)
8814  			return NULL;
8815  		if (cb_priv->netdev == netdev)
8816  			return cb_priv;
8817  	}
8818  	return NULL;
8819  }
8820  
8821  static int
8822  ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
8823  			void *indr_priv)
8824  {
8825  	struct ice_indr_block_priv *priv = indr_priv;
8826  	struct ice_netdev_priv *np = priv->np;
8827  
8828  	switch (type) {
8829  	case TC_SETUP_CLSFLOWER:
8830  		return ice_setup_tc_cls_flower(np, priv->netdev,
8831  					       (struct flow_cls_offload *)
8832  					       type_data);
8833  	default:
8834  		return -EOPNOTSUPP;
8835  	}
8836  }
8837  
8838  static int
8839  ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch,
8840  			struct ice_netdev_priv *np,
8841  			struct flow_block_offload *f, void *data,
8842  			void (*cleanup)(struct flow_block_cb *block_cb))
8843  {
8844  	struct ice_indr_block_priv *indr_priv;
8845  	struct flow_block_cb *block_cb;
8846  
8847  	if (!ice_is_tunnel_supported(netdev) &&
8848  	    !(is_vlan_dev(netdev) &&
8849  	      vlan_dev_real_dev(netdev) == np->vsi->netdev))
8850  		return -EOPNOTSUPP;
8851  
8852  	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
8853  		return -EOPNOTSUPP;
8854  
8855  	switch (f->command) {
8856  	case FLOW_BLOCK_BIND:
8857  		indr_priv = ice_indr_block_priv_lookup(np, netdev);
8858  		if (indr_priv)
8859  			return -EEXIST;
8860  
8861  		indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL);
8862  		if (!indr_priv)
8863  			return -ENOMEM;
8864  
8865  		indr_priv->netdev = netdev;
8866  		indr_priv->np = np;
8867  		list_add(&indr_priv->list, &np->tc_indr_block_priv_list);
8868  
8869  		block_cb =
8870  			flow_indr_block_cb_alloc(ice_indr_setup_block_cb,
8871  						 indr_priv, indr_priv,
8872  						 ice_rep_indr_tc_block_unbind,
8873  						 f, netdev, sch, data, np,
8874  						 cleanup);
8875  
8876  		if (IS_ERR(block_cb)) {
8877  			list_del(&indr_priv->list);
8878  			kfree(indr_priv);
8879  			return PTR_ERR(block_cb);
8880  		}
8881  		flow_block_cb_add(block_cb, f);
8882  		list_add_tail(&block_cb->driver_list, &ice_block_cb_list);
8883  		break;
8884  	case FLOW_BLOCK_UNBIND:
8885  		indr_priv = ice_indr_block_priv_lookup(np, netdev);
8886  		if (!indr_priv)
8887  			return -ENOENT;
8888  
8889  		block_cb = flow_block_cb_lookup(f->block,
8890  						ice_indr_setup_block_cb,
8891  						indr_priv);
8892  		if (!block_cb)
8893  			return -ENOENT;
8894  
8895  		flow_indr_block_cb_remove(block_cb, f);
8896  
8897  		list_del(&block_cb->driver_list);
8898  		break;
8899  	default:
8900  		return -EOPNOTSUPP;
8901  	}
8902  	return 0;
8903  }
8904  
8905  static int
8906  ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
8907  		     void *cb_priv, enum tc_setup_type type, void *type_data,
8908  		     void *data,
8909  		     void (*cleanup)(struct flow_block_cb *block_cb))
8910  {
8911  	switch (type) {
8912  	case TC_SETUP_BLOCK:
8913  		return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data,
8914  					       data, cleanup);
8915  
8916  	default:
8917  		return -EOPNOTSUPP;
8918  	}
8919  }
8920  
8921  /**
8922   * ice_open - Called when a network interface becomes active
8923   * @netdev: network interface device structure
8924   *
8925   * The open entry point is called when a network interface is made
8926   * active by the system (IFF_UP). At this point all resources needed
8927   * for transmit and receive operations are allocated, the interrupt
8928   * handler is registered with the OS, the netdev watchdog is enabled,
8929   * and the stack is notified that the interface is ready.
8930   *
8931   * Returns 0 on success, negative value on failure
8932   */
8933  int ice_open(struct net_device *netdev)
8934  {
8935  	struct ice_netdev_priv *np = netdev_priv(netdev);
8936  	struct ice_pf *pf = np->vsi->back;
8937  
8938  	if (ice_is_reset_in_progress(pf->state)) {
8939  		netdev_err(netdev, "can't open net device while reset is in progress");
8940  		return -EBUSY;
8941  	}
8942  
8943  	return ice_open_internal(netdev);
8944  }
8945  
8946  /**
8947   * ice_open_internal - Called when a network interface becomes active
8948   * @netdev: network interface device structure
8949   *
8950   * Internal ice_open implementation. Should not be used directly except for ice_open and reset
8951   * handling routine
8952   *
8953   * Returns 0 on success, negative value on failure
8954   */
8955  int ice_open_internal(struct net_device *netdev)
8956  {
8957  	struct ice_netdev_priv *np = netdev_priv(netdev);
8958  	struct ice_vsi *vsi = np->vsi;
8959  	struct ice_pf *pf = vsi->back;
8960  	struct ice_port_info *pi;
8961  	int err;
8962  
8963  	if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
8964  		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
8965  		return -EIO;
8966  	}
8967  
8968  	netif_carrier_off(netdev);
8969  
8970  	pi = vsi->port_info;
8971  	err = ice_update_link_info(pi);
8972  	if (err) {
8973  		netdev_err(netdev, "Failed to get link info, error %d\n", err);
8974  		return err;
8975  	}
8976  
8977  	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
8978  
8979  	/* Set PHY if there is media, otherwise, turn off PHY */
8980  	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
8981  		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
8982  		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
8983  			err = ice_init_phy_user_cfg(pi);
8984  			if (err) {
8985  				netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
8986  					   err);
8987  				return err;
8988  			}
8989  		}
8990  
8991  		err = ice_configure_phy(vsi);
8992  		if (err) {
8993  			netdev_err(netdev, "Failed to set physical link up, error %d\n",
8994  				   err);
8995  			return err;
8996  		}
8997  	} else {
8998  		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
8999  		ice_set_link(vsi, false);
9000  	}
9001  
9002  	err = ice_vsi_open(vsi);
9003  	if (err)
9004  		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
9005  			   vsi->vsi_num, vsi->vsw->sw_id);
9006  
9007  	/* Update existing tunnels information */
9008  	udp_tunnel_get_rx_info(netdev);
9009  
9010  	return err;
9011  }
9012  
9013  /**
9014   * ice_stop - Disables a network interface
9015   * @netdev: network interface device structure
9016   *
9017   * The stop entry point is called when an interface is de-activated by the OS,
9018   * and the netdevice enters the DOWN state. The hardware is still under the
9019   * driver's control, but the netdev interface is disabled.
9020   *
9021   * Returns success only - not allowed to fail
9022   */
9023  int ice_stop(struct net_device *netdev)
9024  {
9025  	struct ice_netdev_priv *np = netdev_priv(netdev);
9026  	struct ice_vsi *vsi = np->vsi;
9027  	struct ice_pf *pf = vsi->back;
9028  
9029  	if (ice_is_reset_in_progress(pf->state)) {
9030  		netdev_err(netdev, "can't stop net device while reset is in progress");
9031  		return -EBUSY;
9032  	}
9033  
9034  	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
9035  		int link_err = ice_force_phys_link_state(vsi, false);
9036  
9037  		if (link_err) {
9038  			netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
9039  				   vsi->vsi_num, link_err);
9040  			return -EIO;
9041  		}
9042  	}
9043  
9044  	ice_vsi_close(vsi);
9045  
9046  	return 0;
9047  }
9048  
9049  /**
9050   * ice_features_check - Validate encapsulated packet conforms to limits
9051   * @skb: skb buffer
9052   * @netdev: This port's netdev
9053   * @features: Offload features that the stack believes apply
9054   */
9055  static netdev_features_t
9056  ice_features_check(struct sk_buff *skb,
9057  		   struct net_device __always_unused *netdev,
9058  		   netdev_features_t features)
9059  {
9060  	bool gso = skb_is_gso(skb);
9061  	size_t len;
9062  
9063  	/* No point in doing any of this if neither checksum nor GSO are
9064  	 * being requested for this frame. We can rule out both by just
9065  	 * checking for CHECKSUM_PARTIAL
9066  	 */
9067  	if (skb->ip_summed != CHECKSUM_PARTIAL)
9068  		return features;
9069  
9070  	/* We cannot support GSO if the MSS is going to be less than
9071  	 * 64 bytes. If it is then we need to drop support for GSO.
9072  	 */
9073  	if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
9074  		features &= ~NETIF_F_GSO_MASK;
9075  
9076  	len = skb_network_offset(skb);
9077  	if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
9078  		goto out_rm_features;
9079  
9080  	len = skb_network_header_len(skb);
9081  	if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
9082  		goto out_rm_features;
9083  
9084  	if (skb->encapsulation) {
9085  		/* this must work for VXLAN frames AND IPIP/SIT frames, and in
9086  		 * the case of IPIP frames, the transport header pointer is
9087  		 * after the inner header! So check to make sure that this
9088  		 * is a GRE or UDP_TUNNEL frame before doing that math.
9089  		 */
9090  		if (gso && (skb_shinfo(skb)->gso_type &
9091  			    (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
9092  			len = skb_inner_network_header(skb) -
9093  			      skb_transport_header(skb);
9094  			if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
9095  				goto out_rm_features;
9096  		}
9097  
9098  		len = skb_inner_network_header_len(skb);
9099  		if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
9100  			goto out_rm_features;
9101  	}
9102  
9103  	return features;
9104  out_rm_features:
9105  	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
9106  }
9107  
9108  static const struct net_device_ops ice_netdev_safe_mode_ops = {
9109  	.ndo_open = ice_open,
9110  	.ndo_stop = ice_stop,
9111  	.ndo_start_xmit = ice_start_xmit,
9112  	.ndo_set_mac_address = ice_set_mac_address,
9113  	.ndo_validate_addr = eth_validate_addr,
9114  	.ndo_change_mtu = ice_change_mtu,
9115  	.ndo_get_stats64 = ice_get_stats64,
9116  	.ndo_tx_timeout = ice_tx_timeout,
9117  	.ndo_bpf = ice_xdp_safe_mode,
9118  };
9119  
9120  static const struct net_device_ops ice_netdev_ops = {
9121  	.ndo_open = ice_open,
9122  	.ndo_stop = ice_stop,
9123  	.ndo_start_xmit = ice_start_xmit,
9124  	.ndo_select_queue = ice_select_queue,
9125  	.ndo_features_check = ice_features_check,
9126  	.ndo_fix_features = ice_fix_features,
9127  	.ndo_set_rx_mode = ice_set_rx_mode,
9128  	.ndo_set_mac_address = ice_set_mac_address,
9129  	.ndo_validate_addr = eth_validate_addr,
9130  	.ndo_change_mtu = ice_change_mtu,
9131  	.ndo_get_stats64 = ice_get_stats64,
9132  	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
9133  	.ndo_eth_ioctl = ice_eth_ioctl,
9134  	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
9135  	.ndo_set_vf_mac = ice_set_vf_mac,
9136  	.ndo_get_vf_config = ice_get_vf_cfg,
9137  	.ndo_set_vf_trust = ice_set_vf_trust,
9138  	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
9139  	.ndo_set_vf_link_state = ice_set_vf_link_state,
9140  	.ndo_get_vf_stats = ice_get_vf_stats,
9141  	.ndo_set_vf_rate = ice_set_vf_bw,
9142  	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
9143  	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
9144  	.ndo_setup_tc = ice_setup_tc,
9145  	.ndo_set_features = ice_set_features,
9146  	.ndo_bridge_getlink = ice_bridge_getlink,
9147  	.ndo_bridge_setlink = ice_bridge_setlink,
9148  	.ndo_fdb_add = ice_fdb_add,
9149  	.ndo_fdb_del = ice_fdb_del,
9150  #ifdef CONFIG_RFS_ACCEL
9151  	.ndo_rx_flow_steer = ice_rx_flow_steer,
9152  #endif
9153  	.ndo_tx_timeout = ice_tx_timeout,
9154  	.ndo_bpf = ice_xdp,
9155  	.ndo_xdp_xmit = ice_xdp_xmit,
9156  	.ndo_xsk_wakeup = ice_xsk_wakeup,
9157  };
9158