1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2023, Intel Corporation. */
3
4 /* Intel(R) Ethernet Connection E800 Series Linux Driver */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <generated/utsrelease.h>
9 #include <linux/crash_dump.h>
10 #include "ice.h"
11 #include "ice_base.h"
12 #include "ice_lib.h"
13 #include "ice_fltr.h"
14 #include "ice_dcb_lib.h"
15 #include "ice_dcb_nl.h"
16 #include "devlink/devlink.h"
17 #include "devlink/port.h"
18 #include "ice_sf_eth.h"
19 #include "ice_hwmon.h"
20 /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
21 * ice tracepoint functions. This must be done exactly once across the
22 * ice driver.
23 */
24 #define CREATE_TRACE_POINTS
25 #include "ice_trace.h"
26 #include "ice_eswitch.h"
27 #include "ice_tc_lib.h"
28 #include "ice_vsi_vlan_ops.h"
29 #include <net/xdp_sock_drv.h>
30
31 #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
32 static const char ice_driver_string[] = DRV_SUMMARY;
33 static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
34
35 /* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
36 #define ICE_DDP_PKG_PATH "intel/ice/ddp/"
37 #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
38
39 MODULE_DESCRIPTION(DRV_SUMMARY);
40 MODULE_IMPORT_NS("LIBETH");
41 MODULE_IMPORT_NS("LIBETH_XDP");
42 MODULE_IMPORT_NS("LIBIE");
43 MODULE_IMPORT_NS("LIBIE_ADMINQ");
44 MODULE_IMPORT_NS("LIBIE_FWLOG");
45 MODULE_LICENSE("GPL v2");
46 MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
47
48 static int debug = -1;
49 module_param(debug, int, 0644);
50 #ifndef CONFIG_DYNAMIC_DEBUG
51 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
52 #else
53 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
54 #endif /* !CONFIG_DYNAMIC_DEBUG */
55
56 DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
57 EXPORT_SYMBOL(ice_xdp_locking_key);
58
59 /**
60 * ice_hw_to_dev - Get device pointer from the hardware structure
61 * @hw: pointer to the device HW structure
62 *
63 * Used to access the device pointer from compilation units which can't easily
64 * include the definition of struct ice_pf without leading to circular header
65 * dependencies.
66 */
ice_hw_to_dev(struct ice_hw * hw)67 struct device *ice_hw_to_dev(struct ice_hw *hw)
68 {
69 struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
70
71 return &pf->pdev->dev;
72 }
73
74 static struct workqueue_struct *ice_wq;
75 struct workqueue_struct *ice_lag_wq;
76 static const struct net_device_ops ice_netdev_safe_mode_ops;
77 static const struct net_device_ops ice_netdev_ops;
78
79 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
80
81 static void ice_vsi_release_all(struct ice_pf *pf);
82
83 static int ice_rebuild_channels(struct ice_pf *pf);
84 static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
85
86 static int
87 ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
88 void *cb_priv, enum tc_setup_type type, void *type_data,
89 void *data,
90 void (*cleanup)(struct flow_block_cb *block_cb));
91
netif_is_ice(const struct net_device * dev)92 bool netif_is_ice(const struct net_device *dev)
93 {
94 return dev && (dev->netdev_ops == &ice_netdev_ops ||
95 dev->netdev_ops == &ice_netdev_safe_mode_ops);
96 }
97
98 /**
99 * ice_get_tx_pending - returns number of Tx descriptors not processed
100 * @ring: the ring of descriptors
101 */
ice_get_tx_pending(struct ice_tx_ring * ring)102 static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
103 {
104 u16 head, tail;
105
106 head = ring->next_to_clean;
107 tail = ring->next_to_use;
108
109 if (head != tail)
110 return (head < tail) ?
111 tail - head : (tail + ring->count - head);
112 return 0;
113 }
114
115 /**
116 * ice_check_for_hang_subtask - check for and recover hung queues
117 * @pf: pointer to PF struct
118 */
ice_check_for_hang_subtask(struct ice_pf * pf)119 static void ice_check_for_hang_subtask(struct ice_pf *pf)
120 {
121 struct ice_vsi *vsi = NULL;
122 struct ice_hw *hw;
123 unsigned int i;
124 int packets;
125 u32 v;
126
127 ice_for_each_vsi(pf, v)
128 if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
129 vsi = pf->vsi[v];
130 break;
131 }
132
133 if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
134 return;
135
136 if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
137 return;
138
139 hw = &vsi->back->hw;
140
141 ice_for_each_txq(vsi, i) {
142 struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
143 struct ice_ring_stats *ring_stats;
144
145 if (!tx_ring)
146 continue;
147 if (ice_ring_ch_enabled(tx_ring))
148 continue;
149
150 ring_stats = tx_ring->ring_stats;
151 if (!ring_stats)
152 continue;
153
154 if (tx_ring->desc) {
155 /* If packet counter has not changed the queue is
156 * likely stalled, so force an interrupt for this
157 * queue.
158 *
159 * prev_pkt would be negative if there was no
160 * pending work.
161 */
162 packets = ice_stats_read(ring_stats, pkts) & INT_MAX;
163 if (ring_stats->tx.prev_pkt == packets) {
164 /* Trigger sw interrupt to revive the queue */
165 ice_trigger_sw_intr(hw, tx_ring->q_vector);
166 continue;
167 }
168
169 /* Memory barrier between read of packet count and call
170 * to ice_get_tx_pending()
171 */
172 smp_rmb();
173 ring_stats->tx.prev_pkt =
174 ice_get_tx_pending(tx_ring) ? packets : -1;
175 }
176 }
177 }
178
179 /**
180 * ice_init_mac_fltr - Set initial MAC filters
181 * @pf: board private structure
182 *
183 * Set initial set of MAC filters for PF VSI; configure filters for permanent
184 * address and broadcast address. If an error is encountered, netdevice will be
185 * unregistered.
186 */
ice_init_mac_fltr(struct ice_pf * pf)187 static int ice_init_mac_fltr(struct ice_pf *pf)
188 {
189 struct ice_vsi *vsi;
190 u8 *perm_addr;
191
192 vsi = ice_get_main_vsi(pf);
193 if (!vsi)
194 return -EINVAL;
195
196 perm_addr = vsi->port_info->mac.perm_addr;
197 return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
198 }
199
200 /**
201 * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
202 * @netdev: the net device on which the sync is happening
203 * @addr: MAC address to sync
204 *
205 * This is a callback function which is called by the in kernel device sync
206 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
207 * populates the tmp_sync_list, which is later used by ice_add_mac to add the
208 * MAC filters from the hardware.
209 */
ice_add_mac_to_sync_list(struct net_device * netdev,const u8 * addr)210 static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
211 {
212 struct ice_netdev_priv *np = netdev_priv(netdev);
213 struct ice_vsi *vsi = np->vsi;
214
215 if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
216 ICE_FWD_TO_VSI))
217 return -EINVAL;
218
219 return 0;
220 }
221
222 /**
223 * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
224 * @netdev: the net device on which the unsync is happening
225 * @addr: MAC address to unsync
226 *
227 * This is a callback function which is called by the in kernel device unsync
228 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
229 * populates the tmp_unsync_list, which is later used by ice_remove_mac to
230 * delete the MAC filters from the hardware.
231 */
ice_add_mac_to_unsync_list(struct net_device * netdev,const u8 * addr)232 static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
233 {
234 struct ice_netdev_priv *np = netdev_priv(netdev);
235 struct ice_vsi *vsi = np->vsi;
236
237 /* Under some circumstances, we might receive a request to delete our
238 * own device address from our uc list. Because we store the device
239 * address in the VSI's MAC filter list, we need to ignore such
240 * requests and not delete our device address from this list.
241 */
242 if (ether_addr_equal(addr, netdev->dev_addr))
243 return 0;
244
245 if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
246 ICE_FWD_TO_VSI))
247 return -EINVAL;
248
249 return 0;
250 }
251
252 /**
253 * ice_vsi_fltr_changed - check if filter state changed
254 * @vsi: VSI to be checked
255 *
256 * returns true if filter state has changed, false otherwise.
257 */
ice_vsi_fltr_changed(struct ice_vsi * vsi)258 static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
259 {
260 return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
261 test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
262 }
263
264 /**
265 * ice_set_promisc - Enable promiscuous mode for a given PF
266 * @vsi: the VSI being configured
267 * @promisc_m: mask of promiscuous config bits
268 *
269 */
ice_set_promisc(struct ice_vsi * vsi,u8 promisc_m)270 static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
271 {
272 int status;
273
274 if (vsi->type != ICE_VSI_PF)
275 return 0;
276
277 if (ice_vsi_has_non_zero_vlans(vsi)) {
278 promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
279 status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
280 promisc_m);
281 } else {
282 status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
283 promisc_m, 0);
284 }
285 if (status && status != -EEXIST)
286 return status;
287
288 netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
289 vsi->vsi_num, promisc_m);
290 return 0;
291 }
292
293 /**
294 * ice_clear_promisc - Disable promiscuous mode for a given PF
295 * @vsi: the VSI being configured
296 * @promisc_m: mask of promiscuous config bits
297 *
298 */
ice_clear_promisc(struct ice_vsi * vsi,u8 promisc_m)299 static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
300 {
301 int status;
302
303 if (vsi->type != ICE_VSI_PF)
304 return 0;
305
306 if (ice_vsi_has_non_zero_vlans(vsi)) {
307 promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
308 status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
309 promisc_m);
310 } else {
311 status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
312 promisc_m, 0);
313 }
314
315 netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
316 vsi->vsi_num, promisc_m);
317 return status;
318 }
319
320 /**
321 * ice_vsi_sync_fltr - Update the VSI filter list to the HW
322 * @vsi: ptr to the VSI
323 *
324 * Push any outstanding VSI filter changes through the AdminQ.
325 */
ice_vsi_sync_fltr(struct ice_vsi * vsi)326 static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
327 {
328 struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
329 struct device *dev = ice_pf_to_dev(vsi->back);
330 struct net_device *netdev = vsi->netdev;
331 bool promisc_forced_on = false;
332 struct ice_pf *pf = vsi->back;
333 struct ice_hw *hw = &pf->hw;
334 u32 changed_flags = 0;
335 int err;
336
337 if (!vsi->netdev)
338 return -EINVAL;
339
340 while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
341 usleep_range(1000, 2000);
342
343 changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
344 vsi->current_netdev_flags = vsi->netdev->flags;
345
346 INIT_LIST_HEAD(&vsi->tmp_sync_list);
347 INIT_LIST_HEAD(&vsi->tmp_unsync_list);
348
349 if (ice_vsi_fltr_changed(vsi)) {
350 clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
351 clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
352
353 /* grab the netdev's addr_list_lock */
354 netif_addr_lock_bh(netdev);
355 __dev_uc_sync(netdev, ice_add_mac_to_sync_list,
356 ice_add_mac_to_unsync_list);
357 __dev_mc_sync(netdev, ice_add_mac_to_sync_list,
358 ice_add_mac_to_unsync_list);
359 /* our temp lists are populated. release lock */
360 netif_addr_unlock_bh(netdev);
361 }
362
363 /* Remove MAC addresses in the unsync list */
364 err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
365 ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
366 if (err) {
367 netdev_err(netdev, "Failed to delete MAC filters\n");
368 /* if we failed because of alloc failures, just bail */
369 if (err == -ENOMEM)
370 goto out;
371 }
372
373 /* Add MAC addresses in the sync list */
374 err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
375 ice_fltr_free_list(dev, &vsi->tmp_sync_list);
376 /* If filter is added successfully or already exists, do not go into
377 * 'if' condition and report it as error. Instead continue processing
378 * rest of the function.
379 */
380 if (err && err != -EEXIST) {
381 netdev_err(netdev, "Failed to add MAC filters\n");
382 /* If there is no more space for new umac filters, VSI
383 * should go into promiscuous mode. There should be some
384 * space reserved for promiscuous filters.
385 */
386 if (hw->adminq.sq_last_status == LIBIE_AQ_RC_ENOSPC &&
387 !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
388 vsi->state)) {
389 promisc_forced_on = true;
390 netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
391 vsi->vsi_num);
392 } else {
393 goto out;
394 }
395 }
396 err = 0;
397 /* check for changes in promiscuous modes */
398 if (changed_flags & IFF_ALLMULTI) {
399 if (vsi->current_netdev_flags & IFF_ALLMULTI) {
400 err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
401 if (err) {
402 vsi->current_netdev_flags &= ~IFF_ALLMULTI;
403 goto out_promisc;
404 }
405 } else {
406 /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
407 err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
408 if (err) {
409 vsi->current_netdev_flags |= IFF_ALLMULTI;
410 goto out_promisc;
411 }
412 }
413 }
414
415 if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
416 test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
417 clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
418 if (vsi->current_netdev_flags & IFF_PROMISC) {
419 /* Apply Rx filter rule to get traffic from wire */
420 if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
421 err = ice_set_dflt_vsi(vsi);
422 if (err && err != -EEXIST) {
423 netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
424 err, vsi->vsi_num);
425 vsi->current_netdev_flags &=
426 ~IFF_PROMISC;
427 goto out_promisc;
428 }
429 err = 0;
430 vlan_ops->dis_rx_filtering(vsi);
431
432 /* promiscuous mode implies allmulticast so
433 * that VSIs that are in promiscuous mode are
434 * subscribed to multicast packets coming to
435 * the port
436 */
437 err = ice_set_promisc(vsi,
438 ICE_MCAST_PROMISC_BITS);
439 if (err)
440 goto out_promisc;
441 }
442 } else {
443 /* Clear Rx filter to remove traffic from wire */
444 if (ice_is_vsi_dflt_vsi(vsi)) {
445 err = ice_clear_dflt_vsi(vsi);
446 if (err) {
447 netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
448 err, vsi->vsi_num);
449 vsi->current_netdev_flags |=
450 IFF_PROMISC;
451 goto out_promisc;
452 }
453 if (vsi->netdev->features &
454 NETIF_F_HW_VLAN_CTAG_FILTER)
455 vlan_ops->ena_rx_filtering(vsi);
456 }
457
458 /* disable allmulti here, but only if allmulti is not
459 * still enabled for the netdev
460 */
461 if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
462 err = ice_clear_promisc(vsi,
463 ICE_MCAST_PROMISC_BITS);
464 if (err) {
465 netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n",
466 err, vsi->vsi_num);
467 }
468 }
469 }
470 }
471 goto exit;
472
473 out_promisc:
474 set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
475 goto exit;
476 out:
477 /* if something went wrong then set the changed flag so we try again */
478 set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
479 set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
480 exit:
481 clear_bit(ICE_CFG_BUSY, vsi->state);
482 return err;
483 }
484
485 /**
486 * ice_sync_fltr_subtask - Sync the VSI filter list with HW
487 * @pf: board private structure
488 */
ice_sync_fltr_subtask(struct ice_pf * pf)489 static void ice_sync_fltr_subtask(struct ice_pf *pf)
490 {
491 int v;
492
493 if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
494 return;
495
496 clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
497
498 ice_for_each_vsi(pf, v)
499 if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
500 ice_vsi_sync_fltr(pf->vsi[v])) {
501 /* come back and try again later */
502 set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
503 break;
504 }
505 }
506
507 /**
508 * ice_pf_dis_all_vsi - Pause all VSIs on a PF
509 * @pf: the PF
510 * @locked: is the rtnl_lock already held
511 */
ice_pf_dis_all_vsi(struct ice_pf * pf,bool locked)512 static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
513 {
514 int node;
515 int v;
516
517 ice_for_each_vsi(pf, v)
518 if (pf->vsi[v])
519 ice_dis_vsi(pf->vsi[v], locked);
520
521 for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
522 pf->pf_agg_node[node].num_vsis = 0;
523
524 for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
525 pf->vf_agg_node[node].num_vsis = 0;
526 }
527
528 /**
529 * ice_prepare_for_reset - prep for reset
530 * @pf: board private structure
531 * @reset_type: reset type requested
532 *
533 * Inform or close all dependent features in prep for reset.
534 */
535 static void
ice_prepare_for_reset(struct ice_pf * pf,enum ice_reset_req reset_type)536 ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
537 {
538 struct ice_hw *hw = &pf->hw;
539 struct ice_vsi *vsi;
540 struct ice_vf *vf;
541 unsigned int bkt;
542
543 dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
544
545 /* already prepared for reset */
546 if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
547 return;
548
549 synchronize_irq(pf->oicr_irq.virq);
550
551 ice_unplug_aux_dev(pf);
552
553 /* Notify VFs of impending reset */
554 if (ice_check_sq_alive(hw, &hw->mailboxq))
555 ice_vc_notify_reset(pf);
556
557 /* Disable VFs until reset is completed */
558 mutex_lock(&pf->vfs.table_lock);
559 ice_for_each_vf(pf, bkt, vf)
560 ice_set_vf_state_dis(vf);
561 mutex_unlock(&pf->vfs.table_lock);
562
563 if (ice_is_eswitch_mode_switchdev(pf)) {
564 rtnl_lock();
565 ice_eswitch_br_fdb_flush(pf->eswitch.br_offloads->bridge);
566 rtnl_unlock();
567 }
568
569 /* release ADQ specific HW and SW resources */
570 vsi = ice_get_main_vsi(pf);
571 if (!vsi)
572 goto skip;
573
574 /* to be on safe side, reset orig_rss_size so that normal flow
575 * of deciding rss_size can take precedence
576 */
577 vsi->orig_rss_size = 0;
578
579 if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
580 if (reset_type == ICE_RESET_PFR) {
581 vsi->old_ena_tc = vsi->all_enatc;
582 vsi->old_numtc = vsi->all_numtc;
583 } else {
584 ice_remove_q_channels(vsi, true);
585
586 /* for other reset type, do not support channel rebuild
587 * hence reset needed info
588 */
589 vsi->old_ena_tc = 0;
590 vsi->all_enatc = 0;
591 vsi->old_numtc = 0;
592 vsi->all_numtc = 0;
593 vsi->req_txq = 0;
594 vsi->req_rxq = 0;
595 clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
596 memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
597 }
598 }
599
600 if (vsi->netdev)
601 netif_device_detach(vsi->netdev);
602 skip:
603
604 /* clear SW filtering DB */
605 ice_clear_hw_tbls(hw);
606 /* disable the VSIs and their queues that are not already DOWN */
607 set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state);
608 ice_pf_dis_all_vsi(pf, false);
609
610 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
611 ice_ptp_prepare_for_reset(pf, reset_type);
612
613 if (ice_is_feature_supported(pf, ICE_F_GNSS))
614 ice_gnss_exit(pf);
615
616 if (hw->port_info)
617 ice_sched_clear_port(hw->port_info);
618
619 ice_shutdown_all_ctrlq(hw, false);
620
621 set_bit(ICE_PREPARED_FOR_RESET, pf->state);
622 }
623
624 /**
625 * ice_do_reset - Initiate one of many types of resets
626 * @pf: board private structure
627 * @reset_type: reset type requested before this function was called.
628 */
ice_do_reset(struct ice_pf * pf,enum ice_reset_req reset_type)629 static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
630 {
631 struct device *dev = ice_pf_to_dev(pf);
632 struct ice_hw *hw = &pf->hw;
633
634 dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
635
636 if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
637 dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
638 reset_type = ICE_RESET_CORER;
639 }
640
641 ice_prepare_for_reset(pf, reset_type);
642
643 /* trigger the reset */
644 if (ice_reset(hw, reset_type)) {
645 dev_err(dev, "reset %d failed\n", reset_type);
646 set_bit(ICE_RESET_FAILED, pf->state);
647 clear_bit(ICE_RESET_OICR_RECV, pf->state);
648 clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
649 clear_bit(ICE_PFR_REQ, pf->state);
650 clear_bit(ICE_CORER_REQ, pf->state);
651 clear_bit(ICE_GLOBR_REQ, pf->state);
652 wake_up(&pf->reset_wait_queue);
653 return;
654 }
655
656 /* PFR is a bit of a special case because it doesn't result in an OICR
657 * interrupt. So for PFR, rebuild after the reset and clear the reset-
658 * associated state bits.
659 */
660 if (reset_type == ICE_RESET_PFR) {
661 pf->pfr_count++;
662 ice_rebuild(pf, reset_type);
663 clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
664 clear_bit(ICE_PFR_REQ, pf->state);
665 wake_up(&pf->reset_wait_queue);
666 ice_reset_all_vfs(pf);
667 }
668 }
669
670 /**
671 * ice_reset_subtask - Set up for resetting the device and driver
672 * @pf: board private structure
673 */
ice_reset_subtask(struct ice_pf * pf)674 static void ice_reset_subtask(struct ice_pf *pf)
675 {
676 enum ice_reset_req reset_type = ICE_RESET_INVAL;
677
678 /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
679 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
680 * of reset is pending and sets bits in pf->state indicating the reset
681 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
682 * prepare for pending reset if not already (for PF software-initiated
683 * global resets the software should already be prepared for it as
684 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
685 * by firmware or software on other PFs, that bit is not set so prepare
686 * for the reset now), poll for reset done, rebuild and return.
687 */
688 if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
689 /* Perform the largest reset requested */
690 if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
691 reset_type = ICE_RESET_CORER;
692 if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
693 reset_type = ICE_RESET_GLOBR;
694 if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
695 reset_type = ICE_RESET_EMPR;
696 /* return if no valid reset type requested */
697 if (reset_type == ICE_RESET_INVAL)
698 return;
699 ice_prepare_for_reset(pf, reset_type);
700
701 /* make sure we are ready to rebuild */
702 if (ice_check_reset(&pf->hw)) {
703 set_bit(ICE_RESET_FAILED, pf->state);
704 } else {
705 /* done with reset. start rebuild */
706 pf->hw.reset_ongoing = false;
707 ice_rebuild(pf, reset_type);
708 /* clear bit to resume normal operations, but
709 * ICE_NEEDS_RESTART bit is set in case rebuild failed
710 */
711 clear_bit(ICE_RESET_OICR_RECV, pf->state);
712 clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
713 clear_bit(ICE_PFR_REQ, pf->state);
714 clear_bit(ICE_CORER_REQ, pf->state);
715 clear_bit(ICE_GLOBR_REQ, pf->state);
716 wake_up(&pf->reset_wait_queue);
717 ice_reset_all_vfs(pf);
718 }
719
720 return;
721 }
722
723 /* No pending resets to finish processing. Check for new resets */
724 if (test_bit(ICE_PFR_REQ, pf->state)) {
725 reset_type = ICE_RESET_PFR;
726 if (pf->lag && pf->lag->bonded) {
727 dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
728 reset_type = ICE_RESET_CORER;
729 }
730 }
731 if (test_bit(ICE_CORER_REQ, pf->state))
732 reset_type = ICE_RESET_CORER;
733 if (test_bit(ICE_GLOBR_REQ, pf->state))
734 reset_type = ICE_RESET_GLOBR;
735 /* If no valid reset type requested just return */
736 if (reset_type == ICE_RESET_INVAL)
737 return;
738
739 /* reset if not already down or busy */
740 if (!test_bit(ICE_DOWN, pf->state) &&
741 !test_bit(ICE_CFG_BUSY, pf->state)) {
742 ice_do_reset(pf, reset_type);
743 }
744 }
745
746 /**
747 * ice_print_topo_conflict - print topology conflict message
748 * @vsi: the VSI whose topology status is being checked
749 */
ice_print_topo_conflict(struct ice_vsi * vsi)750 static void ice_print_topo_conflict(struct ice_vsi *vsi)
751 {
752 switch (vsi->port_info->phy.link_info.topo_media_conflict) {
753 case ICE_AQ_LINK_TOPO_CONFLICT:
754 case ICE_AQ_LINK_MEDIA_CONFLICT:
755 case ICE_AQ_LINK_TOPO_UNREACH_PRT:
756 case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
757 case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
758 netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
759 break;
760 case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
761 if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
762 netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
763 else
764 netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
765 break;
766 default:
767 break;
768 }
769 }
770
771 /**
772 * ice_print_link_msg - print link up or down message
773 * @vsi: the VSI whose link status is being queried
774 * @isup: boolean for if the link is now up or down
775 */
ice_print_link_msg(struct ice_vsi * vsi,bool isup)776 void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
777 {
778 struct ice_aqc_get_phy_caps_data *caps;
779 const char *an_advertised;
780 const char *fec_req;
781 const char *speed;
782 const char *fec;
783 const char *fc;
784 const char *an;
785 int status;
786
787 if (!vsi)
788 return;
789
790 if (vsi->current_isup == isup)
791 return;
792
793 vsi->current_isup = isup;
794
795 if (!isup) {
796 netdev_info(vsi->netdev, "NIC Link is Down\n");
797 return;
798 }
799
800 switch (vsi->port_info->phy.link_info.link_speed) {
801 case ICE_AQ_LINK_SPEED_200GB:
802 speed = "200 G";
803 break;
804 case ICE_AQ_LINK_SPEED_100GB:
805 speed = "100 G";
806 break;
807 case ICE_AQ_LINK_SPEED_50GB:
808 speed = "50 G";
809 break;
810 case ICE_AQ_LINK_SPEED_40GB:
811 speed = "40 G";
812 break;
813 case ICE_AQ_LINK_SPEED_25GB:
814 speed = "25 G";
815 break;
816 case ICE_AQ_LINK_SPEED_20GB:
817 speed = "20 G";
818 break;
819 case ICE_AQ_LINK_SPEED_10GB:
820 speed = "10 G";
821 break;
822 case ICE_AQ_LINK_SPEED_5GB:
823 speed = "5 G";
824 break;
825 case ICE_AQ_LINK_SPEED_2500MB:
826 speed = "2.5 G";
827 break;
828 case ICE_AQ_LINK_SPEED_1000MB:
829 speed = "1 G";
830 break;
831 case ICE_AQ_LINK_SPEED_100MB:
832 speed = "100 M";
833 break;
834 default:
835 speed = "Unknown ";
836 break;
837 }
838
839 switch (vsi->port_info->fc.current_mode) {
840 case ICE_FC_FULL:
841 fc = "Rx/Tx";
842 break;
843 case ICE_FC_TX_PAUSE:
844 fc = "Tx";
845 break;
846 case ICE_FC_RX_PAUSE:
847 fc = "Rx";
848 break;
849 case ICE_FC_NONE:
850 fc = "None";
851 break;
852 default:
853 fc = "Unknown";
854 break;
855 }
856
857 /* Get FEC mode based on negotiated link info */
858 switch (vsi->port_info->phy.link_info.fec_info) {
859 case ICE_AQ_LINK_25G_RS_528_FEC_EN:
860 case ICE_AQ_LINK_25G_RS_544_FEC_EN:
861 fec = "RS-FEC";
862 break;
863 case ICE_AQ_LINK_25G_KR_FEC_EN:
864 fec = "FC-FEC/BASE-R";
865 break;
866 default:
867 fec = "NONE";
868 break;
869 }
870
871 /* check if autoneg completed, might be false due to not supported */
872 if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
873 an = "True";
874 else
875 an = "False";
876
877 /* Get FEC mode requested based on PHY caps last SW configuration */
878 caps = kzalloc_obj(*caps);
879 if (!caps) {
880 fec_req = "Unknown";
881 an_advertised = "Unknown";
882 goto done;
883 }
884
885 status = ice_aq_get_phy_caps(vsi->port_info, false,
886 ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
887 if (status)
888 netdev_info(vsi->netdev, "Get phy capability failed.\n");
889
890 an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
891
892 if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
893 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
894 fec_req = "RS-FEC";
895 else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
896 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
897 fec_req = "FC-FEC/BASE-R";
898 else
899 fec_req = "NONE";
900
901 kfree(caps);
902
903 done:
904 netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
905 speed, fec_req, fec, an_advertised, an, fc);
906 ice_print_topo_conflict(vsi);
907 }
908
909 /**
910 * ice_vsi_link_event - update the VSI's netdev
911 * @vsi: the VSI on which the link event occurred
912 * @link_up: whether or not the VSI needs to be set up or down
913 */
ice_vsi_link_event(struct ice_vsi * vsi,bool link_up)914 static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
915 {
916 if (!vsi)
917 return;
918
919 if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
920 return;
921
922 if (vsi->type == ICE_VSI_PF) {
923 if (link_up == netif_carrier_ok(vsi->netdev))
924 return;
925
926 if (link_up) {
927 netif_carrier_on(vsi->netdev);
928 netif_tx_wake_all_queues(vsi->netdev);
929 } else {
930 netif_carrier_off(vsi->netdev);
931 netif_tx_stop_all_queues(vsi->netdev);
932 }
933 }
934 }
935
936 /**
937 * ice_set_dflt_mib - send a default config MIB to the FW
938 * @pf: private PF struct
939 *
940 * This function sends a default configuration MIB to the FW.
941 *
942 * If this function errors out at any point, the driver is still able to
943 * function. The main impact is that LFC may not operate as expected.
944 * Therefore an error state in this function should be treated with a DBG
945 * message and continue on with driver rebuild/reenable.
946 */
ice_set_dflt_mib(struct ice_pf * pf)947 static void ice_set_dflt_mib(struct ice_pf *pf)
948 {
949 struct device *dev = ice_pf_to_dev(pf);
950 u8 mib_type, *buf, *lldpmib = NULL;
951 u16 len, typelen, offset = 0;
952 struct ice_lldp_org_tlv *tlv;
953 struct ice_hw *hw = &pf->hw;
954 u32 ouisubtype;
955
956 mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
957 lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
958 if (!lldpmib) {
959 dev_dbg(dev, "%s Failed to allocate MIB memory\n",
960 __func__);
961 return;
962 }
963
964 /* Add ETS CFG TLV */
965 tlv = (struct ice_lldp_org_tlv *)lldpmib;
966 typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
967 ICE_IEEE_ETS_TLV_LEN);
968 tlv->typelen = htons(typelen);
969 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
970 ICE_IEEE_SUBTYPE_ETS_CFG);
971 tlv->ouisubtype = htonl(ouisubtype);
972
973 buf = tlv->tlvinfo;
974 buf[0] = 0;
975
976 /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
977 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
978 * Octets 13 - 20 are TSA values - leave as zeros
979 */
980 buf[5] = 0x64;
981 len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
982 offset += len + 2;
983 tlv = (struct ice_lldp_org_tlv *)
984 ((char *)tlv + sizeof(tlv->typelen) + len);
985
986 /* Add ETS REC TLV */
987 buf = tlv->tlvinfo;
988 tlv->typelen = htons(typelen);
989
990 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
991 ICE_IEEE_SUBTYPE_ETS_REC);
992 tlv->ouisubtype = htonl(ouisubtype);
993
994 /* First octet of buf is reserved
995 * Octets 1 - 4 map UP to TC - all UPs map to zero
996 * Octets 5 - 12 are BW values - set TC 0 to 100%.
997 * Octets 13 - 20 are TSA value - leave as zeros
998 */
999 buf[5] = 0x64;
1000 offset += len + 2;
1001 tlv = (struct ice_lldp_org_tlv *)
1002 ((char *)tlv + sizeof(tlv->typelen) + len);
1003
1004 /* Add PFC CFG TLV */
1005 typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
1006 ICE_IEEE_PFC_TLV_LEN);
1007 tlv->typelen = htons(typelen);
1008
1009 ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
1010 ICE_IEEE_SUBTYPE_PFC_CFG);
1011 tlv->ouisubtype = htonl(ouisubtype);
1012
1013 /* Octet 1 left as all zeros - PFC disabled */
1014 buf[0] = 0x08;
1015 len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen);
1016 offset += len + 2;
1017
1018 if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
1019 dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
1020
1021 kfree(lldpmib);
1022 }
1023
1024 /**
1025 * ice_check_phy_fw_load - check if PHY FW load failed
1026 * @pf: pointer to PF struct
1027 * @link_cfg_err: bitmap from the link info structure
1028 *
1029 * check if external PHY FW load failed and print an error message if it did
1030 */
ice_check_phy_fw_load(struct ice_pf * pf,u8 link_cfg_err)1031 static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
1032 {
1033 if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
1034 clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
1035 return;
1036 }
1037
1038 if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
1039 return;
1040
1041 if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
1042 dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
1043 set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
1044 }
1045 }
1046
1047 /**
1048 * ice_check_module_power
1049 * @pf: pointer to PF struct
1050 * @link_cfg_err: bitmap from the link info structure
1051 *
1052 * check module power level returned by a previous call to aq_get_link_info
1053 * and print error messages if module power level is not supported
1054 */
ice_check_module_power(struct ice_pf * pf,u8 link_cfg_err)1055 static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
1056 {
1057 /* if module power level is supported, clear the flag */
1058 if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
1059 ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
1060 clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1061 return;
1062 }
1063
1064 /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
1065 * above block didn't clear this bit, there's nothing to do
1066 */
1067 if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
1068 return;
1069
1070 if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
1071 dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
1072 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1073 } else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
1074 dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
1075 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
1076 }
1077 }
1078
1079 /**
1080 * ice_check_link_cfg_err - check if link configuration failed
1081 * @pf: pointer to the PF struct
1082 * @link_cfg_err: bitmap from the link info structure
1083 *
1084 * print if any link configuration failure happens due to the value in the
1085 * link_cfg_err parameter in the link info structure
1086 */
ice_check_link_cfg_err(struct ice_pf * pf,u8 link_cfg_err)1087 static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
1088 {
1089 ice_check_module_power(pf, link_cfg_err);
1090 ice_check_phy_fw_load(pf, link_cfg_err);
1091 }
1092
1093 /**
1094 * ice_link_event - process the link event
1095 * @pf: PF that the link event is associated with
1096 * @pi: port_info for the port that the link event is associated with
1097 * @link_up: true if the physical link is up and false if it is down
1098 * @link_speed: current link speed received from the link event
1099 *
1100 * Returns 0 on success and negative on failure
1101 */
1102 static int
ice_link_event(struct ice_pf * pf,struct ice_port_info * pi,bool link_up,u16 link_speed)1103 ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
1104 u16 link_speed)
1105 {
1106 struct device *dev = ice_pf_to_dev(pf);
1107 struct ice_phy_info *phy_info;
1108 struct ice_vsi *vsi;
1109 u16 old_link_speed;
1110 bool old_link;
1111 int status;
1112
1113 phy_info = &pi->phy;
1114 phy_info->link_info_old = phy_info->link_info;
1115
1116 old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
1117 old_link_speed = phy_info->link_info_old.link_speed;
1118
1119 /* update the link info structures and re-enable link events,
1120 * don't bail on failure due to other book keeping needed
1121 */
1122 status = ice_update_link_info(pi);
1123 if (status)
1124 dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
1125 pi->lport, status,
1126 libie_aq_str(pi->hw->adminq.sq_last_status));
1127
1128 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
1129
1130 /* Check if the link state is up after updating link info, and treat
1131 * this event as an UP event since the link is actually UP now.
1132 */
1133 if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
1134 link_up = true;
1135
1136 vsi = ice_get_main_vsi(pf);
1137 if (!vsi || !vsi->port_info)
1138 return -EINVAL;
1139
1140 /* turn off PHY if media was removed */
1141 if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
1142 !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
1143 set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
1144 ice_set_link(vsi, false);
1145 }
1146
1147 /* if the old link up/down and speed is the same as the new */
1148 if (link_up == old_link && link_speed == old_link_speed)
1149 return 0;
1150
1151 if (!link_up && old_link)
1152 pf->link_down_events++;
1153
1154 ice_ptp_link_change(pf, link_up);
1155
1156 if (ice_is_dcb_active(pf)) {
1157 if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
1158 ice_dcb_rebuild(pf);
1159 } else {
1160 if (link_up)
1161 ice_set_dflt_mib(pf);
1162 }
1163 ice_vsi_link_event(vsi, link_up);
1164 ice_print_link_msg(vsi, link_up);
1165
1166 ice_vc_notify_link_state(pf);
1167
1168 return 0;
1169 }
1170
1171 /**
1172 * ice_watchdog_subtask - periodic tasks not using event driven scheduling
1173 * @pf: board private structure
1174 */
ice_watchdog_subtask(struct ice_pf * pf)1175 static void ice_watchdog_subtask(struct ice_pf *pf)
1176 {
1177 int i;
1178
1179 /* if interface is down do nothing */
1180 if (test_bit(ICE_DOWN, pf->state) ||
1181 test_bit(ICE_CFG_BUSY, pf->state))
1182 return;
1183
1184 /* make sure we don't do these things too often */
1185 if (time_before(jiffies,
1186 pf->serv_tmr_prev + pf->serv_tmr_period))
1187 return;
1188
1189 pf->serv_tmr_prev = jiffies;
1190
1191 /* Update the stats for active netdevs so the network stack
1192 * can look at updated numbers whenever it cares to
1193 */
1194 ice_update_pf_stats(pf);
1195 ice_for_each_vsi(pf, i)
1196 if (pf->vsi[i] && pf->vsi[i]->netdev)
1197 ice_update_vsi_stats(pf->vsi[i]);
1198 }
1199
1200 /**
1201 * ice_init_link_events - enable/initialize link events
1202 * @pi: pointer to the port_info instance
1203 *
1204 * Returns -EIO on failure, 0 on success
1205 */
ice_init_link_events(struct ice_port_info * pi)1206 static int ice_init_link_events(struct ice_port_info *pi)
1207 {
1208 u16 mask;
1209
1210 mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
1211 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
1212 ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
1213
1214 if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
1215 dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
1216 pi->lport);
1217 return -EIO;
1218 }
1219
1220 if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
1221 dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
1222 pi->lport);
1223 return -EIO;
1224 }
1225
1226 return 0;
1227 }
1228
1229 /**
1230 * ice_handle_link_event - handle link event via ARQ
1231 * @pf: PF that the link event is associated with
1232 * @event: event structure containing link status info
1233 */
1234 static int
ice_handle_link_event(struct ice_pf * pf,struct ice_rq_event_info * event)1235 ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
1236 {
1237 struct ice_aqc_get_link_status_data *link_data;
1238 struct ice_port_info *port_info;
1239 int status;
1240
1241 link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
1242 port_info = pf->hw.port_info;
1243 if (!port_info)
1244 return -EINVAL;
1245
1246 status = ice_link_event(pf, port_info,
1247 !!(link_data->link_info & ICE_AQ_LINK_UP),
1248 le16_to_cpu(link_data->link_speed));
1249 if (status)
1250 dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
1251 status);
1252
1253 return status;
1254 }
1255
1256 /**
1257 * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
1258 * @pf: pointer to the PF private structure
1259 * @task: intermediate helper storage and identifier for waiting
1260 * @opcode: the opcode to wait for
1261 *
1262 * Prepares to wait for a specific AdminQ completion event on the ARQ for
1263 * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
1264 *
1265 * Calls are separated to allow caller registering for event before sending
1266 * the command, which mitigates a race between registering and FW responding.
1267 *
1268 * To obtain only the descriptor contents, pass an task->event with null
1269 * msg_buf. If the complete data buffer is desired, allocate the
1270 * task->event.msg_buf with enough space ahead of time.
1271 */
ice_aq_prep_for_event(struct ice_pf * pf,struct ice_aq_task * task,u16 opcode)1272 void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
1273 u16 opcode)
1274 {
1275 INIT_HLIST_NODE(&task->entry);
1276 task->opcode = opcode;
1277 task->state = ICE_AQ_TASK_WAITING;
1278
1279 spin_lock_bh(&pf->aq_wait_lock);
1280 hlist_add_head(&task->entry, &pf->aq_wait_list);
1281 spin_unlock_bh(&pf->aq_wait_lock);
1282 }
1283
1284 /**
1285 * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1286 * @pf: pointer to the PF private structure
1287 * @task: ptr prepared by ice_aq_prep_for_event()
1288 * @timeout: how long to wait, in jiffies
1289 *
1290 * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1291 * current thread will be put to sleep until the specified event occurs or
1292 * until the given timeout is reached.
1293 *
1294 * Returns: zero on success, or a negative error code on failure.
1295 */
ice_aq_wait_for_event(struct ice_pf * pf,struct ice_aq_task * task,unsigned long timeout)1296 int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
1297 unsigned long timeout)
1298 {
1299 enum ice_aq_task_state *state = &task->state;
1300 struct device *dev = ice_pf_to_dev(pf);
1301 unsigned long start = jiffies;
1302 long ret;
1303 int err;
1304
1305 ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
1306 *state != ICE_AQ_TASK_WAITING,
1307 timeout);
1308 switch (*state) {
1309 case ICE_AQ_TASK_NOT_PREPARED:
1310 WARN(1, "call to %s without ice_aq_prep_for_event()", __func__);
1311 err = -EINVAL;
1312 break;
1313 case ICE_AQ_TASK_WAITING:
1314 err = ret < 0 ? ret : -ETIMEDOUT;
1315 break;
1316 case ICE_AQ_TASK_CANCELED:
1317 err = ret < 0 ? ret : -ECANCELED;
1318 break;
1319 case ICE_AQ_TASK_COMPLETE:
1320 err = ret < 0 ? ret : 0;
1321 break;
1322 default:
1323 WARN(1, "Unexpected AdminQ wait task state %u", *state);
1324 err = -EINVAL;
1325 break;
1326 }
1327
1328 dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1329 jiffies_to_msecs(jiffies - start),
1330 jiffies_to_msecs(timeout),
1331 task->opcode);
1332
1333 spin_lock_bh(&pf->aq_wait_lock);
1334 hlist_del(&task->entry);
1335 spin_unlock_bh(&pf->aq_wait_lock);
1336
1337 return err;
1338 }
1339
1340 /**
1341 * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1342 * @pf: pointer to the PF private structure
1343 * @opcode: the opcode of the event
1344 * @event: the event to check
1345 *
1346 * Loops over the current list of pending threads waiting for an AdminQ event.
1347 * For each matching task, copy the contents of the event into the task
1348 * structure and wake up the thread.
1349 *
1350 * If multiple threads wait for the same opcode, they will all be woken up.
1351 *
1352 * Note that event->msg_buf will only be duplicated if the event has a buffer
1353 * with enough space already allocated. Otherwise, only the descriptor and
1354 * message length will be copied.
1355 *
1356 * Returns: true if an event was found, false otherwise
1357 */
ice_aq_check_events(struct ice_pf * pf,u16 opcode,struct ice_rq_event_info * event)1358 static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
1359 struct ice_rq_event_info *event)
1360 {
1361 struct ice_rq_event_info *task_ev;
1362 struct ice_aq_task *task;
1363 bool found = false;
1364
1365 spin_lock_bh(&pf->aq_wait_lock);
1366 hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
1367 if (task->state != ICE_AQ_TASK_WAITING)
1368 continue;
1369 if (task->opcode != opcode)
1370 continue;
1371
1372 task_ev = &task->event;
1373 memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
1374 task_ev->msg_len = event->msg_len;
1375
1376 /* Only copy the data buffer if a destination was set */
1377 if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
1378 memcpy(task_ev->msg_buf, event->msg_buf,
1379 event->buf_len);
1380 task_ev->buf_len = event->buf_len;
1381 }
1382
1383 task->state = ICE_AQ_TASK_COMPLETE;
1384 found = true;
1385 }
1386 spin_unlock_bh(&pf->aq_wait_lock);
1387
1388 if (found)
1389 wake_up(&pf->aq_wait_queue);
1390 }
1391
1392 /**
1393 * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1394 * @pf: the PF private structure
1395 *
1396 * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1397 * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1398 */
ice_aq_cancel_waiting_tasks(struct ice_pf * pf)1399 static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
1400 {
1401 struct ice_aq_task *task;
1402
1403 spin_lock_bh(&pf->aq_wait_lock);
1404 hlist_for_each_entry(task, &pf->aq_wait_list, entry)
1405 task->state = ICE_AQ_TASK_CANCELED;
1406 spin_unlock_bh(&pf->aq_wait_lock);
1407
1408 wake_up(&pf->aq_wait_queue);
1409 }
1410
1411 #define ICE_MBX_OVERFLOW_WATERMARK 64
1412
1413 /**
1414 * __ice_clean_ctrlq - helper function to clean controlq rings
1415 * @pf: ptr to struct ice_pf
1416 * @q_type: specific Control queue type
1417 */
__ice_clean_ctrlq(struct ice_pf * pf,enum ice_ctl_q q_type)1418 static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
1419 {
1420 struct device *dev = ice_pf_to_dev(pf);
1421 struct ice_rq_event_info event;
1422 struct ice_hw *hw = &pf->hw;
1423 struct ice_ctl_q_info *cq;
1424 u16 pending, i = 0;
1425 const char *qtype;
1426 u32 oldval, val;
1427
1428 /* Do not clean control queue if/when PF reset fails */
1429 if (test_bit(ICE_RESET_FAILED, pf->state))
1430 return 0;
1431
1432 switch (q_type) {
1433 case ICE_CTL_Q_ADMIN:
1434 cq = &hw->adminq;
1435 qtype = "Admin";
1436 break;
1437 case ICE_CTL_Q_SB:
1438 cq = &hw->sbq;
1439 qtype = "Sideband";
1440 break;
1441 case ICE_CTL_Q_MAILBOX:
1442 cq = &hw->mailboxq;
1443 qtype = "Mailbox";
1444 /* we are going to try to detect a malicious VF, so set the
1445 * state to begin detection
1446 */
1447 hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
1448 break;
1449 default:
1450 dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
1451 return 0;
1452 }
1453
1454 /* check for error indications - PF_xx_AxQLEN register layout for
1455 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1456 */
1457 val = rd32(hw, cq->rq.len);
1458 if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1459 PF_FW_ARQLEN_ARQCRIT_M)) {
1460 oldval = val;
1461 if (val & PF_FW_ARQLEN_ARQVFE_M)
1462 dev_dbg(dev, "%s Receive Queue VF Error detected\n",
1463 qtype);
1464 if (val & PF_FW_ARQLEN_ARQOVFL_M) {
1465 dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
1466 qtype);
1467 }
1468 if (val & PF_FW_ARQLEN_ARQCRIT_M)
1469 dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
1470 qtype);
1471 val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1472 PF_FW_ARQLEN_ARQCRIT_M);
1473 if (oldval != val)
1474 wr32(hw, cq->rq.len, val);
1475 }
1476
1477 val = rd32(hw, cq->sq.len);
1478 if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1479 PF_FW_ATQLEN_ATQCRIT_M)) {
1480 oldval = val;
1481 if (val & PF_FW_ATQLEN_ATQVFE_M)
1482 dev_dbg(dev, "%s Send Queue VF Error detected\n",
1483 qtype);
1484 if (val & PF_FW_ATQLEN_ATQOVFL_M) {
1485 dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
1486 qtype);
1487 }
1488 if (val & PF_FW_ATQLEN_ATQCRIT_M)
1489 dev_dbg(dev, "%s Send Queue Critical Error detected\n",
1490 qtype);
1491 val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1492 PF_FW_ATQLEN_ATQCRIT_M);
1493 if (oldval != val)
1494 wr32(hw, cq->sq.len, val);
1495 }
1496
1497 event.buf_len = cq->rq_buf_size;
1498 event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
1499 if (!event.msg_buf)
1500 return 0;
1501
1502 do {
1503 struct ice_mbx_data data = {};
1504 u16 opcode;
1505 int ret;
1506
1507 ret = ice_clean_rq_elem(hw, cq, &event, &pending);
1508 if (ret == -EALREADY)
1509 break;
1510 if (ret) {
1511 dev_err(dev, "%s Receive Queue event error %d\n", qtype,
1512 ret);
1513 break;
1514 }
1515
1516 opcode = le16_to_cpu(event.desc.opcode);
1517
1518 /* Notify any thread that might be waiting for this event */
1519 ice_aq_check_events(pf, opcode, &event);
1520
1521 switch (opcode) {
1522 case ice_aqc_opc_get_link_status:
1523 if (ice_handle_link_event(pf, &event))
1524 dev_err(dev, "Could not handle link event\n");
1525 break;
1526 case ice_aqc_opc_event_lan_overflow:
1527 ice_vf_lan_overflow_event(pf, &event);
1528 break;
1529 case ice_mbx_opc_send_msg_to_pf:
1530 if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) {
1531 ice_vc_process_vf_msg(pf, &event, NULL);
1532 ice_mbx_vf_dec_trig_e830(hw, &event);
1533 } else {
1534 u16 val = hw->mailboxq.num_rq_entries;
1535
1536 data.max_num_msgs_mbx = val;
1537 val = ICE_MBX_OVERFLOW_WATERMARK;
1538 data.async_watermark_val = val;
1539 data.num_msg_proc = i;
1540 data.num_pending_arq = pending;
1541
1542 ice_vc_process_vf_msg(pf, &event, &data);
1543 }
1544 break;
1545 case ice_aqc_opc_fw_logs_event:
1546 libie_get_fwlog_data(&hw->fwlog, event.msg_buf,
1547 le16_to_cpu(event.desc.datalen));
1548 break;
1549 case ice_aqc_opc_lldp_set_mib_change:
1550 ice_dcb_process_lldp_set_mib_change(pf, &event);
1551 break;
1552 case ice_aqc_opc_get_health_status:
1553 ice_process_health_status_event(pf, &event);
1554 break;
1555 default:
1556 dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
1557 qtype, opcode);
1558 break;
1559 }
1560 } while (pending && (i++ < ICE_DFLT_IRQ_WORK));
1561
1562 kfree(event.msg_buf);
1563
1564 return pending && (i == ICE_DFLT_IRQ_WORK);
1565 }
1566
1567 /**
1568 * ice_ctrlq_pending - check if there is a difference between ntc and ntu
1569 * @hw: pointer to hardware info
1570 * @cq: control queue information
1571 *
1572 * returns true if there are pending messages in a queue, false if there aren't
1573 */
ice_ctrlq_pending(struct ice_hw * hw,struct ice_ctl_q_info * cq)1574 static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
1575 {
1576 u16 ntu;
1577
1578 ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
1579 return cq->rq.next_to_clean != ntu;
1580 }
1581
1582 /**
1583 * ice_clean_adminq_subtask - clean the AdminQ rings
1584 * @pf: board private structure
1585 */
ice_clean_adminq_subtask(struct ice_pf * pf)1586 static void ice_clean_adminq_subtask(struct ice_pf *pf)
1587 {
1588 struct ice_hw *hw = &pf->hw;
1589
1590 if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
1591 return;
1592
1593 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
1594 return;
1595
1596 clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
1597
1598 /* There might be a situation where new messages arrive to a control
1599 * queue between processing the last message and clearing the
1600 * EVENT_PENDING bit. So before exiting, check queue head again (using
1601 * ice_ctrlq_pending) and process new messages if any.
1602 */
1603 if (ice_ctrlq_pending(hw, &hw->adminq))
1604 __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
1605
1606 ice_flush(hw);
1607 }
1608
1609 /**
1610 * ice_clean_mailboxq_subtask - clean the MailboxQ rings
1611 * @pf: board private structure
1612 */
ice_clean_mailboxq_subtask(struct ice_pf * pf)1613 static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
1614 {
1615 struct ice_hw *hw = &pf->hw;
1616
1617 if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
1618 return;
1619
1620 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
1621 return;
1622
1623 clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
1624
1625 if (ice_ctrlq_pending(hw, &hw->mailboxq))
1626 __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
1627
1628 ice_flush(hw);
1629 }
1630
1631 /**
1632 * ice_clean_sbq_subtask - clean the Sideband Queue rings
1633 * @pf: board private structure
1634 */
ice_clean_sbq_subtask(struct ice_pf * pf)1635 static void ice_clean_sbq_subtask(struct ice_pf *pf)
1636 {
1637 struct ice_hw *hw = &pf->hw;
1638
1639 /* if mac_type is not generic, sideband is not supported
1640 * and there's nothing to do here
1641 */
1642 if (!ice_is_generic_mac(hw)) {
1643 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
1644 return;
1645 }
1646
1647 if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
1648 return;
1649
1650 if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
1651 return;
1652
1653 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
1654
1655 if (ice_ctrlq_pending(hw, &hw->sbq))
1656 __ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
1657
1658 ice_flush(hw);
1659 }
1660
1661 /**
1662 * ice_service_task_schedule - schedule the service task to wake up
1663 * @pf: board private structure
1664 *
1665 * If not already scheduled, this puts the task into the work queue.
1666 */
ice_service_task_schedule(struct ice_pf * pf)1667 void ice_service_task_schedule(struct ice_pf *pf)
1668 {
1669 if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
1670 !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
1671 !test_bit(ICE_NEEDS_RESTART, pf->state))
1672 queue_work(ice_wq, &pf->serv_task);
1673 }
1674
1675 /**
1676 * ice_service_task_complete - finish up the service task
1677 * @pf: board private structure
1678 */
ice_service_task_complete(struct ice_pf * pf)1679 static void ice_service_task_complete(struct ice_pf *pf)
1680 {
1681 WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
1682
1683 /* force memory (pf->state) to sync before next service task */
1684 smp_mb__before_atomic();
1685 clear_bit(ICE_SERVICE_SCHED, pf->state);
1686 }
1687
1688 /**
1689 * ice_service_task_stop - stop service task and cancel works
1690 * @pf: board private structure
1691 *
1692 * Return 0 if the ICE_SERVICE_DIS bit was not already set,
1693 * 1 otherwise.
1694 */
ice_service_task_stop(struct ice_pf * pf)1695 static int ice_service_task_stop(struct ice_pf *pf)
1696 {
1697 int ret;
1698
1699 ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
1700
1701 if (pf->serv_tmr.function)
1702 timer_delete_sync(&pf->serv_tmr);
1703 if (pf->serv_task.func)
1704 cancel_work_sync(&pf->serv_task);
1705
1706 clear_bit(ICE_SERVICE_SCHED, pf->state);
1707 return ret;
1708 }
1709
1710 /**
1711 * ice_service_task_restart - restart service task and schedule works
1712 * @pf: board private structure
1713 *
1714 * This function is needed for suspend and resume works (e.g WoL scenario)
1715 */
ice_service_task_restart(struct ice_pf * pf)1716 static void ice_service_task_restart(struct ice_pf *pf)
1717 {
1718 clear_bit(ICE_SERVICE_DIS, pf->state);
1719 ice_service_task_schedule(pf);
1720 }
1721
1722 /**
1723 * ice_service_timer - timer callback to schedule service task
1724 * @t: pointer to timer_list
1725 */
ice_service_timer(struct timer_list * t)1726 static void ice_service_timer(struct timer_list *t)
1727 {
1728 struct ice_pf *pf = timer_container_of(pf, t, serv_tmr);
1729
1730 mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
1731 ice_service_task_schedule(pf);
1732 }
1733
1734 /**
1735 * ice_mdd_maybe_reset_vf - reset VF after MDD event
1736 * @pf: pointer to the PF structure
1737 * @vf: pointer to the VF structure
1738 * @reset_vf_tx: whether Tx MDD has occurred
1739 * @reset_vf_rx: whether Rx MDD has occurred
1740 *
1741 * Since the queue can get stuck on VF MDD events, the PF can be configured to
1742 * automatically reset the VF by enabling the private ethtool flag
1743 * mdd-auto-reset-vf.
1744 */
ice_mdd_maybe_reset_vf(struct ice_pf * pf,struct ice_vf * vf,bool reset_vf_tx,bool reset_vf_rx)1745 static void ice_mdd_maybe_reset_vf(struct ice_pf *pf, struct ice_vf *vf,
1746 bool reset_vf_tx, bool reset_vf_rx)
1747 {
1748 struct device *dev = ice_pf_to_dev(pf);
1749
1750 if (!test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags))
1751 return;
1752
1753 /* VF MDD event counters will be cleared by reset, so print the event
1754 * prior to reset.
1755 */
1756 if (reset_vf_tx)
1757 ice_print_vf_tx_mdd_event(vf);
1758
1759 if (reset_vf_rx)
1760 ice_print_vf_rx_mdd_event(vf);
1761
1762 dev_info(dev, "PF-to-VF reset on PF %d VF %d due to MDD event\n",
1763 pf->hw.pf_id, vf->vf_id);
1764 ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK);
1765 }
1766
1767 /**
1768 * ice_handle_mdd_event - handle malicious driver detect event
1769 * @pf: pointer to the PF structure
1770 *
1771 * Called from service task. OICR interrupt handler indicates MDD event.
1772 * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1773 * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
1774 * disable the queue, the PF can be configured to reset the VF using ethtool
1775 * private flag mdd-auto-reset-vf.
1776 */
ice_handle_mdd_event(struct ice_pf * pf)1777 static void ice_handle_mdd_event(struct ice_pf *pf)
1778 {
1779 struct device *dev = ice_pf_to_dev(pf);
1780 struct ice_hw *hw = &pf->hw;
1781 struct ice_vf *vf;
1782 unsigned int bkt;
1783 u32 reg;
1784
1785 if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
1786 /* Since the VF MDD event logging is rate limited, check if
1787 * there are pending MDD events.
1788 */
1789 ice_print_vfs_mdd_events(pf);
1790 return;
1791 }
1792
1793 /* find what triggered an MDD event */
1794 reg = rd32(hw, GL_MDET_TX_PQM);
1795 if (reg & GL_MDET_TX_PQM_VALID_M) {
1796 u8 pf_num = FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M, reg);
1797 u16 vf_num = FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M, reg);
1798 u8 event = FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M, reg);
1799 u16 queue = FIELD_GET(GL_MDET_TX_PQM_QNUM_M, reg);
1800
1801 if (netif_msg_tx_err(pf))
1802 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1803 event, queue, pf_num, vf_num);
1804 ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num,
1805 event, queue);
1806 wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
1807 }
1808
1809 reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw));
1810 if (reg & GL_MDET_TX_TCLAN_VALID_M) {
1811 u8 pf_num = FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M, reg);
1812 u16 vf_num = FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M, reg);
1813 u8 event = FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M, reg);
1814 u16 queue = FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M, reg);
1815
1816 if (netif_msg_tx_err(pf))
1817 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1818 event, queue, pf_num, vf_num);
1819 ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num,
1820 event, queue);
1821 wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
1822 }
1823
1824 reg = rd32(hw, GL_MDET_RX);
1825 if (reg & GL_MDET_RX_VALID_M) {
1826 u8 pf_num = FIELD_GET(GL_MDET_RX_PF_NUM_M, reg);
1827 u16 vf_num = FIELD_GET(GL_MDET_RX_VF_NUM_M, reg);
1828 u8 event = FIELD_GET(GL_MDET_RX_MAL_TYPE_M, reg);
1829 u16 queue = FIELD_GET(GL_MDET_RX_QNUM_M, reg);
1830
1831 if (netif_msg_rx_err(pf))
1832 dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1833 event, queue, pf_num, vf_num);
1834 ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event,
1835 queue);
1836 wr32(hw, GL_MDET_RX, 0xffffffff);
1837 }
1838
1839 /* check to see if this PF caused an MDD event */
1840 reg = rd32(hw, PF_MDET_TX_PQM);
1841 if (reg & PF_MDET_TX_PQM_VALID_M) {
1842 wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
1843 if (netif_msg_tx_err(pf))
1844 dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
1845 }
1846
1847 reg = rd32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw));
1848 if (reg & PF_MDET_TX_TCLAN_VALID_M) {
1849 wr32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw), 0xffff);
1850 if (netif_msg_tx_err(pf))
1851 dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1852 }
1853
1854 reg = rd32(hw, PF_MDET_RX);
1855 if (reg & PF_MDET_RX_VALID_M) {
1856 wr32(hw, PF_MDET_RX, 0xFFFF);
1857 if (netif_msg_rx_err(pf))
1858 dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
1859 }
1860
1861 /* Check to see if one of the VFs caused an MDD event, and then
1862 * increment counters and set print pending
1863 */
1864 mutex_lock(&pf->vfs.table_lock);
1865 ice_for_each_vf(pf, bkt, vf) {
1866 bool reset_vf_tx = false, reset_vf_rx = false;
1867
1868 reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
1869 if (reg & VP_MDET_TX_PQM_VALID_M) {
1870 wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF);
1871 vf->mdd_tx_events.count++;
1872 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1873 if (netif_msg_tx_err(pf))
1874 dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1875 vf->vf_id);
1876
1877 reset_vf_tx = true;
1878 }
1879
1880 reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
1881 if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1882 wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF);
1883 vf->mdd_tx_events.count++;
1884 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1885 if (netif_msg_tx_err(pf))
1886 dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1887 vf->vf_id);
1888
1889 reset_vf_tx = true;
1890 }
1891
1892 reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
1893 if (reg & VP_MDET_TX_TDPU_VALID_M) {
1894 wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF);
1895 vf->mdd_tx_events.count++;
1896 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1897 if (netif_msg_tx_err(pf))
1898 dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1899 vf->vf_id);
1900
1901 reset_vf_tx = true;
1902 }
1903
1904 reg = rd32(hw, VP_MDET_RX(vf->vf_id));
1905 if (reg & VP_MDET_RX_VALID_M) {
1906 wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF);
1907 vf->mdd_rx_events.count++;
1908 set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1909 if (netif_msg_rx_err(pf))
1910 dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
1911 vf->vf_id);
1912
1913 reset_vf_rx = true;
1914 }
1915
1916 if (reset_vf_tx || reset_vf_rx)
1917 ice_mdd_maybe_reset_vf(pf, vf, reset_vf_tx,
1918 reset_vf_rx);
1919 }
1920 mutex_unlock(&pf->vfs.table_lock);
1921
1922 ice_print_vfs_mdd_events(pf);
1923 }
1924
1925 /**
1926 * ice_init_nvm_phy_type - Initialize the NVM PHY type
1927 * @pi: port info structure
1928 *
1929 * Initialize nvm_phy_type_[low|high] for link lenient mode support
1930 */
ice_init_nvm_phy_type(struct ice_port_info * pi)1931 static int ice_init_nvm_phy_type(struct ice_port_info *pi)
1932 {
1933 struct ice_aqc_get_phy_caps_data *pcaps;
1934 struct ice_pf *pf = pi->hw->back;
1935 int err;
1936
1937 pcaps = kzalloc_obj(*pcaps);
1938 if (!pcaps)
1939 return -ENOMEM;
1940
1941 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
1942 pcaps, NULL);
1943
1944 if (err) {
1945 dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1946 goto out;
1947 }
1948
1949 pf->nvm_phy_type_hi = pcaps->phy_type_high;
1950 pf->nvm_phy_type_lo = pcaps->phy_type_low;
1951
1952 out:
1953 kfree(pcaps);
1954 return err;
1955 }
1956
1957 /**
1958 * ice_init_link_dflt_override - Initialize link default override
1959 * @pi: port info structure
1960 *
1961 * Initialize link default override and PHY total port shutdown during probe
1962 */
ice_init_link_dflt_override(struct ice_port_info * pi)1963 static void ice_init_link_dflt_override(struct ice_port_info *pi)
1964 {
1965 struct ice_link_default_override_tlv *ldo;
1966 struct ice_pf *pf = pi->hw->back;
1967
1968 ldo = &pf->link_dflt_override;
1969 if (ice_get_link_default_override(ldo, pi))
1970 return;
1971
1972 if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
1973 return;
1974
1975 /* Enable Total Port Shutdown (override/replace link-down-on-close
1976 * ethtool private flag) for ports with Port Disable bit set.
1977 */
1978 set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
1979 set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
1980 }
1981
1982 /**
1983 * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
1984 * @pi: port info structure
1985 *
1986 * If default override is enabled, initialize the user PHY cfg speed and FEC
1987 * settings using the default override mask from the NVM.
1988 *
1989 * The PHY should only be configured with the default override settings the
1990 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
1991 * is used to indicate that the user PHY cfg default override is initialized
1992 * and the PHY has not been configured with the default override settings. The
1993 * state is set here, and cleared in ice_phy_cfg the first time the PHY is
1994 * configured.
1995 *
1996 * This function should be called only if the FW doesn't support default
1997 * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
1998 */
ice_init_phy_cfg_dflt_override(struct ice_port_info * pi)1999 static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
2000 {
2001 struct ice_link_default_override_tlv *ldo;
2002 struct ice_aqc_set_phy_cfg_data *cfg;
2003 struct ice_phy_info *phy = &pi->phy;
2004 struct ice_pf *pf = pi->hw->back;
2005
2006 ldo = &pf->link_dflt_override;
2007
2008 /* If link default override is enabled, use to mask NVM PHY capabilities
2009 * for speed and FEC default configuration.
2010 */
2011 cfg = &phy->curr_user_phy_cfg;
2012
2013 if (ldo->phy_type_low || ldo->phy_type_high) {
2014 cfg->phy_type_low = pf->nvm_phy_type_lo &
2015 cpu_to_le64(ldo->phy_type_low);
2016 cfg->phy_type_high = pf->nvm_phy_type_hi &
2017 cpu_to_le64(ldo->phy_type_high);
2018 }
2019 cfg->link_fec_opt = ldo->fec_options;
2020 phy->curr_user_fec_req = ICE_FEC_AUTO;
2021
2022 set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
2023 }
2024
2025 /**
2026 * ice_init_phy_user_cfg - Initialize the PHY user configuration
2027 * @pi: port info structure
2028 *
2029 * Initialize the current user PHY configuration, speed, FEC, and FC requested
2030 * mode to default. The PHY defaults are from get PHY capabilities topology
2031 * with media so call when media is first available. An error is returned if
2032 * called when media is not available. The PHY initialization completed state is
2033 * set here.
2034 *
2035 * These configurations are used when setting PHY
2036 * configuration. The user PHY configuration is updated on set PHY
2037 * configuration. Returns 0 on success, negative on failure
2038 */
ice_init_phy_user_cfg(struct ice_port_info * pi)2039 static int ice_init_phy_user_cfg(struct ice_port_info *pi)
2040 {
2041 struct ice_aqc_get_phy_caps_data *pcaps;
2042 struct ice_phy_info *phy = &pi->phy;
2043 struct ice_pf *pf = pi->hw->back;
2044 int err;
2045
2046 if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2047 return -EIO;
2048
2049 pcaps = kzalloc_obj(*pcaps);
2050 if (!pcaps)
2051 return -ENOMEM;
2052
2053 if (ice_fw_supports_report_dflt_cfg(pi->hw))
2054 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
2055 pcaps, NULL);
2056 else
2057 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2058 pcaps, NULL);
2059 if (err) {
2060 dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
2061 goto err_out;
2062 }
2063
2064 ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
2065
2066 /* check if lenient mode is supported and enabled */
2067 if (ice_fw_supports_link_override(pi->hw) &&
2068 !(pcaps->module_compliance_enforcement &
2069 ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
2070 set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
2071
2072 /* if the FW supports default PHY configuration mode, then the driver
2073 * does not have to apply link override settings. If not,
2074 * initialize user PHY configuration with link override values
2075 */
2076 if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
2077 (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
2078 ice_init_phy_cfg_dflt_override(pi);
2079 goto out;
2080 }
2081 }
2082
2083 /* if link default override is not enabled, set user flow control and
2084 * FEC settings based on what get_phy_caps returned
2085 */
2086 phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
2087 pcaps->link_fec_options);
2088 phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
2089
2090 out:
2091 phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
2092 set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
2093 err_out:
2094 kfree(pcaps);
2095 return err;
2096 }
2097
2098 /**
2099 * ice_phy_cfg - configure PHY
2100 * @vsi: VSI of PHY
2101 * @link_en: true/false indicates to set link to enable/disable
2102 *
2103 * Set the PHY configuration. If the current PHY configuration is the same as
2104 * the curr_user_phy_cfg and link_en hasn't changed, then do nothing to avoid
2105 * link flap. Otherwise configure the PHY based get PHY capabilities for
2106 * topology with media and link_en.
2107 *
2108 * Return: 0 on success, negative on failure
2109 */
ice_phy_cfg(struct ice_vsi * vsi,bool link_en)2110 static int ice_phy_cfg(struct ice_vsi *vsi, bool link_en)
2111 {
2112 struct device *dev = ice_pf_to_dev(vsi->back);
2113 struct ice_port_info *pi = vsi->port_info;
2114 struct ice_aqc_get_phy_caps_data *pcaps;
2115 struct ice_aqc_set_phy_cfg_data *cfg;
2116 struct ice_phy_info *phy = &pi->phy;
2117 struct ice_pf *pf = vsi->back;
2118 int err;
2119
2120 /* Ensure we have media as we cannot configure a medialess port */
2121 if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
2122 return -ENOMEDIUM;
2123
2124 ice_print_topo_conflict(vsi);
2125
2126 if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
2127 phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
2128 return -EPERM;
2129
2130 pcaps = kzalloc_obj(*pcaps);
2131 if (!pcaps)
2132 return -ENOMEM;
2133
2134 /* Get current PHY config */
2135 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
2136 NULL);
2137 if (err) {
2138 dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
2139 vsi->vsi_num, err);
2140 goto done;
2141 }
2142
2143 /* Configuration has not changed. There's nothing to do. */
2144 if (link_en == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
2145 ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
2146 goto done;
2147
2148 /* Use PHY topology as baseline for configuration */
2149 memset(pcaps, 0, sizeof(*pcaps));
2150 if (ice_fw_supports_report_dflt_cfg(pi->hw))
2151 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
2152 pcaps, NULL);
2153 else
2154 err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
2155 pcaps, NULL);
2156 if (err) {
2157 dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
2158 vsi->vsi_num, err);
2159 goto done;
2160 }
2161
2162 cfg = kzalloc_obj(*cfg);
2163 if (!cfg) {
2164 err = -ENOMEM;
2165 goto done;
2166 }
2167
2168 ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
2169
2170 /* Speed - If default override pending, use curr_user_phy_cfg set in
2171 * ice_init_phy_user_cfg_ldo.
2172 */
2173 if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
2174 vsi->back->state)) {
2175 cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
2176 cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
2177 } else {
2178 u64 phy_low = 0, phy_high = 0;
2179
2180 ice_update_phy_type(&phy_low, &phy_high,
2181 pi->phy.curr_user_speed_req);
2182 cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
2183 cfg->phy_type_high = pcaps->phy_type_high &
2184 cpu_to_le64(phy_high);
2185 }
2186
2187 /* Can't provide what was requested; use PHY capabilities */
2188 if (!cfg->phy_type_low && !cfg->phy_type_high) {
2189 cfg->phy_type_low = pcaps->phy_type_low;
2190 cfg->phy_type_high = pcaps->phy_type_high;
2191 }
2192
2193 /* FEC */
2194 ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
2195
2196 /* Can't provide what was requested; use PHY capabilities */
2197 if (cfg->link_fec_opt !=
2198 (cfg->link_fec_opt & pcaps->link_fec_options)) {
2199 cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
2200 cfg->link_fec_opt = pcaps->link_fec_options;
2201 }
2202
2203 /* Flow Control - always supported; no need to check against
2204 * capabilities
2205 */
2206 ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
2207
2208 /* Enable/Disable link and link update */
2209 cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
2210 if (link_en)
2211 cfg->caps |= ICE_AQ_PHY_ENA_LINK;
2212 else
2213 cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
2214
2215 err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
2216 if (err)
2217 dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
2218 vsi->vsi_num, err);
2219
2220 kfree(cfg);
2221 done:
2222 kfree(pcaps);
2223 return err;
2224 }
2225
2226 /**
2227 * ice_check_media_subtask - Check for media
2228 * @pf: pointer to PF struct
2229 *
2230 * If media is available, then initialize PHY user configuration if it is not
2231 * been, and configure the PHY if the interface is up.
2232 */
ice_check_media_subtask(struct ice_pf * pf)2233 static void ice_check_media_subtask(struct ice_pf *pf)
2234 {
2235 struct ice_port_info *pi;
2236 struct ice_vsi *vsi;
2237 int err;
2238
2239 /* No need to check for media if it's already present */
2240 if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
2241 return;
2242
2243 vsi = ice_get_main_vsi(pf);
2244 if (!vsi)
2245 return;
2246
2247 /* Refresh link info and check if media is present */
2248 pi = vsi->port_info;
2249 err = ice_update_link_info(pi);
2250 if (err)
2251 return;
2252
2253 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
2254
2255 if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2256 if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
2257 ice_init_phy_user_cfg(pi);
2258
2259 /* PHY settings are reset on media insertion, reconfigure
2260 * PHY to preserve settings.
2261 */
2262 if (test_bit(ICE_VSI_DOWN, vsi->state) &&
2263 test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
2264 return;
2265
2266 err = ice_phy_cfg(vsi, true);
2267 if (!err)
2268 clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
2269
2270 /* A Link Status Event will be generated; the event handler
2271 * will complete bringing the interface up
2272 */
2273 }
2274 }
2275
ice_service_task_recovery_mode(struct work_struct * work)2276 static void ice_service_task_recovery_mode(struct work_struct *work)
2277 {
2278 struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
2279
2280 set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
2281 ice_clean_adminq_subtask(pf);
2282
2283 ice_service_task_complete(pf);
2284
2285 mod_timer(&pf->serv_tmr, jiffies + msecs_to_jiffies(100));
2286 }
2287
2288 /**
2289 * ice_service_task - manage and run subtasks
2290 * @work: pointer to work_struct contained by the PF struct
2291 */
ice_service_task(struct work_struct * work)2292 static void ice_service_task(struct work_struct *work)
2293 {
2294 struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
2295 unsigned long start_time = jiffies;
2296
2297 if (pf->health_reporters.tx_hang_buf.tx_ring) {
2298 ice_report_tx_hang(pf);
2299 pf->health_reporters.tx_hang_buf.tx_ring = NULL;
2300 }
2301
2302 ice_reset_subtask(pf);
2303
2304 /* bail if a reset/recovery cycle is pending or rebuild failed */
2305 if (ice_is_reset_in_progress(pf->state) ||
2306 test_bit(ICE_SUSPENDED, pf->state) ||
2307 test_bit(ICE_NEEDS_RESTART, pf->state)) {
2308 ice_service_task_complete(pf);
2309 return;
2310 }
2311
2312 if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
2313 struct iidc_rdma_event *event;
2314
2315 event = kzalloc_obj(*event);
2316 if (event) {
2317 set_bit(IIDC_RDMA_EVENT_CRIT_ERR, event->type);
2318 /* report the entire OICR value to AUX driver */
2319 swap(event->reg, pf->oicr_err_reg);
2320 ice_send_event_to_aux(pf, event);
2321 kfree(event);
2322 }
2323 }
2324
2325 /* unplug aux dev per request, if an unplug request came in
2326 * while processing a plug request, this will handle it
2327 */
2328 if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
2329 ice_unplug_aux_dev(pf);
2330
2331 /* Plug aux device per request */
2332 if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
2333 ice_plug_aux_dev(pf);
2334
2335 if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
2336 struct iidc_rdma_event *event;
2337
2338 event = kzalloc_obj(*event);
2339 if (event) {
2340 set_bit(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, event->type);
2341 ice_send_event_to_aux(pf, event);
2342 kfree(event);
2343 }
2344 }
2345
2346 ice_clean_adminq_subtask(pf);
2347 ice_check_media_subtask(pf);
2348 ice_check_for_hang_subtask(pf);
2349 ice_sync_fltr_subtask(pf);
2350 ice_handle_mdd_event(pf);
2351 ice_watchdog_subtask(pf);
2352
2353 if (ice_is_safe_mode(pf)) {
2354 ice_service_task_complete(pf);
2355 return;
2356 }
2357
2358 ice_process_vflr_event(pf);
2359 ice_clean_mailboxq_subtask(pf);
2360 ice_clean_sbq_subtask(pf);
2361 ice_sync_arfs_fltrs(pf);
2362 ice_flush_fdir_ctx(pf);
2363
2364 /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
2365 ice_service_task_complete(pf);
2366
2367 /* If the tasks have taken longer than one service timer period
2368 * or there is more work to be done, reset the service timer to
2369 * schedule the service task now.
2370 */
2371 if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
2372 test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
2373 test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
2374 test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
2375 test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
2376 test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
2377 test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
2378 mod_timer(&pf->serv_tmr, jiffies);
2379 }
2380
2381 /**
2382 * ice_set_ctrlq_len - helper function to set controlq length
2383 * @hw: pointer to the HW instance
2384 */
ice_set_ctrlq_len(struct ice_hw * hw)2385 static void ice_set_ctrlq_len(struct ice_hw *hw)
2386 {
2387 hw->adminq.num_rq_entries = ICE_AQ_LEN;
2388 hw->adminq.num_sq_entries = ICE_AQ_LEN;
2389 hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
2390 hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
2391 hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
2392 hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
2393 hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2394 hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2395 hw->sbq.num_rq_entries = ICE_SBQ_LEN;
2396 hw->sbq.num_sq_entries = ICE_SBQ_LEN;
2397 hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2398 hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
2399 }
2400
2401 /**
2402 * ice_schedule_reset - schedule a reset
2403 * @pf: board private structure
2404 * @reset: reset being requested
2405 */
ice_schedule_reset(struct ice_pf * pf,enum ice_reset_req reset)2406 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
2407 {
2408 struct device *dev = ice_pf_to_dev(pf);
2409
2410 /* bail out if earlier reset has failed */
2411 if (test_bit(ICE_RESET_FAILED, pf->state)) {
2412 dev_dbg(dev, "earlier reset has failed\n");
2413 return -EIO;
2414 }
2415 /* bail if reset/recovery already in progress */
2416 if (ice_is_reset_in_progress(pf->state)) {
2417 dev_dbg(dev, "Reset already in progress\n");
2418 return -EBUSY;
2419 }
2420
2421 switch (reset) {
2422 case ICE_RESET_PFR:
2423 set_bit(ICE_PFR_REQ, pf->state);
2424 break;
2425 case ICE_RESET_CORER:
2426 set_bit(ICE_CORER_REQ, pf->state);
2427 break;
2428 case ICE_RESET_GLOBR:
2429 set_bit(ICE_GLOBR_REQ, pf->state);
2430 break;
2431 default:
2432 return -EINVAL;
2433 }
2434
2435 ice_service_task_schedule(pf);
2436 return 0;
2437 }
2438
2439 /**
2440 * ice_vsi_ena_irq - Enable IRQ for the given VSI
2441 * @vsi: the VSI being configured
2442 */
ice_vsi_ena_irq(struct ice_vsi * vsi)2443 static int ice_vsi_ena_irq(struct ice_vsi *vsi)
2444 {
2445 struct ice_hw *hw = &vsi->back->hw;
2446 int i;
2447
2448 ice_for_each_q_vector(vsi, i)
2449 ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
2450
2451 ice_flush(hw);
2452 return 0;
2453 }
2454
2455 /**
2456 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2457 * @vsi: the VSI being configured
2458 * @basename: name for the vector
2459 */
ice_vsi_req_irq_msix(struct ice_vsi * vsi,char * basename)2460 static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
2461 {
2462 int q_vectors = vsi->num_q_vectors;
2463 struct ice_pf *pf = vsi->back;
2464 struct device *dev;
2465 int rx_int_idx = 0;
2466 int tx_int_idx = 0;
2467 int vector, err;
2468 int irq_num;
2469
2470 dev = ice_pf_to_dev(pf);
2471 for (vector = 0; vector < q_vectors; vector++) {
2472 struct ice_q_vector *q_vector = vsi->q_vectors[vector];
2473
2474 irq_num = q_vector->irq.virq;
2475
2476 if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
2477 snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2478 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
2479 tx_int_idx++;
2480 } else if (q_vector->rx.rx_ring) {
2481 snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2482 "%s-%s-%d", basename, "rx", rx_int_idx++);
2483 } else if (q_vector->tx.tx_ring) {
2484 snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2485 "%s-%s-%d", basename, "tx", tx_int_idx++);
2486 } else {
2487 /* skip this unused q_vector */
2488 continue;
2489 }
2490 if (vsi->type == ICE_VSI_CTRL && vsi->vf)
2491 err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2492 IRQF_SHARED, q_vector->name,
2493 q_vector);
2494 else
2495 err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2496 0, q_vector->name, q_vector);
2497 if (err) {
2498 netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
2499 err);
2500 goto free_q_irqs;
2501 }
2502 }
2503
2504 err = ice_set_cpu_rx_rmap(vsi);
2505 if (err) {
2506 netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n",
2507 vsi->vsi_num, ERR_PTR(err));
2508 goto free_q_irqs;
2509 }
2510
2511 vsi->irqs_ready = true;
2512 return 0;
2513
2514 free_q_irqs:
2515 while (vector--) {
2516 irq_num = vsi->q_vectors[vector]->irq.virq;
2517 devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
2518 }
2519 return err;
2520 }
2521
2522 /**
2523 * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2524 * @vsi: VSI to setup Tx rings used by XDP
2525 *
2526 * Return 0 on success and negative value on error
2527 */
ice_xdp_alloc_setup_rings(struct ice_vsi * vsi)2528 static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
2529 {
2530 struct device *dev = ice_pf_to_dev(vsi->back);
2531 struct ice_tx_desc *tx_desc;
2532 int i, j;
2533
2534 ice_for_each_xdp_txq(vsi, i) {
2535 u16 xdp_q_idx = vsi->alloc_txq + i;
2536 struct ice_ring_stats *ring_stats;
2537 struct ice_tx_ring *xdp_ring;
2538
2539 xdp_ring = kzalloc_obj(*xdp_ring);
2540 if (!xdp_ring)
2541 goto free_xdp_rings;
2542
2543 ring_stats = kzalloc_obj(*ring_stats);
2544 if (!ring_stats) {
2545 ice_free_tx_ring(xdp_ring);
2546 goto free_xdp_rings;
2547 }
2548
2549 xdp_ring->ring_stats = ring_stats;
2550 xdp_ring->q_index = xdp_q_idx;
2551 xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
2552 xdp_ring->vsi = vsi;
2553 xdp_ring->netdev = NULL;
2554 xdp_ring->dev = dev;
2555 xdp_ring->count = vsi->num_tx_desc;
2556 WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
2557 if (ice_setup_tx_ring(xdp_ring))
2558 goto free_xdp_rings;
2559 ice_set_ring_xdp(xdp_ring);
2560 spin_lock_init(&xdp_ring->tx_lock);
2561 for (j = 0; j < xdp_ring->count; j++) {
2562 tx_desc = ICE_TX_DESC(xdp_ring, j);
2563 tx_desc->cmd_type_offset_bsz = 0;
2564 }
2565 }
2566
2567 return 0;
2568
2569 free_xdp_rings:
2570 for (; i >= 0; i--) {
2571 if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) {
2572 kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2573 vsi->xdp_rings[i]->ring_stats = NULL;
2574 ice_free_tx_ring(vsi->xdp_rings[i]);
2575 }
2576 }
2577 return -ENOMEM;
2578 }
2579
2580 /**
2581 * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2582 * @vsi: VSI to set the bpf prog on
2583 * @prog: the bpf prog pointer
2584 */
ice_vsi_assign_bpf_prog(struct ice_vsi * vsi,struct bpf_prog * prog)2585 static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
2586 {
2587 struct bpf_prog *old_prog;
2588 int i;
2589
2590 old_prog = xchg(&vsi->xdp_prog, prog);
2591 ice_for_each_rxq(vsi, i)
2592 WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
2593
2594 if (old_prog)
2595 bpf_prog_put(old_prog);
2596 }
2597
ice_xdp_ring_from_qid(struct ice_vsi * vsi,int qid)2598 static struct ice_tx_ring *ice_xdp_ring_from_qid(struct ice_vsi *vsi, int qid)
2599 {
2600 struct ice_q_vector *q_vector;
2601 struct ice_tx_ring *ring;
2602
2603 if (static_key_enabled(&ice_xdp_locking_key))
2604 return vsi->xdp_rings[qid % vsi->num_xdp_txq];
2605
2606 q_vector = vsi->rx_rings[qid]->q_vector;
2607 ice_for_each_tx_ring(ring, q_vector->tx)
2608 if (ice_ring_is_xdp(ring))
2609 return ring;
2610
2611 return NULL;
2612 }
2613
2614 /**
2615 * ice_map_xdp_rings - Map XDP rings to interrupt vectors
2616 * @vsi: the VSI with XDP rings being configured
2617 *
2618 * Map XDP rings to interrupt vectors and perform the configuration steps
2619 * dependent on the mapping.
2620 */
ice_map_xdp_rings(struct ice_vsi * vsi)2621 void ice_map_xdp_rings(struct ice_vsi *vsi)
2622 {
2623 int xdp_rings_rem = vsi->num_xdp_txq;
2624 int v_idx, q_idx;
2625
2626 /* follow the logic from ice_vsi_map_rings_to_vectors */
2627 ice_for_each_q_vector(vsi, v_idx) {
2628 struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2629 int xdp_rings_per_v, q_id, q_base;
2630
2631 xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
2632 vsi->num_q_vectors - v_idx);
2633 q_base = vsi->num_xdp_txq - xdp_rings_rem;
2634
2635 for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
2636 struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
2637
2638 xdp_ring->q_vector = q_vector;
2639 xdp_ring->next = q_vector->tx.tx_ring;
2640 q_vector->tx.tx_ring = xdp_ring;
2641 }
2642 xdp_rings_rem -= xdp_rings_per_v;
2643 }
2644
2645 ice_for_each_rxq(vsi, q_idx) {
2646 vsi->rx_rings[q_idx]->xdp_ring = ice_xdp_ring_from_qid(vsi,
2647 q_idx);
2648 ice_tx_xsk_pool(vsi, q_idx);
2649 }
2650 }
2651
2652 /**
2653 * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors
2654 * @vsi: the VSI with XDP rings being unmapped
2655 */
ice_unmap_xdp_rings(struct ice_vsi * vsi)2656 static void ice_unmap_xdp_rings(struct ice_vsi *vsi)
2657 {
2658 int v_idx;
2659
2660 ice_for_each_q_vector(vsi, v_idx) {
2661 struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2662 struct ice_tx_ring *ring;
2663
2664 ice_for_each_tx_ring(ring, q_vector->tx)
2665 if (!ring->tx_buf || !ice_ring_is_xdp(ring))
2666 break;
2667
2668 /* restore the value of last node prior to XDP setup */
2669 q_vector->tx.tx_ring = ring;
2670 }
2671 }
2672
2673 /**
2674 * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2675 * @vsi: VSI to bring up Tx rings used by XDP
2676 * @prog: bpf program that will be assigned to VSI
2677 * @cfg_type: create from scratch or restore the existing configuration
2678 *
2679 * Return 0 on success and negative value on error
2680 */
ice_prepare_xdp_rings(struct ice_vsi * vsi,struct bpf_prog * prog,enum ice_xdp_cfg cfg_type)2681 int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
2682 enum ice_xdp_cfg cfg_type)
2683 {
2684 u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2685 struct ice_pf *pf = vsi->back;
2686 struct ice_qs_cfg xdp_qs_cfg = {
2687 .qs_mutex = &pf->avail_q_mutex,
2688 .pf_map = pf->avail_txqs,
2689 .pf_map_size = pf->max_pf_txqs,
2690 .q_count = vsi->num_xdp_txq,
2691 .scatter_count = ICE_MAX_SCATTER_TXQS,
2692 .vsi_map = vsi->txq_map,
2693 .vsi_map_offset = vsi->alloc_txq,
2694 .mapping_mode = ICE_VSI_MAP_CONTIG
2695 };
2696 struct device *dev;
2697 int status, i;
2698
2699 dev = ice_pf_to_dev(pf);
2700 vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
2701 sizeof(*vsi->xdp_rings), GFP_KERNEL);
2702 if (!vsi->xdp_rings)
2703 return -ENOMEM;
2704
2705 vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
2706 if (__ice_vsi_get_qs(&xdp_qs_cfg))
2707 goto err_map_xdp;
2708
2709 if (static_key_enabled(&ice_xdp_locking_key))
2710 netdev_warn(vsi->netdev,
2711 "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
2712
2713 if (ice_xdp_alloc_setup_rings(vsi))
2714 goto clear_xdp_rings;
2715
2716 /* omit the scheduler update if in reset path; XDP queues will be
2717 * taken into account at the end of ice_vsi_rebuild, where
2718 * ice_cfg_vsi_lan is being called
2719 */
2720 if (cfg_type == ICE_XDP_CFG_PART)
2721 return 0;
2722
2723 ice_map_xdp_rings(vsi);
2724
2725 /* tell the Tx scheduler that right now we have
2726 * additional queues
2727 */
2728 for (i = 0; i < vsi->tc_cfg.numtc; i++)
2729 max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
2730
2731 status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2732 max_txqs);
2733 if (status) {
2734 dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
2735 status);
2736 goto unmap_xdp_rings;
2737 }
2738
2739 /* assign the prog only when it's not already present on VSI;
2740 * this flow is a subject of both ethtool -L and ndo_bpf flows;
2741 * VSI rebuild that happens under ethtool -L can expose us to
2742 * the bpf_prog refcount issues as we would be swapping same
2743 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2744 * on it as it would be treated as an 'old_prog'; for ndo_bpf
2745 * this is not harmful as dev_xdp_install bumps the refcount
2746 * before calling the op exposed by the driver;
2747 */
2748 if (!ice_is_xdp_ena_vsi(vsi))
2749 ice_vsi_assign_bpf_prog(vsi, prog);
2750
2751 return 0;
2752 unmap_xdp_rings:
2753 ice_unmap_xdp_rings(vsi);
2754 clear_xdp_rings:
2755 ice_for_each_xdp_txq(vsi, i)
2756 if (vsi->xdp_rings[i]) {
2757 kfree_rcu(vsi->xdp_rings[i], rcu);
2758 vsi->xdp_rings[i] = NULL;
2759 }
2760
2761 err_map_xdp:
2762 mutex_lock(&pf->avail_q_mutex);
2763 ice_for_each_xdp_txq(vsi, i) {
2764 clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2765 vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2766 }
2767 mutex_unlock(&pf->avail_q_mutex);
2768
2769 devm_kfree(dev, vsi->xdp_rings);
2770 vsi->xdp_rings = NULL;
2771
2772 return -ENOMEM;
2773 }
2774
2775 /**
2776 * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2777 * @vsi: VSI to remove XDP rings
2778 * @cfg_type: disable XDP permanently or allow it to be restored later
2779 *
2780 * Detach XDP rings from irq vectors, clean up the PF bitmap and free
2781 * resources
2782 */
ice_destroy_xdp_rings(struct ice_vsi * vsi,enum ice_xdp_cfg cfg_type)2783 int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
2784 {
2785 u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2786 struct ice_pf *pf = vsi->back;
2787 int i;
2788
2789 /* q_vectors are freed in reset path so there's no point in detaching
2790 * rings
2791 */
2792 if (cfg_type == ICE_XDP_CFG_PART)
2793 goto free_qmap;
2794
2795 ice_unmap_xdp_rings(vsi);
2796
2797 free_qmap:
2798 mutex_lock(&pf->avail_q_mutex);
2799 ice_for_each_xdp_txq(vsi, i) {
2800 clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2801 vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2802 }
2803 mutex_unlock(&pf->avail_q_mutex);
2804
2805 ice_for_each_xdp_txq(vsi, i)
2806 if (vsi->xdp_rings[i]) {
2807 if (vsi->xdp_rings[i]->desc) {
2808 synchronize_rcu();
2809 ice_free_tx_ring(vsi->xdp_rings[i]);
2810 }
2811 kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
2812 vsi->xdp_rings[i]->ring_stats = NULL;
2813 kfree_rcu(vsi->xdp_rings[i], rcu);
2814 vsi->xdp_rings[i] = NULL;
2815 }
2816
2817 devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
2818 vsi->xdp_rings = NULL;
2819
2820 if (static_key_enabled(&ice_xdp_locking_key))
2821 static_branch_dec(&ice_xdp_locking_key);
2822
2823 if (cfg_type == ICE_XDP_CFG_PART)
2824 return 0;
2825
2826 ice_vsi_assign_bpf_prog(vsi, NULL);
2827
2828 /* notify Tx scheduler that we destroyed XDP queues and bring
2829 * back the old number of child nodes
2830 */
2831 for (i = 0; i < vsi->tc_cfg.numtc; i++)
2832 max_txqs[i] = vsi->num_txq;
2833
2834 /* change number of XDP Tx queues to 0 */
2835 vsi->num_xdp_txq = 0;
2836
2837 return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2838 max_txqs);
2839 }
2840
2841 /**
2842 * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2843 * @vsi: VSI to schedule napi on
2844 */
ice_vsi_rx_napi_schedule(struct ice_vsi * vsi)2845 static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
2846 {
2847 int i;
2848
2849 ice_for_each_rxq(vsi, i) {
2850 struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
2851
2852 if (READ_ONCE(rx_ring->xsk_pool))
2853 napi_schedule(&rx_ring->q_vector->napi);
2854 }
2855 }
2856
2857 /**
2858 * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
2859 * @vsi: VSI to determine the count of XDP Tx qs
2860 *
2861 * returns 0 if Tx qs count is higher than at least half of CPU count,
2862 * -ENOMEM otherwise
2863 */
ice_vsi_determine_xdp_res(struct ice_vsi * vsi)2864 int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
2865 {
2866 u16 avail = ice_get_avail_txq_count(vsi->back);
2867 u16 cpus = num_possible_cpus();
2868
2869 if (avail < cpus / 2)
2870 return -ENOMEM;
2871
2872 if (vsi->type == ICE_VSI_SF)
2873 avail = vsi->alloc_txq;
2874
2875 vsi->num_xdp_txq = min_t(u16, avail, cpus);
2876
2877 if (vsi->num_xdp_txq < cpus)
2878 static_branch_inc(&ice_xdp_locking_key);
2879
2880 return 0;
2881 }
2882
2883 /**
2884 * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2885 * @vsi: Pointer to VSI structure
2886 */
ice_max_xdp_frame_size(struct ice_vsi * vsi)2887 static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
2888 {
2889 return ICE_RXBUF_3072;
2890 }
2891
2892 /**
2893 * ice_xdp_setup_prog - Add or remove XDP eBPF program
2894 * @vsi: VSI to setup XDP for
2895 * @prog: XDP program
2896 * @extack: netlink extended ack
2897 */
2898 static int
ice_xdp_setup_prog(struct ice_vsi * vsi,struct bpf_prog * prog,struct netlink_ext_ack * extack)2899 ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
2900 struct netlink_ext_ack *extack)
2901 {
2902 unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
2903 int ret = 0, xdp_ring_err = 0;
2904 bool if_running;
2905
2906 if (prog && !prog->aux->xdp_has_frags) {
2907 if (frame_size > ice_max_xdp_frame_size(vsi)) {
2908 NL_SET_ERR_MSG_MOD(extack,
2909 "MTU is too large for linear frames and XDP prog does not support frags");
2910 return -EOPNOTSUPP;
2911 }
2912 }
2913
2914 /* hot swap progs and avoid toggling link */
2915 if (ice_is_xdp_ena_vsi(vsi) == !!prog ||
2916 test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) {
2917 ice_vsi_assign_bpf_prog(vsi, prog);
2918 return 0;
2919 }
2920
2921 if_running = netif_running(vsi->netdev) &&
2922 !test_and_set_bit(ICE_VSI_DOWN, vsi->state);
2923
2924 /* need to stop netdev while setting up the program for Rx rings */
2925 if (if_running) {
2926 ret = ice_down(vsi);
2927 if (ret) {
2928 NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
2929 return ret;
2930 }
2931 }
2932
2933 if (!ice_is_xdp_ena_vsi(vsi) && prog) {
2934 xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
2935 if (xdp_ring_err) {
2936 NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
2937 goto resume_if;
2938 } else {
2939 xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
2940 ICE_XDP_CFG_FULL);
2941 if (xdp_ring_err) {
2942 NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
2943 goto resume_if;
2944 }
2945 }
2946 xdp_features_set_redirect_target(vsi->netdev, true);
2947 } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
2948 xdp_features_clear_redirect_target(vsi->netdev);
2949 xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
2950 if (xdp_ring_err)
2951 NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
2952 }
2953
2954 resume_if:
2955 if (if_running)
2956 ret = ice_up(vsi);
2957
2958 if (!ret && prog)
2959 ice_vsi_rx_napi_schedule(vsi);
2960
2961 return (ret || xdp_ring_err) ? -ENOMEM : 0;
2962 }
2963
2964 /**
2965 * ice_xdp_safe_mode - XDP handler for safe mode
2966 * @dev: netdevice
2967 * @xdp: XDP command
2968 */
ice_xdp_safe_mode(struct net_device __always_unused * dev,struct netdev_bpf * xdp)2969 static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
2970 struct netdev_bpf *xdp)
2971 {
2972 NL_SET_ERR_MSG_MOD(xdp->extack,
2973 "Please provide working DDP firmware package in order to use XDP\n"
2974 "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
2975 return -EOPNOTSUPP;
2976 }
2977
2978 /**
2979 * ice_xdp - implements XDP handler
2980 * @dev: netdevice
2981 * @xdp: XDP command
2982 */
ice_xdp(struct net_device * dev,struct netdev_bpf * xdp)2983 int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2984 {
2985 struct ice_netdev_priv *np = netdev_priv(dev);
2986 struct ice_vsi *vsi = np->vsi;
2987 int ret;
2988
2989 if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_SF) {
2990 NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF or SF VSI");
2991 return -EINVAL;
2992 }
2993
2994 mutex_lock(&vsi->xdp_state_lock);
2995
2996 switch (xdp->command) {
2997 case XDP_SETUP_PROG:
2998 ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
2999 break;
3000 case XDP_SETUP_XSK_POOL:
3001 ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id);
3002 break;
3003 default:
3004 ret = -EINVAL;
3005 }
3006
3007 mutex_unlock(&vsi->xdp_state_lock);
3008 return ret;
3009 }
3010
3011 /**
3012 * ice_ena_misc_vector - enable the non-queue interrupts
3013 * @pf: board private structure
3014 */
ice_ena_misc_vector(struct ice_pf * pf)3015 static void ice_ena_misc_vector(struct ice_pf *pf)
3016 {
3017 struct ice_hw *hw = &pf->hw;
3018 u32 pf_intr_start_offset;
3019 u32 val;
3020
3021 /* Disable anti-spoof detection interrupt to prevent spurious event
3022 * interrupts during a function reset. Anti-spoof functionally is
3023 * still supported.
3024 */
3025 val = rd32(hw, GL_MDCK_TX_TDPU);
3026 val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
3027 wr32(hw, GL_MDCK_TX_TDPU, val);
3028
3029 /* clear things first */
3030 wr32(hw, PFINT_OICR_ENA, 0); /* disable all */
3031 rd32(hw, PFINT_OICR); /* read to clear */
3032
3033 val = (PFINT_OICR_ECC_ERR_M |
3034 PFINT_OICR_MAL_DETECT_M |
3035 PFINT_OICR_GRST_M |
3036 PFINT_OICR_PCI_EXCEPTION_M |
3037 PFINT_OICR_VFLR_M |
3038 PFINT_OICR_HMC_ERR_M |
3039 PFINT_OICR_PE_PUSH_M |
3040 PFINT_OICR_PE_CRITERR_M);
3041
3042 wr32(hw, PFINT_OICR_ENA, val);
3043
3044 /* SW_ITR_IDX = 0, but don't change INTENA */
3045 wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
3046 GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
3047
3048 if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3049 return;
3050 pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3051 wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset),
3052 GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
3053 }
3054
3055 /**
3056 * ice_ll_ts_intr - ll_ts interrupt handler
3057 * @irq: interrupt number
3058 * @data: pointer to a q_vector
3059 */
ice_ll_ts_intr(int __always_unused irq,void * data)3060 static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data)
3061 {
3062 struct ice_pf *pf = data;
3063 u32 pf_intr_start_offset;
3064 struct ice_ptp_tx *tx;
3065 unsigned long flags;
3066 struct ice_hw *hw;
3067 u32 val;
3068 u8 idx;
3069
3070 hw = &pf->hw;
3071 tx = &pf->ptp.port.tx;
3072 spin_lock_irqsave(&tx->lock, flags);
3073 if (tx->init) {
3074 ice_ptp_complete_tx_single_tstamp(tx);
3075
3076 idx = find_next_bit_wrap(tx->in_use, tx->len,
3077 tx->last_ll_ts_idx_read + 1);
3078 if (idx != tx->len)
3079 ice_ptp_req_tx_single_tstamp(tx, idx);
3080 }
3081 spin_unlock_irqrestore(&tx->lock, flags);
3082
3083 val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
3084 (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
3085 pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3086 wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset),
3087 val);
3088
3089 return IRQ_HANDLED;
3090 }
3091
3092 /**
3093 * ice_misc_intr - misc interrupt handler
3094 * @irq: interrupt number
3095 * @data: pointer to a q_vector
3096 */
ice_misc_intr(int __always_unused irq,void * data)3097 static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
3098 {
3099 struct ice_pf *pf = (struct ice_pf *)data;
3100 irqreturn_t ret = IRQ_HANDLED;
3101 struct ice_hw *hw = &pf->hw;
3102 struct device *dev;
3103 u32 oicr, ena_mask;
3104
3105 dev = ice_pf_to_dev(pf);
3106 set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
3107 set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
3108 set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
3109
3110 oicr = rd32(hw, PFINT_OICR);
3111 ena_mask = rd32(hw, PFINT_OICR_ENA);
3112
3113 if (oicr & PFINT_OICR_SWINT_M) {
3114 ena_mask &= ~PFINT_OICR_SWINT_M;
3115 pf->sw_int_count++;
3116 }
3117
3118 if (oicr & PFINT_OICR_MAL_DETECT_M) {
3119 ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
3120 set_bit(ICE_MDD_EVENT_PENDING, pf->state);
3121 }
3122 if (oicr & PFINT_OICR_VFLR_M) {
3123 /* disable any further VFLR event notifications */
3124 if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
3125 u32 reg = rd32(hw, PFINT_OICR_ENA);
3126
3127 reg &= ~PFINT_OICR_VFLR_M;
3128 wr32(hw, PFINT_OICR_ENA, reg);
3129 } else {
3130 ena_mask &= ~PFINT_OICR_VFLR_M;
3131 set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
3132 }
3133 }
3134
3135 if (oicr & PFINT_OICR_GRST_M) {
3136 u32 reset;
3137
3138 /* we have a reset warning */
3139 ena_mask &= ~PFINT_OICR_GRST_M;
3140 reset = FIELD_GET(GLGEN_RSTAT_RESET_TYPE_M,
3141 rd32(hw, GLGEN_RSTAT));
3142
3143 if (reset == ICE_RESET_CORER)
3144 pf->corer_count++;
3145 else if (reset == ICE_RESET_GLOBR)
3146 pf->globr_count++;
3147 else if (reset == ICE_RESET_EMPR)
3148 pf->empr_count++;
3149 else
3150 dev_dbg(dev, "Invalid reset type %d\n", reset);
3151
3152 /* If a reset cycle isn't already in progress, we set a bit in
3153 * pf->state so that the service task can start a reset/rebuild.
3154 */
3155 if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
3156 if (reset == ICE_RESET_CORER)
3157 set_bit(ICE_CORER_RECV, pf->state);
3158 else if (reset == ICE_RESET_GLOBR)
3159 set_bit(ICE_GLOBR_RECV, pf->state);
3160 else
3161 set_bit(ICE_EMPR_RECV, pf->state);
3162
3163 /* There are couple of different bits at play here.
3164 * hw->reset_ongoing indicates whether the hardware is
3165 * in reset. This is set to true when a reset interrupt
3166 * is received and set back to false after the driver
3167 * has determined that the hardware is out of reset.
3168 *
3169 * ICE_RESET_OICR_RECV in pf->state indicates
3170 * that a post reset rebuild is required before the
3171 * driver is operational again. This is set above.
3172 *
3173 * As this is the start of the reset/rebuild cycle, set
3174 * both to indicate that.
3175 */
3176 hw->reset_ongoing = true;
3177 }
3178 }
3179
3180 if (oicr & PFINT_OICR_TSYN_TX_M) {
3181 ena_mask &= ~PFINT_OICR_TSYN_TX_M;
3182
3183 ret = ice_ptp_ts_irq(pf);
3184 }
3185
3186 if (oicr & PFINT_OICR_TSYN_EVNT_M) {
3187 u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
3188 u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
3189
3190 ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
3191
3192 if (ice_pf_src_tmr_owned(pf)) {
3193 /* Save EVENTs from GLTSYN register */
3194 pf->ptp.ext_ts_irq |= gltsyn_stat &
3195 (GLTSYN_STAT_EVENT0_M |
3196 GLTSYN_STAT_EVENT1_M |
3197 GLTSYN_STAT_EVENT2_M);
3198
3199 ice_ptp_extts_event(pf);
3200 }
3201 }
3202
3203 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
3204 if (oicr & ICE_AUX_CRIT_ERR) {
3205 pf->oicr_err_reg |= oicr;
3206 set_bit(ICE_AUX_ERR_PENDING, pf->state);
3207 ena_mask &= ~ICE_AUX_CRIT_ERR;
3208 }
3209
3210 /* Report any remaining unexpected interrupts */
3211 oicr &= ena_mask;
3212 if (oicr) {
3213 dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
3214 /* If a critical error is pending there is no choice but to
3215 * reset the device.
3216 */
3217 if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
3218 PFINT_OICR_ECC_ERR_M)) {
3219 set_bit(ICE_PFR_REQ, pf->state);
3220 }
3221 }
3222 ice_service_task_schedule(pf);
3223 if (ret == IRQ_HANDLED)
3224 ice_irq_dynamic_ena(hw, NULL, NULL);
3225
3226 return ret;
3227 }
3228
3229 /**
3230 * ice_misc_intr_thread_fn - misc interrupt thread function
3231 * @irq: interrupt number
3232 * @data: pointer to a q_vector
3233 */
ice_misc_intr_thread_fn(int __always_unused irq,void * data)3234 static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
3235 {
3236 struct ice_pf *pf = data;
3237 struct ice_hw *hw;
3238
3239 hw = &pf->hw;
3240
3241 if (ice_is_reset_in_progress(pf->state))
3242 goto skip_irq;
3243
3244 if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread))
3245 ice_ptp_process_ts(pf);
3246
3247 skip_irq:
3248 ice_irq_dynamic_ena(hw, NULL, NULL);
3249 ice_flush(hw);
3250
3251 if (ice_ptp_tx_tstamps_pending(pf)) {
3252 /* If any new Tx timestamps happened while in interrupt,
3253 * re-arm the interrupt to trigger it again.
3254 */
3255 wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
3256 ice_flush(hw);
3257 }
3258
3259 return IRQ_HANDLED;
3260 }
3261
3262 /**
3263 * ice_dis_ctrlq_interrupts - disable control queue interrupts
3264 * @hw: pointer to HW structure
3265 */
ice_dis_ctrlq_interrupts(struct ice_hw * hw)3266 static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
3267 {
3268 /* disable Admin queue Interrupt causes */
3269 wr32(hw, PFINT_FW_CTL,
3270 rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
3271
3272 /* disable Mailbox queue Interrupt causes */
3273 wr32(hw, PFINT_MBX_CTL,
3274 rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
3275
3276 wr32(hw, PFINT_SB_CTL,
3277 rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
3278
3279 /* disable Control queue Interrupt causes */
3280 wr32(hw, PFINT_OICR_CTL,
3281 rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
3282
3283 ice_flush(hw);
3284 }
3285
3286 /**
3287 * ice_free_irq_msix_ll_ts- Unroll ll_ts vector setup
3288 * @pf: board private structure
3289 */
ice_free_irq_msix_ll_ts(struct ice_pf * pf)3290 static void ice_free_irq_msix_ll_ts(struct ice_pf *pf)
3291 {
3292 int irq_num = pf->ll_ts_irq.virq;
3293
3294 synchronize_irq(irq_num);
3295 devm_free_irq(ice_pf_to_dev(pf), irq_num, pf);
3296
3297 ice_free_irq(pf, pf->ll_ts_irq);
3298 }
3299
3300 /**
3301 * ice_free_irq_msix_misc - Unroll misc vector setup
3302 * @pf: board private structure
3303 */
ice_free_irq_msix_misc(struct ice_pf * pf)3304 static void ice_free_irq_msix_misc(struct ice_pf *pf)
3305 {
3306 int misc_irq_num = pf->oicr_irq.virq;
3307 struct ice_hw *hw = &pf->hw;
3308
3309 ice_dis_ctrlq_interrupts(hw);
3310
3311 /* disable OICR interrupt */
3312 wr32(hw, PFINT_OICR_ENA, 0);
3313 ice_flush(hw);
3314
3315 synchronize_irq(misc_irq_num);
3316 devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf);
3317
3318 ice_free_irq(pf, pf->oicr_irq);
3319 if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3320 ice_free_irq_msix_ll_ts(pf);
3321 }
3322
3323 /**
3324 * ice_ena_ctrlq_interrupts - enable control queue interrupts
3325 * @hw: pointer to HW structure
3326 * @reg_idx: HW vector index to associate the control queue interrupts with
3327 */
ice_ena_ctrlq_interrupts(struct ice_hw * hw,u16 reg_idx)3328 static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
3329 {
3330 u32 val;
3331
3332 val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
3333 PFINT_OICR_CTL_CAUSE_ENA_M);
3334 wr32(hw, PFINT_OICR_CTL, val);
3335
3336 /* enable Admin queue Interrupt causes */
3337 val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
3338 PFINT_FW_CTL_CAUSE_ENA_M);
3339 wr32(hw, PFINT_FW_CTL, val);
3340
3341 /* enable Mailbox queue Interrupt causes */
3342 val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
3343 PFINT_MBX_CTL_CAUSE_ENA_M);
3344 wr32(hw, PFINT_MBX_CTL, val);
3345
3346 if (!hw->dev_caps.ts_dev_info.ts_ll_int_read) {
3347 /* enable Sideband queue Interrupt causes */
3348 val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
3349 PFINT_SB_CTL_CAUSE_ENA_M);
3350 wr32(hw, PFINT_SB_CTL, val);
3351 }
3352
3353 ice_flush(hw);
3354 }
3355
3356 /**
3357 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3358 * @pf: board private structure
3359 *
3360 * This sets up the handler for MSIX 0, which is used to manage the
3361 * non-queue interrupts, e.g. AdminQ and errors. This is not used
3362 * when in MSI or Legacy interrupt mode.
3363 */
ice_req_irq_msix_misc(struct ice_pf * pf)3364 static int ice_req_irq_msix_misc(struct ice_pf *pf)
3365 {
3366 struct device *dev = ice_pf_to_dev(pf);
3367 struct ice_hw *hw = &pf->hw;
3368 u32 pf_intr_start_offset;
3369 struct msi_map irq;
3370 int err = 0;
3371
3372 if (!pf->int_name[0])
3373 snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
3374 dev_driver_string(dev), dev_name(dev));
3375
3376 if (!pf->int_name_ll_ts[0])
3377 snprintf(pf->int_name_ll_ts, sizeof(pf->int_name_ll_ts) - 1,
3378 "%s-%s:ll_ts", dev_driver_string(dev), dev_name(dev));
3379 /* Do not request IRQ but do enable OICR interrupt since settings are
3380 * lost during reset. Note that this function is called only during
3381 * rebuild path and not while reset is in progress.
3382 */
3383 if (ice_is_reset_in_progress(pf->state))
3384 goto skip_req_irq;
3385
3386 /* reserve one vector in irq_tracker for misc interrupts */
3387 irq = ice_alloc_irq(pf, false);
3388 if (irq.index < 0)
3389 return irq.index;
3390
3391 pf->oicr_irq = irq;
3392 err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr,
3393 ice_misc_intr_thread_fn, 0,
3394 pf->int_name, pf);
3395 if (err) {
3396 dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
3397 pf->int_name, err);
3398 ice_free_irq(pf, pf->oicr_irq);
3399 return err;
3400 }
3401
3402 /* reserve one vector in irq_tracker for ll_ts interrupt */
3403 if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3404 goto skip_req_irq;
3405
3406 irq = ice_alloc_irq(pf, false);
3407 if (irq.index < 0)
3408 return irq.index;
3409
3410 pf->ll_ts_irq = irq;
3411 err = devm_request_irq(dev, pf->ll_ts_irq.virq, ice_ll_ts_intr, 0,
3412 pf->int_name_ll_ts, pf);
3413 if (err) {
3414 dev_err(dev, "devm_request_irq for %s failed: %d\n",
3415 pf->int_name_ll_ts, err);
3416 ice_free_irq(pf, pf->ll_ts_irq);
3417 return err;
3418 }
3419
3420 skip_req_irq:
3421 ice_ena_misc_vector(pf);
3422
3423 ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index);
3424 /* This enables LL TS interrupt */
3425 pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST;
3426 if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read)
3427 wr32(hw, PFINT_SB_CTL,
3428 ((pf->ll_ts_irq.index + pf_intr_start_offset) &
3429 PFINT_SB_CTL_MSIX_INDX_M) | PFINT_SB_CTL_CAUSE_ENA_M);
3430 wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index),
3431 ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
3432
3433 ice_flush(hw);
3434 ice_irq_dynamic_ena(hw, NULL, NULL);
3435
3436 return 0;
3437 }
3438
3439 /**
3440 * ice_set_ops - set netdev and ethtools ops for the given netdev
3441 * @vsi: the VSI associated with the new netdev
3442 */
ice_set_ops(struct ice_vsi * vsi)3443 static void ice_set_ops(struct ice_vsi *vsi)
3444 {
3445 struct net_device *netdev = vsi->netdev;
3446 struct ice_pf *pf = ice_netdev_to_pf(netdev);
3447
3448 if (ice_is_safe_mode(pf)) {
3449 netdev->netdev_ops = &ice_netdev_safe_mode_ops;
3450 ice_set_ethtool_safe_mode_ops(netdev);
3451 return;
3452 }
3453
3454 netdev->netdev_ops = &ice_netdev_ops;
3455 netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
3456 netdev->xdp_metadata_ops = &ice_xdp_md_ops;
3457 ice_set_ethtool_ops(netdev);
3458
3459 if (vsi->type != ICE_VSI_PF)
3460 return;
3461
3462 netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
3463 NETDEV_XDP_ACT_XSK_ZEROCOPY |
3464 NETDEV_XDP_ACT_RX_SG;
3465 netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
3466 }
3467
3468 /**
3469 * ice_set_netdev_features - set features for the given netdev
3470 * @netdev: netdev instance
3471 */
ice_set_netdev_features(struct net_device * netdev)3472 void ice_set_netdev_features(struct net_device *netdev)
3473 {
3474 struct ice_pf *pf = ice_netdev_to_pf(netdev);
3475 bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
3476 netdev_features_t csumo_features;
3477 netdev_features_t vlano_features;
3478 netdev_features_t dflt_features;
3479 netdev_features_t tso_features;
3480
3481 if (ice_is_safe_mode(pf)) {
3482 /* safe mode */
3483 netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
3484 netdev->hw_features = netdev->features;
3485 return;
3486 }
3487
3488 dflt_features = NETIF_F_SG |
3489 NETIF_F_HIGHDMA |
3490 NETIF_F_NTUPLE |
3491 NETIF_F_RXHASH;
3492
3493 csumo_features = NETIF_F_RXCSUM |
3494 NETIF_F_IP_CSUM |
3495 NETIF_F_SCTP_CRC |
3496 NETIF_F_IPV6_CSUM;
3497
3498 vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
3499 NETIF_F_HW_VLAN_CTAG_TX |
3500 NETIF_F_HW_VLAN_CTAG_RX;
3501
3502 /* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
3503 if (is_dvm_ena)
3504 vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER;
3505
3506 tso_features = NETIF_F_TSO |
3507 NETIF_F_TSO_ECN |
3508 NETIF_F_TSO6 |
3509 NETIF_F_GSO_GRE |
3510 NETIF_F_GSO_UDP_TUNNEL |
3511 NETIF_F_GSO_GRE_CSUM |
3512 NETIF_F_GSO_UDP_TUNNEL_CSUM |
3513 NETIF_F_GSO_PARTIAL |
3514 NETIF_F_GSO_IPXIP4 |
3515 NETIF_F_GSO_IPXIP6 |
3516 NETIF_F_GSO_UDP_L4;
3517
3518 netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
3519 NETIF_F_GSO_GRE_CSUM;
3520 /* set features that user can change */
3521 netdev->hw_features = dflt_features | csumo_features |
3522 vlano_features | tso_features;
3523
3524 /* add support for HW_CSUM on packets with MPLS header */
3525 netdev->mpls_features = NETIF_F_HW_CSUM |
3526 NETIF_F_TSO |
3527 NETIF_F_TSO6;
3528
3529 /* enable features */
3530 netdev->features |= netdev->hw_features;
3531
3532 netdev->hw_features |= NETIF_F_HW_TC;
3533 netdev->hw_features |= NETIF_F_LOOPBACK;
3534
3535 /* encap and VLAN devices inherit default, csumo and tso features */
3536 netdev->hw_enc_features |= dflt_features | csumo_features |
3537 tso_features;
3538 netdev->vlan_features |= dflt_features | csumo_features |
3539 tso_features;
3540
3541 /* advertise support but don't enable by default since only one type of
3542 * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
3543 * type turns on the other has to be turned off. This is enforced by the
3544 * ice_fix_features() ndo callback.
3545 */
3546 if (is_dvm_ena)
3547 netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
3548 NETIF_F_HW_VLAN_STAG_TX;
3549
3550 /* Leave CRC / FCS stripping enabled by default, but allow the value to
3551 * be changed at runtime
3552 */
3553 netdev->hw_features |= NETIF_F_RXFCS;
3554
3555 /* Allow core to manage IRQs affinity */
3556 netif_set_affinity_auto(netdev);
3557
3558 /* Mutual exclusivity for TSO and GCS is enforced by the set features
3559 * ndo callback.
3560 */
3561 if (ice_is_feature_supported(pf, ICE_F_GCS))
3562 netdev->hw_features |= NETIF_F_HW_CSUM;
3563
3564 netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE);
3565 }
3566
3567 /**
3568 * ice_fill_rss_lut - Fill the RSS lookup table with default values
3569 * @lut: Lookup table
3570 * @rss_table_size: Lookup table size
3571 * @rss_size: Range of queue number for hashing
3572 */
ice_fill_rss_lut(u8 * lut,u16 rss_table_size,u16 rss_size)3573 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
3574 {
3575 u16 i;
3576
3577 for (i = 0; i < rss_table_size; i++)
3578 lut[i] = i % rss_size;
3579 }
3580
3581 /**
3582 * ice_pf_vsi_setup - Set up a PF VSI
3583 * @pf: board private structure
3584 * @pi: pointer to the port_info instance
3585 *
3586 * Returns pointer to the successfully allocated VSI software struct
3587 * on success, otherwise returns NULL on failure.
3588 */
3589 static struct ice_vsi *
ice_pf_vsi_setup(struct ice_pf * pf,struct ice_port_info * pi)3590 ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3591 {
3592 struct ice_vsi_cfg_params params = {};
3593
3594 params.type = ICE_VSI_PF;
3595 params.port_info = pi;
3596 params.flags = ICE_VSI_FLAG_INIT;
3597
3598 return ice_vsi_setup(pf, ¶ms);
3599 }
3600
3601 static struct ice_vsi *
ice_chnl_vsi_setup(struct ice_pf * pf,struct ice_port_info * pi,struct ice_channel * ch)3602 ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
3603 struct ice_channel *ch)
3604 {
3605 struct ice_vsi_cfg_params params = {};
3606
3607 params.type = ICE_VSI_CHNL;
3608 params.port_info = pi;
3609 params.ch = ch;
3610 params.flags = ICE_VSI_FLAG_INIT;
3611
3612 return ice_vsi_setup(pf, ¶ms);
3613 }
3614
3615 /**
3616 * ice_ctrl_vsi_setup - Set up a control VSI
3617 * @pf: board private structure
3618 * @pi: pointer to the port_info instance
3619 *
3620 * Returns pointer to the successfully allocated VSI software struct
3621 * on success, otherwise returns NULL on failure.
3622 */
3623 static struct ice_vsi *
ice_ctrl_vsi_setup(struct ice_pf * pf,struct ice_port_info * pi)3624 ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3625 {
3626 struct ice_vsi_cfg_params params = {};
3627
3628 params.type = ICE_VSI_CTRL;
3629 params.port_info = pi;
3630 params.flags = ICE_VSI_FLAG_INIT;
3631
3632 return ice_vsi_setup(pf, ¶ms);
3633 }
3634
3635 /**
3636 * ice_lb_vsi_setup - Set up a loopback VSI
3637 * @pf: board private structure
3638 * @pi: pointer to the port_info instance
3639 *
3640 * Returns pointer to the successfully allocated VSI software struct
3641 * on success, otherwise returns NULL on failure.
3642 */
3643 struct ice_vsi *
ice_lb_vsi_setup(struct ice_pf * pf,struct ice_port_info * pi)3644 ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3645 {
3646 struct ice_vsi_cfg_params params = {};
3647
3648 params.type = ICE_VSI_LB;
3649 params.port_info = pi;
3650 params.flags = ICE_VSI_FLAG_INIT;
3651
3652 return ice_vsi_setup(pf, ¶ms);
3653 }
3654
3655 /**
3656 * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3657 * @netdev: network interface to be adjusted
3658 * @proto: VLAN TPID
3659 * @vid: VLAN ID to be added
3660 *
3661 * net_device_ops implementation for adding VLAN IDs
3662 */
ice_vlan_rx_add_vid(struct net_device * netdev,__be16 proto,u16 vid)3663 int ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3664 {
3665 struct ice_netdev_priv *np = netdev_priv(netdev);
3666 struct ice_vsi_vlan_ops *vlan_ops;
3667 struct ice_vsi *vsi = np->vsi;
3668 struct ice_vlan vlan;
3669 int ret;
3670
3671 /* VLAN 0 is added by default during load/reset */
3672 if (!vid)
3673 return 0;
3674
3675 while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
3676 usleep_range(1000, 2000);
3677
3678 /* Add multicast promisc rule for the VLAN ID to be added if
3679 * all-multicast is currently enabled.
3680 */
3681 if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3682 ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3683 ICE_MCAST_VLAN_PROMISC_BITS,
3684 vid);
3685 if (ret && ret != -EEXIST)
3686 goto finish;
3687 }
3688
3689 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3690
3691 /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
3692 * packets aren't pruned by the device's internal switch on Rx
3693 */
3694 vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
3695 ret = vlan_ops->add_vlan(vsi, &vlan);
3696 if (ret)
3697 goto finish;
3698
3699 /* If all-multicast is currently enabled and this VLAN ID is only one
3700 * besides VLAN-0 we have to update look-up type of multicast promisc
3701 * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
3702 */
3703 if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
3704 ice_vsi_num_non_zero_vlans(vsi) == 1) {
3705 ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3706 ICE_MCAST_PROMISC_BITS, 0);
3707 ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3708 ICE_MCAST_VLAN_PROMISC_BITS, 0);
3709 }
3710
3711 finish:
3712 clear_bit(ICE_CFG_BUSY, vsi->state);
3713
3714 return ret;
3715 }
3716
3717 /**
3718 * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3719 * @netdev: network interface to be adjusted
3720 * @proto: VLAN TPID
3721 * @vid: VLAN ID to be removed
3722 *
3723 * net_device_ops implementation for removing VLAN IDs
3724 */
ice_vlan_rx_kill_vid(struct net_device * netdev,__be16 proto,u16 vid)3725 int ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3726 {
3727 struct ice_netdev_priv *np = netdev_priv(netdev);
3728 struct ice_vsi_vlan_ops *vlan_ops;
3729 struct ice_vsi *vsi = np->vsi;
3730 struct ice_vlan vlan;
3731 int ret;
3732
3733 /* don't allow removal of VLAN 0 */
3734 if (!vid)
3735 return 0;
3736
3737 while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
3738 usleep_range(1000, 2000);
3739
3740 ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3741 ICE_MCAST_VLAN_PROMISC_BITS, vid);
3742 if (ret) {
3743 netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n",
3744 vsi->vsi_num);
3745 vsi->current_netdev_flags |= IFF_ALLMULTI;
3746 }
3747
3748 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
3749
3750 /* Make sure VLAN delete is successful before updating VLAN
3751 * information
3752 */
3753 vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
3754 ret = vlan_ops->del_vlan(vsi, &vlan);
3755 if (ret)
3756 goto finish;
3757
3758 /* Remove multicast promisc rule for the removed VLAN ID if
3759 * all-multicast is enabled.
3760 */
3761 if (vsi->current_netdev_flags & IFF_ALLMULTI)
3762 ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3763 ICE_MCAST_VLAN_PROMISC_BITS, vid);
3764
3765 if (!ice_vsi_has_non_zero_vlans(vsi)) {
3766 /* Update look-up type of multicast promisc rule for VLAN 0
3767 * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
3768 * all-multicast is enabled and VLAN 0 is the only VLAN rule.
3769 */
3770 if (vsi->current_netdev_flags & IFF_ALLMULTI) {
3771 ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
3772 ICE_MCAST_VLAN_PROMISC_BITS,
3773 0);
3774 ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
3775 ICE_MCAST_PROMISC_BITS, 0);
3776 }
3777 }
3778
3779 finish:
3780 clear_bit(ICE_CFG_BUSY, vsi->state);
3781
3782 return ret;
3783 }
3784
3785 /**
3786 * ice_rep_indr_tc_block_unbind
3787 * @cb_priv: indirection block private data
3788 */
ice_rep_indr_tc_block_unbind(void * cb_priv)3789 static void ice_rep_indr_tc_block_unbind(void *cb_priv)
3790 {
3791 struct ice_indr_block_priv *indr_priv = cb_priv;
3792
3793 list_del(&indr_priv->list);
3794 kfree(indr_priv);
3795 }
3796
3797 /**
3798 * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
3799 * @vsi: VSI struct which has the netdev
3800 */
ice_tc_indir_block_unregister(struct ice_vsi * vsi)3801 static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
3802 {
3803 struct ice_netdev_priv *np = netdev_priv(vsi->netdev);
3804
3805 flow_indr_dev_unregister(ice_indr_setup_tc_cb, np,
3806 ice_rep_indr_tc_block_unbind);
3807 }
3808
3809 /**
3810 * ice_tc_indir_block_register - Register TC indirect block notifications
3811 * @vsi: VSI struct which has the netdev
3812 *
3813 * Returns 0 on success, negative value on failure
3814 */
ice_tc_indir_block_register(struct ice_vsi * vsi)3815 static int ice_tc_indir_block_register(struct ice_vsi *vsi)
3816 {
3817 struct ice_netdev_priv *np;
3818
3819 if (!vsi || !vsi->netdev)
3820 return -EINVAL;
3821
3822 np = netdev_priv(vsi->netdev);
3823
3824 INIT_LIST_HEAD(&np->tc_indr_block_priv_list);
3825 return flow_indr_dev_register(ice_indr_setup_tc_cb, np);
3826 }
3827
3828 /**
3829 * ice_get_avail_q_count - Get count of queues in use
3830 * @pf_qmap: bitmap to get queue use count from
3831 * @lock: pointer to a mutex that protects access to pf_qmap
3832 * @size: size of the bitmap
3833 */
3834 static u16
ice_get_avail_q_count(unsigned long * pf_qmap,struct mutex * lock,u16 size)3835 ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
3836 {
3837 unsigned long bit;
3838 u16 count = 0;
3839
3840 mutex_lock(lock);
3841 for_each_clear_bit(bit, pf_qmap, size)
3842 count++;
3843 mutex_unlock(lock);
3844
3845 return count;
3846 }
3847
3848 /**
3849 * ice_get_avail_txq_count - Get count of Tx queues in use
3850 * @pf: pointer to an ice_pf instance
3851 */
ice_get_avail_txq_count(struct ice_pf * pf)3852 u16 ice_get_avail_txq_count(struct ice_pf *pf)
3853 {
3854 return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
3855 pf->max_pf_txqs);
3856 }
3857
3858 /**
3859 * ice_get_avail_rxq_count - Get count of Rx queues in use
3860 * @pf: pointer to an ice_pf instance
3861 */
ice_get_avail_rxq_count(struct ice_pf * pf)3862 u16 ice_get_avail_rxq_count(struct ice_pf *pf)
3863 {
3864 return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
3865 pf->max_pf_rxqs);
3866 }
3867
3868 /**
3869 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3870 * @pf: board private structure to initialize
3871 */
ice_deinit_pf(struct ice_pf * pf)3872 void ice_deinit_pf(struct ice_pf *pf)
3873 {
3874 /* note that we unroll also on ice_init_pf() failure here */
3875
3876 mutex_destroy(&pf->lag_mutex);
3877 mutex_destroy(&pf->adev_mutex);
3878 mutex_destroy(&pf->sw_mutex);
3879 mutex_destroy(&pf->tc_mutex);
3880 mutex_destroy(&pf->avail_q_mutex);
3881 mutex_destroy(&pf->vfs.table_lock);
3882
3883 if (pf->avail_txqs) {
3884 bitmap_free(pf->avail_txqs);
3885 pf->avail_txqs = NULL;
3886 }
3887
3888 if (pf->avail_rxqs) {
3889 bitmap_free(pf->avail_rxqs);
3890 pf->avail_rxqs = NULL;
3891 }
3892
3893 if (pf->txtime_txqs) {
3894 bitmap_free(pf->txtime_txqs);
3895 pf->txtime_txqs = NULL;
3896 }
3897
3898 if (pf->ptp.clock)
3899 ptp_clock_unregister(pf->ptp.clock);
3900
3901 if (!xa_empty(&pf->irq_tracker.entries))
3902 ice_free_irq_msix_misc(pf);
3903
3904 xa_destroy(&pf->dyn_ports);
3905 xa_destroy(&pf->sf_nums);
3906 }
3907
3908 /**
3909 * ice_set_pf_caps - set PFs capability flags
3910 * @pf: pointer to the PF instance
3911 */
ice_set_pf_caps(struct ice_pf * pf)3912 static void ice_set_pf_caps(struct ice_pf *pf)
3913 {
3914 struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
3915
3916 clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
3917 if (func_caps->common_cap.rdma)
3918 set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
3919 clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3920 if (func_caps->common_cap.dcb)
3921 set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3922 clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3923 if (func_caps->common_cap.sr_iov_1_1) {
3924 set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3925 pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
3926 ICE_MAX_SRIOV_VFS);
3927 }
3928 clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
3929 if (func_caps->common_cap.rss_table_size)
3930 set_bit(ICE_FLAG_RSS_ENA, pf->flags);
3931
3932 clear_bit(ICE_FLAG_FD_ENA, pf->flags);
3933 if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
3934 u16 unused;
3935
3936 /* ctrl_vsi_idx will be set to a valid value when flow director
3937 * is setup by ice_init_fdir
3938 */
3939 pf->ctrl_vsi_idx = ICE_NO_VSI;
3940 set_bit(ICE_FLAG_FD_ENA, pf->flags);
3941 /* force guaranteed filter pool for PF */
3942 ice_alloc_fd_guar_item(&pf->hw, &unused,
3943 func_caps->fd_fltr_guar);
3944 /* force shared filter pool for PF */
3945 ice_alloc_fd_shrd_item(&pf->hw, &unused,
3946 func_caps->fd_fltr_best_effort);
3947 }
3948
3949 clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
3950 if (func_caps->common_cap.ieee_1588)
3951 set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
3952
3953 pf->max_pf_txqs = func_caps->common_cap.num_txq;
3954 pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
3955 }
3956
ice_start_service_task(struct ice_pf * pf)3957 void ice_start_service_task(struct ice_pf *pf)
3958 {
3959 timer_setup(&pf->serv_tmr, ice_service_timer, 0);
3960 pf->serv_tmr_period = HZ;
3961 INIT_WORK(&pf->serv_task, ice_service_task);
3962 clear_bit(ICE_SERVICE_SCHED, pf->state);
3963 }
3964
3965 /**
3966 * ice_init_pf - Initialize general software structures (struct ice_pf)
3967 * @pf: board private structure to initialize
3968 * Return: 0 on success, negative errno otherwise.
3969 */
ice_init_pf(struct ice_pf * pf)3970 int ice_init_pf(struct ice_pf *pf)
3971 {
3972 struct udp_tunnel_nic_info *udp_tunnel_nic = &pf->hw.udp_tunnel_nic;
3973 struct device *dev = ice_pf_to_dev(pf);
3974 struct ice_hw *hw = &pf->hw;
3975 int err = -ENOMEM;
3976
3977 mutex_init(&pf->sw_mutex);
3978 mutex_init(&pf->tc_mutex);
3979 mutex_init(&pf->adev_mutex);
3980 mutex_init(&pf->lag_mutex);
3981
3982 INIT_HLIST_HEAD(&pf->aq_wait_list);
3983 spin_lock_init(&pf->aq_wait_lock);
3984 init_waitqueue_head(&pf->aq_wait_queue);
3985
3986 init_waitqueue_head(&pf->reset_wait_queue);
3987
3988 mutex_init(&pf->avail_q_mutex);
3989
3990 mutex_init(&pf->vfs.table_lock);
3991 hash_init(pf->vfs.table);
3992 if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT))
3993 wr32(&pf->hw, E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH,
3994 ICE_MBX_OVERFLOW_WATERMARK);
3995 else
3996 ice_mbx_init_snapshot(&pf->hw);
3997
3998 xa_init(&pf->dyn_ports);
3999 xa_init(&pf->sf_nums);
4000
4001 pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
4002 pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
4003 pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
4004 if (!pf->avail_txqs || !pf->avail_rxqs || !pf->txtime_txqs)
4005 goto undo_init;
4006
4007 udp_tunnel_nic->set_port = ice_udp_tunnel_set_port;
4008 udp_tunnel_nic->unset_port = ice_udp_tunnel_unset_port;
4009 udp_tunnel_nic->shared = &hw->udp_tunnel_shared;
4010 udp_tunnel_nic->tables[0].n_entries = hw->tnl.valid_count[TNL_VXLAN];
4011 udp_tunnel_nic->tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
4012 udp_tunnel_nic->tables[1].n_entries = hw->tnl.valid_count[TNL_GENEVE];
4013 udp_tunnel_nic->tables[1].tunnel_types = UDP_TUNNEL_TYPE_GENEVE;
4014
4015 /* In case of MSIX we are going to setup the misc vector right here
4016 * to handle admin queue events etc. In case of legacy and MSI
4017 * the misc functionality and queue processing is combined in
4018 * the same vector and that gets setup at open.
4019 */
4020 err = ice_req_irq_msix_misc(pf);
4021 if (err) {
4022 dev_err(dev, "setup of misc vector failed: %d\n", err);
4023 goto undo_init;
4024 }
4025
4026 return 0;
4027 undo_init:
4028 /* deinit handles half-initialized pf just fine */
4029 ice_deinit_pf(pf);
4030 return err;
4031 }
4032
4033 /**
4034 * ice_is_wol_supported - check if WoL is supported
4035 * @hw: pointer to hardware info
4036 *
4037 * Check if WoL is supported based on the HW configuration.
4038 * Returns true if NVM supports and enables WoL for this port, false otherwise
4039 */
ice_is_wol_supported(struct ice_hw * hw)4040 bool ice_is_wol_supported(struct ice_hw *hw)
4041 {
4042 u16 wol_ctrl;
4043
4044 /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
4045 * word) indicates WoL is not supported on the corresponding PF ID.
4046 */
4047 if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
4048 return false;
4049
4050 return !(BIT(hw->port_info->lport) & wol_ctrl);
4051 }
4052
4053 /**
4054 * ice_vsi_recfg_qs - Change the number of queues on a VSI
4055 * @vsi: VSI being changed
4056 * @new_rx: new number of Rx queues
4057 * @new_tx: new number of Tx queues
4058 * @locked: is adev device_lock held
4059 *
4060 * Only change the number of queues if new_tx, or new_rx is non-0.
4061 *
4062 * Returns 0 on success.
4063 */
ice_vsi_recfg_qs(struct ice_vsi * vsi,int new_rx,int new_tx,bool locked)4064 int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
4065 {
4066 struct ice_pf *pf = vsi->back;
4067 int i, err = 0, timeout = 50;
4068
4069 if (!new_rx && !new_tx)
4070 return -EINVAL;
4071
4072 while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
4073 timeout--;
4074 if (!timeout)
4075 return -EBUSY;
4076 usleep_range(1000, 2000);
4077 }
4078
4079 if (new_tx)
4080 vsi->req_txq = (u16)new_tx;
4081 if (new_rx)
4082 vsi->req_rxq = (u16)new_rx;
4083
4084 /* set for the next time the netdev is started */
4085 if (!netif_running(vsi->netdev)) {
4086 err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
4087 if (err)
4088 goto rebuild_err;
4089 dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
4090 goto done;
4091 }
4092
4093 ice_vsi_close(vsi);
4094 err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
4095 if (err)
4096 goto rebuild_err;
4097
4098 ice_for_each_traffic_class(i) {
4099 if (vsi->tc_cfg.ena_tc & BIT(i))
4100 netdev_set_tc_queue(vsi->netdev,
4101 vsi->tc_cfg.tc_info[i].netdev_tc,
4102 vsi->tc_cfg.tc_info[i].qcount_tx,
4103 vsi->tc_cfg.tc_info[i].qoffset);
4104 }
4105 ice_pf_dcb_recfg(pf, locked);
4106 ice_vsi_open(vsi);
4107 /* Rx rings are reallocated during VSI rebuild and lose their ptp_rx
4108 * flag. Restore timestamp mode so newly allocated rings are set up
4109 * for hardware Rx timestamping.
4110 */
4111 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4112 ice_ptp_restore_timestamp_mode(pf);
4113 goto done;
4114
4115 rebuild_err:
4116 dev_err(ice_pf_to_dev(pf), "Error during VSI rebuild: %d. Unload and reload the driver.\n",
4117 err);
4118 done:
4119 clear_bit(ICE_CFG_BUSY, pf->state);
4120 return err;
4121 }
4122
4123 /**
4124 * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
4125 * @pf: PF to configure
4126 *
4127 * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
4128 * VSI can still Tx/Rx VLAN tagged packets.
4129 */
ice_set_safe_mode_vlan_cfg(struct ice_pf * pf)4130 static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
4131 {
4132 struct ice_vsi *vsi = ice_get_main_vsi(pf);
4133 struct ice_vsi_ctx *ctxt;
4134 struct ice_hw *hw;
4135 int status;
4136
4137 if (!vsi)
4138 return;
4139
4140 ctxt = kzalloc_obj(*ctxt);
4141 if (!ctxt)
4142 return;
4143
4144 hw = &pf->hw;
4145 ctxt->info = vsi->info;
4146
4147 ctxt->info.valid_sections =
4148 cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
4149 ICE_AQ_VSI_PROP_SECURITY_VALID |
4150 ICE_AQ_VSI_PROP_SW_VALID);
4151
4152 /* disable VLAN anti-spoof */
4153 ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
4154 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
4155
4156 /* disable VLAN pruning and keep all other settings */
4157 ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
4158
4159 /* allow all VLANs on Tx and don't strip on Rx */
4160 ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL |
4161 ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
4162
4163 status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
4164 if (status) {
4165 dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
4166 status, libie_aq_str(hw->adminq.sq_last_status));
4167 } else {
4168 vsi->info.sec_flags = ctxt->info.sec_flags;
4169 vsi->info.sw_flags2 = ctxt->info.sw_flags2;
4170 vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
4171 }
4172
4173 kfree(ctxt);
4174 }
4175
4176 /**
4177 * ice_log_pkg_init - log result of DDP package load
4178 * @hw: pointer to hardware info
4179 * @state: state of package load
4180 */
ice_log_pkg_init(struct ice_hw * hw,enum ice_ddp_state state)4181 static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state)
4182 {
4183 struct ice_pf *pf = hw->back;
4184 struct device *dev;
4185
4186 dev = ice_pf_to_dev(pf);
4187
4188 switch (state) {
4189 case ICE_DDP_PKG_SUCCESS:
4190 dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
4191 hw->active_pkg_name,
4192 hw->active_pkg_ver.major,
4193 hw->active_pkg_ver.minor,
4194 hw->active_pkg_ver.update,
4195 hw->active_pkg_ver.draft);
4196 break;
4197 case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
4198 dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
4199 hw->active_pkg_name,
4200 hw->active_pkg_ver.major,
4201 hw->active_pkg_ver.minor,
4202 hw->active_pkg_ver.update,
4203 hw->active_pkg_ver.draft);
4204 break;
4205 case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
4206 dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
4207 hw->active_pkg_name,
4208 hw->active_pkg_ver.major,
4209 hw->active_pkg_ver.minor,
4210 ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4211 break;
4212 case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
4213 dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
4214 hw->active_pkg_name,
4215 hw->active_pkg_ver.major,
4216 hw->active_pkg_ver.minor,
4217 hw->active_pkg_ver.update,
4218 hw->active_pkg_ver.draft,
4219 hw->pkg_name,
4220 hw->pkg_ver.major,
4221 hw->pkg_ver.minor,
4222 hw->pkg_ver.update,
4223 hw->pkg_ver.draft);
4224 break;
4225 case ICE_DDP_PKG_FW_MISMATCH:
4226 dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
4227 break;
4228 case ICE_DDP_PKG_INVALID_FILE:
4229 dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
4230 break;
4231 case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
4232 dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
4233 break;
4234 case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
4235 dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
4236 ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
4237 break;
4238 case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
4239 dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
4240 break;
4241 case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
4242 dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
4243 break;
4244 case ICE_DDP_PKG_LOAD_ERROR:
4245 dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
4246 /* poll for reset to complete */
4247 if (ice_check_reset(hw))
4248 dev_err(dev, "Error resetting device. Please reload the driver\n");
4249 break;
4250 case ICE_DDP_PKG_ERR:
4251 default:
4252 dev_err(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n");
4253 break;
4254 }
4255 }
4256
4257 /**
4258 * ice_load_pkg - load/reload the DDP Package file
4259 * @firmware: firmware structure when firmware requested or NULL for reload
4260 * @pf: pointer to the PF instance
4261 *
4262 * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
4263 * initialize HW tables.
4264 */
4265 static void
ice_load_pkg(const struct firmware * firmware,struct ice_pf * pf)4266 ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
4267 {
4268 enum ice_ddp_state state = ICE_DDP_PKG_ERR;
4269 struct device *dev = ice_pf_to_dev(pf);
4270 struct ice_hw *hw = &pf->hw;
4271
4272 /* Load DDP Package */
4273 if (firmware && !hw->pkg_copy) {
4274 state = ice_copy_and_init_pkg(hw, firmware->data,
4275 firmware->size);
4276 ice_log_pkg_init(hw, state);
4277 } else if (!firmware && hw->pkg_copy) {
4278 /* Reload package during rebuild after CORER/GLOBR reset */
4279 state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
4280 ice_log_pkg_init(hw, state);
4281 } else {
4282 dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
4283 }
4284
4285 if (!ice_is_init_pkg_successful(state)) {
4286 /* Safe Mode */
4287 clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
4288 return;
4289 }
4290
4291 /* Successful download package is the precondition for advanced
4292 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
4293 */
4294 set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
4295 }
4296
4297 /**
4298 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4299 * @pf: pointer to the PF structure
4300 *
4301 * There is no error returned here because the driver should be able to handle
4302 * 128 Byte cache lines, so we only print a warning in case issues are seen,
4303 * specifically with Tx.
4304 */
ice_verify_cacheline_size(struct ice_pf * pf)4305 static void ice_verify_cacheline_size(struct ice_pf *pf)
4306 {
4307 if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
4308 dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4309 ICE_CACHE_LINE_BYTES);
4310 }
4311
4312 /**
4313 * ice_send_version - update firmware with driver version
4314 * @pf: PF struct
4315 *
4316 * Returns 0 on success, else error code
4317 */
ice_send_version(struct ice_pf * pf)4318 static int ice_send_version(struct ice_pf *pf)
4319 {
4320 struct ice_driver_ver dv;
4321
4322 dv.major_ver = 0xff;
4323 dv.minor_ver = 0xff;
4324 dv.build_ver = 0xff;
4325 dv.subbuild_ver = 0;
4326 strscpy((char *)dv.driver_string, UTS_RELEASE,
4327 sizeof(dv.driver_string));
4328 return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
4329 }
4330
4331 /**
4332 * ice_init_fdir - Initialize flow director VSI and configuration
4333 * @pf: pointer to the PF instance
4334 *
4335 * returns 0 on success, negative on error
4336 */
ice_init_fdir(struct ice_pf * pf)4337 static int ice_init_fdir(struct ice_pf *pf)
4338 {
4339 struct device *dev = ice_pf_to_dev(pf);
4340 struct ice_vsi *ctrl_vsi;
4341 int err;
4342
4343 /* Side Band Flow Director needs to have a control VSI.
4344 * Allocate it and store it in the PF.
4345 */
4346 ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
4347 if (!ctrl_vsi) {
4348 dev_dbg(dev, "could not create control VSI\n");
4349 return -ENOMEM;
4350 }
4351
4352 err = ice_vsi_open_ctrl(ctrl_vsi);
4353 if (err) {
4354 dev_dbg(dev, "could not open control VSI\n");
4355 goto err_vsi_open;
4356 }
4357
4358 mutex_init(&pf->hw.fdir_fltr_lock);
4359
4360 err = ice_fdir_create_dflt_rules(pf);
4361 if (err)
4362 goto err_fdir_rule;
4363
4364 return 0;
4365
4366 err_fdir_rule:
4367 ice_fdir_release_flows(&pf->hw);
4368 ice_vsi_close(ctrl_vsi);
4369 err_vsi_open:
4370 ice_vsi_release(ctrl_vsi);
4371 if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4372 pf->vsi[pf->ctrl_vsi_idx] = NULL;
4373 pf->ctrl_vsi_idx = ICE_NO_VSI;
4374 }
4375 return err;
4376 }
4377
ice_deinit_fdir(struct ice_pf * pf)4378 static void ice_deinit_fdir(struct ice_pf *pf)
4379 {
4380 struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
4381
4382 if (!vsi)
4383 return;
4384
4385 ice_vsi_manage_fdir(vsi, false);
4386 ice_vsi_release(vsi);
4387 if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
4388 pf->vsi[pf->ctrl_vsi_idx] = NULL;
4389 pf->ctrl_vsi_idx = ICE_NO_VSI;
4390 }
4391
4392 mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
4393 }
4394
4395 /**
4396 * ice_get_opt_fw_name - return optional firmware file name or NULL
4397 * @pf: pointer to the PF instance
4398 */
ice_get_opt_fw_name(struct ice_pf * pf)4399 static char *ice_get_opt_fw_name(struct ice_pf *pf)
4400 {
4401 /* Optional firmware name same as default with additional dash
4402 * followed by a EUI-64 identifier (PCIe Device Serial Number)
4403 */
4404 struct pci_dev *pdev = pf->pdev;
4405 char *opt_fw_filename;
4406 u64 dsn;
4407
4408 /* Determine the name of the optional file using the DSN (two
4409 * dwords following the start of the DSN Capability).
4410 */
4411 dsn = pci_get_dsn(pdev);
4412 if (!dsn)
4413 return NULL;
4414
4415 opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
4416 if (!opt_fw_filename)
4417 return NULL;
4418
4419 snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
4420 ICE_DDP_PKG_PATH, dsn);
4421
4422 return opt_fw_filename;
4423 }
4424
4425 /**
4426 * ice_request_fw - Device initialization routine
4427 * @pf: pointer to the PF instance
4428 * @firmware: double pointer to firmware struct
4429 *
4430 * Return: zero when successful, negative values otherwise.
4431 */
ice_request_fw(struct ice_pf * pf,const struct firmware ** firmware)4432 static int ice_request_fw(struct ice_pf *pf, const struct firmware **firmware)
4433 {
4434 char *opt_fw_filename = ice_get_opt_fw_name(pf);
4435 struct device *dev = ice_pf_to_dev(pf);
4436 int err = 0;
4437
4438 /* optional device-specific DDP (if present) overrides the default DDP
4439 * package file. kernel logs a debug message if the file doesn't exist,
4440 * and warning messages for other errors.
4441 */
4442 if (opt_fw_filename) {
4443 err = firmware_request_nowarn(firmware, opt_fw_filename, dev);
4444 kfree(opt_fw_filename);
4445 if (!err)
4446 return err;
4447 }
4448 err = request_firmware(firmware, ICE_DDP_PKG_FILE, dev);
4449 if (err)
4450 dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4451
4452 return err;
4453 }
4454
4455 /**
4456 * ice_init_tx_topology - performs Tx topology initialization
4457 * @hw: pointer to the hardware structure
4458 * @firmware: pointer to firmware structure
4459 *
4460 * Return: zero when init was successful, negative values otherwise.
4461 */
4462 static int
ice_init_tx_topology(struct ice_hw * hw,const struct firmware * firmware)4463 ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware)
4464 {
4465 u8 num_tx_sched_layers = hw->num_tx_sched_layers;
4466 struct ice_pf *pf = hw->back;
4467 struct device *dev;
4468 int err;
4469
4470 dev = ice_pf_to_dev(pf);
4471 err = ice_cfg_tx_topo(hw, firmware->data, firmware->size);
4472 if (!err) {
4473 if (hw->num_tx_sched_layers > num_tx_sched_layers)
4474 dev_info(dev, "Tx scheduling layers switching feature disabled\n");
4475 else
4476 dev_info(dev, "Tx scheduling layers switching feature enabled\n");
4477 return 0;
4478 } else if (err == -ENODEV) {
4479 /* If we failed to re-initialize the device, we can no longer
4480 * continue loading.
4481 */
4482 dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n");
4483 return err;
4484 } else if (err == -EIO) {
4485 dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n");
4486 return 0;
4487 } else if (err == -EEXIST) {
4488 return 0;
4489 }
4490
4491 /* Do not treat this as a fatal error. */
4492 dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n",
4493 ERR_PTR(err));
4494 return 0;
4495 }
4496
4497 /**
4498 * ice_init_supported_rxdids - Initialize supported Rx descriptor IDs
4499 * @hw: pointer to the hardware structure
4500 * @pf: pointer to pf structure
4501 *
4502 * The pf->supported_rxdids bitmap is used to indicate to VFs which descriptor
4503 * formats the PF hardware supports. The exact list of supported RXDIDs
4504 * depends on the loaded DDP package. The IDs can be determined by reading the
4505 * GLFLXP_RXDID_FLAGS register after the DDP package is loaded.
4506 *
4507 * Note that the legacy 32-byte RXDID 0 is always supported but is not listed
4508 * in the DDP package. The 16-byte legacy descriptor is never supported by
4509 * VFs.
4510 */
ice_init_supported_rxdids(struct ice_hw * hw,struct ice_pf * pf)4511 static void ice_init_supported_rxdids(struct ice_hw *hw, struct ice_pf *pf)
4512 {
4513 pf->supported_rxdids = BIT(ICE_RXDID_LEGACY_1);
4514
4515 for (int i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) {
4516 u32 regval;
4517
4518 regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0));
4519 if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
4520 & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M)
4521 pf->supported_rxdids |= BIT(i);
4522 }
4523 }
4524
4525 /**
4526 * ice_init_ddp_config - DDP related configuration
4527 * @hw: pointer to the hardware structure
4528 * @pf: pointer to pf structure
4529 *
4530 * This function loads DDP file from the disk, then initializes Tx
4531 * topology. At the end DDP package is loaded on the card.
4532 *
4533 * Return: zero when init was successful, negative values otherwise.
4534 */
ice_init_ddp_config(struct ice_hw * hw,struct ice_pf * pf)4535 static int ice_init_ddp_config(struct ice_hw *hw, struct ice_pf *pf)
4536 {
4537 struct device *dev = ice_pf_to_dev(pf);
4538 const struct firmware *firmware = NULL;
4539 int err;
4540
4541 err = ice_request_fw(pf, &firmware);
4542 if (err) {
4543 dev_err(dev, "Fail during requesting FW: %d\n", err);
4544 return err;
4545 }
4546
4547 err = ice_init_tx_topology(hw, firmware);
4548 if (err) {
4549 dev_err(dev, "Fail during initialization of Tx topology: %d\n",
4550 err);
4551 release_firmware(firmware);
4552 return err;
4553 }
4554
4555 /* Download firmware to device */
4556 ice_load_pkg(firmware, pf);
4557 release_firmware(firmware);
4558
4559 /* Initialize the supported Rx descriptor IDs after loading DDP */
4560 ice_init_supported_rxdids(hw, pf);
4561
4562 return 0;
4563 }
4564
4565 /**
4566 * ice_print_wake_reason - show the wake up cause in the log
4567 * @pf: pointer to the PF struct
4568 */
ice_print_wake_reason(struct ice_pf * pf)4569 static void ice_print_wake_reason(struct ice_pf *pf)
4570 {
4571 u32 wus = pf->wakeup_reason;
4572 const char *wake_str;
4573
4574 /* if no wake event, nothing to print */
4575 if (!wus)
4576 return;
4577
4578 if (wus & PFPM_WUS_LNKC_M)
4579 wake_str = "Link\n";
4580 else if (wus & PFPM_WUS_MAG_M)
4581 wake_str = "Magic Packet\n";
4582 else if (wus & PFPM_WUS_MNG_M)
4583 wake_str = "Management\n";
4584 else if (wus & PFPM_WUS_FW_RST_WK_M)
4585 wake_str = "Firmware Reset\n";
4586 else
4587 wake_str = "Unknown\n";
4588
4589 dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
4590 }
4591
4592 /**
4593 * ice_register_netdev - register netdev
4594 * @vsi: pointer to the VSI struct
4595 */
ice_register_netdev(struct ice_vsi * vsi)4596 static int ice_register_netdev(struct ice_vsi *vsi)
4597 {
4598 int err;
4599
4600 if (!vsi || !vsi->netdev)
4601 return -EIO;
4602
4603 err = register_netdev(vsi->netdev);
4604 if (err)
4605 return err;
4606
4607 set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
4608 netif_carrier_off(vsi->netdev);
4609 netif_tx_stop_all_queues(vsi->netdev);
4610
4611 return 0;
4612 }
4613
ice_unregister_netdev(struct ice_vsi * vsi)4614 static void ice_unregister_netdev(struct ice_vsi *vsi)
4615 {
4616 if (!vsi || !vsi->netdev)
4617 return;
4618
4619 unregister_netdev(vsi->netdev);
4620 clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
4621 }
4622
4623 /**
4624 * ice_cfg_netdev - Allocate, configure and register a netdev
4625 * @vsi: the VSI associated with the new netdev
4626 *
4627 * Returns 0 on success, negative value on failure
4628 */
ice_cfg_netdev(struct ice_vsi * vsi)4629 static int ice_cfg_netdev(struct ice_vsi *vsi)
4630 {
4631 struct ice_netdev_priv *np;
4632 struct net_device *netdev;
4633 u8 mac_addr[ETH_ALEN];
4634
4635 netdev = alloc_etherdev_mqs(sizeof(*np), ice_get_max_txq(vsi->back),
4636 ice_get_max_rxq(vsi->back));
4637 if (!netdev)
4638 return -ENOMEM;
4639
4640 set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
4641 vsi->netdev = netdev;
4642 np = netdev_priv(netdev);
4643 np->vsi = vsi;
4644
4645 ice_set_netdev_features(netdev);
4646 ice_set_ops(vsi);
4647
4648 if (vsi->type == ICE_VSI_PF) {
4649 SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
4650 ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
4651 eth_hw_addr_set(netdev, mac_addr);
4652 }
4653
4654 netdev->priv_flags |= IFF_UNICAST_FLT;
4655
4656 /* Setup netdev TC information */
4657 ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
4658
4659 netdev->max_mtu = ICE_MAX_MTU;
4660
4661 return 0;
4662 }
4663
ice_decfg_netdev(struct ice_vsi * vsi)4664 static void ice_decfg_netdev(struct ice_vsi *vsi)
4665 {
4666 clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
4667 free_netdev(vsi->netdev);
4668 vsi->netdev = NULL;
4669 }
4670
ice_init_dev_hw(struct ice_pf * pf)4671 void ice_init_dev_hw(struct ice_pf *pf)
4672 {
4673 struct ice_hw *hw = &pf->hw;
4674 int err;
4675
4676 ice_init_feature_support(pf);
4677
4678 err = ice_init_ddp_config(hw, pf);
4679
4680 /* if ice_init_ddp_config fails, ICE_FLAG_ADV_FEATURES bit won't be
4681 * set in pf->state, which will cause ice_is_safe_mode to return
4682 * true
4683 */
4684 if (err || ice_is_safe_mode(pf)) {
4685 /* we already got function/device capabilities but these don't
4686 * reflect what the driver needs to do in safe mode. Instead of
4687 * adding conditional logic everywhere to ignore these
4688 * device/function capabilities, override them.
4689 */
4690 ice_set_safe_mode_caps(hw);
4691 }
4692 }
4693
ice_init_dev(struct ice_pf * pf)4694 int ice_init_dev(struct ice_pf *pf)
4695 {
4696 struct device *dev = ice_pf_to_dev(pf);
4697 int err;
4698
4699 ice_set_pf_caps(pf);
4700 err = ice_init_interrupt_scheme(pf);
4701 if (err) {
4702 dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
4703 return -EIO;
4704 }
4705
4706 ice_start_service_task(pf);
4707
4708 return 0;
4709 }
4710
ice_deinit_dev(struct ice_pf * pf)4711 void ice_deinit_dev(struct ice_pf *pf)
4712 {
4713 ice_service_task_stop(pf);
4714
4715 /* Service task is already stopped, so call reset directly. */
4716 ice_reset(&pf->hw, ICE_RESET_PFR);
4717 pci_wait_for_pending_transaction(pf->pdev);
4718 ice_clear_interrupt_scheme(pf);
4719 }
4720
ice_init_features(struct ice_pf * pf)4721 static void ice_init_features(struct ice_pf *pf)
4722 {
4723 struct device *dev = ice_pf_to_dev(pf);
4724
4725 if (ice_is_safe_mode(pf))
4726 return;
4727
4728 /* initialize DDP driven features */
4729 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4730 ice_ptp_init(pf);
4731
4732 if (ice_is_feature_supported(pf, ICE_F_GNSS))
4733 ice_gnss_init(pf);
4734
4735 if (ice_is_feature_supported(pf, ICE_F_CGU) ||
4736 ice_is_feature_supported(pf, ICE_F_PHY_RCLK))
4737 ice_dpll_init(pf);
4738
4739 /* Note: Flow director init failure is non-fatal to load */
4740 if (ice_init_fdir(pf))
4741 dev_err(dev, "could not initialize flow director\n");
4742
4743 /* Note: DCB init failure is non-fatal to load */
4744 if (ice_init_pf_dcb(pf, false)) {
4745 clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
4746 clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
4747 } else {
4748 ice_cfg_lldp_mib_change(&pf->hw, true);
4749 }
4750
4751 if (ice_init_lag(pf))
4752 dev_warn(dev, "Failed to init link aggregation support\n");
4753
4754 ice_hwmon_init(pf);
4755 }
4756
ice_deinit_features(struct ice_pf * pf)4757 static void ice_deinit_features(struct ice_pf *pf)
4758 {
4759 if (ice_is_safe_mode(pf))
4760 return;
4761
4762 ice_deinit_lag(pf);
4763 if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
4764 ice_cfg_lldp_mib_change(&pf->hw, false);
4765 ice_deinit_fdir(pf);
4766 if (ice_is_feature_supported(pf, ICE_F_GNSS))
4767 ice_gnss_exit(pf);
4768 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
4769 ice_ptp_release(pf);
4770 if (test_bit(ICE_FLAG_DPLL, pf->flags))
4771 ice_dpll_deinit(pf);
4772 if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
4773 xa_destroy(&pf->eswitch.reprs);
4774 ice_hwmon_exit(pf);
4775 }
4776
ice_init_wakeup(struct ice_pf * pf)4777 static void ice_init_wakeup(struct ice_pf *pf)
4778 {
4779 /* Save wakeup reason register for later use */
4780 pf->wakeup_reason = rd32(&pf->hw, PFPM_WUS);
4781
4782 /* check for a power management event */
4783 ice_print_wake_reason(pf);
4784
4785 /* clear wake status, all bits */
4786 wr32(&pf->hw, PFPM_WUS, U32_MAX);
4787
4788 /* Disable WoL at init, wait for user to enable */
4789 device_set_wakeup_enable(ice_pf_to_dev(pf), false);
4790 }
4791
ice_init_link(struct ice_pf * pf)4792 static int ice_init_link(struct ice_pf *pf)
4793 {
4794 struct device *dev = ice_pf_to_dev(pf);
4795 int err;
4796
4797 err = ice_init_link_events(pf->hw.port_info);
4798 if (err) {
4799 dev_err(dev, "ice_init_link_events failed: %d\n", err);
4800 return err;
4801 }
4802
4803 /* not a fatal error if this fails */
4804 err = ice_init_nvm_phy_type(pf->hw.port_info);
4805 if (err)
4806 dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
4807
4808 /* not a fatal error if this fails */
4809 err = ice_update_link_info(pf->hw.port_info);
4810 if (err)
4811 dev_err(dev, "ice_update_link_info failed: %d\n", err);
4812
4813 ice_init_link_dflt_override(pf->hw.port_info);
4814
4815 ice_check_link_cfg_err(pf,
4816 pf->hw.port_info->phy.link_info.link_cfg_err);
4817
4818 /* if media available, initialize PHY settings */
4819 if (pf->hw.port_info->phy.link_info.link_info &
4820 ICE_AQ_MEDIA_AVAILABLE) {
4821 /* not a fatal error if this fails */
4822 err = ice_init_phy_user_cfg(pf->hw.port_info);
4823 if (err)
4824 dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
4825
4826 if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
4827 struct ice_vsi *vsi = ice_get_main_vsi(pf);
4828 struct ice_link_default_override_tlv *ldo;
4829 bool link_en;
4830
4831 ldo = &pf->link_dflt_override;
4832 link_en = !(ldo->options &
4833 ICE_LINK_OVERRIDE_AUTO_LINK_DIS);
4834
4835 if (vsi)
4836 ice_phy_cfg(vsi, link_en);
4837 }
4838 } else {
4839 set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
4840 }
4841
4842 return err;
4843 }
4844
ice_init_pf_sw(struct ice_pf * pf)4845 static int ice_init_pf_sw(struct ice_pf *pf)
4846 {
4847 bool dvm = ice_is_dvm_ena(&pf->hw);
4848 struct ice_vsi *vsi;
4849 int err;
4850
4851 /* create switch struct for the switch element created by FW on boot */
4852 pf->first_sw = kzalloc_obj(*pf->first_sw);
4853 if (!pf->first_sw)
4854 return -ENOMEM;
4855
4856 if (pf->hw.evb_veb)
4857 pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
4858 else
4859 pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
4860
4861 pf->first_sw->pf = pf;
4862
4863 /* record the sw_id available for later use */
4864 pf->first_sw->sw_id = pf->hw.port_info->sw_id;
4865
4866 err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
4867 if (err)
4868 goto err_aq_set_port_params;
4869
4870 vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
4871 if (!vsi) {
4872 err = -ENOMEM;
4873 goto err_pf_vsi_setup;
4874 }
4875
4876 return 0;
4877
4878 err_pf_vsi_setup:
4879 err_aq_set_port_params:
4880 kfree(pf->first_sw);
4881 return err;
4882 }
4883
ice_deinit_pf_sw(struct ice_pf * pf)4884 static void ice_deinit_pf_sw(struct ice_pf *pf)
4885 {
4886 struct ice_vsi *vsi = ice_get_main_vsi(pf);
4887
4888 if (!vsi)
4889 return;
4890
4891 ice_vsi_release(vsi);
4892 kfree(pf->first_sw);
4893 }
4894
ice_alloc_vsis(struct ice_pf * pf)4895 static int ice_alloc_vsis(struct ice_pf *pf)
4896 {
4897 struct device *dev = ice_pf_to_dev(pf);
4898
4899 pf->num_alloc_vsi = pf->hw.func_caps.guar_num_vsi;
4900 if (!pf->num_alloc_vsi)
4901 return -EIO;
4902
4903 if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
4904 dev_warn(dev,
4905 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
4906 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
4907 pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
4908 }
4909
4910 pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
4911 GFP_KERNEL);
4912 if (!pf->vsi)
4913 return -ENOMEM;
4914
4915 pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi,
4916 sizeof(*pf->vsi_stats), GFP_KERNEL);
4917 if (!pf->vsi_stats) {
4918 devm_kfree(dev, pf->vsi);
4919 return -ENOMEM;
4920 }
4921
4922 return 0;
4923 }
4924
ice_dealloc_vsis(struct ice_pf * pf)4925 static void ice_dealloc_vsis(struct ice_pf *pf)
4926 {
4927 devm_kfree(ice_pf_to_dev(pf), pf->vsi_stats);
4928 pf->vsi_stats = NULL;
4929
4930 pf->num_alloc_vsi = 0;
4931 devm_kfree(ice_pf_to_dev(pf), pf->vsi);
4932 pf->vsi = NULL;
4933 }
4934
ice_init_devlink(struct ice_pf * pf)4935 static int ice_init_devlink(struct ice_pf *pf)
4936 {
4937 int err;
4938
4939 err = ice_devlink_register_params(pf);
4940 if (err)
4941 return err;
4942
4943 ice_devlink_init_regions(pf);
4944 ice_devlink_register(pf);
4945 ice_health_init(pf);
4946
4947 return 0;
4948 }
4949
ice_deinit_devlink(struct ice_pf * pf)4950 static void ice_deinit_devlink(struct ice_pf *pf)
4951 {
4952 ice_health_deinit(pf);
4953 ice_devlink_unregister(pf);
4954 ice_devlink_destroy_regions(pf);
4955 ice_devlink_unregister_params(pf);
4956 }
4957
ice_init(struct ice_pf * pf)4958 static int ice_init(struct ice_pf *pf)
4959 {
4960 struct device *dev = ice_pf_to_dev(pf);
4961 int err;
4962
4963 err = ice_init_pf(pf);
4964 if (err) {
4965 dev_err(dev, "ice_init_pf failed: %d\n", err);
4966 return err;
4967 }
4968
4969 if (pf->hw.mac_type == ICE_MAC_E830) {
4970 err = pci_enable_ptm(pf->pdev);
4971 if (err)
4972 dev_dbg(dev, "PCIe PTM not supported by PCIe bus/controller\n");
4973 }
4974
4975 err = ice_alloc_vsis(pf);
4976 if (err)
4977 goto unroll_pf_init;
4978
4979 err = ice_init_pf_sw(pf);
4980 if (err)
4981 goto err_init_pf_sw;
4982
4983 ice_init_wakeup(pf);
4984
4985 err = ice_init_link(pf);
4986 if (err)
4987 goto err_init_link;
4988
4989 err = ice_send_version(pf);
4990 if (err)
4991 goto err_init_link;
4992
4993 ice_verify_cacheline_size(pf);
4994
4995 if (ice_is_safe_mode(pf))
4996 ice_set_safe_mode_vlan_cfg(pf);
4997 else
4998 /* print PCI link speed and width */
4999 pcie_print_link_status(pf->pdev);
5000
5001 /* ready to go, so clear down state bit */
5002 clear_bit(ICE_DOWN, pf->state);
5003 clear_bit(ICE_SERVICE_DIS, pf->state);
5004
5005 /* since everything is good, start the service timer */
5006 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
5007
5008 return 0;
5009
5010 err_init_link:
5011 ice_deinit_pf_sw(pf);
5012 err_init_pf_sw:
5013 ice_dealloc_vsis(pf);
5014 unroll_pf_init:
5015 ice_deinit_pf(pf);
5016 return err;
5017 }
5018
ice_deinit(struct ice_pf * pf)5019 static void ice_deinit(struct ice_pf *pf)
5020 {
5021 set_bit(ICE_SERVICE_DIS, pf->state);
5022 set_bit(ICE_DOWN, pf->state);
5023
5024 ice_deinit_pf_sw(pf);
5025 ice_dealloc_vsis(pf);
5026 ice_deinit_pf(pf);
5027 }
5028
5029 /**
5030 * ice_load - load pf by init hw and starting VSI
5031 * @pf: pointer to the pf instance
5032 *
5033 * This function has to be called under devl_lock.
5034 */
ice_load(struct ice_pf * pf)5035 int ice_load(struct ice_pf *pf)
5036 {
5037 struct ice_vsi *vsi;
5038 int err;
5039
5040 devl_assert_locked(priv_to_devlink(pf));
5041
5042 vsi = ice_get_main_vsi(pf);
5043
5044 /* init channel list */
5045 INIT_LIST_HEAD(&vsi->ch_list);
5046
5047 err = ice_cfg_netdev(vsi);
5048 if (err)
5049 return err;
5050
5051 /* Setup DCB netlink interface */
5052 ice_dcbnl_setup(vsi);
5053
5054 err = ice_init_mac_fltr(pf);
5055 if (err)
5056 goto err_init_mac_fltr;
5057
5058 err = ice_devlink_create_pf_port(pf);
5059 if (err)
5060 goto err_devlink_create_pf_port;
5061
5062 SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
5063
5064 err = ice_register_netdev(vsi);
5065 if (err)
5066 goto err_register_netdev;
5067
5068 err = ice_tc_indir_block_register(vsi);
5069 if (err)
5070 goto err_tc_indir_block_register;
5071
5072 ice_napi_add(vsi);
5073
5074 ice_init_features(pf);
5075
5076 err = ice_init_rdma(pf);
5077 if (err)
5078 goto err_init_rdma;
5079
5080 /* Finalize RDMA: VSI already created, assign info and plug device */
5081 ice_rdma_finalize_setup(pf);
5082
5083 ice_service_task_restart(pf);
5084
5085 clear_bit(ICE_DOWN, pf->state);
5086
5087 return 0;
5088
5089 err_init_rdma:
5090 ice_deinit_features(pf);
5091 ice_tc_indir_block_unregister(vsi);
5092 err_tc_indir_block_register:
5093 ice_unregister_netdev(vsi);
5094 err_register_netdev:
5095 ice_devlink_destroy_pf_port(pf);
5096 err_devlink_create_pf_port:
5097 err_init_mac_fltr:
5098 ice_decfg_netdev(vsi);
5099 return err;
5100 }
5101
5102 /**
5103 * ice_unload - unload pf by stopping VSI and deinit hw
5104 * @pf: pointer to the pf instance
5105 *
5106 * This function has to be called under devl_lock.
5107 */
ice_unload(struct ice_pf * pf)5108 void ice_unload(struct ice_pf *pf)
5109 {
5110 struct ice_vsi *vsi = ice_get_main_vsi(pf);
5111
5112 devl_assert_locked(priv_to_devlink(pf));
5113
5114 ice_unplug_aux_dev(pf);
5115 ice_deinit_rdma(pf);
5116 ice_deinit_features(pf);
5117 ice_tc_indir_block_unregister(vsi);
5118 ice_unregister_netdev(vsi);
5119 ice_devlink_destroy_pf_port(pf);
5120 ice_decfg_netdev(vsi);
5121 }
5122
ice_probe_recovery_mode(struct ice_pf * pf)5123 static int ice_probe_recovery_mode(struct ice_pf *pf)
5124 {
5125 struct device *dev = ice_pf_to_dev(pf);
5126 int err;
5127
5128 dev_err(dev, "Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode\n");
5129
5130 INIT_HLIST_HEAD(&pf->aq_wait_list);
5131 spin_lock_init(&pf->aq_wait_lock);
5132 init_waitqueue_head(&pf->aq_wait_queue);
5133
5134 timer_setup(&pf->serv_tmr, ice_service_timer, 0);
5135 pf->serv_tmr_period = HZ;
5136 INIT_WORK(&pf->serv_task, ice_service_task_recovery_mode);
5137 clear_bit(ICE_SERVICE_SCHED, pf->state);
5138 err = ice_create_all_ctrlq(&pf->hw);
5139 if (err)
5140 return err;
5141
5142 scoped_guard(devl, priv_to_devlink(pf)) {
5143 err = ice_init_devlink(pf);
5144 if (err)
5145 return err;
5146 }
5147
5148 ice_service_task_restart(pf);
5149
5150 return 0;
5151 }
5152
5153 /**
5154 * ice_probe - Device initialization routine
5155 * @pdev: PCI device information struct
5156 * @ent: entry in ice_pci_tbl
5157 *
5158 * Returns 0 on success, negative on failure
5159 */
5160 static int
ice_probe(struct pci_dev * pdev,const struct pci_device_id __always_unused * ent)5161 ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
5162 {
5163 struct device *dev = &pdev->dev;
5164 bool need_dev_deinit = false;
5165 struct ice_adapter *adapter;
5166 struct ice_pf *pf;
5167 struct ice_hw *hw;
5168 int err;
5169
5170 if (pdev->is_virtfn) {
5171 dev_err(dev, "can't probe a virtual function\n");
5172 return -EINVAL;
5173 }
5174
5175 /* when under a kdump kernel initiate a reset before enabling the
5176 * device in order to clear out any pending DMA transactions. These
5177 * transactions can cause some systems to machine check when doing
5178 * the pcim_enable_device() below.
5179 */
5180 if (is_kdump_kernel()) {
5181 pci_save_state(pdev);
5182 pci_clear_master(pdev);
5183 err = pcie_flr(pdev);
5184 if (err)
5185 return err;
5186 pci_restore_state(pdev);
5187 }
5188
5189 /* this driver uses devres, see
5190 * Documentation/driver-api/driver-model/devres.rst
5191 */
5192 err = pcim_enable_device(pdev);
5193 if (err)
5194 return err;
5195
5196 err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
5197 if (err) {
5198 dev_err(dev, "BAR0 I/O map error %d\n", err);
5199 return err;
5200 }
5201
5202 pf = ice_allocate_pf(dev);
5203 if (!pf)
5204 return -ENOMEM;
5205
5206 /* initialize Auxiliary index to invalid value */
5207 pf->aux_idx = -1;
5208
5209 /* set up for high or low DMA */
5210 err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
5211 if (err) {
5212 dev_err(dev, "DMA configuration failed: 0x%x\n", err);
5213 return err;
5214 }
5215
5216 pci_set_master(pdev);
5217 pf->pdev = pdev;
5218 pci_set_drvdata(pdev, pf);
5219 set_bit(ICE_DOWN, pf->state);
5220 /* Disable service task until DOWN bit is cleared */
5221 set_bit(ICE_SERVICE_DIS, pf->state);
5222
5223 hw = &pf->hw;
5224 hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
5225 pci_save_state(pdev);
5226
5227 hw->back = pf;
5228 hw->port_info = NULL;
5229 hw->vendor_id = pdev->vendor;
5230 hw->device_id = pdev->device;
5231 pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
5232 hw->subsystem_vendor_id = pdev->subsystem_vendor;
5233 hw->subsystem_device_id = pdev->subsystem_device;
5234 hw->bus.device = PCI_SLOT(pdev->devfn);
5235 hw->bus.func = PCI_FUNC(pdev->devfn);
5236 ice_set_ctrlq_len(hw);
5237
5238 pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
5239
5240 #ifndef CONFIG_DYNAMIC_DEBUG
5241 if (debug < -1)
5242 hw->debug_mask = debug;
5243 #endif
5244
5245 if (ice_is_recovery_mode(hw))
5246 return ice_probe_recovery_mode(pf);
5247
5248 err = ice_init_hw(hw);
5249 if (err) {
5250 dev_err(dev, "ice_init_hw failed: %d\n", err);
5251 return err;
5252 }
5253
5254 ice_init_dev_hw(pf);
5255
5256 adapter = ice_adapter_get(pdev);
5257 if (IS_ERR(adapter)) {
5258 err = PTR_ERR(adapter);
5259 goto unroll_hw_init;
5260 }
5261 pf->adapter = adapter;
5262
5263 err = ice_init_dev(pf);
5264 if (err)
5265 goto unroll_adapter;
5266
5267 err = ice_init(pf);
5268 if (err)
5269 goto unroll_dev_init;
5270
5271 devl_lock(priv_to_devlink(pf));
5272 err = ice_load(pf);
5273 if (err)
5274 goto unroll_init;
5275
5276 err = ice_init_devlink(pf);
5277 if (err)
5278 goto unroll_load;
5279 devl_unlock(priv_to_devlink(pf));
5280
5281 return 0;
5282
5283 unroll_load:
5284 ice_unload(pf);
5285 unroll_init:
5286 devl_unlock(priv_to_devlink(pf));
5287 ice_deinit(pf);
5288 unroll_dev_init:
5289 need_dev_deinit = true;
5290 unroll_adapter:
5291 ice_adapter_put(pdev);
5292 unroll_hw_init:
5293 ice_deinit_hw(hw);
5294 if (need_dev_deinit)
5295 ice_deinit_dev(pf);
5296 return err;
5297 }
5298
5299 /**
5300 * ice_set_wake - enable or disable Wake on LAN
5301 * @pf: pointer to the PF struct
5302 *
5303 * Simple helper for WoL control
5304 */
ice_set_wake(struct ice_pf * pf)5305 static void ice_set_wake(struct ice_pf *pf)
5306 {
5307 struct ice_hw *hw = &pf->hw;
5308 bool wol = pf->wol_ena;
5309
5310 /* clear wake state, otherwise new wake events won't fire */
5311 wr32(hw, PFPM_WUS, U32_MAX);
5312
5313 /* enable / disable APM wake up, no RMW needed */
5314 wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
5315
5316 /* set magic packet filter enabled */
5317 wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
5318 }
5319
5320 /**
5321 * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
5322 * @pf: pointer to the PF struct
5323 *
5324 * Issue firmware command to enable multicast magic wake, making
5325 * sure that any locally administered address (LAA) is used for
5326 * wake, and that PF reset doesn't undo the LAA.
5327 */
ice_setup_mc_magic_wake(struct ice_pf * pf)5328 static void ice_setup_mc_magic_wake(struct ice_pf *pf)
5329 {
5330 struct device *dev = ice_pf_to_dev(pf);
5331 struct ice_hw *hw = &pf->hw;
5332 u8 mac_addr[ETH_ALEN];
5333 struct ice_vsi *vsi;
5334 int status;
5335 u8 flags;
5336
5337 if (!pf->wol_ena)
5338 return;
5339
5340 vsi = ice_get_main_vsi(pf);
5341 if (!vsi)
5342 return;
5343
5344 /* Get current MAC address in case it's an LAA */
5345 if (vsi->netdev)
5346 ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
5347 else
5348 ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
5349
5350 flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
5351 ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
5352 ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
5353
5354 status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
5355 if (status)
5356 dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
5357 status, libie_aq_str(hw->adminq.sq_last_status));
5358 }
5359
5360 /**
5361 * ice_remove - Device removal routine
5362 * @pdev: PCI device information struct
5363 */
ice_remove(struct pci_dev * pdev)5364 static void ice_remove(struct pci_dev *pdev)
5365 {
5366 struct ice_pf *pf = pci_get_drvdata(pdev);
5367 int i;
5368
5369 for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
5370 if (!ice_is_reset_in_progress(pf->state))
5371 break;
5372 msleep(100);
5373 }
5374
5375 if (ice_is_recovery_mode(&pf->hw)) {
5376 ice_service_task_stop(pf);
5377 scoped_guard(devl, priv_to_devlink(pf)) {
5378 ice_deinit_devlink(pf);
5379 }
5380 return;
5381 }
5382
5383 if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
5384 set_bit(ICE_VF_RESETS_DISABLED, pf->state);
5385 ice_free_vfs(pf);
5386 }
5387
5388 if (!ice_is_safe_mode(pf))
5389 ice_remove_arfs(pf);
5390
5391 devl_lock(priv_to_devlink(pf));
5392 ice_dealloc_all_dynamic_ports(pf);
5393 ice_deinit_devlink(pf);
5394
5395 ice_unload(pf);
5396 devl_unlock(priv_to_devlink(pf));
5397
5398 ice_deinit(pf);
5399 ice_vsi_release_all(pf);
5400
5401 ice_setup_mc_magic_wake(pf);
5402 ice_set_wake(pf);
5403
5404 ice_adapter_put(pdev);
5405 ice_deinit_hw(&pf->hw);
5406
5407 ice_deinit_dev(pf);
5408 ice_aq_cancel_waiting_tasks(pf);
5409 set_bit(ICE_DOWN, pf->state);
5410 }
5411
5412 /**
5413 * ice_shutdown - PCI callback for shutting down device
5414 * @pdev: PCI device information struct
5415 */
ice_shutdown(struct pci_dev * pdev)5416 static void ice_shutdown(struct pci_dev *pdev)
5417 {
5418 struct ice_pf *pf = pci_get_drvdata(pdev);
5419
5420 ice_remove(pdev);
5421
5422 if (system_state == SYSTEM_POWER_OFF) {
5423 pci_wake_from_d3(pdev, pf->wol_ena);
5424 pci_set_power_state(pdev, PCI_D3hot);
5425 }
5426 }
5427
5428 /**
5429 * ice_prepare_for_shutdown - prep for PCI shutdown
5430 * @pf: board private structure
5431 *
5432 * Inform or close all dependent features in prep for PCI device shutdown
5433 */
ice_prepare_for_shutdown(struct ice_pf * pf)5434 static void ice_prepare_for_shutdown(struct ice_pf *pf)
5435 {
5436 struct ice_hw *hw = &pf->hw;
5437 u32 v;
5438
5439 /* Notify VFs of impending reset */
5440 if (ice_check_sq_alive(hw, &hw->mailboxq))
5441 ice_vc_notify_reset(pf);
5442
5443 dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
5444
5445 /* disable the VSIs and their queues that are not already DOWN */
5446 ice_pf_dis_all_vsi(pf, false);
5447
5448 ice_for_each_vsi(pf, v)
5449 if (pf->vsi[v])
5450 pf->vsi[v]->vsi_num = 0;
5451
5452 ice_shutdown_all_ctrlq(hw, true);
5453 }
5454
5455 /**
5456 * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
5457 * @pf: board private structure to reinitialize
5458 *
5459 * This routine reinitialize interrupt scheme that was cleared during
5460 * power management suspend callback.
5461 *
5462 * This should be called during resume routine to re-allocate the q_vectors
5463 * and reacquire interrupts.
5464 */
ice_reinit_interrupt_scheme(struct ice_pf * pf)5465 static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
5466 {
5467 struct device *dev = ice_pf_to_dev(pf);
5468 int ret, v;
5469
5470 /* Since we clear MSIX flag during suspend, we need to
5471 * set it back during resume...
5472 */
5473
5474 ret = ice_init_interrupt_scheme(pf);
5475 if (ret) {
5476 dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
5477 return ret;
5478 }
5479
5480 /* Remap vectors and rings, after successful re-init interrupts */
5481 ice_for_each_vsi(pf, v) {
5482 if (!pf->vsi[v])
5483 continue;
5484
5485 ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
5486 if (ret)
5487 goto err_reinit;
5488 ice_vsi_map_rings_to_vectors(pf->vsi[v]);
5489 rtnl_lock();
5490 ice_vsi_set_napi_queues(pf->vsi[v]);
5491 rtnl_unlock();
5492 }
5493
5494 ret = ice_req_irq_msix_misc(pf);
5495 if (ret) {
5496 dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
5497 ret);
5498 goto err_reinit;
5499 }
5500
5501 return 0;
5502
5503 err_reinit:
5504 while (v--)
5505 if (pf->vsi[v]) {
5506 rtnl_lock();
5507 ice_vsi_clear_napi_queues(pf->vsi[v]);
5508 rtnl_unlock();
5509 ice_vsi_free_q_vectors(pf->vsi[v]);
5510 }
5511
5512 return ret;
5513 }
5514
5515 /**
5516 * ice_suspend
5517 * @dev: generic device information structure
5518 *
5519 * Power Management callback to quiesce the device and prepare
5520 * for D3 transition.
5521 */
ice_suspend(struct device * dev)5522 static int ice_suspend(struct device *dev)
5523 {
5524 struct pci_dev *pdev = to_pci_dev(dev);
5525 struct ice_pf *pf;
5526 int disabled, v;
5527
5528 pf = pci_get_drvdata(pdev);
5529
5530 if (!ice_pf_state_is_nominal(pf)) {
5531 dev_err(dev, "Device is not ready, no need to suspend it\n");
5532 return -EBUSY;
5533 }
5534
5535 /* Stop watchdog tasks until resume completion.
5536 * Even though it is most likely that the service task is
5537 * disabled if the device is suspended or down, the service task's
5538 * state is controlled by a different state bit, and we should
5539 * store and honor whatever state that bit is in at this point.
5540 */
5541 disabled = ice_service_task_stop(pf);
5542
5543 ice_unplug_aux_dev(pf);
5544 ice_deinit_rdma(pf);
5545
5546 /* Already suspended?, then there is nothing to do */
5547 if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
5548 if (!disabled)
5549 ice_service_task_restart(pf);
5550 return 0;
5551 }
5552
5553 if (test_bit(ICE_DOWN, pf->state) ||
5554 ice_is_reset_in_progress(pf->state)) {
5555 dev_err(dev, "can't suspend device in reset or already down\n");
5556 if (!disabled)
5557 ice_service_task_restart(pf);
5558 return 0;
5559 }
5560
5561 ice_setup_mc_magic_wake(pf);
5562
5563 ice_prepare_for_shutdown(pf);
5564
5565 ice_set_wake(pf);
5566
5567 /* Free vectors, clear the interrupt scheme and release IRQs
5568 * for proper hibernation, especially with large number of CPUs.
5569 * Otherwise hibernation might fail when mapping all the vectors back
5570 * to CPU0.
5571 */
5572 ice_free_irq_msix_misc(pf);
5573 ice_for_each_vsi(pf, v) {
5574 if (!pf->vsi[v])
5575 continue;
5576 rtnl_lock();
5577 ice_vsi_clear_napi_queues(pf->vsi[v]);
5578 rtnl_unlock();
5579 ice_vsi_free_q_vectors(pf->vsi[v]);
5580 }
5581 ice_clear_interrupt_scheme(pf);
5582
5583 pci_save_state(pdev);
5584 pci_wake_from_d3(pdev, pf->wol_ena);
5585 pci_set_power_state(pdev, PCI_D3hot);
5586 return 0;
5587 }
5588
5589 /**
5590 * ice_resume - PM callback for waking up from D3
5591 * @dev: generic device information structure
5592 */
ice_resume(struct device * dev)5593 static int ice_resume(struct device *dev)
5594 {
5595 struct pci_dev *pdev = to_pci_dev(dev);
5596 enum ice_reset_req reset_type;
5597 struct ice_pf *pf;
5598 struct ice_hw *hw;
5599 int ret;
5600
5601 pci_set_power_state(pdev, PCI_D0);
5602 pci_restore_state(pdev);
5603
5604 if (!pci_device_is_present(pdev))
5605 return -ENODEV;
5606
5607 ret = pci_enable_device_mem(pdev);
5608 if (ret) {
5609 dev_err(dev, "Cannot enable device after suspend\n");
5610 return ret;
5611 }
5612
5613 pf = pci_get_drvdata(pdev);
5614 hw = &pf->hw;
5615
5616 pf->wakeup_reason = rd32(hw, PFPM_WUS);
5617 ice_print_wake_reason(pf);
5618
5619 /* We cleared the interrupt scheme when we suspended, so we need to
5620 * restore it now to resume device functionality.
5621 */
5622 ret = ice_reinit_interrupt_scheme(pf);
5623 if (ret)
5624 dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
5625
5626 ret = ice_init_rdma(pf);
5627 if (ret)
5628 dev_err(dev, "Reinitialize RDMA during resume failed: %d\n",
5629 ret);
5630
5631 clear_bit(ICE_DOWN, pf->state);
5632 /* Now perform PF reset and rebuild */
5633 reset_type = ICE_RESET_PFR;
5634 /* re-enable service task for reset, but allow reset to schedule it */
5635 clear_bit(ICE_SERVICE_DIS, pf->state);
5636
5637 if (ice_schedule_reset(pf, reset_type))
5638 dev_err(dev, "Reset during resume failed.\n");
5639
5640 clear_bit(ICE_SUSPENDED, pf->state);
5641 ice_service_task_restart(pf);
5642
5643 /* Restart the service task */
5644 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
5645
5646 return 0;
5647 }
5648
5649 /**
5650 * ice_pci_err_detected - warning that PCI error has been detected
5651 * @pdev: PCI device information struct
5652 * @err: the type of PCI error
5653 *
5654 * Called to warn that something happened on the PCI bus and the error handling
5655 * is in progress. Allows the driver to gracefully prepare/handle PCI errors.
5656 */
5657 static pci_ers_result_t
ice_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t err)5658 ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
5659 {
5660 struct ice_pf *pf = pci_get_drvdata(pdev);
5661
5662 if (!pf) {
5663 dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
5664 __func__, err);
5665 return PCI_ERS_RESULT_DISCONNECT;
5666 }
5667
5668 if (!test_bit(ICE_SUSPENDED, pf->state)) {
5669 ice_service_task_stop(pf);
5670
5671 if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5672 set_bit(ICE_PFR_REQ, pf->state);
5673 ice_prepare_for_reset(pf, ICE_RESET_PFR);
5674 }
5675 }
5676
5677 return PCI_ERS_RESULT_NEED_RESET;
5678 }
5679
5680 /**
5681 * ice_pci_err_slot_reset - a PCI slot reset has just happened
5682 * @pdev: PCI device information struct
5683 *
5684 * Called to determine if the driver can recover from the PCI slot reset by
5685 * using a register read to determine if the device is recoverable.
5686 */
ice_pci_err_slot_reset(struct pci_dev * pdev)5687 static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
5688 {
5689 struct ice_pf *pf = pci_get_drvdata(pdev);
5690 pci_ers_result_t result;
5691 int err;
5692 u32 reg;
5693
5694 err = pci_enable_device_mem(pdev);
5695 if (err) {
5696 dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
5697 err);
5698 result = PCI_ERS_RESULT_DISCONNECT;
5699 } else {
5700 pci_set_master(pdev);
5701 pci_restore_state(pdev);
5702 pci_wake_from_d3(pdev, false);
5703
5704 /* Check for life */
5705 reg = rd32(&pf->hw, GLGEN_RTRIG);
5706 if (!reg)
5707 result = PCI_ERS_RESULT_RECOVERED;
5708 else
5709 result = PCI_ERS_RESULT_DISCONNECT;
5710 }
5711
5712 return result;
5713 }
5714
5715 /**
5716 * ice_pci_err_resume - restart operations after PCI error recovery
5717 * @pdev: PCI device information struct
5718 *
5719 * Called to allow the driver to bring things back up after PCI error and/or
5720 * reset recovery have finished
5721 */
ice_pci_err_resume(struct pci_dev * pdev)5722 static void ice_pci_err_resume(struct pci_dev *pdev)
5723 {
5724 struct ice_pf *pf = pci_get_drvdata(pdev);
5725
5726 if (!pf) {
5727 dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
5728 __func__);
5729 return;
5730 }
5731
5732 if (test_bit(ICE_SUSPENDED, pf->state)) {
5733 dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
5734 __func__);
5735 return;
5736 }
5737
5738 ice_restore_all_vfs_msi_state(pf);
5739
5740 ice_do_reset(pf, ICE_RESET_PFR);
5741 ice_service_task_restart(pf);
5742 mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
5743 }
5744
5745 /**
5746 * ice_pci_err_reset_prepare - prepare device driver for PCI reset
5747 * @pdev: PCI device information struct
5748 */
ice_pci_err_reset_prepare(struct pci_dev * pdev)5749 static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
5750 {
5751 struct ice_pf *pf = pci_get_drvdata(pdev);
5752
5753 if (!test_bit(ICE_SUSPENDED, pf->state)) {
5754 ice_service_task_stop(pf);
5755
5756 if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
5757 set_bit(ICE_PFR_REQ, pf->state);
5758 ice_prepare_for_reset(pf, ICE_RESET_PFR);
5759 }
5760 }
5761 }
5762
5763 /**
5764 * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5765 * @pdev: PCI device information struct
5766 */
ice_pci_err_reset_done(struct pci_dev * pdev)5767 static void ice_pci_err_reset_done(struct pci_dev *pdev)
5768 {
5769 ice_pci_err_resume(pdev);
5770 }
5771
5772 /* ice_pci_tbl - PCI Device ID Table
5773 *
5774 * Wildcard entries (PCI_ANY_ID) should come last
5775 * Last entry must be all 0s
5776 *
5777 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5778 * Class, Class Mask, private data (not used) }
5779 */
5780 static const struct pci_device_id ice_pci_tbl[] = {
5781 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE) },
5782 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP) },
5783 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP) },
5784 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE) },
5785 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP) },
5786 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP) },
5787 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE) },
5788 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP) },
5789 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP) },
5790 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T) },
5791 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII) },
5792 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE) },
5793 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP) },
5794 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP) },
5795 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T) },
5796 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII) },
5797 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE) },
5798 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP) },
5799 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T) },
5800 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII) },
5801 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE) },
5802 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP) },
5803 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T) },
5804 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE) },
5805 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP) },
5806 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT) },
5807 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_BACKPLANE), },
5808 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_QSFP), },
5809 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SFP), },
5810 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SGMII), },
5811 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_BACKPLANE) },
5812 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_QSFP56) },
5813 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP) },
5814 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP_DD) },
5815 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_BACKPLANE), },
5816 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_BACKPLANE), },
5817 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_QSFP), },
5818 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_QSFP), },
5819 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_SFP), },
5820 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_SFP), },
5821 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_BACKPLANE), },
5822 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_QSFP56), },
5823 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835CC_SFP), },
5824 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_BACKPLANE), },
5825 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_QSFP), },
5826 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835C_SFP), },
5827 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_BACKPLANE), },
5828 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_QSFP), },
5829 { PCI_VDEVICE(INTEL, ICE_DEV_ID_E835_L_SFP), },
5830 /* required last entry */
5831 {}
5832 };
5833 MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
5834
5835 static DEFINE_SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
5836
5837 static const struct pci_error_handlers ice_pci_err_handler = {
5838 .error_detected = ice_pci_err_detected,
5839 .slot_reset = ice_pci_err_slot_reset,
5840 .reset_prepare = ice_pci_err_reset_prepare,
5841 .reset_done = ice_pci_err_reset_done,
5842 .resume = ice_pci_err_resume
5843 };
5844
5845 static struct pci_driver ice_driver = {
5846 .name = KBUILD_MODNAME,
5847 .id_table = ice_pci_tbl,
5848 .probe = ice_probe,
5849 .remove = ice_remove,
5850 .driver.pm = pm_sleep_ptr(&ice_pm_ops),
5851 .shutdown = ice_shutdown,
5852 .sriov_configure = ice_sriov_configure,
5853 .sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix,
5854 .sriov_set_msix_vec_count = ice_sriov_set_msix_vec_count,
5855 .err_handler = &ice_pci_err_handler
5856 };
5857
5858 /**
5859 * ice_module_init - Driver registration routine
5860 *
5861 * ice_module_init is the first routine called when the driver is
5862 * loaded. All it does is register with the PCI subsystem.
5863 */
ice_module_init(void)5864 static int __init ice_module_init(void)
5865 {
5866 int status = -ENOMEM;
5867
5868 pr_info("%s\n", ice_driver_string);
5869 pr_info("%s\n", ice_copyright);
5870
5871 ice_adv_lnk_speed_maps_init();
5872
5873 ice_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, KBUILD_MODNAME);
5874 if (!ice_wq) {
5875 pr_err("Failed to create workqueue\n");
5876 return status;
5877 }
5878
5879 ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", 0);
5880 if (!ice_lag_wq) {
5881 pr_err("Failed to create LAG workqueue\n");
5882 goto err_dest_wq;
5883 }
5884
5885 ice_debugfs_init();
5886
5887 status = pci_register_driver(&ice_driver);
5888 if (status) {
5889 pr_err("failed to register PCI driver, err %d\n", status);
5890 goto err_dest_lag_wq;
5891 }
5892
5893 status = ice_sf_driver_register();
5894 if (status) {
5895 pr_err("Failed to register SF driver, err %d\n", status);
5896 goto err_sf_driver;
5897 }
5898
5899 return 0;
5900
5901 err_sf_driver:
5902 pci_unregister_driver(&ice_driver);
5903 err_dest_lag_wq:
5904 destroy_workqueue(ice_lag_wq);
5905 ice_debugfs_exit();
5906 err_dest_wq:
5907 destroy_workqueue(ice_wq);
5908 return status;
5909 }
5910 module_init(ice_module_init);
5911
5912 /**
5913 * ice_module_exit - Driver exit cleanup routine
5914 *
5915 * ice_module_exit is called just before the driver is removed
5916 * from memory.
5917 */
ice_module_exit(void)5918 static void __exit ice_module_exit(void)
5919 {
5920 ice_sf_driver_unregister();
5921 pci_unregister_driver(&ice_driver);
5922 ice_debugfs_exit();
5923 destroy_workqueue(ice_wq);
5924 destroy_workqueue(ice_lag_wq);
5925 pr_info("module unloaded\n");
5926 }
5927 module_exit(ice_module_exit);
5928
5929 /**
5930 * ice_set_mac_address - NDO callback to set MAC address
5931 * @netdev: network interface device structure
5932 * @pi: pointer to an address structure
5933 *
5934 * Returns 0 on success, negative on failure
5935 */
ice_set_mac_address(struct net_device * netdev,void * pi)5936 static int ice_set_mac_address(struct net_device *netdev, void *pi)
5937 {
5938 struct ice_netdev_priv *np = netdev_priv(netdev);
5939 struct ice_vsi *vsi = np->vsi;
5940 struct ice_pf *pf = vsi->back;
5941 struct ice_hw *hw = &pf->hw;
5942 struct sockaddr *addr = pi;
5943 u8 old_mac[ETH_ALEN];
5944 u8 flags = 0;
5945 u8 *mac;
5946 int err;
5947
5948 mac = (u8 *)addr->sa_data;
5949
5950 if (!is_valid_ether_addr(mac))
5951 return -EADDRNOTAVAIL;
5952
5953 if (test_bit(ICE_DOWN, pf->state) ||
5954 ice_is_reset_in_progress(pf->state)) {
5955 netdev_err(netdev, "can't set mac %pM. device not ready\n",
5956 mac);
5957 return -EBUSY;
5958 }
5959
5960 if (ice_chnl_dmac_fltr_cnt(pf)) {
5961 netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
5962 mac);
5963 return -EAGAIN;
5964 }
5965
5966 netif_addr_lock_bh(netdev);
5967 ether_addr_copy(old_mac, netdev->dev_addr);
5968 /* change the netdev's MAC address */
5969 eth_hw_addr_set(netdev, mac);
5970 netif_addr_unlock_bh(netdev);
5971
5972 /* Clean up old MAC filter. Not an error if old filter doesn't exist */
5973 err = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
5974 if (err && err != -ENOENT) {
5975 err = -EADDRNOTAVAIL;
5976 goto err_update_filters;
5977 }
5978
5979 /* Add filter for new MAC. If filter exists, return success */
5980 err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
5981 if (err == -EEXIST) {
5982 /* Although this MAC filter is already present in hardware it's
5983 * possible in some cases (e.g. bonding) that dev_addr was
5984 * modified outside of the driver and needs to be restored back
5985 * to this value.
5986 */
5987 netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
5988
5989 return 0;
5990 } else if (err) {
5991 /* error if the new filter addition failed */
5992 err = -EADDRNOTAVAIL;
5993 }
5994
5995 err_update_filters:
5996 if (err) {
5997 netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
5998 mac);
5999 netif_addr_lock_bh(netdev);
6000 eth_hw_addr_set(netdev, old_mac);
6001 netif_addr_unlock_bh(netdev);
6002 return err;
6003 }
6004
6005 netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
6006 netdev->dev_addr);
6007
6008 /* write new MAC address to the firmware */
6009 flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
6010 err = ice_aq_manage_mac_write(hw, mac, flags, NULL);
6011 if (err) {
6012 netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
6013 mac, err);
6014 }
6015 return 0;
6016 }
6017
6018 /**
6019 * ice_set_rx_mode - NDO callback to set the netdev filters
6020 * @netdev: network interface device structure
6021 */
ice_set_rx_mode(struct net_device * netdev)6022 static void ice_set_rx_mode(struct net_device *netdev)
6023 {
6024 struct ice_netdev_priv *np = netdev_priv(netdev);
6025 struct ice_vsi *vsi = np->vsi;
6026
6027 if (!vsi || ice_is_switchdev_running(vsi->back))
6028 return;
6029
6030 /* Set the flags to synchronize filters
6031 * ndo_set_rx_mode may be triggered even without a change in netdev
6032 * flags
6033 */
6034 set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
6035 set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
6036 set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
6037
6038 /* schedule our worker thread which will take care of
6039 * applying the new filter changes
6040 */
6041 ice_service_task_schedule(vsi->back);
6042 }
6043
6044 /**
6045 * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
6046 * @netdev: network interface device structure
6047 * @queue_index: Queue ID
6048 * @maxrate: maximum bandwidth in Mbps
6049 */
6050 static int
ice_set_tx_maxrate(struct net_device * netdev,int queue_index,u32 maxrate)6051 ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
6052 {
6053 struct ice_netdev_priv *np = netdev_priv(netdev);
6054 struct ice_vsi *vsi = np->vsi;
6055 u16 q_handle;
6056 int status;
6057 u8 tc;
6058
6059 /* Validate maxrate requested is within permitted range */
6060 if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
6061 netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
6062 maxrate, queue_index);
6063 return -EINVAL;
6064 }
6065
6066 q_handle = vsi->tx_rings[queue_index]->q_handle;
6067 tc = ice_dcb_get_tc(vsi, queue_index);
6068
6069 vsi = ice_locate_vsi_using_queue(vsi, queue_index);
6070 if (!vsi) {
6071 netdev_err(netdev, "Invalid VSI for given queue %d\n",
6072 queue_index);
6073 return -EINVAL;
6074 }
6075
6076 /* Set BW back to default, when user set maxrate to 0 */
6077 if (!maxrate)
6078 status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
6079 q_handle, ICE_MAX_BW);
6080 else
6081 status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
6082 q_handle, ICE_MAX_BW, maxrate * 1000);
6083 if (status)
6084 netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
6085 status);
6086
6087 return status;
6088 }
6089
6090 /**
6091 * ice_fdb_add - add an entry to the hardware database
6092 * @ndm: the input from the stack
6093 * @tb: pointer to array of nladdr (unused)
6094 * @dev: the net device pointer
6095 * @addr: the MAC address entry being added
6096 * @vid: VLAN ID
6097 * @flags: instructions from stack about fdb operation
6098 * @notified: whether notification was emitted
6099 * @extack: netlink extended ack
6100 */
6101 static int
ice_fdb_add(struct ndmsg * ndm,struct nlattr __always_unused * tb[],struct net_device * dev,const unsigned char * addr,u16 vid,u16 flags,bool * notified,struct netlink_ext_ack __always_unused * extack)6102 ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
6103 struct net_device *dev, const unsigned char *addr, u16 vid,
6104 u16 flags, bool *notified,
6105 struct netlink_ext_ack __always_unused *extack)
6106 {
6107 int err;
6108
6109 if (vid) {
6110 netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
6111 return -EINVAL;
6112 }
6113 if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
6114 netdev_err(dev, "FDB only supports static addresses\n");
6115 return -EINVAL;
6116 }
6117
6118 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
6119 err = dev_uc_add_excl(dev, addr);
6120 else if (is_multicast_ether_addr(addr))
6121 err = dev_mc_add_excl(dev, addr);
6122 else
6123 err = -EINVAL;
6124
6125 /* Only return duplicate errors if NLM_F_EXCL is set */
6126 if (err == -EEXIST && !(flags & NLM_F_EXCL))
6127 err = 0;
6128
6129 return err;
6130 }
6131
6132 /**
6133 * ice_fdb_del - delete an entry from the hardware database
6134 * @ndm: the input from the stack
6135 * @tb: pointer to array of nladdr (unused)
6136 * @dev: the net device pointer
6137 * @addr: the MAC address entry being added
6138 * @vid: VLAN ID
6139 * @notified: whether notification was emitted
6140 * @extack: netlink extended ack
6141 */
6142 static int
ice_fdb_del(struct ndmsg * ndm,__always_unused struct nlattr * tb[],struct net_device * dev,const unsigned char * addr,__always_unused u16 vid,bool * notified,struct netlink_ext_ack * extack)6143 ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
6144 struct net_device *dev, const unsigned char *addr,
6145 __always_unused u16 vid, bool *notified,
6146 struct netlink_ext_ack *extack)
6147 {
6148 int err;
6149
6150 if (ndm->ndm_state & NUD_PERMANENT) {
6151 netdev_err(dev, "FDB only supports static addresses\n");
6152 return -EINVAL;
6153 }
6154
6155 if (is_unicast_ether_addr(addr))
6156 err = dev_uc_del(dev, addr);
6157 else if (is_multicast_ether_addr(addr))
6158 err = dev_mc_del(dev, addr);
6159 else
6160 err = -EINVAL;
6161
6162 return err;
6163 }
6164
6165 #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
6166 NETIF_F_HW_VLAN_CTAG_TX | \
6167 NETIF_F_HW_VLAN_STAG_RX | \
6168 NETIF_F_HW_VLAN_STAG_TX)
6169
6170 #define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
6171 NETIF_F_HW_VLAN_STAG_RX)
6172
6173 #define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
6174 NETIF_F_HW_VLAN_STAG_FILTER)
6175
6176 /**
6177 * ice_fix_features - fix the netdev features flags based on device limitations
6178 * @netdev: ptr to the netdev that flags are being fixed on
6179 * @features: features that need to be checked and possibly fixed
6180 *
6181 * Make sure any fixups are made to features in this callback. This enables the
6182 * driver to not have to check unsupported configurations throughout the driver
6183 * because that's the responsiblity of this callback.
6184 *
6185 * Single VLAN Mode (SVM) Supported Features:
6186 * NETIF_F_HW_VLAN_CTAG_FILTER
6187 * NETIF_F_HW_VLAN_CTAG_RX
6188 * NETIF_F_HW_VLAN_CTAG_TX
6189 *
6190 * Double VLAN Mode (DVM) Supported Features:
6191 * NETIF_F_HW_VLAN_CTAG_FILTER
6192 * NETIF_F_HW_VLAN_CTAG_RX
6193 * NETIF_F_HW_VLAN_CTAG_TX
6194 *
6195 * NETIF_F_HW_VLAN_STAG_FILTER
6196 * NETIF_HW_VLAN_STAG_RX
6197 * NETIF_HW_VLAN_STAG_TX
6198 *
6199 * Features that need fixing:
6200 * Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
6201 * These are mutually exlusive as the VSI context cannot support multiple
6202 * VLAN ethertypes simultaneously for stripping and/or insertion. If this
6203 * is not done, then default to clearing the requested STAG offload
6204 * settings.
6205 *
6206 * All supported filtering has to be enabled or disabled together. For
6207 * example, in DVM, CTAG and STAG filtering have to be enabled and disabled
6208 * together. If this is not done, then default to VLAN filtering disabled.
6209 * These are mutually exclusive as there is currently no way to
6210 * enable/disable VLAN filtering based on VLAN ethertype when using VLAN
6211 * prune rules.
6212 */
6213 static netdev_features_t
ice_fix_features(struct net_device * netdev,netdev_features_t features)6214 ice_fix_features(struct net_device *netdev, netdev_features_t features)
6215 {
6216 struct ice_netdev_priv *np = netdev_priv(netdev);
6217 netdev_features_t req_vlan_fltr, cur_vlan_fltr;
6218 bool cur_ctag, cur_stag, req_ctag, req_stag;
6219
6220 cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
6221 cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
6222 cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
6223
6224 req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
6225 req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
6226 req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
6227
6228 if (req_vlan_fltr != cur_vlan_fltr) {
6229 if (ice_is_dvm_ena(&np->vsi->back->hw)) {
6230 if (req_ctag && req_stag) {
6231 features |= NETIF_VLAN_FILTERING_FEATURES;
6232 } else if (!req_ctag && !req_stag) {
6233 features &= ~NETIF_VLAN_FILTERING_FEATURES;
6234 } else if ((!cur_ctag && req_ctag && !cur_stag) ||
6235 (!cur_stag && req_stag && !cur_ctag)) {
6236 features |= NETIF_VLAN_FILTERING_FEATURES;
6237 netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
6238 } else if ((cur_ctag && !req_ctag && cur_stag) ||
6239 (cur_stag && !req_stag && cur_ctag)) {
6240 features &= ~NETIF_VLAN_FILTERING_FEATURES;
6241 netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
6242 }
6243 } else {
6244 if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
6245 netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
6246
6247 if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
6248 features |= NETIF_F_HW_VLAN_CTAG_FILTER;
6249 }
6250 }
6251
6252 if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
6253 (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) {
6254 netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
6255 features &= ~(NETIF_F_HW_VLAN_STAG_RX |
6256 NETIF_F_HW_VLAN_STAG_TX);
6257 }
6258
6259 if (!(netdev->features & NETIF_F_RXFCS) &&
6260 (features & NETIF_F_RXFCS) &&
6261 (features & NETIF_VLAN_STRIPPING_FEATURES) &&
6262 !ice_vsi_has_non_zero_vlans(np->vsi)) {
6263 netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
6264 features &= ~NETIF_VLAN_STRIPPING_FEATURES;
6265 }
6266
6267 return features;
6268 }
6269
6270 /**
6271 * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto
6272 * @vsi: PF's VSI
6273 * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order
6274 *
6275 * Store current stripped VLAN proto in ring packet context,
6276 * so it can be accessed more efficiently by packet processing code.
6277 */
6278 static void
ice_set_rx_rings_vlan_proto(struct ice_vsi * vsi,__be16 vlan_ethertype)6279 ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype)
6280 {
6281 u16 i;
6282
6283 ice_for_each_alloc_rxq(vsi, i)
6284 vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype;
6285 }
6286
6287 /**
6288 * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
6289 * @vsi: PF's VSI
6290 * @features: features used to determine VLAN offload settings
6291 *
6292 * First, determine the vlan_ethertype based on the VLAN offload bits in
6293 * features. Then determine if stripping and insertion should be enabled or
6294 * disabled. Finally enable or disable VLAN stripping and insertion.
6295 */
6296 static int
ice_set_vlan_offload_features(struct ice_vsi * vsi,netdev_features_t features)6297 ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
6298 {
6299 bool enable_stripping = true, enable_insertion = true;
6300 struct ice_vsi_vlan_ops *vlan_ops;
6301 int strip_err = 0, insert_err = 0;
6302 u16 vlan_ethertype = 0;
6303
6304 vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6305
6306 if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
6307 vlan_ethertype = ETH_P_8021AD;
6308 else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
6309 vlan_ethertype = ETH_P_8021Q;
6310
6311 if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
6312 enable_stripping = false;
6313 if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
6314 enable_insertion = false;
6315
6316 if (enable_stripping)
6317 strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
6318 else
6319 strip_err = vlan_ops->dis_stripping(vsi);
6320
6321 if (enable_insertion)
6322 insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
6323 else
6324 insert_err = vlan_ops->dis_insertion(vsi);
6325
6326 if (strip_err || insert_err)
6327 return -EIO;
6328
6329 ice_set_rx_rings_vlan_proto(vsi, enable_stripping ?
6330 htons(vlan_ethertype) : 0);
6331
6332 return 0;
6333 }
6334
6335 /**
6336 * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
6337 * @vsi: PF's VSI
6338 * @features: features used to determine VLAN filtering settings
6339 *
6340 * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
6341 * features.
6342 */
6343 static int
ice_set_vlan_filtering_features(struct ice_vsi * vsi,netdev_features_t features)6344 ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
6345 {
6346 struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
6347 int err = 0;
6348
6349 /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
6350 * if either bit is set. In switchdev mode Rx filtering should never be
6351 * enabled.
6352 */
6353 if ((features &
6354 (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) &&
6355 !ice_is_eswitch_mode_switchdev(vsi->back))
6356 err = vlan_ops->ena_rx_filtering(vsi);
6357 else
6358 err = vlan_ops->dis_rx_filtering(vsi);
6359
6360 return err;
6361 }
6362
6363 /**
6364 * ice_set_vlan_features - set VLAN settings based on suggested feature set
6365 * @netdev: ptr to the netdev being adjusted
6366 * @features: the feature set that the stack is suggesting
6367 *
6368 * Only update VLAN settings if the requested_vlan_features are different than
6369 * the current_vlan_features.
6370 */
6371 static int
ice_set_vlan_features(struct net_device * netdev,netdev_features_t features)6372 ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
6373 {
6374 netdev_features_t current_vlan_features, requested_vlan_features;
6375 struct ice_netdev_priv *np = netdev_priv(netdev);
6376 struct ice_vsi *vsi = np->vsi;
6377 int err;
6378
6379 current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
6380 requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
6381 if (current_vlan_features ^ requested_vlan_features) {
6382 if ((features & NETIF_F_RXFCS) &&
6383 (features & NETIF_VLAN_STRIPPING_FEATURES)) {
6384 dev_err(ice_pf_to_dev(vsi->back),
6385 "To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
6386 return -EIO;
6387 }
6388
6389 err = ice_set_vlan_offload_features(vsi, features);
6390 if (err)
6391 return err;
6392 }
6393
6394 current_vlan_features = netdev->features &
6395 NETIF_VLAN_FILTERING_FEATURES;
6396 requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
6397 if (current_vlan_features ^ requested_vlan_features) {
6398 err = ice_set_vlan_filtering_features(vsi, features);
6399 if (err)
6400 return err;
6401 }
6402
6403 return 0;
6404 }
6405
6406 /**
6407 * ice_set_loopback - turn on/off loopback mode on underlying PF
6408 * @vsi: ptr to VSI
6409 * @ena: flag to indicate the on/off setting
6410 */
ice_set_loopback(struct ice_vsi * vsi,bool ena)6411 static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
6412 {
6413 bool if_running = netif_running(vsi->netdev);
6414 int ret;
6415
6416 if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
6417 ret = ice_down(vsi);
6418 if (ret) {
6419 netdev_err(vsi->netdev, "Preparing device to toggle loopback failed\n");
6420 return ret;
6421 }
6422 }
6423 ret = ice_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
6424 if (ret)
6425 netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
6426 if (if_running)
6427 ret = ice_up(vsi);
6428
6429 return ret;
6430 }
6431
6432 /**
6433 * ice_set_features - set the netdev feature flags
6434 * @netdev: ptr to the netdev being adjusted
6435 * @features: the feature set that the stack is suggesting
6436 */
6437 static int
ice_set_features(struct net_device * netdev,netdev_features_t features)6438 ice_set_features(struct net_device *netdev, netdev_features_t features)
6439 {
6440 netdev_features_t changed = netdev->features ^ features;
6441 struct ice_netdev_priv *np = netdev_priv(netdev);
6442 struct ice_vsi *vsi = np->vsi;
6443 struct ice_pf *pf = vsi->back;
6444 int ret = 0;
6445
6446 /* Don't set any netdev advanced features with device in Safe Mode */
6447 if (ice_is_safe_mode(pf)) {
6448 dev_err(ice_pf_to_dev(pf),
6449 "Device is in Safe Mode - not enabling advanced netdev features\n");
6450 return ret;
6451 }
6452
6453 /* Do not change setting during reset */
6454 if (ice_is_reset_in_progress(pf->state)) {
6455 dev_err(ice_pf_to_dev(pf),
6456 "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
6457 return -EBUSY;
6458 }
6459
6460 /* Multiple features can be changed in one call so keep features in
6461 * separate if/else statements to guarantee each feature is checked
6462 */
6463 if (changed & NETIF_F_RXHASH)
6464 ice_vsi_manage_rss_lut(vsi, !!(features & NETIF_F_RXHASH));
6465
6466 ret = ice_set_vlan_features(netdev, features);
6467 if (ret)
6468 return ret;
6469
6470 /* Turn on receive of FCS aka CRC, and after setting this
6471 * flag the packet data will have the 4 byte CRC appended
6472 */
6473 if (changed & NETIF_F_RXFCS) {
6474 if ((features & NETIF_F_RXFCS) &&
6475 (features & NETIF_VLAN_STRIPPING_FEATURES)) {
6476 dev_err(ice_pf_to_dev(vsi->back),
6477 "To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
6478 return -EIO;
6479 }
6480
6481 ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS));
6482 ret = ice_down_up(vsi);
6483 if (ret)
6484 return ret;
6485 }
6486
6487 if (changed & NETIF_F_NTUPLE) {
6488 bool ena = !!(features & NETIF_F_NTUPLE);
6489
6490 ice_vsi_manage_fdir(vsi, ena);
6491 ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
6492 }
6493
6494 /* don't turn off hw_tc_offload when ADQ is already enabled */
6495 if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
6496 dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
6497 return -EACCES;
6498 }
6499
6500 if (changed & NETIF_F_HW_TC) {
6501 bool ena = !!(features & NETIF_F_HW_TC);
6502
6503 assign_bit(ICE_FLAG_CLS_FLOWER, pf->flags, ena);
6504 }
6505
6506 if (changed & NETIF_F_LOOPBACK)
6507 ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
6508
6509 /* Due to E830 hardware limitations, TSO (NETIF_F_ALL_TSO) with GCS
6510 * (NETIF_F_HW_CSUM) is not supported.
6511 */
6512 if (ice_is_feature_supported(pf, ICE_F_GCS) &&
6513 ((features & NETIF_F_HW_CSUM) && (features & NETIF_F_ALL_TSO))) {
6514 if (netdev->features & NETIF_F_HW_CSUM)
6515 dev_err(ice_pf_to_dev(pf), "To enable TSO, you must first disable HW checksum.\n");
6516 else
6517 dev_err(ice_pf_to_dev(pf), "To enable HW checksum, you must first disable TSO.\n");
6518 return -EIO;
6519 }
6520
6521 return ret;
6522 }
6523
6524 /**
6525 * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
6526 * @vsi: VSI to setup VLAN properties for
6527 */
ice_vsi_vlan_setup(struct ice_vsi * vsi)6528 static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
6529 {
6530 int err;
6531
6532 err = ice_set_vlan_offload_features(vsi, vsi->netdev->features);
6533 if (err)
6534 return err;
6535
6536 err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features);
6537 if (err)
6538 return err;
6539
6540 return ice_vsi_add_vlan_zero(vsi);
6541 }
6542
6543 /**
6544 * ice_vsi_cfg_lan - Setup the VSI lan related config
6545 * @vsi: the VSI being configured
6546 *
6547 * Return 0 on success and negative value on error
6548 */
ice_vsi_cfg_lan(struct ice_vsi * vsi)6549 int ice_vsi_cfg_lan(struct ice_vsi *vsi)
6550 {
6551 int err;
6552
6553 if (vsi->netdev && vsi->type == ICE_VSI_PF) {
6554 ice_set_rx_mode(vsi->netdev);
6555
6556 err = ice_vsi_vlan_setup(vsi);
6557 if (err)
6558 return err;
6559 }
6560 ice_vsi_cfg_dcb_rings(vsi);
6561
6562 err = ice_vsi_cfg_lan_txqs(vsi);
6563 if (!err && ice_is_xdp_ena_vsi(vsi))
6564 err = ice_vsi_cfg_xdp_txqs(vsi);
6565 if (!err)
6566 err = ice_vsi_cfg_rxqs(vsi);
6567
6568 return err;
6569 }
6570
6571 /* THEORY OF MODERATION:
6572 * The ice driver hardware works differently than the hardware that DIMLIB was
6573 * originally made for. ice hardware doesn't have packet count limits that
6574 * can trigger an interrupt, but it *does* have interrupt rate limit support,
6575 * which is hard-coded to a limit of 250,000 ints/second.
6576 * If not using dynamic moderation, the INTRL value can be modified
6577 * by ethtool rx-usecs-high.
6578 */
6579 struct ice_dim {
6580 /* the throttle rate for interrupts, basically worst case delay before
6581 * an initial interrupt fires, value is stored in microseconds.
6582 */
6583 u16 itr;
6584 };
6585
6586 /* Make a different profile for Rx that doesn't allow quite so aggressive
6587 * moderation at the high end (it maxes out at 126us or about 8k interrupts a
6588 * second.
6589 */
6590 static const struct ice_dim rx_profile[] = {
6591 {2}, /* 500,000 ints/s, capped at 250K by INTRL */
6592 {8}, /* 125,000 ints/s */
6593 {16}, /* 62,500 ints/s */
6594 {62}, /* 16,129 ints/s */
6595 {126} /* 7,936 ints/s */
6596 };
6597
6598 /* The transmit profile, which has the same sorts of values
6599 * as the previous struct
6600 */
6601 static const struct ice_dim tx_profile[] = {
6602 {2}, /* 500,000 ints/s, capped at 250K by INTRL */
6603 {8}, /* 125,000 ints/s */
6604 {40}, /* 16,125 ints/s */
6605 {128}, /* 7,812 ints/s */
6606 {256} /* 3,906 ints/s */
6607 };
6608
ice_tx_dim_work(struct work_struct * work)6609 static void ice_tx_dim_work(struct work_struct *work)
6610 {
6611 struct ice_ring_container *rc;
6612 struct dim *dim;
6613 u16 itr;
6614
6615 dim = container_of(work, struct dim, work);
6616 rc = dim->priv;
6617
6618 WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
6619
6620 /* look up the values in our local table */
6621 itr = tx_profile[dim->profile_ix].itr;
6622
6623 ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
6624 ice_write_itr(rc, itr);
6625
6626 dim->state = DIM_START_MEASURE;
6627 }
6628
ice_rx_dim_work(struct work_struct * work)6629 static void ice_rx_dim_work(struct work_struct *work)
6630 {
6631 struct ice_ring_container *rc;
6632 struct dim *dim;
6633 u16 itr;
6634
6635 dim = container_of(work, struct dim, work);
6636 rc = dim->priv;
6637
6638 WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
6639
6640 /* look up the values in our local table */
6641 itr = rx_profile[dim->profile_ix].itr;
6642
6643 ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
6644 ice_write_itr(rc, itr);
6645
6646 dim->state = DIM_START_MEASURE;
6647 }
6648
6649 #define ICE_DIM_DEFAULT_PROFILE_IX 1
6650
6651 /**
6652 * ice_init_moderation - set up interrupt moderation
6653 * @q_vector: the vector containing rings to be configured
6654 *
6655 * Set up interrupt moderation registers, with the intent to do the right thing
6656 * when called from reset or from probe, and whether or not dynamic moderation
6657 * is enabled or not. Take special care to write all the registers in both
6658 * dynamic moderation mode or not in order to make sure hardware is in a known
6659 * state.
6660 */
ice_init_moderation(struct ice_q_vector * q_vector)6661 static void ice_init_moderation(struct ice_q_vector *q_vector)
6662 {
6663 struct ice_ring_container *rc;
6664 bool tx_dynamic, rx_dynamic;
6665
6666 rc = &q_vector->tx;
6667 INIT_WORK(&rc->dim.work, ice_tx_dim_work);
6668 rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6669 rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6670 rc->dim.priv = rc;
6671 tx_dynamic = ITR_IS_DYNAMIC(rc);
6672
6673 /* set the initial TX ITR to match the above */
6674 ice_write_itr(rc, tx_dynamic ?
6675 tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
6676
6677 rc = &q_vector->rx;
6678 INIT_WORK(&rc->dim.work, ice_rx_dim_work);
6679 rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
6680 rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
6681 rc->dim.priv = rc;
6682 rx_dynamic = ITR_IS_DYNAMIC(rc);
6683
6684 /* set the initial RX ITR to match the above */
6685 ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
6686 rc->itr_setting);
6687
6688 ice_set_q_vector_intrl(q_vector);
6689 }
6690
6691 /**
6692 * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
6693 * @vsi: the VSI being configured
6694 */
ice_napi_enable_all(struct ice_vsi * vsi)6695 static void ice_napi_enable_all(struct ice_vsi *vsi)
6696 {
6697 int q_idx;
6698
6699 if (!vsi->netdev)
6700 return;
6701
6702 ice_for_each_q_vector(vsi, q_idx) {
6703 struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
6704
6705 ice_init_moderation(q_vector);
6706
6707 if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
6708 napi_enable(&q_vector->napi);
6709 }
6710 }
6711
6712 /**
6713 * ice_up_complete - Finish the last steps of bringing up a connection
6714 * @vsi: The VSI being configured
6715 *
6716 * Return 0 on success and negative value on error
6717 */
ice_up_complete(struct ice_vsi * vsi)6718 static int ice_up_complete(struct ice_vsi *vsi)
6719 {
6720 struct ice_pf *pf = vsi->back;
6721 int err;
6722
6723 ice_vsi_cfg_msix(vsi);
6724
6725 /* Enable only Rx rings, Tx rings were enabled by the FW when the
6726 * Tx queue group list was configured and the context bits were
6727 * programmed using ice_vsi_cfg_txqs
6728 */
6729 err = ice_vsi_start_all_rx_rings(vsi);
6730 if (err)
6731 return err;
6732
6733 clear_bit(ICE_VSI_DOWN, vsi->state);
6734 ice_napi_enable_all(vsi);
6735 ice_vsi_ena_irq(vsi);
6736
6737 if (vsi->port_info &&
6738 (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
6739 ((vsi->netdev && (vsi->type == ICE_VSI_PF ||
6740 vsi->type == ICE_VSI_SF)))) {
6741 ice_print_link_msg(vsi, true);
6742 netif_tx_start_all_queues(vsi->netdev);
6743 netif_carrier_on(vsi->netdev);
6744 ice_ptp_link_change(pf, true);
6745 }
6746
6747 /* Perform an initial read of the statistics registers now to
6748 * set the baseline so counters are ready when interface is up
6749 */
6750 ice_update_eth_stats(vsi);
6751
6752 if (vsi->type == ICE_VSI_PF)
6753 ice_service_task_schedule(pf);
6754
6755 return 0;
6756 }
6757
6758 /**
6759 * ice_up - Bring the connection back up after being down
6760 * @vsi: VSI being configured
6761 */
ice_up(struct ice_vsi * vsi)6762 int ice_up(struct ice_vsi *vsi)
6763 {
6764 int err;
6765
6766 err = ice_vsi_cfg_lan(vsi);
6767 if (!err)
6768 err = ice_up_complete(vsi);
6769
6770 return err;
6771 }
6772
6773 struct ice_vsi_tx_stats {
6774 u64 pkts;
6775 u64 bytes;
6776 u64 tx_restart_q;
6777 u64 tx_busy;
6778 u64 tx_linearize;
6779 };
6780
6781 struct ice_vsi_rx_stats {
6782 u64 pkts;
6783 u64 bytes;
6784 u64 rx_non_eop_descs;
6785 u64 rx_page_failed;
6786 u64 rx_buf_failed;
6787 };
6788
6789 /**
6790 * ice_fetch_u64_tx_stats - get Tx stats from a ring
6791 * @ring: the Tx ring to copy stats from
6792 * @copy: temporary storage for the ring statistics
6793 *
6794 * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each
6795 * stat value is self-consistent, though not necessarily consistent w.r.t
6796 * other stats.
6797 */
ice_fetch_u64_tx_stats(struct ice_tx_ring * ring,struct ice_vsi_tx_stats * copy)6798 static void ice_fetch_u64_tx_stats(struct ice_tx_ring *ring,
6799 struct ice_vsi_tx_stats *copy)
6800 {
6801 struct ice_ring_stats *stats = ring->ring_stats;
6802 unsigned int start;
6803
6804 do {
6805 start = u64_stats_fetch_begin(&stats->syncp);
6806 copy->pkts = u64_stats_read(&stats->pkts);
6807 copy->bytes = u64_stats_read(&stats->bytes);
6808 copy->tx_restart_q = u64_stats_read(&stats->tx_restart_q);
6809 copy->tx_busy = u64_stats_read(&stats->tx_busy);
6810 copy->tx_linearize = u64_stats_read(&stats->tx_linearize);
6811 } while (u64_stats_fetch_retry(&stats->syncp, start));
6812 }
6813
6814 /**
6815 * ice_fetch_u64_rx_stats - get Rx stats from a ring
6816 * @ring: the Rx ring to copy stats from
6817 * @copy: temporary storage for the ring statistics
6818 *
6819 * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each
6820 * stat value is self-consistent, though not necessarily consistent w.r.t
6821 * other stats.
6822 */
ice_fetch_u64_rx_stats(struct ice_rx_ring * ring,struct ice_vsi_rx_stats * copy)6823 static void ice_fetch_u64_rx_stats(struct ice_rx_ring *ring,
6824 struct ice_vsi_rx_stats *copy)
6825 {
6826 struct ice_ring_stats *stats = ring->ring_stats;
6827 unsigned int start;
6828
6829 do {
6830 start = u64_stats_fetch_begin(&stats->syncp);
6831 copy->pkts = u64_stats_read(&stats->pkts);
6832 copy->bytes = u64_stats_read(&stats->bytes);
6833 copy->rx_non_eop_descs =
6834 u64_stats_read(&stats->rx_non_eop_descs);
6835 copy->rx_page_failed = u64_stats_read(&stats->rx_page_failed);
6836 copy->rx_buf_failed = u64_stats_read(&stats->rx_buf_failed);
6837 } while (u64_stats_fetch_retry(&stats->syncp, start));
6838 }
6839
6840 /**
6841 * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
6842 * @vsi: the VSI to be updated
6843 * @vsi_stats: accumulated stats for this VSI
6844 * @rings: rings to work on
6845 * @count: number of rings
6846 */
ice_update_vsi_tx_ring_stats(struct ice_vsi * vsi,struct ice_vsi_tx_stats * vsi_stats,struct ice_tx_ring ** rings,u16 count)6847 static void ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
6848 struct ice_vsi_tx_stats *vsi_stats,
6849 struct ice_tx_ring **rings, u16 count)
6850 {
6851 struct ice_vsi_tx_stats copy = {};
6852 u16 i;
6853
6854 for (i = 0; i < count; i++) {
6855 struct ice_tx_ring *ring;
6856
6857 ring = READ_ONCE(rings[i]);
6858 if (!ring || !ring->ring_stats)
6859 continue;
6860
6861 ice_fetch_u64_tx_stats(ring, ©);
6862
6863 vsi_stats->pkts += copy.pkts;
6864 vsi_stats->bytes += copy.bytes;
6865 vsi_stats->tx_restart_q += copy.tx_restart_q;
6866 vsi_stats->tx_busy += copy.tx_busy;
6867 vsi_stats->tx_linearize += copy.tx_linearize;
6868 }
6869 }
6870
6871 /**
6872 * ice_update_vsi_rx_ring_stats - Update VSI Rx ring stats counters
6873 * @vsi: the VSI to be updated
6874 * @vsi_stats: accumulated stats for this VSI
6875 * @rings: rings to work on
6876 * @count: number of rings
6877 */
ice_update_vsi_rx_ring_stats(struct ice_vsi * vsi,struct ice_vsi_rx_stats * vsi_stats,struct ice_rx_ring ** rings,u16 count)6878 static void ice_update_vsi_rx_ring_stats(struct ice_vsi *vsi,
6879 struct ice_vsi_rx_stats *vsi_stats,
6880 struct ice_rx_ring **rings, u16 count)
6881 {
6882 struct ice_vsi_rx_stats copy = {};
6883 u16 i;
6884
6885 for (i = 0; i < count; i++) {
6886 struct ice_rx_ring *ring;
6887
6888 ring = READ_ONCE(rings[i]);
6889 if (!ring || !ring->ring_stats)
6890 continue;
6891
6892 ice_fetch_u64_rx_stats(ring, ©);
6893
6894 vsi_stats->pkts += copy.pkts;
6895 vsi_stats->bytes += copy.bytes;
6896 vsi_stats->rx_non_eop_descs += copy.rx_non_eop_descs;
6897 vsi_stats->rx_page_failed += copy.rx_page_failed;
6898 vsi_stats->rx_buf_failed += copy.rx_buf_failed;
6899 }
6900 }
6901
6902 /**
6903 * ice_update_vsi_ring_stats - Update VSI stats counters
6904 * @vsi: the VSI to be updated
6905 */
ice_update_vsi_ring_stats(struct ice_vsi * vsi)6906 static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
6907 {
6908 struct rtnl_link_stats64 *net_stats, *stats_prev;
6909 struct ice_vsi_tx_stats tx_stats = {};
6910 struct ice_vsi_rx_stats rx_stats = {};
6911 struct ice_pf *pf = vsi->back;
6912
6913 rcu_read_lock();
6914
6915 /* update Tx rings counters */
6916 ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->tx_rings,
6917 vsi->num_txq);
6918
6919 /* update Rx rings counters */
6920 ice_update_vsi_rx_ring_stats(vsi, &rx_stats, vsi->rx_rings,
6921 vsi->num_rxq);
6922
6923 /* update XDP Tx rings counters */
6924 if (ice_is_xdp_ena_vsi(vsi))
6925 ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->xdp_rings,
6926 vsi->num_xdp_txq);
6927
6928 rcu_read_unlock();
6929
6930 /* Save non-netdev (extended) stats */
6931 vsi->tx_restart = tx_stats.tx_restart_q;
6932 vsi->tx_busy = tx_stats.tx_busy;
6933 vsi->tx_linearize = tx_stats.tx_linearize;
6934 vsi->rx_buf_failed = rx_stats.rx_buf_failed;
6935 vsi->rx_page_failed = rx_stats.rx_page_failed;
6936
6937 net_stats = &vsi->net_stats;
6938 stats_prev = &vsi->net_stats_prev;
6939
6940 /* Update netdev counters, but keep in mind that values could start at
6941 * random value after PF reset. And as we increase the reported stat by
6942 * diff of Prev-Cur, we need to be sure that Prev is valid. If it's not,
6943 * let's skip this round.
6944 */
6945 if (likely(pf->stat_prev_loaded)) {
6946 net_stats->tx_packets += tx_stats.pkts - stats_prev->tx_packets;
6947 net_stats->tx_bytes += tx_stats.bytes - stats_prev->tx_bytes;
6948 net_stats->rx_packets += rx_stats.pkts - stats_prev->rx_packets;
6949 net_stats->rx_bytes += rx_stats.bytes - stats_prev->rx_bytes;
6950 }
6951
6952 stats_prev->tx_packets = tx_stats.pkts;
6953 stats_prev->tx_bytes = tx_stats.bytes;
6954 stats_prev->rx_packets = rx_stats.pkts;
6955 stats_prev->rx_bytes = rx_stats.bytes;
6956 }
6957
6958 /**
6959 * ice_update_vsi_stats - Update VSI stats counters
6960 * @vsi: the VSI to be updated
6961 */
ice_update_vsi_stats(struct ice_vsi * vsi)6962 void ice_update_vsi_stats(struct ice_vsi *vsi)
6963 {
6964 struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
6965 struct ice_eth_stats *cur_es = &vsi->eth_stats;
6966 struct ice_pf *pf = vsi->back;
6967
6968 if (test_bit(ICE_VSI_DOWN, vsi->state) ||
6969 test_bit(ICE_CFG_BUSY, pf->state))
6970 return;
6971
6972 /* get stats as recorded by Tx/Rx rings */
6973 ice_update_vsi_ring_stats(vsi);
6974
6975 /* get VSI stats as recorded by the hardware */
6976 ice_update_eth_stats(vsi);
6977
6978 cur_ns->tx_errors = cur_es->tx_errors;
6979 cur_ns->rx_dropped = cur_es->rx_discards;
6980 cur_ns->tx_dropped = cur_es->tx_discards;
6981 cur_ns->multicast = cur_es->rx_multicast;
6982
6983 /* update some more netdev stats if this is main VSI */
6984 if (vsi->type == ICE_VSI_PF) {
6985 cur_ns->rx_crc_errors = pf->stats.crc_errors;
6986 cur_ns->rx_errors = pf->stats.crc_errors +
6987 pf->stats.illegal_bytes +
6988 pf->stats.rx_undersize +
6989 pf->stats.rx_jabber +
6990 pf->stats.rx_fragments +
6991 pf->stats.rx_oversize;
6992 /* record drops from the port level */
6993 cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
6994 }
6995 }
6996
6997 /**
6998 * ice_update_pf_stats - Update PF port stats counters
6999 * @pf: PF whose stats needs to be updated
7000 */
ice_update_pf_stats(struct ice_pf * pf)7001 void ice_update_pf_stats(struct ice_pf *pf)
7002 {
7003 struct ice_hw_port_stats *prev_ps, *cur_ps;
7004 struct ice_hw *hw = &pf->hw;
7005 u16 fd_ctr_base;
7006 u8 port;
7007
7008 port = hw->port_info->lport;
7009 prev_ps = &pf->stats_prev;
7010 cur_ps = &pf->stats;
7011
7012 if (ice_is_reset_in_progress(pf->state))
7013 pf->stat_prev_loaded = false;
7014
7015 ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
7016 &prev_ps->eth.rx_bytes,
7017 &cur_ps->eth.rx_bytes);
7018
7019 ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
7020 &prev_ps->eth.rx_unicast,
7021 &cur_ps->eth.rx_unicast);
7022
7023 ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
7024 &prev_ps->eth.rx_multicast,
7025 &cur_ps->eth.rx_multicast);
7026
7027 ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
7028 &prev_ps->eth.rx_broadcast,
7029 &cur_ps->eth.rx_broadcast);
7030
7031 ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
7032 &prev_ps->eth.rx_discards,
7033 &cur_ps->eth.rx_discards);
7034
7035 ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
7036 &prev_ps->eth.tx_bytes,
7037 &cur_ps->eth.tx_bytes);
7038
7039 ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
7040 &prev_ps->eth.tx_unicast,
7041 &cur_ps->eth.tx_unicast);
7042
7043 ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
7044 &prev_ps->eth.tx_multicast,
7045 &cur_ps->eth.tx_multicast);
7046
7047 ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
7048 &prev_ps->eth.tx_broadcast,
7049 &cur_ps->eth.tx_broadcast);
7050
7051 ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
7052 &prev_ps->tx_dropped_link_down,
7053 &cur_ps->tx_dropped_link_down);
7054
7055 ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
7056 &prev_ps->rx_size_64, &cur_ps->rx_size_64);
7057
7058 ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
7059 &prev_ps->rx_size_127, &cur_ps->rx_size_127);
7060
7061 ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
7062 &prev_ps->rx_size_255, &cur_ps->rx_size_255);
7063
7064 ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
7065 &prev_ps->rx_size_511, &cur_ps->rx_size_511);
7066
7067 ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
7068 &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
7069
7070 ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
7071 &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
7072
7073 ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
7074 &prev_ps->rx_size_big, &cur_ps->rx_size_big);
7075
7076 ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
7077 &prev_ps->tx_size_64, &cur_ps->tx_size_64);
7078
7079 ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
7080 &prev_ps->tx_size_127, &cur_ps->tx_size_127);
7081
7082 ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
7083 &prev_ps->tx_size_255, &cur_ps->tx_size_255);
7084
7085 ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
7086 &prev_ps->tx_size_511, &cur_ps->tx_size_511);
7087
7088 ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
7089 &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
7090
7091 ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
7092 &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
7093
7094 ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
7095 &prev_ps->tx_size_big, &cur_ps->tx_size_big);
7096
7097 fd_ctr_base = hw->fd_ctr_base;
7098
7099 ice_stat_update40(hw,
7100 GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
7101 pf->stat_prev_loaded, &prev_ps->fd_sb_match,
7102 &cur_ps->fd_sb_match);
7103 ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
7104 &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
7105
7106 ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
7107 &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
7108
7109 ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
7110 &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
7111
7112 ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
7113 &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
7114
7115 ice_update_dcb_stats(pf);
7116
7117 ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
7118 &prev_ps->crc_errors, &cur_ps->crc_errors);
7119
7120 ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
7121 &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
7122
7123 ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
7124 &prev_ps->mac_local_faults,
7125 &cur_ps->mac_local_faults);
7126
7127 ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
7128 &prev_ps->mac_remote_faults,
7129 &cur_ps->mac_remote_faults);
7130
7131 ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
7132 &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
7133
7134 ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
7135 &prev_ps->rx_undersize, &cur_ps->rx_undersize);
7136
7137 ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
7138 &prev_ps->rx_fragments, &cur_ps->rx_fragments);
7139
7140 ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
7141 &prev_ps->rx_oversize, &cur_ps->rx_oversize);
7142
7143 ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
7144 &prev_ps->rx_jabber, &cur_ps->rx_jabber);
7145
7146 cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
7147
7148 pf->stat_prev_loaded = true;
7149 }
7150
7151 /**
7152 * ice_get_stats64 - get statistics for network device structure
7153 * @netdev: network interface device structure
7154 * @stats: main device statistics structure
7155 */
ice_get_stats64(struct net_device * netdev,struct rtnl_link_stats64 * stats)7156 void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
7157 {
7158 struct ice_netdev_priv *np = netdev_priv(netdev);
7159 struct rtnl_link_stats64 *vsi_stats;
7160 struct ice_vsi *vsi = np->vsi;
7161
7162 vsi_stats = &vsi->net_stats;
7163
7164 if (!vsi->num_txq || !vsi->num_rxq)
7165 return;
7166
7167 /* netdev packet/byte stats come from ring counter. These are obtained
7168 * by summing up ring counters (done by ice_update_vsi_ring_stats).
7169 * But, only call the update routine and read the registers if VSI is
7170 * not down.
7171 */
7172 if (!test_bit(ICE_VSI_DOWN, vsi->state))
7173 ice_update_vsi_ring_stats(vsi);
7174 stats->tx_packets = vsi_stats->tx_packets;
7175 stats->tx_bytes = vsi_stats->tx_bytes;
7176 stats->rx_packets = vsi_stats->rx_packets;
7177 stats->rx_bytes = vsi_stats->rx_bytes;
7178
7179 /* The rest of the stats can be read from the hardware but instead we
7180 * just return values that the watchdog task has already obtained from
7181 * the hardware.
7182 */
7183 stats->multicast = vsi_stats->multicast;
7184 stats->tx_errors = vsi_stats->tx_errors;
7185 stats->tx_dropped = vsi_stats->tx_dropped;
7186 stats->rx_errors = vsi_stats->rx_errors;
7187 stats->rx_dropped = vsi_stats->rx_dropped;
7188 stats->rx_crc_errors = vsi_stats->rx_crc_errors;
7189 stats->rx_length_errors = vsi_stats->rx_length_errors;
7190 }
7191
7192 /**
7193 * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
7194 * @vsi: VSI having NAPI disabled
7195 */
ice_napi_disable_all(struct ice_vsi * vsi)7196 static void ice_napi_disable_all(struct ice_vsi *vsi)
7197 {
7198 int q_idx;
7199
7200 if (!vsi->netdev)
7201 return;
7202
7203 ice_for_each_q_vector(vsi, q_idx) {
7204 struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
7205
7206 if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
7207 napi_disable(&q_vector->napi);
7208
7209 cancel_work_sync(&q_vector->tx.dim.work);
7210 cancel_work_sync(&q_vector->rx.dim.work);
7211 }
7212 }
7213
7214 /**
7215 * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
7216 * @vsi: the VSI being un-configured
7217 */
ice_vsi_dis_irq(struct ice_vsi * vsi)7218 static void ice_vsi_dis_irq(struct ice_vsi *vsi)
7219 {
7220 struct ice_pf *pf = vsi->back;
7221 struct ice_hw *hw = &pf->hw;
7222 u32 val;
7223 int i;
7224
7225 /* disable interrupt causation from each Rx queue; Tx queues are
7226 * handled in ice_vsi_stop_tx_ring()
7227 */
7228 if (vsi->rx_rings) {
7229 ice_for_each_rxq(vsi, i) {
7230 if (vsi->rx_rings[i]) {
7231 u16 reg;
7232
7233 reg = vsi->rx_rings[i]->reg_idx;
7234 val = rd32(hw, QINT_RQCTL(reg));
7235 val &= ~QINT_RQCTL_CAUSE_ENA_M;
7236 wr32(hw, QINT_RQCTL(reg), val);
7237 }
7238 }
7239 }
7240
7241 /* disable each interrupt */
7242 ice_for_each_q_vector(vsi, i) {
7243 if (!vsi->q_vectors[i])
7244 continue;
7245 wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
7246 }
7247
7248 ice_flush(hw);
7249
7250 /* don't call synchronize_irq() for VF's from the host */
7251 if (vsi->type == ICE_VSI_VF)
7252 return;
7253
7254 ice_for_each_q_vector(vsi, i)
7255 synchronize_irq(vsi->q_vectors[i]->irq.virq);
7256 }
7257
7258 /**
7259 * ice_down - Shutdown the connection
7260 * @vsi: The VSI being stopped
7261 *
7262 * Caller of this function is expected to set the vsi->state ICE_DOWN bit
7263 */
ice_down(struct ice_vsi * vsi)7264 int ice_down(struct ice_vsi *vsi)
7265 {
7266 int i, tx_err, rx_err, vlan_err = 0;
7267
7268 WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
7269
7270 if (vsi->netdev) {
7271 vlan_err = ice_vsi_del_vlan_zero(vsi);
7272 ice_ptp_link_change(vsi->back, false);
7273 netif_carrier_off(vsi->netdev);
7274 netif_tx_disable(vsi->netdev);
7275 }
7276
7277 ice_vsi_dis_irq(vsi);
7278
7279 tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
7280 if (tx_err)
7281 netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
7282 vsi->vsi_num, tx_err);
7283 if (!tx_err && vsi->xdp_rings) {
7284 tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
7285 if (tx_err)
7286 netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
7287 vsi->vsi_num, tx_err);
7288 }
7289
7290 rx_err = ice_vsi_stop_all_rx_rings(vsi);
7291 if (rx_err)
7292 netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
7293 vsi->vsi_num, rx_err);
7294
7295 ice_napi_disable_all(vsi);
7296
7297 ice_for_each_txq(vsi, i)
7298 ice_clean_tx_ring(vsi->tx_rings[i]);
7299
7300 if (vsi->xdp_rings)
7301 ice_for_each_xdp_txq(vsi, i)
7302 ice_clean_tx_ring(vsi->xdp_rings[i]);
7303
7304 ice_for_each_rxq(vsi, i)
7305 ice_clean_rx_ring(vsi->rx_rings[i]);
7306
7307 if (tx_err || rx_err || vlan_err) {
7308 netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
7309 vsi->vsi_num, vsi->vsw->sw_id);
7310 return -EIO;
7311 }
7312
7313 return 0;
7314 }
7315
7316 /**
7317 * ice_down_up - shutdown the VSI connection and bring it up
7318 * @vsi: the VSI to be reconnected
7319 */
ice_down_up(struct ice_vsi * vsi)7320 int ice_down_up(struct ice_vsi *vsi)
7321 {
7322 int ret;
7323
7324 /* if DOWN already set, nothing to do */
7325 if (test_and_set_bit(ICE_VSI_DOWN, vsi->state))
7326 return 0;
7327
7328 ret = ice_down(vsi);
7329 if (ret)
7330 return ret;
7331
7332 ret = ice_up(vsi);
7333 if (ret) {
7334 netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n");
7335 return ret;
7336 }
7337
7338 return 0;
7339 }
7340
7341 /**
7342 * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
7343 * @vsi: VSI having resources allocated
7344 *
7345 * Return 0 on success, negative on failure
7346 */
ice_vsi_setup_tx_rings(struct ice_vsi * vsi)7347 int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
7348 {
7349 int i, err = 0;
7350
7351 if (!vsi->num_txq) {
7352 dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
7353 vsi->vsi_num);
7354 return -EINVAL;
7355 }
7356
7357 ice_for_each_txq(vsi, i) {
7358 struct ice_tx_ring *ring = vsi->tx_rings[i];
7359
7360 if (!ring)
7361 return -EINVAL;
7362
7363 if (vsi->netdev)
7364 ring->netdev = vsi->netdev;
7365 err = ice_setup_tx_ring(ring);
7366 if (err)
7367 break;
7368 }
7369
7370 return err;
7371 }
7372
7373 /**
7374 * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
7375 * @vsi: VSI having resources allocated
7376 *
7377 * Return 0 on success, negative on failure
7378 */
ice_vsi_setup_rx_rings(struct ice_vsi * vsi)7379 int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
7380 {
7381 int i, err = 0;
7382
7383 if (!vsi->num_rxq) {
7384 dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
7385 vsi->vsi_num);
7386 return -EINVAL;
7387 }
7388
7389 ice_for_each_rxq(vsi, i) {
7390 struct ice_rx_ring *ring = vsi->rx_rings[i];
7391
7392 if (!ring)
7393 return -EINVAL;
7394
7395 if (vsi->netdev)
7396 ring->netdev = vsi->netdev;
7397 err = ice_setup_rx_ring(ring);
7398 if (err)
7399 break;
7400 }
7401
7402 return err;
7403 }
7404
7405 /**
7406 * ice_vsi_open_ctrl - open control VSI for use
7407 * @vsi: the VSI to open
7408 *
7409 * Initialization of the Control VSI
7410 *
7411 * Returns 0 on success, negative value on error
7412 */
ice_vsi_open_ctrl(struct ice_vsi * vsi)7413 int ice_vsi_open_ctrl(struct ice_vsi *vsi)
7414 {
7415 char int_name[ICE_INT_NAME_STR_LEN];
7416 struct ice_pf *pf = vsi->back;
7417 struct device *dev;
7418 int err;
7419
7420 dev = ice_pf_to_dev(pf);
7421 /* allocate descriptors */
7422 err = ice_vsi_setup_tx_rings(vsi);
7423 if (err)
7424 goto err_setup_tx;
7425
7426 err = ice_vsi_setup_rx_rings(vsi);
7427 if (err)
7428 goto err_setup_rx;
7429
7430 err = ice_vsi_cfg_lan(vsi);
7431 if (err)
7432 goto err_setup_rx;
7433
7434 snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
7435 dev_driver_string(dev), dev_name(dev));
7436 err = ice_vsi_req_irq_msix(vsi, int_name);
7437 if (err)
7438 goto err_setup_rx;
7439
7440 ice_vsi_cfg_msix(vsi);
7441
7442 err = ice_vsi_start_all_rx_rings(vsi);
7443 if (err)
7444 goto err_up_complete;
7445
7446 clear_bit(ICE_VSI_DOWN, vsi->state);
7447 ice_vsi_ena_irq(vsi);
7448
7449 return 0;
7450
7451 err_up_complete:
7452 ice_down(vsi);
7453 err_setup_rx:
7454 ice_vsi_free_rx_rings(vsi);
7455 err_setup_tx:
7456 ice_vsi_free_tx_rings(vsi);
7457
7458 return err;
7459 }
7460
7461 /**
7462 * ice_vsi_open - Called when a network interface is made active
7463 * @vsi: the VSI to open
7464 *
7465 * Initialization of the VSI
7466 *
7467 * Returns 0 on success, negative value on error
7468 */
ice_vsi_open(struct ice_vsi * vsi)7469 int ice_vsi_open(struct ice_vsi *vsi)
7470 {
7471 char int_name[ICE_INT_NAME_STR_LEN];
7472 struct ice_pf *pf = vsi->back;
7473 int err;
7474
7475 /* allocate descriptors */
7476 err = ice_vsi_setup_tx_rings(vsi);
7477 if (err)
7478 goto err_setup_tx;
7479
7480 err = ice_vsi_setup_rx_rings(vsi);
7481 if (err)
7482 goto err_setup_rx;
7483
7484 err = ice_vsi_cfg_lan(vsi);
7485 if (err)
7486 goto err_setup_rx;
7487
7488 snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
7489 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
7490 err = ice_vsi_req_irq_msix(vsi, int_name);
7491 if (err)
7492 goto err_setup_rx;
7493
7494 if (bitmap_empty(pf->txtime_txqs, pf->max_pf_txqs))
7495 ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
7496
7497 if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_SF) {
7498 /* Notify the stack of the actual queue counts. */
7499 err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
7500 if (err)
7501 goto err_set_qs;
7502
7503 err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
7504 if (err)
7505 goto err_set_qs;
7506
7507 ice_vsi_set_napi_queues(vsi);
7508 }
7509
7510 err = ice_up_complete(vsi);
7511 if (err)
7512 goto err_up_complete;
7513
7514 return 0;
7515
7516 err_up_complete:
7517 ice_down(vsi);
7518 err_set_qs:
7519 ice_vsi_free_irq(vsi);
7520 err_setup_rx:
7521 ice_vsi_free_rx_rings(vsi);
7522 err_setup_tx:
7523 ice_vsi_free_tx_rings(vsi);
7524
7525 return err;
7526 }
7527
7528 /**
7529 * ice_vsi_release_all - Delete all VSIs
7530 * @pf: PF from which all VSIs are being removed
7531 */
ice_vsi_release_all(struct ice_pf * pf)7532 static void ice_vsi_release_all(struct ice_pf *pf)
7533 {
7534 int err, i;
7535
7536 if (!pf->vsi)
7537 return;
7538
7539 ice_for_each_vsi(pf, i) {
7540 if (!pf->vsi[i])
7541 continue;
7542
7543 if (pf->vsi[i]->type == ICE_VSI_CHNL)
7544 continue;
7545
7546 err = ice_vsi_release(pf->vsi[i]);
7547 if (err)
7548 dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
7549 i, err, pf->vsi[i]->vsi_num);
7550 }
7551 }
7552
7553 /**
7554 * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
7555 * @pf: pointer to the PF instance
7556 * @type: VSI type to rebuild
7557 *
7558 * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
7559 */
ice_vsi_rebuild_by_type(struct ice_pf * pf,enum ice_vsi_type type)7560 static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
7561 {
7562 struct device *dev = ice_pf_to_dev(pf);
7563 int i, err;
7564
7565 ice_for_each_vsi(pf, i) {
7566 struct ice_vsi *vsi = pf->vsi[i];
7567
7568 if (!vsi || vsi->type != type)
7569 continue;
7570
7571 /* rebuild the VSI */
7572 err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
7573 if (err) {
7574 dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
7575 err, vsi->idx, ice_vsi_type_str(type));
7576 return err;
7577 }
7578
7579 /* replay filters for the VSI */
7580 err = ice_replay_vsi(&pf->hw, vsi->idx);
7581 if (err) {
7582 dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
7583 err, vsi->idx, ice_vsi_type_str(type));
7584 return err;
7585 }
7586
7587 /* Re-map HW VSI number, using VSI handle that has been
7588 * previously validated in ice_replay_vsi() call above
7589 */
7590 vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
7591
7592 /* enable the VSI */
7593 err = ice_ena_vsi(vsi, false);
7594 if (err) {
7595 dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
7596 err, vsi->idx, ice_vsi_type_str(type));
7597 return err;
7598 }
7599
7600 dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
7601 ice_vsi_type_str(type));
7602 }
7603
7604 return 0;
7605 }
7606
7607 /**
7608 * ice_update_pf_netdev_link - Update PF netdev link status
7609 * @pf: pointer to the PF instance
7610 */
ice_update_pf_netdev_link(struct ice_pf * pf)7611 static void ice_update_pf_netdev_link(struct ice_pf *pf)
7612 {
7613 bool link_up;
7614 int i;
7615
7616 ice_for_each_vsi(pf, i) {
7617 struct ice_vsi *vsi = pf->vsi[i];
7618
7619 if (!vsi || vsi->type != ICE_VSI_PF)
7620 return;
7621
7622 ice_get_link_status(pf->vsi[i]->port_info, &link_up);
7623 if (link_up) {
7624 netif_carrier_on(pf->vsi[i]->netdev);
7625 netif_tx_wake_all_queues(pf->vsi[i]->netdev);
7626 } else {
7627 netif_carrier_off(pf->vsi[i]->netdev);
7628 netif_tx_stop_all_queues(pf->vsi[i]->netdev);
7629 }
7630 }
7631 }
7632
7633 /**
7634 * ice_rebuild - rebuild after reset
7635 * @pf: PF to rebuild
7636 * @reset_type: type of reset
7637 *
7638 * Do not rebuild VF VSI in this flow because that is already handled via
7639 * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
7640 * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
7641 * to reset/rebuild all the VF VSI twice.
7642 */
ice_rebuild(struct ice_pf * pf,enum ice_reset_req reset_type)7643 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
7644 {
7645 struct ice_vsi *vsi = ice_get_main_vsi(pf);
7646 struct device *dev = ice_pf_to_dev(pf);
7647 struct ice_hw *hw = &pf->hw;
7648 bool dvm;
7649 int err;
7650
7651 if (test_bit(ICE_DOWN, pf->state))
7652 goto clear_recovery;
7653
7654 dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
7655
7656 #define ICE_EMP_RESET_SLEEP_MS 5000
7657 if (reset_type == ICE_RESET_EMPR) {
7658 /* If an EMP reset has occurred, any previously pending flash
7659 * update will have completed. We no longer know whether or
7660 * not the NVM update EMP reset is restricted.
7661 */
7662 pf->fw_emp_reset_disabled = false;
7663
7664 msleep(ICE_EMP_RESET_SLEEP_MS);
7665 }
7666
7667 err = ice_init_all_ctrlq(hw);
7668 if (err) {
7669 dev_err(dev, "control queues init failed %d\n", err);
7670 goto err_init_ctrlq;
7671 }
7672
7673 /* if DDP was previously loaded successfully */
7674 if (!ice_is_safe_mode(pf)) {
7675 /* reload the SW DB of filter tables */
7676 if (reset_type == ICE_RESET_PFR)
7677 ice_fill_blk_tbls(hw);
7678 else
7679 /* Reload DDP Package after CORER/GLOBR reset */
7680 ice_load_pkg(NULL, pf);
7681 }
7682
7683 err = ice_clear_pf_cfg(hw);
7684 if (err) {
7685 dev_err(dev, "clear PF configuration failed %d\n", err);
7686 goto err_init_ctrlq;
7687 }
7688
7689 ice_clear_pxe_mode(hw);
7690
7691 err = ice_init_nvm(hw);
7692 if (err) {
7693 dev_err(dev, "ice_init_nvm failed %d\n", err);
7694 goto err_init_ctrlq;
7695 }
7696
7697 err = ice_get_caps(hw);
7698 if (err) {
7699 dev_err(dev, "ice_get_caps failed %d\n", err);
7700 goto err_init_ctrlq;
7701 }
7702
7703 err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
7704 if (err) {
7705 dev_err(dev, "set_mac_cfg failed %d\n", err);
7706 goto err_init_ctrlq;
7707 }
7708
7709 dvm = ice_is_dvm_ena(hw);
7710
7711 err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
7712 if (err)
7713 goto err_init_ctrlq;
7714
7715 err = ice_sched_init_port(hw->port_info);
7716 if (err)
7717 goto err_sched_init_port;
7718
7719 /* start misc vector */
7720 err = ice_req_irq_msix_misc(pf);
7721 if (err) {
7722 dev_err(dev, "misc vector setup failed: %d\n", err);
7723 goto err_sched_init_port;
7724 }
7725
7726 if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7727 wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
7728 if (!rd32(hw, PFQF_FD_SIZE)) {
7729 u16 unused, guar, b_effort;
7730
7731 guar = hw->func_caps.fd_fltr_guar;
7732 b_effort = hw->func_caps.fd_fltr_best_effort;
7733
7734 /* force guaranteed filter pool for PF */
7735 ice_alloc_fd_guar_item(hw, &unused, guar);
7736 /* force shared filter pool for PF */
7737 ice_alloc_fd_shrd_item(hw, &unused, b_effort);
7738 }
7739 }
7740
7741 if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
7742 ice_dcb_rebuild(pf);
7743
7744 /* If the PF previously had enabled PTP, PTP init needs to happen before
7745 * the VSI rebuild. If not, this causes the PTP link status events to
7746 * fail.
7747 */
7748 if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
7749 ice_ptp_rebuild(pf, reset_type);
7750
7751 if (ice_is_feature_supported(pf, ICE_F_GNSS))
7752 ice_gnss_init(pf);
7753
7754 /* rebuild PF VSI */
7755 err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
7756 if (err) {
7757 dev_err(dev, "PF VSI rebuild failed: %d\n", err);
7758 goto err_vsi_rebuild;
7759 }
7760
7761 if (reset_type == ICE_RESET_PFR) {
7762 err = ice_rebuild_channels(pf);
7763 if (err) {
7764 dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
7765 err);
7766 goto err_vsi_rebuild;
7767 }
7768 }
7769
7770 /* If Flow Director is active */
7771 if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
7772 err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
7773 if (err) {
7774 dev_err(dev, "control VSI rebuild failed: %d\n", err);
7775 goto err_vsi_rebuild;
7776 }
7777
7778 /* replay HW Flow Director recipes */
7779 if (hw->fdir_prof)
7780 ice_fdir_replay_flows(hw);
7781
7782 /* replay Flow Director filters */
7783 ice_fdir_replay_fltrs(pf);
7784
7785 ice_rebuild_arfs(pf);
7786 }
7787
7788 if (vsi && vsi->netdev)
7789 netif_device_attach(vsi->netdev);
7790
7791 ice_update_pf_netdev_link(pf);
7792
7793 /* tell the firmware we are up */
7794 err = ice_send_version(pf);
7795 if (err) {
7796 dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
7797 err);
7798 goto err_vsi_rebuild;
7799 }
7800
7801 ice_replay_post(hw);
7802
7803 /* if we get here, reset flow is successful */
7804 clear_bit(ICE_RESET_FAILED, pf->state);
7805
7806 ice_health_clear(pf);
7807
7808 ice_rdma_finalize_setup(pf);
7809 if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
7810 ice_lag_rebuild(pf);
7811
7812 /* Restore timestamp mode settings after VSI rebuild */
7813 ice_ptp_restore_timestamp_mode(pf);
7814
7815 /* Start PTP periodic work after VSI is fully rebuilt */
7816 ice_ptp_queue_work(pf);
7817 return;
7818
7819 err_vsi_rebuild:
7820 err_sched_init_port:
7821 ice_sched_cleanup_all(hw);
7822 err_init_ctrlq:
7823 ice_shutdown_all_ctrlq(hw, false);
7824 set_bit(ICE_RESET_FAILED, pf->state);
7825 clear_recovery:
7826 /* set this bit in PF state to control service task scheduling */
7827 set_bit(ICE_NEEDS_RESTART, pf->state);
7828 dev_err(dev, "Rebuild failed, unload and reload driver\n");
7829 }
7830
7831 /**
7832 * ice_change_mtu - NDO callback to change the MTU
7833 * @netdev: network interface device structure
7834 * @new_mtu: new value for maximum frame size
7835 *
7836 * Returns 0 on success, negative on failure
7837 */
ice_change_mtu(struct net_device * netdev,int new_mtu)7838 int ice_change_mtu(struct net_device *netdev, int new_mtu)
7839 {
7840 struct ice_netdev_priv *np = netdev_priv(netdev);
7841 struct ice_vsi *vsi = np->vsi;
7842 struct ice_pf *pf = vsi->back;
7843 struct bpf_prog *prog;
7844 u8 count = 0;
7845 int err = 0;
7846
7847 if (new_mtu == (int)netdev->mtu) {
7848 netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
7849 return 0;
7850 }
7851
7852 prog = vsi->xdp_prog;
7853 if (prog && !prog->aux->xdp_has_frags) {
7854 int frame_size = ice_max_xdp_frame_size(vsi);
7855
7856 if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
7857 netdev_err(netdev, "max MTU for XDP usage is %d\n",
7858 frame_size - ICE_ETH_PKT_HDR_PAD);
7859 return -EINVAL;
7860 }
7861 }
7862
7863 /* if a reset is in progress, wait for some time for it to complete */
7864 do {
7865 if (ice_is_reset_in_progress(pf->state)) {
7866 count++;
7867 usleep_range(1000, 2000);
7868 } else {
7869 break;
7870 }
7871
7872 } while (count < 100);
7873
7874 if (count == 100) {
7875 netdev_err(netdev, "can't change MTU. Device is busy\n");
7876 return -EBUSY;
7877 }
7878
7879 WRITE_ONCE(netdev->mtu, (unsigned int)new_mtu);
7880 err = ice_down_up(vsi);
7881 if (err)
7882 return err;
7883
7884 netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
7885 set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
7886
7887 return err;
7888 }
7889
7890 /**
7891 * ice_set_rss_lut - Set RSS LUT
7892 * @vsi: Pointer to VSI structure
7893 * @lut: Lookup table
7894 * @lut_size: Lookup table size
7895 *
7896 * Returns 0 on success, negative on failure
7897 */
ice_set_rss_lut(struct ice_vsi * vsi,u8 * lut,u16 lut_size)7898 int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
7899 {
7900 struct ice_aq_get_set_rss_lut_params params = {};
7901 struct ice_hw *hw = &vsi->back->hw;
7902 int status;
7903
7904 if (!lut)
7905 return -EINVAL;
7906
7907 params.vsi_handle = vsi->idx;
7908 params.lut_size = lut_size;
7909 params.lut_type = vsi->rss_lut_type;
7910 params.lut = lut;
7911
7912 status = ice_aq_set_rss_lut(hw, ¶ms);
7913 if (status)
7914 dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
7915 status, libie_aq_str(hw->adminq.sq_last_status));
7916
7917 return status;
7918 }
7919
7920 /**
7921 * ice_set_rss_key - Set RSS key
7922 * @vsi: Pointer to the VSI structure
7923 * @seed: RSS hash seed
7924 *
7925 * Returns 0 on success, negative on failure
7926 */
ice_set_rss_key(struct ice_vsi * vsi,u8 * seed)7927 int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
7928 {
7929 struct ice_hw *hw = &vsi->back->hw;
7930 int status;
7931
7932 if (!seed)
7933 return -EINVAL;
7934
7935 status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
7936 if (status)
7937 dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
7938 status, libie_aq_str(hw->adminq.sq_last_status));
7939
7940 return status;
7941 }
7942
7943 /**
7944 * ice_get_rss_lut - Get RSS LUT
7945 * @vsi: Pointer to VSI structure
7946 * @lut: Buffer to store the lookup table entries
7947 * @lut_size: Size of buffer to store the lookup table entries
7948 *
7949 * Returns 0 on success, negative on failure
7950 */
ice_get_rss_lut(struct ice_vsi * vsi,u8 * lut,u16 lut_size)7951 int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
7952 {
7953 struct ice_aq_get_set_rss_lut_params params = {};
7954 struct ice_hw *hw = &vsi->back->hw;
7955 int status;
7956
7957 if (!lut)
7958 return -EINVAL;
7959
7960 params.vsi_handle = vsi->idx;
7961 params.lut_size = lut_size;
7962 params.lut_type = vsi->rss_lut_type;
7963 params.lut = lut;
7964
7965 status = ice_aq_get_rss_lut(hw, ¶ms);
7966 if (status)
7967 dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
7968 status, libie_aq_str(hw->adminq.sq_last_status));
7969
7970 return status;
7971 }
7972
7973 /**
7974 * ice_get_rss_key - Get RSS key
7975 * @vsi: Pointer to VSI structure
7976 * @seed: Buffer to store the key in
7977 *
7978 * Returns 0 on success, negative on failure
7979 */
ice_get_rss_key(struct ice_vsi * vsi,u8 * seed)7980 int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
7981 {
7982 struct ice_hw *hw = &vsi->back->hw;
7983 int status;
7984
7985 if (!seed)
7986 return -EINVAL;
7987
7988 status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
7989 if (status)
7990 dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
7991 status, libie_aq_str(hw->adminq.sq_last_status));
7992
7993 return status;
7994 }
7995
7996 /**
7997 * ice_get_rss - Get RSS LUT and/or key
7998 * @vsi: Pointer to VSI structure
7999 * @seed: Buffer to store the key in
8000 * @lut: Buffer to store the lookup table entries
8001 * @lut_size: Size of buffer to store the lookup table entries
8002 *
8003 * Return: 0 on success, negative on failure
8004 */
ice_get_rss(struct ice_vsi * vsi,u8 * seed,u8 * lut,u16 lut_size)8005 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
8006 {
8007 int err;
8008
8009 if (seed) {
8010 err = ice_get_rss_key(vsi, seed);
8011 if (err)
8012 return err;
8013 }
8014
8015 if (lut) {
8016 err = ice_get_rss_lut(vsi, lut, lut_size);
8017 if (err)
8018 return err;
8019 }
8020
8021 return 0;
8022 }
8023
8024 /**
8025 * ice_set_rss_hfunc - Set RSS HASH function
8026 * @vsi: Pointer to VSI structure
8027 * @hfunc: hash function (ICE_AQ_VSI_Q_OPT_RSS_*)
8028 *
8029 * Returns 0 on success, negative on failure
8030 */
ice_set_rss_hfunc(struct ice_vsi * vsi,u8 hfunc)8031 int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc)
8032 {
8033 struct ice_hw *hw = &vsi->back->hw;
8034 struct ice_vsi_ctx *ctx;
8035 bool symm;
8036 int err;
8037
8038 if (hfunc == vsi->rss_hfunc)
8039 return 0;
8040
8041 if (hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ &&
8042 hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ)
8043 return -EOPNOTSUPP;
8044
8045 ctx = kzalloc_obj(*ctx);
8046 if (!ctx)
8047 return -ENOMEM;
8048
8049 ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
8050 ctx->info.q_opt_rss = vsi->info.q_opt_rss;
8051 ctx->info.q_opt_rss &= ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M;
8052 ctx->info.q_opt_rss |=
8053 FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc);
8054 ctx->info.q_opt_tc = vsi->info.q_opt_tc;
8055 ctx->info.q_opt_flags = vsi->info.q_opt_flags;
8056
8057 err = ice_update_vsi(hw, vsi->idx, ctx, NULL);
8058 if (err) {
8059 dev_err(ice_pf_to_dev(vsi->back), "Failed to configure RSS hash for VSI %d, error %d\n",
8060 vsi->vsi_num, err);
8061 } else {
8062 vsi->info.q_opt_rss = ctx->info.q_opt_rss;
8063 vsi->rss_hfunc = hfunc;
8064 netdev_info(vsi->netdev, "Hash function set to: %sToeplitz\n",
8065 hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ ?
8066 "Symmetric " : "");
8067 }
8068 kfree(ctx);
8069 if (err)
8070 return err;
8071
8072 /* Fix the symmetry setting for all existing RSS configurations */
8073 symm = !!(hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ);
8074 return ice_set_rss_cfg_symm(hw, vsi, symm);
8075 }
8076
8077 /**
8078 * ice_bridge_getlink - Get the hardware bridge mode
8079 * @skb: skb buff
8080 * @pid: process ID
8081 * @seq: RTNL message seq
8082 * @dev: the netdev being configured
8083 * @filter_mask: filter mask passed in
8084 * @nlflags: netlink flags passed in
8085 *
8086 * Return the bridge mode (VEB/VEPA)
8087 */
8088 static int
ice_bridge_getlink(struct sk_buff * skb,u32 pid,u32 seq,struct net_device * dev,u32 filter_mask,int nlflags)8089 ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
8090 struct net_device *dev, u32 filter_mask, int nlflags)
8091 {
8092 struct ice_pf *pf = ice_netdev_to_pf(dev);
8093 u16 bmode;
8094
8095 bmode = pf->first_sw->bridge_mode;
8096
8097 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
8098 filter_mask, NULL);
8099 }
8100
8101 /**
8102 * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
8103 * @vsi: Pointer to VSI structure
8104 * @bmode: Hardware bridge mode (VEB/VEPA)
8105 *
8106 * Returns 0 on success, negative on failure
8107 */
ice_vsi_update_bridge_mode(struct ice_vsi * vsi,u16 bmode)8108 static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
8109 {
8110 struct ice_aqc_vsi_props *vsi_props;
8111 struct ice_hw *hw = &vsi->back->hw;
8112 struct ice_vsi_ctx *ctxt;
8113 int ret;
8114
8115 vsi_props = &vsi->info;
8116
8117 ctxt = kzalloc_obj(*ctxt);
8118 if (!ctxt)
8119 return -ENOMEM;
8120
8121 ctxt->info = vsi->info;
8122
8123 if (bmode == BRIDGE_MODE_VEB)
8124 /* change from VEPA to VEB mode */
8125 ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
8126 else
8127 /* change from VEB to VEPA mode */
8128 ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
8129 ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
8130
8131 ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
8132 if (ret) {
8133 dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
8134 bmode, ret, libie_aq_str(hw->adminq.sq_last_status));
8135 goto out;
8136 }
8137 /* Update sw flags for book keeping */
8138 vsi_props->sw_flags = ctxt->info.sw_flags;
8139
8140 out:
8141 kfree(ctxt);
8142 return ret;
8143 }
8144
8145 /**
8146 * ice_bridge_setlink - Set the hardware bridge mode
8147 * @dev: the netdev being configured
8148 * @nlh: RTNL message
8149 * @flags: bridge setlink flags
8150 * @extack: netlink extended ack
8151 *
8152 * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
8153 * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
8154 * not already set for all VSIs connected to this switch. And also update the
8155 * unicast switch filter rules for the corresponding switch of the netdev.
8156 */
8157 static int
ice_bridge_setlink(struct net_device * dev,struct nlmsghdr * nlh,u16 __always_unused flags,struct netlink_ext_ack __always_unused * extack)8158 ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
8159 u16 __always_unused flags,
8160 struct netlink_ext_ack __always_unused *extack)
8161 {
8162 struct ice_pf *pf = ice_netdev_to_pf(dev);
8163 struct nlattr *attr, *br_spec;
8164 struct ice_hw *hw = &pf->hw;
8165 struct ice_sw *pf_sw;
8166 int rem, v, err = 0;
8167
8168 pf_sw = pf->first_sw;
8169 /* find the attribute in the netlink message */
8170 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
8171 if (!br_spec)
8172 return -EINVAL;
8173
8174 nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) {
8175 __u16 mode = nla_get_u16(attr);
8176
8177 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
8178 return -EINVAL;
8179 /* Continue if bridge mode is not being flipped */
8180 if (mode == pf_sw->bridge_mode)
8181 continue;
8182 /* Iterates through the PF VSI list and update the loopback
8183 * mode of the VSI
8184 */
8185 ice_for_each_vsi(pf, v) {
8186 if (!pf->vsi[v])
8187 continue;
8188 err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
8189 if (err)
8190 return err;
8191 }
8192
8193 hw->evb_veb = (mode == BRIDGE_MODE_VEB);
8194 /* Update the unicast switch filter rules for the corresponding
8195 * switch of the netdev
8196 */
8197 err = ice_update_sw_rule_bridge_mode(hw);
8198 if (err) {
8199 netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n",
8200 mode, err,
8201 libie_aq_str(hw->adminq.sq_last_status));
8202 /* revert hw->evb_veb */
8203 hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
8204 return err;
8205 }
8206
8207 pf_sw->bridge_mode = mode;
8208 }
8209
8210 return 0;
8211 }
8212
8213 /**
8214 * ice_tx_timeout - Respond to a Tx Hang
8215 * @netdev: network interface device structure
8216 * @txqueue: Tx queue
8217 */
ice_tx_timeout(struct net_device * netdev,unsigned int txqueue)8218 void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
8219 {
8220 struct ice_netdev_priv *np = netdev_priv(netdev);
8221 struct ice_tx_ring *tx_ring = NULL;
8222 struct ice_vsi *vsi = np->vsi;
8223 struct ice_pf *pf = vsi->back;
8224 u32 i;
8225
8226 pf->tx_timeout_count++;
8227
8228 /* Check if PFC is enabled for the TC to which the queue belongs
8229 * to. If yes then Tx timeout is not caused by a hung queue, no
8230 * need to reset and rebuild
8231 */
8232 if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
8233 dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
8234 txqueue);
8235 return;
8236 }
8237
8238 /* now that we have an index, find the tx_ring struct */
8239 ice_for_each_txq(vsi, i)
8240 if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
8241 if (txqueue == vsi->tx_rings[i]->q_index) {
8242 tx_ring = vsi->tx_rings[i];
8243 break;
8244 }
8245
8246 /* Reset recovery level if enough time has elapsed after last timeout.
8247 * Also ensure no new reset action happens before next timeout period.
8248 */
8249 if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
8250 pf->tx_timeout_recovery_level = 1;
8251 else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
8252 netdev->watchdog_timeo)))
8253 return;
8254
8255 if (tx_ring) {
8256 struct ice_hw *hw = &pf->hw;
8257 u32 head, intr = 0;
8258
8259 head = FIELD_GET(QTX_COMM_HEAD_HEAD_M,
8260 rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])));
8261 /* Read interrupt register */
8262 intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
8263
8264 netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
8265 vsi->vsi_num, txqueue, tx_ring->next_to_clean,
8266 head, tx_ring->next_to_use, intr);
8267
8268 ice_prep_tx_hang_report(pf, tx_ring, vsi->vsi_num, head, intr);
8269 }
8270
8271 pf->tx_timeout_last_recovery = jiffies;
8272 netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n",
8273 pf->tx_timeout_recovery_level, txqueue);
8274
8275 switch (pf->tx_timeout_recovery_level) {
8276 case 1:
8277 set_bit(ICE_PFR_REQ, pf->state);
8278 break;
8279 case 2:
8280 set_bit(ICE_CORER_REQ, pf->state);
8281 break;
8282 case 3:
8283 set_bit(ICE_GLOBR_REQ, pf->state);
8284 break;
8285 default:
8286 netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
8287 set_bit(ICE_DOWN, pf->state);
8288 set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
8289 set_bit(ICE_SERVICE_DIS, pf->state);
8290 break;
8291 }
8292
8293 ice_service_task_schedule(pf);
8294 pf->tx_timeout_recovery_level++;
8295 }
8296
8297 /**
8298 * ice_setup_tc_cls_flower - flower classifier offloads
8299 * @np: net device to configure
8300 * @filter_dev: device on which filter is added
8301 * @cls_flower: offload data
8302 * @ingress: if the rule is added to an ingress block
8303 *
8304 * Return: 0 if the flower was successfully added or deleted,
8305 * negative error code otherwise.
8306 */
8307 static int
ice_setup_tc_cls_flower(struct ice_netdev_priv * np,struct net_device * filter_dev,struct flow_cls_offload * cls_flower,bool ingress)8308 ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
8309 struct net_device *filter_dev,
8310 struct flow_cls_offload *cls_flower,
8311 bool ingress)
8312 {
8313 struct ice_vsi *vsi = np->vsi;
8314
8315 if (cls_flower->common.chain_index)
8316 return -EOPNOTSUPP;
8317
8318 switch (cls_flower->command) {
8319 case FLOW_CLS_REPLACE:
8320 return ice_add_cls_flower(filter_dev, vsi, cls_flower, ingress);
8321 case FLOW_CLS_DESTROY:
8322 return ice_del_cls_flower(vsi, cls_flower);
8323 default:
8324 return -EINVAL;
8325 }
8326 }
8327
8328 /**
8329 * ice_setup_tc_block_cb_ingress - callback handler for ingress TC block
8330 * @type: TC SETUP type
8331 * @type_data: TC flower offload data that contains user input
8332 * @cb_priv: netdev private data
8333 *
8334 * Return: 0 if the setup was successful, negative error code otherwise.
8335 */
8336 static int
ice_setup_tc_block_cb_ingress(enum tc_setup_type type,void * type_data,void * cb_priv)8337 ice_setup_tc_block_cb_ingress(enum tc_setup_type type, void *type_data,
8338 void *cb_priv)
8339 {
8340 struct ice_netdev_priv *np = cb_priv;
8341
8342 switch (type) {
8343 case TC_SETUP_CLSFLOWER:
8344 return ice_setup_tc_cls_flower(np, np->vsi->netdev,
8345 type_data, true);
8346 default:
8347 return -EOPNOTSUPP;
8348 }
8349 }
8350
8351 /**
8352 * ice_setup_tc_block_cb_egress - callback handler for egress TC block
8353 * @type: TC SETUP type
8354 * @type_data: TC flower offload data that contains user input
8355 * @cb_priv: netdev private data
8356 *
8357 * Return: 0 if the setup was successful, negative error code otherwise.
8358 */
8359 static int
ice_setup_tc_block_cb_egress(enum tc_setup_type type,void * type_data,void * cb_priv)8360 ice_setup_tc_block_cb_egress(enum tc_setup_type type, void *type_data,
8361 void *cb_priv)
8362 {
8363 struct ice_netdev_priv *np = cb_priv;
8364
8365 switch (type) {
8366 case TC_SETUP_CLSFLOWER:
8367 return ice_setup_tc_cls_flower(np, np->vsi->netdev,
8368 type_data, false);
8369 default:
8370 return -EOPNOTSUPP;
8371 }
8372 }
8373
8374 /**
8375 * ice_validate_mqprio_qopt - Validate TCF input parameters
8376 * @vsi: Pointer to VSI
8377 * @mqprio_qopt: input parameters for mqprio queue configuration
8378 *
8379 * This function validates MQPRIO params, such as qcount (power of 2 wherever
8380 * needed), and make sure user doesn't specify qcount and BW rate limit
8381 * for TCs, which are more than "num_tc"
8382 */
8383 static int
ice_validate_mqprio_qopt(struct ice_vsi * vsi,struct tc_mqprio_qopt_offload * mqprio_qopt)8384 ice_validate_mqprio_qopt(struct ice_vsi *vsi,
8385 struct tc_mqprio_qopt_offload *mqprio_qopt)
8386 {
8387 int non_power_of_2_qcount = 0;
8388 struct ice_pf *pf = vsi->back;
8389 int max_rss_q_cnt = 0;
8390 u64 sum_min_rate = 0;
8391 struct device *dev;
8392 int i, speed;
8393 u8 num_tc;
8394
8395 if (vsi->type != ICE_VSI_PF)
8396 return -EINVAL;
8397
8398 if (mqprio_qopt->qopt.offset[0] != 0 ||
8399 mqprio_qopt->qopt.num_tc < 1 ||
8400 mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
8401 return -EINVAL;
8402
8403 dev = ice_pf_to_dev(pf);
8404 vsi->ch_rss_size = 0;
8405 num_tc = mqprio_qopt->qopt.num_tc;
8406 speed = ice_get_link_speed_kbps(vsi);
8407
8408 for (i = 0; num_tc; i++) {
8409 int qcount = mqprio_qopt->qopt.count[i];
8410 u64 max_rate, min_rate, rem;
8411
8412 if (!qcount)
8413 return -EINVAL;
8414
8415 if (is_power_of_2(qcount)) {
8416 if (non_power_of_2_qcount &&
8417 qcount > non_power_of_2_qcount) {
8418 dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
8419 qcount, non_power_of_2_qcount);
8420 return -EINVAL;
8421 }
8422 if (qcount > max_rss_q_cnt)
8423 max_rss_q_cnt = qcount;
8424 } else {
8425 if (non_power_of_2_qcount &&
8426 qcount != non_power_of_2_qcount) {
8427 dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
8428 qcount, non_power_of_2_qcount);
8429 return -EINVAL;
8430 }
8431 if (qcount < max_rss_q_cnt) {
8432 dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
8433 qcount, max_rss_q_cnt);
8434 return -EINVAL;
8435 }
8436 max_rss_q_cnt = qcount;
8437 non_power_of_2_qcount = qcount;
8438 }
8439
8440 /* TC command takes input in K/N/Gbps or K/M/Gbit etc but
8441 * converts the bandwidth rate limit into Bytes/s when
8442 * passing it down to the driver. So convert input bandwidth
8443 * from Bytes/s to Kbps
8444 */
8445 max_rate = mqprio_qopt->max_rate[i];
8446 max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
8447
8448 /* min_rate is minimum guaranteed rate and it can't be zero */
8449 min_rate = mqprio_qopt->min_rate[i];
8450 min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
8451 sum_min_rate += min_rate;
8452
8453 if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
8454 dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
8455 min_rate, ICE_MIN_BW_LIMIT);
8456 return -EINVAL;
8457 }
8458
8459 if (max_rate && max_rate > speed) {
8460 dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
8461 i, max_rate, speed);
8462 return -EINVAL;
8463 }
8464
8465 iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
8466 if (rem) {
8467 dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
8468 i, ICE_MIN_BW_LIMIT);
8469 return -EINVAL;
8470 }
8471
8472 iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
8473 if (rem) {
8474 dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
8475 i, ICE_MIN_BW_LIMIT);
8476 return -EINVAL;
8477 }
8478
8479 /* min_rate can't be more than max_rate, except when max_rate
8480 * is zero (implies max_rate sought is max line rate). In such
8481 * a case min_rate can be more than max.
8482 */
8483 if (max_rate && min_rate > max_rate) {
8484 dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
8485 min_rate, max_rate);
8486 return -EINVAL;
8487 }
8488
8489 if (i >= mqprio_qopt->qopt.num_tc - 1)
8490 break;
8491 if (mqprio_qopt->qopt.offset[i + 1] !=
8492 (mqprio_qopt->qopt.offset[i] + qcount))
8493 return -EINVAL;
8494 }
8495 if (vsi->num_rxq <
8496 (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8497 return -EINVAL;
8498 if (vsi->num_txq <
8499 (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
8500 return -EINVAL;
8501
8502 if (sum_min_rate && sum_min_rate > (u64)speed) {
8503 dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
8504 sum_min_rate, speed);
8505 return -EINVAL;
8506 }
8507
8508 /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
8509 vsi->ch_rss_size = max_rss_q_cnt;
8510
8511 return 0;
8512 }
8513
8514 /**
8515 * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
8516 * @pf: ptr to PF device
8517 * @vsi: ptr to VSI
8518 */
ice_add_vsi_to_fdir(struct ice_pf * pf,struct ice_vsi * vsi)8519 static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi)
8520 {
8521 struct device *dev = ice_pf_to_dev(pf);
8522 bool added = false;
8523 struct ice_hw *hw;
8524 int flow;
8525
8526 if (!(vsi->num_gfltr || vsi->num_bfltr))
8527 return -EINVAL;
8528
8529 hw = &pf->hw;
8530 for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
8531 struct ice_fd_hw_prof *prof;
8532 int tun, status;
8533 u64 entry_h;
8534
8535 if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
8536 hw->fdir_prof[flow]->cnt))
8537 continue;
8538
8539 for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
8540 enum ice_flow_priority prio;
8541
8542 /* add this VSI to FDir profile for this flow */
8543 prio = ICE_FLOW_PRIO_NORMAL;
8544 prof = hw->fdir_prof[flow];
8545 status = ice_flow_add_entry(hw, ICE_BLK_FD,
8546 prof->prof_id[tun],
8547 prof->vsi_h[0], vsi->idx,
8548 prio, prof->fdir_seg[tun],
8549 &entry_h);
8550 if (status) {
8551 dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
8552 vsi->idx, flow);
8553 continue;
8554 }
8555
8556 prof->entry_h[prof->cnt][tun] = entry_h;
8557 }
8558
8559 /* store VSI for filter replay and delete */
8560 prof->vsi_h[prof->cnt] = vsi->idx;
8561 prof->cnt++;
8562
8563 added = true;
8564 dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
8565 flow);
8566 }
8567
8568 if (!added)
8569 dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
8570
8571 return 0;
8572 }
8573
8574 /**
8575 * ice_add_channel - add a channel by adding VSI
8576 * @pf: ptr to PF device
8577 * @sw_id: underlying HW switching element ID
8578 * @ch: ptr to channel structure
8579 *
8580 * Add a channel (VSI) using add_vsi and queue_map
8581 */
ice_add_channel(struct ice_pf * pf,u16 sw_id,struct ice_channel * ch)8582 static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
8583 {
8584 struct device *dev = ice_pf_to_dev(pf);
8585 struct ice_vsi *vsi;
8586
8587 if (ch->type != ICE_VSI_CHNL) {
8588 dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
8589 return -EINVAL;
8590 }
8591
8592 vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
8593 if (!vsi || vsi->type != ICE_VSI_CHNL) {
8594 dev_err(dev, "create chnl VSI failure\n");
8595 return -EINVAL;
8596 }
8597
8598 ice_add_vsi_to_fdir(pf, vsi);
8599
8600 ch->sw_id = sw_id;
8601 ch->vsi_num = vsi->vsi_num;
8602 ch->info.mapping_flags = vsi->info.mapping_flags;
8603 ch->ch_vsi = vsi;
8604 /* set the back pointer of channel for newly created VSI */
8605 vsi->ch = ch;
8606
8607 memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
8608 sizeof(vsi->info.q_mapping));
8609 memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
8610 sizeof(vsi->info.tc_mapping));
8611
8612 return 0;
8613 }
8614
8615 /**
8616 * ice_chnl_cfg_res
8617 * @vsi: the VSI being setup
8618 * @ch: ptr to channel structure
8619 *
8620 * Configure channel specific resources such as rings, vector.
8621 */
ice_chnl_cfg_res(struct ice_vsi * vsi,struct ice_channel * ch)8622 static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
8623 {
8624 int i;
8625
8626 for (i = 0; i < ch->num_txq; i++) {
8627 struct ice_q_vector *tx_q_vector, *rx_q_vector;
8628 struct ice_ring_container *rc;
8629 struct ice_tx_ring *tx_ring;
8630 struct ice_rx_ring *rx_ring;
8631
8632 tx_ring = vsi->tx_rings[ch->base_q + i];
8633 rx_ring = vsi->rx_rings[ch->base_q + i];
8634 if (!tx_ring || !rx_ring)
8635 continue;
8636
8637 /* setup ring being channel enabled */
8638 tx_ring->ch = ch;
8639 rx_ring->ch = ch;
8640
8641 /* following code block sets up vector specific attributes */
8642 tx_q_vector = tx_ring->q_vector;
8643 rx_q_vector = rx_ring->q_vector;
8644 if (!tx_q_vector && !rx_q_vector)
8645 continue;
8646
8647 if (tx_q_vector) {
8648 tx_q_vector->ch = ch;
8649 /* setup Tx and Rx ITR setting if DIM is off */
8650 rc = &tx_q_vector->tx;
8651 if (!ITR_IS_DYNAMIC(rc))
8652 ice_write_itr(rc, rc->itr_setting);
8653 }
8654 if (rx_q_vector) {
8655 rx_q_vector->ch = ch;
8656 /* setup Tx and Rx ITR setting if DIM is off */
8657 rc = &rx_q_vector->rx;
8658 if (!ITR_IS_DYNAMIC(rc))
8659 ice_write_itr(rc, rc->itr_setting);
8660 }
8661 }
8662
8663 /* it is safe to assume that, if channel has non-zero num_t[r]xq, then
8664 * GLINT_ITR register would have written to perform in-context
8665 * update, hence perform flush
8666 */
8667 if (ch->num_txq || ch->num_rxq)
8668 ice_flush(&vsi->back->hw);
8669 }
8670
8671 /**
8672 * ice_cfg_chnl_all_res - configure channel resources
8673 * @vsi: pte to main_vsi
8674 * @ch: ptr to channel structure
8675 *
8676 * This function configures channel specific resources such as flow-director
8677 * counter index, and other resources such as queues, vectors, ITR settings
8678 */
8679 static void
ice_cfg_chnl_all_res(struct ice_vsi * vsi,struct ice_channel * ch)8680 ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
8681 {
8682 /* configure channel (aka ADQ) resources such as queues, vectors,
8683 * ITR settings for channel specific vectors and anything else
8684 */
8685 ice_chnl_cfg_res(vsi, ch);
8686 }
8687
8688 /**
8689 * ice_setup_hw_channel - setup new channel
8690 * @pf: ptr to PF device
8691 * @vsi: the VSI being setup
8692 * @ch: ptr to channel structure
8693 * @sw_id: underlying HW switching element ID
8694 * @type: type of channel to be created (VMDq2/VF)
8695 *
8696 * Setup new channel (VSI) based on specified type (VMDq2/VF)
8697 * and configures Tx rings accordingly
8698 */
8699 static int
ice_setup_hw_channel(struct ice_pf * pf,struct ice_vsi * vsi,struct ice_channel * ch,u16 sw_id,u8 type)8700 ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
8701 struct ice_channel *ch, u16 sw_id, u8 type)
8702 {
8703 struct device *dev = ice_pf_to_dev(pf);
8704 int ret;
8705
8706 ch->base_q = vsi->next_base_q;
8707 ch->type = type;
8708
8709 ret = ice_add_channel(pf, sw_id, ch);
8710 if (ret) {
8711 dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
8712 return ret;
8713 }
8714
8715 /* configure/setup ADQ specific resources */
8716 ice_cfg_chnl_all_res(vsi, ch);
8717
8718 /* make sure to update the next_base_q so that subsequent channel's
8719 * (aka ADQ) VSI queue map is correct
8720 */
8721 vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
8722 dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
8723 ch->num_rxq);
8724
8725 return 0;
8726 }
8727
8728 /**
8729 * ice_setup_channel - setup new channel using uplink element
8730 * @pf: ptr to PF device
8731 * @vsi: the VSI being setup
8732 * @ch: ptr to channel structure
8733 *
8734 * Setup new channel (VSI) based on specified type (VMDq2/VF)
8735 * and uplink switching element
8736 */
8737 static bool
ice_setup_channel(struct ice_pf * pf,struct ice_vsi * vsi,struct ice_channel * ch)8738 ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
8739 struct ice_channel *ch)
8740 {
8741 struct device *dev = ice_pf_to_dev(pf);
8742 u16 sw_id;
8743 int ret;
8744
8745 if (vsi->type != ICE_VSI_PF) {
8746 dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
8747 return false;
8748 }
8749
8750 sw_id = pf->first_sw->sw_id;
8751
8752 /* create channel (VSI) */
8753 ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
8754 if (ret) {
8755 dev_err(dev, "failed to setup hw_channel\n");
8756 return false;
8757 }
8758 dev_dbg(dev, "successfully created channel()\n");
8759
8760 return ch->ch_vsi ? true : false;
8761 }
8762
8763 /**
8764 * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
8765 * @vsi: VSI to be configured
8766 * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
8767 * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
8768 */
8769 static int
ice_set_bw_limit(struct ice_vsi * vsi,u64 max_tx_rate,u64 min_tx_rate)8770 ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
8771 {
8772 int err;
8773
8774 err = ice_set_min_bw_limit(vsi, min_tx_rate);
8775 if (err)
8776 return err;
8777
8778 return ice_set_max_bw_limit(vsi, max_tx_rate);
8779 }
8780
8781 /**
8782 * ice_create_q_channel - function to create channel
8783 * @vsi: VSI to be configured
8784 * @ch: ptr to channel (it contains channel specific params)
8785 *
8786 * This function creates channel (VSI) using num_queues specified by user,
8787 * reconfigs RSS if needed.
8788 */
ice_create_q_channel(struct ice_vsi * vsi,struct ice_channel * ch)8789 static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
8790 {
8791 struct ice_pf *pf = vsi->back;
8792 struct device *dev;
8793
8794 if (!ch)
8795 return -EINVAL;
8796
8797 dev = ice_pf_to_dev(pf);
8798 if (!ch->num_txq || !ch->num_rxq) {
8799 dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
8800 return -EINVAL;
8801 }
8802
8803 if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
8804 dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
8805 vsi->cnt_q_avail, ch->num_txq);
8806 return -EINVAL;
8807 }
8808
8809 if (!ice_setup_channel(pf, vsi, ch)) {
8810 dev_info(dev, "Failed to setup channel\n");
8811 return -EINVAL;
8812 }
8813 /* configure BW rate limit */
8814 if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
8815 int ret;
8816
8817 ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
8818 ch->min_tx_rate);
8819 if (ret)
8820 dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
8821 ch->max_tx_rate, ch->ch_vsi->vsi_num);
8822 else
8823 dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
8824 ch->max_tx_rate, ch->ch_vsi->vsi_num);
8825 }
8826
8827 vsi->cnt_q_avail -= ch->num_txq;
8828
8829 return 0;
8830 }
8831
8832 /**
8833 * ice_rem_all_chnl_fltrs - removes all channel filters
8834 * @pf: ptr to PF, TC-flower based filter are tracked at PF level
8835 *
8836 * Remove all advanced switch filters only if they are channel specific
8837 * tc-flower based filter
8838 */
ice_rem_all_chnl_fltrs(struct ice_pf * pf)8839 static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
8840 {
8841 struct ice_tc_flower_fltr *fltr;
8842 struct hlist_node *node;
8843
8844 /* to remove all channel filters, iterate an ordered list of filters */
8845 hlist_for_each_entry_safe(fltr, node,
8846 &pf->tc_flower_fltr_list,
8847 tc_flower_node) {
8848 struct ice_rule_query_data rule;
8849 int status;
8850
8851 /* for now process only channel specific filters */
8852 if (!ice_is_chnl_fltr(fltr))
8853 continue;
8854
8855 rule.rid = fltr->rid;
8856 rule.rule_id = fltr->rule_id;
8857 rule.vsi_handle = fltr->dest_vsi_handle;
8858 status = ice_rem_adv_rule_by_id(&pf->hw, &rule);
8859 if (status) {
8860 if (status == -ENOENT)
8861 dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
8862 rule.rule_id);
8863 else
8864 dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
8865 status);
8866 } else if (fltr->dest_vsi) {
8867 /* update advanced switch filter count */
8868 if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
8869 u32 flags = fltr->flags;
8870
8871 fltr->dest_vsi->num_chnl_fltr--;
8872 if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
8873 ICE_TC_FLWR_FIELD_ENC_DST_MAC))
8874 pf->num_dmac_chnl_fltrs--;
8875 }
8876 }
8877
8878 hlist_del(&fltr->tc_flower_node);
8879 kfree(fltr);
8880 }
8881 }
8882
8883 /**
8884 * ice_remove_q_channels - Remove queue channels for the TCs
8885 * @vsi: VSI to be configured
8886 * @rem_fltr: delete advanced switch filter or not
8887 *
8888 * Remove queue channels for the TCs
8889 */
ice_remove_q_channels(struct ice_vsi * vsi,bool rem_fltr)8890 static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
8891 {
8892 struct ice_channel *ch, *ch_tmp;
8893 struct ice_pf *pf = vsi->back;
8894 int i;
8895
8896 /* remove all tc-flower based filter if they are channel filters only */
8897 if (rem_fltr)
8898 ice_rem_all_chnl_fltrs(pf);
8899
8900 /* remove ntuple filters since queue configuration is being changed */
8901 if (vsi->netdev->features & NETIF_F_NTUPLE) {
8902 struct ice_hw *hw = &pf->hw;
8903
8904 mutex_lock(&hw->fdir_fltr_lock);
8905 ice_fdir_del_all_fltrs(vsi);
8906 mutex_unlock(&hw->fdir_fltr_lock);
8907 }
8908
8909 /* perform cleanup for channels if they exist */
8910 list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
8911 struct ice_vsi *ch_vsi;
8912
8913 list_del(&ch->list);
8914 ch_vsi = ch->ch_vsi;
8915 if (!ch_vsi) {
8916 kfree(ch);
8917 continue;
8918 }
8919
8920 /* Reset queue contexts */
8921 for (i = 0; i < ch->num_rxq; i++) {
8922 struct ice_tx_ring *tx_ring;
8923 struct ice_rx_ring *rx_ring;
8924
8925 tx_ring = vsi->tx_rings[ch->base_q + i];
8926 rx_ring = vsi->rx_rings[ch->base_q + i];
8927 if (tx_ring) {
8928 tx_ring->ch = NULL;
8929 if (tx_ring->q_vector)
8930 tx_ring->q_vector->ch = NULL;
8931 }
8932 if (rx_ring) {
8933 rx_ring->ch = NULL;
8934 if (rx_ring->q_vector)
8935 rx_ring->q_vector->ch = NULL;
8936 }
8937 }
8938
8939 /* Release FD resources for the channel VSI */
8940 ice_fdir_rem_adq_chnl(&pf->hw, ch->ch_vsi->idx);
8941
8942 /* clear the VSI from scheduler tree */
8943 ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
8944
8945 /* Delete VSI from FW, PF and HW VSI arrays */
8946 ice_vsi_delete(ch->ch_vsi);
8947
8948 /* free the channel */
8949 kfree(ch);
8950 }
8951
8952 /* clear the channel VSI map which is stored in main VSI */
8953 ice_for_each_chnl_tc(i)
8954 vsi->tc_map_vsi[i] = NULL;
8955
8956 /* reset main VSI's all TC information */
8957 vsi->all_enatc = 0;
8958 vsi->all_numtc = 0;
8959 }
8960
8961 /**
8962 * ice_rebuild_channels - rebuild channel
8963 * @pf: ptr to PF
8964 *
8965 * Recreate channel VSIs and replay filters
8966 */
ice_rebuild_channels(struct ice_pf * pf)8967 static int ice_rebuild_channels(struct ice_pf *pf)
8968 {
8969 struct device *dev = ice_pf_to_dev(pf);
8970 struct ice_vsi *main_vsi;
8971 bool rem_adv_fltr = true;
8972 struct ice_channel *ch;
8973 struct ice_vsi *vsi;
8974 int tc_idx = 1;
8975 int i, err;
8976
8977 main_vsi = ice_get_main_vsi(pf);
8978 if (!main_vsi)
8979 return 0;
8980
8981 if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
8982 main_vsi->old_numtc == 1)
8983 return 0; /* nothing to be done */
8984
8985 /* reconfigure main VSI based on old value of TC and cached values
8986 * for MQPRIO opts
8987 */
8988 err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
8989 if (err) {
8990 dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
8991 main_vsi->old_ena_tc, main_vsi->vsi_num);
8992 return err;
8993 }
8994
8995 /* rebuild ADQ VSIs */
8996 ice_for_each_vsi(pf, i) {
8997 enum ice_vsi_type type;
8998
8999 vsi = pf->vsi[i];
9000 if (!vsi || vsi->type != ICE_VSI_CHNL)
9001 continue;
9002
9003 type = vsi->type;
9004
9005 /* rebuild ADQ VSI */
9006 err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
9007 if (err) {
9008 dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
9009 ice_vsi_type_str(type), vsi->idx, err);
9010 goto cleanup;
9011 }
9012
9013 /* Re-map HW VSI number, using VSI handle that has been
9014 * previously validated in ice_replay_vsi() call above
9015 */
9016 vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
9017
9018 /* replay filters for the VSI */
9019 err = ice_replay_vsi(&pf->hw, vsi->idx);
9020 if (err) {
9021 dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
9022 ice_vsi_type_str(type), err, vsi->idx);
9023 rem_adv_fltr = false;
9024 goto cleanup;
9025 }
9026 dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
9027 ice_vsi_type_str(type), vsi->idx);
9028
9029 /* store ADQ VSI at correct TC index in main VSI's
9030 * map of TC to VSI
9031 */
9032 main_vsi->tc_map_vsi[tc_idx++] = vsi;
9033 }
9034
9035 /* ADQ VSI(s) has been rebuilt successfully, so setup
9036 * channel for main VSI's Tx and Rx rings
9037 */
9038 list_for_each_entry(ch, &main_vsi->ch_list, list) {
9039 struct ice_vsi *ch_vsi;
9040
9041 ch_vsi = ch->ch_vsi;
9042 if (!ch_vsi)
9043 continue;
9044
9045 /* reconfig channel resources */
9046 ice_cfg_chnl_all_res(main_vsi, ch);
9047
9048 /* replay BW rate limit if it is non-zero */
9049 if (!ch->max_tx_rate && !ch->min_tx_rate)
9050 continue;
9051
9052 err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
9053 ch->min_tx_rate);
9054 if (err)
9055 dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
9056 err, ch->max_tx_rate, ch->min_tx_rate,
9057 ch_vsi->vsi_num);
9058 else
9059 dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
9060 ch->max_tx_rate, ch->min_tx_rate,
9061 ch_vsi->vsi_num);
9062 }
9063
9064 /* reconfig RSS for main VSI */
9065 if (main_vsi->ch_rss_size)
9066 ice_vsi_cfg_rss_lut_key(main_vsi);
9067
9068 return 0;
9069
9070 cleanup:
9071 ice_remove_q_channels(main_vsi, rem_adv_fltr);
9072 return err;
9073 }
9074
9075 /**
9076 * ice_create_q_channels - Add queue channel for the given TCs
9077 * @vsi: VSI to be configured
9078 *
9079 * Configures queue channel mapping to the given TCs
9080 */
ice_create_q_channels(struct ice_vsi * vsi)9081 static int ice_create_q_channels(struct ice_vsi *vsi)
9082 {
9083 struct ice_pf *pf = vsi->back;
9084 struct ice_channel *ch;
9085 int ret = 0, i;
9086
9087 ice_for_each_chnl_tc(i) {
9088 if (!(vsi->all_enatc & BIT(i)))
9089 continue;
9090
9091 ch = kzalloc_obj(*ch);
9092 if (!ch) {
9093 ret = -ENOMEM;
9094 goto err_free;
9095 }
9096 INIT_LIST_HEAD(&ch->list);
9097 ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
9098 ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
9099 ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
9100 ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
9101 ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
9102
9103 /* convert to Kbits/s */
9104 if (ch->max_tx_rate)
9105 ch->max_tx_rate = div_u64(ch->max_tx_rate,
9106 ICE_BW_KBPS_DIVISOR);
9107 if (ch->min_tx_rate)
9108 ch->min_tx_rate = div_u64(ch->min_tx_rate,
9109 ICE_BW_KBPS_DIVISOR);
9110
9111 ret = ice_create_q_channel(vsi, ch);
9112 if (ret) {
9113 dev_err(ice_pf_to_dev(pf),
9114 "failed creating channel TC:%d\n", i);
9115 kfree(ch);
9116 goto err_free;
9117 }
9118 list_add_tail(&ch->list, &vsi->ch_list);
9119 vsi->tc_map_vsi[i] = ch->ch_vsi;
9120 dev_dbg(ice_pf_to_dev(pf),
9121 "successfully created channel: VSI %p\n", ch->ch_vsi);
9122 }
9123 return 0;
9124
9125 err_free:
9126 ice_remove_q_channels(vsi, false);
9127
9128 return ret;
9129 }
9130
9131 /**
9132 * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
9133 * @netdev: net device to configure
9134 * @type_data: TC offload data
9135 */
ice_setup_tc_mqprio_qdisc(struct net_device * netdev,void * type_data)9136 static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
9137 {
9138 struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
9139 struct ice_netdev_priv *np = netdev_priv(netdev);
9140 struct ice_vsi *vsi = np->vsi;
9141 struct ice_pf *pf = vsi->back;
9142 u16 mode, ena_tc_qdisc = 0;
9143 int cur_txq, cur_rxq;
9144 u8 hw = 0, num_tcf;
9145 struct device *dev;
9146 int ret, i;
9147
9148 dev = ice_pf_to_dev(pf);
9149 num_tcf = mqprio_qopt->qopt.num_tc;
9150 hw = mqprio_qopt->qopt.hw;
9151 mode = mqprio_qopt->mode;
9152 if (!hw) {
9153 clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
9154 vsi->ch_rss_size = 0;
9155 memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
9156 goto config_tcf;
9157 }
9158
9159 /* Generate queue region map for number of TCF requested */
9160 for (i = 0; i < num_tcf; i++)
9161 ena_tc_qdisc |= BIT(i);
9162
9163 switch (mode) {
9164 case TC_MQPRIO_MODE_CHANNEL:
9165
9166 if (pf->hw.port_info->is_custom_tx_enabled) {
9167 dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
9168 return -EBUSY;
9169 }
9170 ice_tear_down_devlink_rate_tree(pf);
9171
9172 ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
9173 if (ret) {
9174 netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
9175 ret);
9176 return ret;
9177 }
9178 memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
9179 set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
9180 /* don't assume state of hw_tc_offload during driver load
9181 * and set the flag for TC flower filter if hw_tc_offload
9182 * already ON
9183 */
9184 if (vsi->netdev->features & NETIF_F_HW_TC)
9185 set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
9186 break;
9187 default:
9188 return -EINVAL;
9189 }
9190
9191 config_tcf:
9192
9193 /* Requesting same TCF configuration as already enabled */
9194 if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
9195 mode != TC_MQPRIO_MODE_CHANNEL)
9196 return 0;
9197
9198 /* Pause VSI queues */
9199 ice_dis_vsi(vsi, true);
9200
9201 if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
9202 ice_remove_q_channels(vsi, true);
9203
9204 if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
9205 vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
9206 num_online_cpus());
9207 vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
9208 num_online_cpus());
9209 } else {
9210 /* logic to rebuild VSI, same like ethtool -L */
9211 u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
9212
9213 for (i = 0; i < num_tcf; i++) {
9214 if (!(ena_tc_qdisc & BIT(i)))
9215 continue;
9216
9217 offset = vsi->mqprio_qopt.qopt.offset[i];
9218 qcount_rx = vsi->mqprio_qopt.qopt.count[i];
9219 qcount_tx = vsi->mqprio_qopt.qopt.count[i];
9220 }
9221 vsi->req_txq = offset + qcount_tx;
9222 vsi->req_rxq = offset + qcount_rx;
9223
9224 /* store away original rss_size info, so that it gets reused
9225 * form ice_vsi_rebuild during tc-qdisc delete stage - to
9226 * determine, what should be the rss_sizefor main VSI
9227 */
9228 vsi->orig_rss_size = vsi->rss_size;
9229 }
9230
9231 /* save current values of Tx and Rx queues before calling VSI rebuild
9232 * for fallback option
9233 */
9234 cur_txq = vsi->num_txq;
9235 cur_rxq = vsi->num_rxq;
9236
9237 /* proceed with rebuild main VSI using correct number of queues */
9238 ret = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
9239 if (ret) {
9240 /* fallback to current number of queues */
9241 dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
9242 vsi->req_txq = cur_txq;
9243 vsi->req_rxq = cur_rxq;
9244 clear_bit(ICE_RESET_FAILED, pf->state);
9245 if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT)) {
9246 dev_err(dev, "Rebuild of main VSI failed again\n");
9247 return ret;
9248 }
9249 }
9250
9251 vsi->all_numtc = num_tcf;
9252 vsi->all_enatc = ena_tc_qdisc;
9253 ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
9254 if (ret) {
9255 netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
9256 vsi->vsi_num);
9257 goto exit;
9258 }
9259
9260 if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
9261 u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
9262 u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
9263
9264 /* set TC0 rate limit if specified */
9265 if (max_tx_rate || min_tx_rate) {
9266 /* convert to Kbits/s */
9267 if (max_tx_rate)
9268 max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
9269 if (min_tx_rate)
9270 min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
9271
9272 ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
9273 if (!ret) {
9274 dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
9275 max_tx_rate, min_tx_rate, vsi->vsi_num);
9276 } else {
9277 dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
9278 max_tx_rate, min_tx_rate, vsi->vsi_num);
9279 goto exit;
9280 }
9281 }
9282 ret = ice_create_q_channels(vsi);
9283 if (ret) {
9284 netdev_err(netdev, "failed configuring queue channels\n");
9285 goto exit;
9286 } else {
9287 netdev_dbg(netdev, "successfully configured channels\n");
9288 }
9289 }
9290
9291 if (vsi->ch_rss_size)
9292 ice_vsi_cfg_rss_lut_key(vsi);
9293
9294 exit:
9295 /* if error, reset the all_numtc and all_enatc */
9296 if (ret) {
9297 vsi->all_numtc = 0;
9298 vsi->all_enatc = 0;
9299 }
9300 /* resume VSI */
9301 ice_ena_vsi(vsi, true);
9302
9303 return ret;
9304 }
9305
9306 /**
9307 * ice_cfg_txtime - configure Tx Time for the Tx ring
9308 * @tx_ring: pointer to the Tx ring structure
9309 *
9310 * Return: 0 on success, negative value on failure.
9311 */
ice_cfg_txtime(struct ice_tx_ring * tx_ring)9312 static int ice_cfg_txtime(struct ice_tx_ring *tx_ring)
9313 {
9314 int err, timeout = 50;
9315 struct ice_vsi *vsi;
9316 struct device *dev;
9317 struct ice_pf *pf;
9318 u32 queue;
9319
9320 if (!tx_ring)
9321 return -EINVAL;
9322
9323 vsi = tx_ring->vsi;
9324 pf = vsi->back;
9325 while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
9326 timeout--;
9327 if (!timeout)
9328 return -EBUSY;
9329 usleep_range(1000, 2000);
9330 }
9331
9332 queue = tx_ring->q_index;
9333 dev = ice_pf_to_dev(pf);
9334
9335 /* Ignore return value, and always attempt to enable queue. */
9336 ice_qp_dis(vsi, queue);
9337
9338 err = ice_qp_ena(vsi, queue);
9339 if (err)
9340 dev_err(dev, "Failed to enable Tx queue %d for TxTime configuration\n",
9341 queue);
9342
9343 clear_bit(ICE_CFG_BUSY, pf->state);
9344 return err;
9345 }
9346
9347 /**
9348 * ice_offload_txtime - set earliest TxTime first
9349 * @netdev: network interface device structure
9350 * @qopt_off: etf queue option offload from the skb to set
9351 *
9352 * Return: 0 on success, negative value on failure.
9353 */
ice_offload_txtime(struct net_device * netdev,void * qopt_off)9354 static int ice_offload_txtime(struct net_device *netdev,
9355 void *qopt_off)
9356 {
9357 struct ice_netdev_priv *np = netdev_priv(netdev);
9358 struct ice_pf *pf = np->vsi->back;
9359 struct tc_etf_qopt_offload *qopt;
9360 struct ice_vsi *vsi = np->vsi;
9361 struct ice_tx_ring *tx_ring;
9362 int ret = 0;
9363
9364 if (!ice_is_feature_supported(pf, ICE_F_TXTIME))
9365 return -EOPNOTSUPP;
9366
9367 qopt = qopt_off;
9368 if (!qopt_off || qopt->queue < 0 || qopt->queue >= vsi->num_txq)
9369 return -EINVAL;
9370
9371 if (qopt->enable)
9372 set_bit(qopt->queue, pf->txtime_txqs);
9373 else
9374 clear_bit(qopt->queue, pf->txtime_txqs);
9375
9376 if (netif_running(vsi->netdev)) {
9377 tx_ring = vsi->tx_rings[qopt->queue];
9378 ret = ice_cfg_txtime(tx_ring);
9379 if (ret)
9380 goto err;
9381 }
9382
9383 netdev_info(netdev, "%s TxTime on queue: %i\n",
9384 str_enable_disable(qopt->enable), qopt->queue);
9385 return 0;
9386
9387 err:
9388 netdev_err(netdev, "Failed to %s TxTime on queue: %i\n",
9389 str_enable_disable(qopt->enable), qopt->queue);
9390
9391 if (qopt->enable)
9392 clear_bit(qopt->queue, pf->txtime_txqs);
9393 return ret;
9394 }
9395
9396 static LIST_HEAD(ice_block_cb_list);
9397
9398 static int
ice_setup_tc(struct net_device * netdev,enum tc_setup_type type,void * type_data)9399 ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
9400 void *type_data)
9401 {
9402 struct ice_netdev_priv *np = netdev_priv(netdev);
9403 enum flow_block_binder_type binder_type;
9404 struct iidc_rdma_core_dev_info *cdev;
9405 struct ice_pf *pf = np->vsi->back;
9406 flow_setup_cb_t *flower_handler;
9407 bool locked = false;
9408 int err;
9409
9410 switch (type) {
9411 case TC_SETUP_BLOCK:
9412 binder_type =
9413 ((struct flow_block_offload *)type_data)->binder_type;
9414
9415 switch (binder_type) {
9416 case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS:
9417 flower_handler = ice_setup_tc_block_cb_ingress;
9418 break;
9419 case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS:
9420 flower_handler = ice_setup_tc_block_cb_egress;
9421 break;
9422 default:
9423 return -EOPNOTSUPP;
9424 }
9425
9426 return flow_block_cb_setup_simple(type_data,
9427 &ice_block_cb_list,
9428 flower_handler,
9429 np, np, false);
9430 case TC_SETUP_QDISC_MQPRIO:
9431 if (ice_is_eswitch_mode_switchdev(pf)) {
9432 netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
9433 return -EOPNOTSUPP;
9434 }
9435
9436 cdev = pf->cdev_info;
9437 if (cdev && cdev->adev) {
9438 mutex_lock(&pf->adev_mutex);
9439 device_lock(&cdev->adev->dev);
9440 locked = true;
9441 if (cdev->adev->dev.driver) {
9442 netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
9443 err = -EBUSY;
9444 goto adev_unlock;
9445 }
9446 }
9447
9448 /* setup traffic classifier for receive side */
9449 mutex_lock(&pf->tc_mutex);
9450 err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
9451 mutex_unlock(&pf->tc_mutex);
9452
9453 adev_unlock:
9454 if (locked) {
9455 device_unlock(&cdev->adev->dev);
9456 mutex_unlock(&pf->adev_mutex);
9457 }
9458 return err;
9459 case TC_SETUP_QDISC_ETF:
9460 return ice_offload_txtime(netdev, type_data);
9461 default:
9462 return -EOPNOTSUPP;
9463 }
9464 return -EOPNOTSUPP;
9465 }
9466
9467 static struct ice_indr_block_priv *
ice_indr_block_priv_lookup(struct ice_netdev_priv * np,struct net_device * netdev)9468 ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
9469 struct net_device *netdev)
9470 {
9471 struct ice_indr_block_priv *cb_priv;
9472
9473 list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
9474 if (!cb_priv->netdev)
9475 return NULL;
9476 if (cb_priv->netdev == netdev)
9477 return cb_priv;
9478 }
9479 return NULL;
9480 }
9481
9482 static int
ice_indr_setup_block_cb(enum tc_setup_type type,void * type_data,void * indr_priv)9483 ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
9484 void *indr_priv)
9485 {
9486 struct ice_indr_block_priv *priv = indr_priv;
9487 struct ice_netdev_priv *np = priv->np;
9488
9489 switch (type) {
9490 case TC_SETUP_CLSFLOWER:
9491 return ice_setup_tc_cls_flower(np, priv->netdev,
9492 (struct flow_cls_offload *)
9493 type_data, false);
9494 default:
9495 return -EOPNOTSUPP;
9496 }
9497 }
9498
9499 static int
ice_indr_setup_tc_block(struct net_device * netdev,struct Qdisc * sch,struct ice_netdev_priv * np,struct flow_block_offload * f,void * data,void (* cleanup)(struct flow_block_cb * block_cb))9500 ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch,
9501 struct ice_netdev_priv *np,
9502 struct flow_block_offload *f, void *data,
9503 void (*cleanup)(struct flow_block_cb *block_cb))
9504 {
9505 struct ice_indr_block_priv *indr_priv;
9506 struct flow_block_cb *block_cb;
9507
9508 if (!ice_is_tunnel_supported(netdev) &&
9509 !(is_vlan_dev(netdev) &&
9510 vlan_dev_real_dev(netdev) == np->vsi->netdev))
9511 return -EOPNOTSUPP;
9512
9513 if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
9514 return -EOPNOTSUPP;
9515
9516 switch (f->command) {
9517 case FLOW_BLOCK_BIND:
9518 indr_priv = ice_indr_block_priv_lookup(np, netdev);
9519 if (indr_priv)
9520 return -EEXIST;
9521
9522 indr_priv = kzalloc_obj(*indr_priv);
9523 if (!indr_priv)
9524 return -ENOMEM;
9525
9526 indr_priv->netdev = netdev;
9527 indr_priv->np = np;
9528 list_add(&indr_priv->list, &np->tc_indr_block_priv_list);
9529
9530 block_cb =
9531 flow_indr_block_cb_alloc(ice_indr_setup_block_cb,
9532 indr_priv, indr_priv,
9533 ice_rep_indr_tc_block_unbind,
9534 f, netdev, sch, data, np,
9535 cleanup);
9536
9537 if (IS_ERR(block_cb)) {
9538 list_del(&indr_priv->list);
9539 kfree(indr_priv);
9540 return PTR_ERR(block_cb);
9541 }
9542 flow_block_cb_add(block_cb, f);
9543 list_add_tail(&block_cb->driver_list, &ice_block_cb_list);
9544 break;
9545 case FLOW_BLOCK_UNBIND:
9546 indr_priv = ice_indr_block_priv_lookup(np, netdev);
9547 if (!indr_priv)
9548 return -ENOENT;
9549
9550 block_cb = flow_block_cb_lookup(f->block,
9551 ice_indr_setup_block_cb,
9552 indr_priv);
9553 if (!block_cb)
9554 return -ENOENT;
9555
9556 flow_indr_block_cb_remove(block_cb, f);
9557
9558 list_del(&block_cb->driver_list);
9559 break;
9560 default:
9561 return -EOPNOTSUPP;
9562 }
9563 return 0;
9564 }
9565
9566 static int
ice_indr_setup_tc_cb(struct net_device * netdev,struct Qdisc * sch,void * cb_priv,enum tc_setup_type type,void * type_data,void * data,void (* cleanup)(struct flow_block_cb * block_cb))9567 ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
9568 void *cb_priv, enum tc_setup_type type, void *type_data,
9569 void *data,
9570 void (*cleanup)(struct flow_block_cb *block_cb))
9571 {
9572 switch (type) {
9573 case TC_SETUP_BLOCK:
9574 return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data,
9575 data, cleanup);
9576
9577 default:
9578 return -EOPNOTSUPP;
9579 }
9580 }
9581
9582 /**
9583 * ice_open - Called when a network interface becomes active
9584 * @netdev: network interface device structure
9585 *
9586 * The open entry point is called when a network interface is made
9587 * active by the system (IFF_UP). At this point all resources needed
9588 * for transmit and receive operations are allocated, the interrupt
9589 * handler is registered with the OS, the netdev watchdog is enabled,
9590 * and the stack is notified that the interface is ready.
9591 *
9592 * Returns 0 on success, negative value on failure
9593 */
ice_open(struct net_device * netdev)9594 int ice_open(struct net_device *netdev)
9595 {
9596 struct ice_pf *pf = ice_netdev_to_pf(netdev);
9597
9598 if (ice_is_reset_in_progress(pf->state)) {
9599 netdev_err(netdev, "can't open net device while reset is in progress");
9600 return -EBUSY;
9601 }
9602
9603 return ice_open_internal(netdev);
9604 }
9605
9606 /**
9607 * ice_open_internal - Called when a network interface becomes active
9608 * @netdev: network interface device structure
9609 *
9610 * Internal ice_open implementation. Should not be used directly except for ice_open and reset
9611 * handling routine
9612 *
9613 * Returns 0 on success, negative value on failure
9614 */
ice_open_internal(struct net_device * netdev)9615 int ice_open_internal(struct net_device *netdev)
9616 {
9617 struct ice_netdev_priv *np = netdev_priv(netdev);
9618 struct ice_vsi *vsi = np->vsi;
9619 struct ice_pf *pf = vsi->back;
9620 struct ice_port_info *pi;
9621 int err;
9622
9623 if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
9624 netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
9625 return -EIO;
9626 }
9627
9628 netif_carrier_off(netdev);
9629
9630 pi = vsi->port_info;
9631 err = ice_update_link_info(pi);
9632 if (err) {
9633 netdev_err(netdev, "Failed to get link info, error %d\n", err);
9634 return err;
9635 }
9636
9637 ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
9638
9639 /* Set PHY if there is media, otherwise, turn off PHY */
9640 if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9641 clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
9642 if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
9643 err = ice_init_phy_user_cfg(pi);
9644 if (err) {
9645 netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
9646 err);
9647 return err;
9648 }
9649 }
9650
9651 err = ice_phy_cfg(vsi, true);
9652 if (err) {
9653 netdev_err(netdev, "Failed to set physical link up, error %d\n",
9654 err);
9655 return err;
9656 }
9657 } else {
9658 set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
9659 ice_set_link(vsi, false);
9660 }
9661
9662 err = ice_vsi_open(vsi);
9663 if (err)
9664 netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
9665 vsi->vsi_num, vsi->vsw->sw_id);
9666
9667 return err;
9668 }
9669
9670 /**
9671 * ice_stop - Disables a network interface
9672 * @netdev: network interface device structure
9673 *
9674 * The stop entry point is called when an interface is de-activated by the OS,
9675 * and the netdevice enters the DOWN state. The hardware is still under the
9676 * driver's control, but the netdev interface is disabled.
9677 *
9678 * Returns success only - not allowed to fail
9679 */
ice_stop(struct net_device * netdev)9680 int ice_stop(struct net_device *netdev)
9681 {
9682 struct ice_netdev_priv *np = netdev_priv(netdev);
9683 struct ice_vsi *vsi = np->vsi;
9684 struct ice_pf *pf = vsi->back;
9685
9686 if (ice_is_reset_in_progress(pf->state)) {
9687 netdev_err(netdev, "can't stop net device while reset is in progress");
9688 return -EBUSY;
9689 }
9690
9691 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
9692 int link_err = ice_phy_cfg(vsi, false);
9693
9694 if (link_err) {
9695 if (link_err == -ENOMEDIUM)
9696 netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n",
9697 vsi->vsi_num);
9698 else
9699 netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
9700 vsi->vsi_num, link_err);
9701
9702 ice_vsi_close(vsi);
9703 return -EIO;
9704 }
9705 }
9706
9707 ice_vsi_close(vsi);
9708
9709 return 0;
9710 }
9711
9712 /**
9713 * ice_features_check - Validate encapsulated packet conforms to limits
9714 * @skb: skb buffer
9715 * @netdev: This port's netdev
9716 * @features: Offload features that the stack believes apply
9717 */
9718 static netdev_features_t
ice_features_check(struct sk_buff * skb,struct net_device __always_unused * netdev,netdev_features_t features)9719 ice_features_check(struct sk_buff *skb,
9720 struct net_device __always_unused *netdev,
9721 netdev_features_t features)
9722 {
9723 bool gso = skb_is_gso(skb);
9724 size_t len;
9725
9726 /* No point in doing any of this if neither checksum nor GSO are
9727 * being requested for this frame. We can rule out both by just
9728 * checking for CHECKSUM_PARTIAL
9729 */
9730 if (skb->ip_summed != CHECKSUM_PARTIAL)
9731 return features;
9732
9733 /* We cannot support GSO if the MSS is going to be less than
9734 * 64 bytes. If it is then we need to drop support for GSO.
9735 */
9736 if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
9737 features &= ~NETIF_F_GSO_MASK;
9738
9739 len = skb_network_offset(skb);
9740 if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
9741 goto out_rm_features;
9742
9743 len = skb_network_header_len(skb);
9744 if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
9745 goto out_rm_features;
9746
9747 if (skb->encapsulation) {
9748 /* this must work for VXLAN frames AND IPIP/SIT frames, and in
9749 * the case of IPIP frames, the transport header pointer is
9750 * after the inner header! So check to make sure that this
9751 * is a GRE or UDP_TUNNEL frame before doing that math.
9752 */
9753 if (gso && (skb_shinfo(skb)->gso_type &
9754 (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
9755 len = skb_inner_network_header(skb) -
9756 skb_transport_header(skb);
9757 if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
9758 goto out_rm_features;
9759 }
9760
9761 len = skb_inner_network_header_len(skb);
9762 if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
9763 goto out_rm_features;
9764 }
9765
9766 return features;
9767 out_rm_features:
9768 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
9769 }
9770
9771 static const struct net_device_ops ice_netdev_safe_mode_ops = {
9772 .ndo_open = ice_open,
9773 .ndo_stop = ice_stop,
9774 .ndo_start_xmit = ice_start_xmit,
9775 .ndo_set_mac_address = ice_set_mac_address,
9776 .ndo_validate_addr = eth_validate_addr,
9777 .ndo_change_mtu = ice_change_mtu,
9778 .ndo_get_stats64 = ice_get_stats64,
9779 .ndo_tx_timeout = ice_tx_timeout,
9780 .ndo_bpf = ice_xdp_safe_mode,
9781 };
9782
9783 static const struct net_device_ops ice_netdev_ops = {
9784 .ndo_open = ice_open,
9785 .ndo_stop = ice_stop,
9786 .ndo_start_xmit = ice_start_xmit,
9787 .ndo_select_queue = ice_select_queue,
9788 .ndo_features_check = ice_features_check,
9789 .ndo_fix_features = ice_fix_features,
9790 .ndo_set_rx_mode = ice_set_rx_mode,
9791 .ndo_set_mac_address = ice_set_mac_address,
9792 .ndo_validate_addr = eth_validate_addr,
9793 .ndo_change_mtu = ice_change_mtu,
9794 .ndo_get_stats64 = ice_get_stats64,
9795 .ndo_set_tx_maxrate = ice_set_tx_maxrate,
9796 .ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
9797 .ndo_set_vf_mac = ice_set_vf_mac,
9798 .ndo_get_vf_config = ice_get_vf_cfg,
9799 .ndo_set_vf_trust = ice_set_vf_trust,
9800 .ndo_set_vf_vlan = ice_set_vf_port_vlan,
9801 .ndo_set_vf_link_state = ice_set_vf_link_state,
9802 .ndo_get_vf_stats = ice_get_vf_stats,
9803 .ndo_set_vf_rate = ice_set_vf_bw,
9804 .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
9805 .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
9806 .ndo_setup_tc = ice_setup_tc,
9807 .ndo_set_features = ice_set_features,
9808 .ndo_bridge_getlink = ice_bridge_getlink,
9809 .ndo_bridge_setlink = ice_bridge_setlink,
9810 .ndo_fdb_add = ice_fdb_add,
9811 .ndo_fdb_del = ice_fdb_del,
9812 #ifdef CONFIG_RFS_ACCEL
9813 .ndo_rx_flow_steer = ice_rx_flow_steer,
9814 #endif
9815 .ndo_tx_timeout = ice_tx_timeout,
9816 .ndo_bpf = ice_xdp,
9817 .ndo_xdp_xmit = ice_xdp_xmit,
9818 .ndo_xsk_wakeup = ice_xsk_wakeup,
9819 .ndo_hwtstamp_get = ice_ptp_hwtstamp_get,
9820 .ndo_hwtstamp_set = ice_ptp_hwtstamp_set,
9821 };
9822