// SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2021 Intel Corporation. */ #include #include #include #include #include #include #include /* Local includes */ #include "i40e.h" #include "i40e_devids.h" #include "i40e_diag.h" #include "i40e_lan_hmc.h" #include "i40e_virtchnl_pf.h" #include "i40e_xsk.h" /* All i40e tracepoints are defined by the include below, which * must be included exactly once across the whole kernel with * CREATE_TRACE_POINTS defined */ #define CREATE_TRACE_POINTS #include "i40e_trace.h" const char i40e_driver_name[] = "i40e"; static const char i40e_driver_string[] = "Intel(R) Ethernet Connection XL710 Network Driver"; static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation."; /* a bit of forward declarations */ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); static void i40e_prep_for_reset(struct i40e_pf *pf); static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); static int i40e_restore_interrupt_scheme(struct i40e_pf *pf); static bool i40e_check_recovery_mode(struct i40e_pf *pf); static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type); static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf); /* i40e_pci_tbl - PCI Device ID Table * * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, * Class, Class Mask, private data (not used) } */ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_XL710), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QEMU), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_C), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_BC), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0}, /* required last entry */ {0, } }; MODULE_DEVICE_TABLE(pci, i40e_pci_tbl); #define I40E_MAX_VF_COUNT 128 static int debug = -1; module_param(debug, uint, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)"); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, struct net_device *netdev, int delta) { struct netdev_hw_addr_list *ha_list; struct netdev_hw_addr *ha; if (!f || !netdev) return; if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr)) ha_list = &netdev->uc; else ha_list = &netdev->mc; netdev_hw_addr_list_for_each(ha, ha_list) { if (ether_addr_equal(ha->addr, f->macaddr)) { ha->refcount += delta; if (ha->refcount <= 0) ha->refcount = 1; break; } } } /** * i40e_hw_to_dev - get device pointer from the hardware structure * @hw: pointer to the device HW structure **/ struct device *i40e_hw_to_dev(struct i40e_hw *hw) { struct i40e_pf *pf = i40e_hw_to_pf(hw); return &pf->pdev->dev; } /** * i40e_allocate_dma_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested * @alignment: what to align the allocation to **/ int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem, u64 size, u32 alignment) { struct i40e_pf *pf = i40e_hw_to_pf(hw); mem->size = ALIGN(size, alignment); mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa, GFP_KERNEL); if (!mem->va) return -ENOMEM; return 0; } /** * i40e_free_dma_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem) { struct i40e_pf *pf = i40e_hw_to_pf(hw); dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa); mem->va = NULL; mem->pa = 0; mem->size = 0; return 0; } /** * i40e_allocate_virt_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem, u32 size) { mem->size = size; mem->va = kzalloc(size, GFP_KERNEL); if (!mem->va) return -ENOMEM; return 0; } /** * i40e_free_virt_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem) { /* it's ok to kfree a NULL pointer */ kfree(mem->va); mem->va = NULL; mem->size = 0; return 0; } /** * i40e_get_lump - find a lump of free generic resource * @pf: board private structure * @pile: the pile of resource to search * @needed: the number of items needed * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) { int ret = -ENOMEM; int i, j; if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, "param err: pile=%s needed=%d id=0x%04x\n", pile ? "" : "", needed, id); return -EINVAL; } /* Allocate last queue in the pile for FDIR VSI queue * so it doesn't fragment the qp_pile */ if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { dev_err(&pf->pdev->dev, "Cannot allocate queue %d for I40E_VSI_FDIR\n", pile->num_entries - 1); return -ENOMEM; } pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; return pile->num_entries - 1; } i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { i++; continue; } /* do we have enough in this lump? */ for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) { if (pile->list[i+j] & I40E_PILE_VALID_BIT) break; } if (j == needed) { /* there was enough, so assign it to the requestor */ for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; break; } /* not enough, so skip over it and continue looking */ i += j; } return ret; } /** * i40e_put_lump - return a lump of generic resource * @pile: the pile of resource to search * @index: the base item index * @id: the owner id of the items assigned * * Returns the count of items in the lump **/ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; for (i = index; i < pile->num_entries && pile->list[i] == valid_id; i++) { pile->list[i] = 0; count++; } return count; } /** * i40e_find_vsi_from_id - searches for the vsi with the given id * @pf: the pf structure to search for the vsi * @id: id of the vsi it is searching for **/ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) { struct i40e_vsi *vsi; int i; i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->id == id) return vsi; return NULL; } /** * i40e_service_event_schedule - Schedule the service task to wake up * @pf: board private structure * * If not already scheduled, this puts the task into the work queue **/ void i40e_service_event_schedule(struct i40e_pf *pf) { if ((!test_bit(__I40E_DOWN, pf->state) && !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) || test_bit(__I40E_RECOVERY_MODE, pf->state)) queue_work(i40e_wq, &pf->service_task); } /** * i40e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure * @txqueue: queue number timing out * * If any port has noticed a Tx timeout, it is likely that the whole * device is munged, not just the one netdev port, so go for the full * reset. **/ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *tx_ring = NULL; unsigned int i; u32 head, val; pf->tx_timeout_count++; /* with txqueue index, find the tx_ring struct */ for (i = 0; i < vsi->num_queue_pairs; i++) { if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { if (txqueue == vsi->tx_rings[i]->queue_index) { tx_ring = vsi->tx_rings[i]; break; } } } if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20))) pf->tx_timeout_recovery_level = 1; /* reset after some time */ else if (time_before(jiffies, (pf->tx_timeout_last_recovery + netdev->watchdog_timeo))) return; /* don't do any new action before the next timeout */ /* don't kick off another recovery if one is already pending */ if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state)) return; if (tx_ring) { head = i40e_get_head(tx_ring); /* Read interrupt register */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) val = rd32(&pf->hw, I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + tx_ring->vsi->base_vector - 1)); else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", vsi->seid, txqueue, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail), val); } pf->tx_timeout_last_recovery = jiffies; netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n", pf->tx_timeout_recovery_level, txqueue); switch (pf->tx_timeout_recovery_level) { case 1: set_bit(__I40E_PF_RESET_REQUESTED, pf->state); break; case 2: set_bit(__I40E_CORE_RESET_REQUESTED, pf->state); break; case 3: set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n"); set_bit(__I40E_DOWN_REQUESTED, pf->state); set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state); break; } i40e_service_event_schedule(pf); pf->tx_timeout_recovery_level++; } /** * i40e_get_vsi_stats_struct - Get System Network Statistics * @vsi: the VSI we care about * * Returns the address of the device statistics structure. * The statistics are actually updated from the service task. **/ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi) { return &vsi->net_stats; } /** * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring * @ring: Tx ring to get statistics from * @stats: statistics entry to be updated **/ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, struct rtnl_link_stats64 *stats) { u64 bytes, packets; unsigned int start; do { start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } /** * i40e_get_netdev_stats_struct - Get statistics for netdev interface * @netdev: network interface device structure * @stats: data structure to store statistics * * Returns the address of the device statistics structure. * The statistics are actually updated from the service task. **/ static void i40e_get_netdev_stats_struct(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi); struct i40e_ring *ring; int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; if (!vsi->tx_rings) return; rcu_read_lock(); for (i = 0; i < vsi->num_queue_pairs; i++) { u64 bytes, packets; unsigned int start; ring = READ_ONCE(vsi->tx_rings[i]); if (!ring) continue; i40e_get_netdev_stats_struct_tx(ring, stats); if (i40e_enabled_xdp_vsi(vsi)) { ring = READ_ONCE(vsi->xdp_rings[i]); if (!ring) continue; i40e_get_netdev_stats_struct_tx(ring, stats); } ring = READ_ONCE(vsi->rx_rings[i]); if (!ring) continue; do { start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } rcu_read_unlock(); /* following stats updated by i40e_watchdog_subtask() */ stats->multicast = vsi_stats->multicast; stats->tx_errors = vsi_stats->tx_errors; stats->tx_dropped = vsi_stats->tx_dropped; stats->rx_errors = vsi_stats->rx_errors; stats->rx_dropped = vsi_stats->rx_dropped; stats->rx_missed_errors = vsi_stats->rx_missed_errors; stats->rx_crc_errors = vsi_stats->rx_crc_errors; stats->rx_length_errors = vsi_stats->rx_length_errors; } /** * i40e_vsi_reset_stats - Resets all stats of the given vsi * @vsi: the VSI to have its stats reset **/ void i40e_vsi_reset_stats(struct i40e_vsi *vsi) { struct rtnl_link_stats64 *ns; int i; if (!vsi) return; ns = i40e_get_vsi_stats_struct(vsi); memset(ns, 0, sizeof(*ns)); memset(&vsi->net_stats_offsets, 0, sizeof(vsi->net_stats_offsets)); memset(&vsi->eth_stats, 0, sizeof(vsi->eth_stats)); memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets)); if (vsi->rx_rings && vsi->rx_rings[0]) { for (i = 0; i < vsi->num_queue_pairs; i++) { memset(&vsi->rx_rings[i]->stats, 0, sizeof(vsi->rx_rings[i]->stats)); memset(&vsi->rx_rings[i]->rx_stats, 0, sizeof(vsi->rx_rings[i]->rx_stats)); memset(&vsi->tx_rings[i]->stats, 0, sizeof(vsi->tx_rings[i]->stats)); memset(&vsi->tx_rings[i]->tx_stats, 0, sizeof(vsi->tx_rings[i]->tx_stats)); } } vsi->stat_offsets_loaded = false; } /** * i40e_pf_reset_stats - Reset all of the stats for the given PF * @pf: the PF to be reset **/ void i40e_pf_reset_stats(struct i40e_pf *pf) { struct i40e_veb *veb; int i; memset(&pf->stats, 0, sizeof(pf->stats)); memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets)); pf->stat_offsets_loaded = false; i40e_pf_for_each_veb(pf, i, veb) { memset(&veb->stats, 0, sizeof(veb->stats)); memset(&veb->stats_offsets, 0, sizeof(veb->stats_offsets)); memset(&veb->tc_stats, 0, sizeof(veb->tc_stats)); memset(&veb->tc_stats_offsets, 0, sizeof(veb->tc_stats_offsets)); veb->stat_offsets_loaded = false; } pf->hw_csum_rx_error = 0; } /** * i40e_compute_pci_to_hw_id - compute index form PCI function. * @vsi: ptr to the VSI to read from. * @hw: ptr to the hardware info. **/ static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw) { int pf_count = i40e_get_pf_count(hw); if (vsi->type == I40E_VSI_SRIOV) return (hw->port * BIT(7)) / pf_count + vsi->vf_id; return hw->port + BIT(7); } /** * i40e_stat_update64 - read and update a 64 bit stat from the chip. * @hw: ptr to the hardware info. * @hireg: the high 32 bit reg to read. * @loreg: the low 32 bit reg to read. * @offset_loaded: has the initial offset been loaded yet. * @offset: ptr to current offset value. * @stat: ptr to the stat. * * Since the device stats are not reset at PFReset, they will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; new_data = rd64(hw, loreg); if (!offset_loaded || new_data < *offset) *offset = new_data; *stat = new_data - *offset; } /** * i40e_stat_update48 - read and update a 48 bit stat from the chip * @hw: ptr to the hardware info * @hireg: the high 32 bit reg to read * @loreg: the low 32 bit reg to read * @offset_loaded: has the initial offset been loaded yet * @offset: ptr to current offset value * @stat: ptr to the stat * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. In the process, we also manage * the potential roll-over. **/ static void i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; if (hw->device_id == I40E_DEV_ID_QEMU) { new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; } else { new_data = rd64(hw, loreg); } if (!offset_loaded) *offset = new_data; if (likely(new_data >= *offset)) *stat = new_data - *offset; else *stat = (new_data + BIT_ULL(48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * i40e_stat_update32 - read and update a 32 bit stat from the chip * @hw: ptr to the hardware info * @reg: the hw reg to read * @offset_loaded: has the initial offset been loaded yet * @offset: ptr to current offset value * @stat: ptr to the stat **/ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (likely(new_data >= *offset)) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + BIT_ULL(32)) - *offset); } /** * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat * @hw: ptr to the hardware info * @reg: the hw reg to read and clear * @stat: ptr to the stat **/ static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) { u32 new_data = rd32(hw, reg); wr32(hw, reg, 1); /* must write a nonzero value to clear register */ *stat += new_data; } /** * i40e_stats_update_rx_discards - update rx_discards. * @vsi: ptr to the VSI to be updated. * @hw: ptr to the hardware info. * @stat_idx: VSI's stat_counter_idx. * @offset_loaded: ptr to the VSI's stat_offsets_loaded. * @stat_offset: ptr to stat_offset to store first read of specific register. * @stat: ptr to VSI's stat to be updated. **/ static void i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, int stat_idx, bool offset_loaded, struct i40e_eth_stats *stat_offset, struct i40e_eth_stats *stat) { i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded, &stat_offset->rx_discards, &stat->rx_discards); i40e_stat_update64(hw, I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)), I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)), offset_loaded, &stat_offset->rx_discards_other, &stat->rx_discards_other); } /** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated **/ void i40e_update_eth_stats(struct i40e_vsi *vsi) { int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx); struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; /* Gather up the stats that the hw collects */ i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unknown_protocol, &es->rx_unknown_protocol); i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); i40e_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); i40e_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); i40e_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); i40e_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); i40e_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); i40e_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); i40e_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); i40e_stats_update_rx_discards(vsi, hw, stat_idx, vsi->stat_offsets_loaded, oes, es); vsi->stat_offsets_loaded = true; } /** * i40e_update_veb_stats - Update Switch component statistics * @veb: the VEB being updated **/ void i40e_update_veb_stats(struct i40e_veb *veb) { struct i40e_pf *pf = veb->pf; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ struct i40e_veb_tc_stats *veb_oes; struct i40e_veb_tc_stats *veb_es; int i, idx = 0; idx = veb->stats_idx; es = &veb->stats; oes = &veb->stats_offsets; veb_es = &veb->tc_stats; veb_oes = &veb->tc_stats_offsets; /* Gather up the stats that the hw collects */ i40e_stat_update32(hw, I40E_GLSW_TDPC(idx), veb->stat_offsets_loaded, &oes->tx_discards, &es->tx_discards); if (hw->revision_id > 0) i40e_stat_update32(hw, I40E_GLSW_RUPP(idx), veb->stat_offsets_loaded, &oes->rx_unknown_protocol, &es->rx_unknown_protocol); i40e_stat_update48(hw, I40E_GLSW_GORCH(idx), I40E_GLSW_GORCL(idx), veb->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); i40e_stat_update48(hw, I40E_GLSW_UPRCH(idx), I40E_GLSW_UPRCL(idx), veb->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); i40e_stat_update48(hw, I40E_GLSW_MPRCH(idx), I40E_GLSW_MPRCL(idx), veb->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); i40e_stat_update48(hw, I40E_GLSW_BPRCH(idx), I40E_GLSW_BPRCL(idx), veb->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); i40e_stat_update48(hw, I40E_GLSW_GOTCH(idx), I40E_GLSW_GOTCL(idx), veb->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); i40e_stat_update48(hw, I40E_GLSW_UPTCH(idx), I40E_GLSW_UPTCL(idx), veb->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); i40e_stat_update48(hw, I40E_GLSW_MPTCH(idx), I40E_GLSW_MPTCL(idx), veb->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); i40e_stat_update48(hw, I40E_GLSW_BPTCH(idx), I40E_GLSW_BPTCL(idx), veb->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { i40e_stat_update48(hw, I40E_GLVEBTC_RPCH(i, idx), I40E_GLVEBTC_RPCL(i, idx), veb->stat_offsets_loaded, &veb_oes->tc_rx_packets[i], &veb_es->tc_rx_packets[i]); i40e_stat_update48(hw, I40E_GLVEBTC_RBCH(i, idx), I40E_GLVEBTC_RBCL(i, idx), veb->stat_offsets_loaded, &veb_oes->tc_rx_bytes[i], &veb_es->tc_rx_bytes[i]); i40e_stat_update48(hw, I40E_GLVEBTC_TPCH(i, idx), I40E_GLVEBTC_TPCL(i, idx), veb->stat_offsets_loaded, &veb_oes->tc_tx_packets[i], &veb_es->tc_tx_packets[i]); i40e_stat_update48(hw, I40E_GLVEBTC_TBCH(i, idx), I40E_GLVEBTC_TBCL(i, idx), veb->stat_offsets_loaded, &veb_oes->tc_tx_bytes[i], &veb_es->tc_tx_bytes[i]); } veb->stat_offsets_loaded = true; } /** * i40e_update_vsi_stats - Update the vsi statistics counters. * @vsi: the VSI to be updated * * There are a few instances where we store the same stat in a * couple of different structs. This is partly because we have * the netdev stats that need to be filled out, which is slightly * different from the "eth_stats" defined by the chip and used in * VF communications. We sort it out here. **/ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) { u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ u64 tx_restart, tx_busy; struct i40e_ring *p; u64 bytes, packets; unsigned int start; u64 tx_linearize; u64 tx_force_wb; u64 tx_stopped; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; if (test_bit(__I40E_VSI_DOWN, vsi->state) || test_bit(__I40E_CONFIG_BUSY, pf->state)) return; ns = i40e_get_vsi_stats_struct(vsi); ons = &vsi->net_stats_offsets; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; /* Gather up the netdev and vsi stats that the driver collects * on the fly during packet processing */ rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; tx_stopped = 0; rx_page = 0; rx_buf = 0; rx_reuse = 0; rx_alloc = 0; rx_waive = 0; rx_busy = 0; rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ p = READ_ONCE(vsi->tx_rings[q]); if (!p) continue; do { start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; tx_stopped += p->tx_stats.tx_stopped; /* locate Rx ring */ p = READ_ONCE(vsi->rx_rings[q]); if (!p) continue; do { start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; } while (u64_stats_fetch_retry(&p->syncp, start)); rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; rx_reuse += p->rx_stats.page_reuse_count; rx_alloc += p->rx_stats.page_alloc_count; rx_waive += p->rx_stats.page_waive_count; rx_busy += p->rx_stats.page_busy_count; if (i40e_enabled_xdp_vsi(vsi)) { /* locate XDP ring */ p = READ_ONCE(vsi->xdp_rings[q]); if (!p) continue; do { start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; } } rcu_read_unlock(); vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; vsi->tx_stopped = tx_stopped; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; vsi->rx_page_reuse = rx_reuse; vsi->rx_page_alloc = rx_alloc; vsi->rx_page_waive = rx_waive; vsi->rx_page_busy = rx_busy; ns->rx_packets = rx_p; ns->rx_bytes = rx_b; ns->tx_packets = tx_p; ns->tx_bytes = tx_b; /* update netdev stats from eth stats */ i40e_update_eth_stats(vsi); ons->tx_errors = oes->tx_errors; ns->tx_errors = es->tx_errors; ons->multicast = oes->rx_multicast; ns->multicast = es->rx_multicast; ons->rx_dropped = oes->rx_discards_other; ns->rx_dropped = es->rx_discards_other; ons->rx_missed_errors = oes->rx_discards; ns->rx_missed_errors = es->rx_discards; ons->tx_dropped = oes->tx_discards; ns->tx_dropped = es->tx_discards; /* pull in a couple PF stats if this is the main vsi */ if (vsi->type == I40E_VSI_MAIN) { ns->rx_crc_errors = pf->stats.crc_errors; ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes; ns->rx_length_errors = pf->stats.rx_length_errors; } } /** * i40e_update_pf_stats - Update the PF statistics counters. * @pf: the PF to be updated **/ static void i40e_update_pf_stats(struct i40e_pf *pf) { struct i40e_hw_port_stats *osd = &pf->stats_offsets; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw *hw = &pf->hw; u32 val; int i; i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); i40e_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); i40e_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); i40e_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); i40e_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); i40e_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); for (i = 0; i < 8; i++) { i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xoff_rx[i], &nsd->priority_xoff_rx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_rx[i], &nsd->priority_xon_rx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_tx[i], &nsd->priority_xon_tx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xoff_tx[i], &nsd->priority_xoff_tx[i]); i40e_stat_update32(hw, I40E_GLPRT_RXON2OFFCNT(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_2_xoff[i], &nsd->priority_xon_2_xoff[i]); } i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); /* FDIR stats */ i40e_stat_update_and_clear32(hw, I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)), &nsd->fd_atr_match); i40e_stat_update_and_clear32(hw, I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)), &nsd->fd_sb_match); i40e_stat_update_and_clear32(hw, I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)), &nsd->fd_atr_tunnel_match); val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = FIELD_GET(I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK, val); nsd->rx_lpi_status = FIELD_GET(I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK, val); i40e_stat_update32(hw, I40E_PRTPM_TLPIC, pf->stat_offsets_loaded, &osd->tx_lpi_count, &nsd->tx_lpi_count); i40e_stat_update32(hw, I40E_PRTPM_RLPIC, pf->stat_offsets_loaded, &osd->rx_lpi_count, &nsd->rx_lpi_count); if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) nsd->fd_sb_status = true; else nsd->fd_sb_status = false; if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) nsd->fd_atr_status = true; else nsd->fd_atr_status = false; pf->stat_offsets_loaded = true; } /** * i40e_update_stats - Update the various statistics counters. * @vsi: the VSI to be updated * * Update the various stats for this VSI and its related entities. **/ void i40e_update_stats(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; if (vsi->type == I40E_VSI_MAIN) i40e_update_pf_stats(pf); i40e_update_vsi_stats(vsi); } /** * i40e_count_filters - counts VSI mac filters * @vsi: the VSI to be searched * * Returns count of mac filters **/ int i40e_count_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; struct hlist_node *h; int bkt; int cnt = 0; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_NEW || f->state == I40E_FILTER_ACTIVE) ++cnt; } return cnt; } /** * i40e_find_filter - Search VSI filter list for specific mac/vlan filter * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan * * Returns ptr to the filter object or NULL **/ static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; u64 key; if (!vsi || !macaddr) return NULL; key = i40e_addr_to_hkey(macaddr); hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) { if ((ether_addr_equal(macaddr, f->macaddr)) && (vlan == f->vlan)) return f; } return NULL; } /** * i40e_find_mac - Find a mac addr in the macvlan filters list * @vsi: the VSI to be searched * @macaddr: the MAC address we are searching for * * Returns the first filter with the provided MAC address or NULL if * MAC address was not found **/ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr) { struct i40e_mac_filter *f; u64 key; if (!vsi || !macaddr) return NULL; key = i40e_addr_to_hkey(macaddr); hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) { if ((ether_addr_equal(macaddr, f->macaddr))) return f; } return NULL; } /** * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode * @vsi: the VSI to be searched * * Returns true if VSI is in vlan mode or false otherwise **/ bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi) { /* If we have a PVID, always operate in VLAN mode */ if (vsi->info.pvid) return true; /* We need to operate in VLAN mode whenever we have any filters with * a VLAN other than I40E_VLAN_ALL. We could check the table each * time, incurring search cost repeatedly. However, we can notice two * things: * * 1) the only place where we can gain a VLAN filter is in * i40e_add_filter. * * 2) the only place where filters are actually removed is in * i40e_sync_filters_subtask. * * Thus, we can simply use a boolean value, has_vlan_filters which we * will set to true when we add a VLAN filter in i40e_add_filter. Then * we have to perform the full search after deleting filters in * i40e_sync_filters_subtask, but we already have to search * filters here and can perform the check at the same time. This * results in avoiding embedding a loop for VLAN mode inside another * loop over all the filters, and should maintain correctness as noted * above. */ return vsi->has_vlan_filter; } /** * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary * @vsi: the VSI to configure * @tmp_add_list: list of filters ready to be added * @tmp_del_list: list of filters ready to be deleted * @vlan_filters: the number of active VLAN filters * * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they * behave as expected. If we have any active VLAN filters remaining or about * to be added then we need to update non-VLAN filters to be marked as VLAN=0 * so that they only match against untagged traffic. If we no longer have any * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1 * so that they match against both tagged and untagged traffic. In this way, * we ensure that we correctly receive the desired traffic. This ensures that * when we have an active VLAN we will receive only untagged traffic and * traffic matching active VLANs. If we have no active VLANs then we will * operate in non-VLAN mode and receive all traffic, tagged or untagged. * * Finally, in a similar fashion, this function also corrects filters when * there is an active PVID assigned to this VSI. * * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. * * This function is only expected to be called from within * i40e_sync_vsi_filters. * * NOTE: This function expects to be called while under the * mac_filter_hash_lock */ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, struct hlist_head *tmp_add_list, struct hlist_head *tmp_del_list, int vlan_filters) { s16 pvid = le16_to_cpu(vsi->info.pvid); struct i40e_mac_filter *f, *add_head; struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan; /* To determine if a particular filter needs to be replaced we * have the three following conditions: * * a) if we have a PVID assigned, then all filters which are * not marked as VLAN=PVID must be replaced with filters that * are. * b) otherwise, if we have any active VLANS, all filters * which are marked as VLAN=-1 must be replaced with * filters marked as VLAN=0 * c) finally, if we do not have any active VLANS, all filters * which are marked as VLAN=0 must be replaced with filters * marked as VLAN=-1 */ /* Update the filters about to be added in place */ hlist_for_each_entry(new, tmp_add_list, hlist) { if (pvid && new->f->vlan != pvid) new->f->vlan = pvid; else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY) new->f->vlan = 0; else if (!vlan_filters && new->f->vlan == 0) new->f->vlan = I40E_VLAN_ANY; } /* Update the remaining active filters */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { /* Combine the checks for whether a filter needs to be changed * and then determine the new VLAN inside the if block, in * order to avoid duplicating code for adding the new filter * then deleting the old filter. */ if ((pvid && f->vlan != pvid) || (vlan_filters && f->vlan == I40E_VLAN_ANY) || (!vlan_filters && f->vlan == 0)) { /* Determine the new vlan we will be adding */ if (pvid) new_vlan = pvid; else if (vlan_filters) new_vlan = 0; else new_vlan = I40E_VLAN_ANY; /* Create the new filter */ add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); if (!add_head) return -ENOMEM; /* Create a temporary i40e_new_mac_filter */ new = kzalloc(sizeof(*new), GFP_ATOMIC); if (!new) return -ENOMEM; new->f = add_head; new->state = add_head->state; /* Add the new filter to the tmp list */ hlist_add_head(&new->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; hash_del(&f->hlist); hlist_add_head(&f->hlist, tmp_del_list); } } vsi->has_vlan_filter = !!vlan_filters; return 0; } /** * i40e_get_vf_new_vlan - Get new vlan id on a vf * @vsi: the vsi to configure * @new_mac: new mac filter to be added * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL * @vlan_filters: the number of active VLAN filters * @trusted: flag if the VF is trusted * * Get new VLAN id based on current VLAN filters, trust, PVID * and vf-vlan-prune-disable flag. * * Returns the value of the new vlan filter or * the old value if no new filter is needed. */ static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, struct i40e_new_mac_filter *new_mac, struct i40e_mac_filter *f, int vlan_filters, bool trusted) { s16 pvid = le16_to_cpu(vsi->info.pvid); struct i40e_pf *pf = vsi->back; bool is_any; if (new_mac) f = new_mac->f; if (pvid && f->vlan != pvid) return pvid; is_any = (trusted || !test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, pf->flags)); if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || (is_any && !vlan_filters && f->vlan == 0)) { if (is_any) return I40E_VLAN_ANY; else return 0; } return f->vlan; } /** * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary * @vsi: the vsi to configure * @tmp_add_list: list of filters ready to be added * @tmp_del_list: list of filters ready to be deleted * @vlan_filters: the number of active VLAN filters * @trusted: flag if the VF is trusted * * Correct VF VLAN filters based on current VLAN filters, trust, PVID * and vf-vlan-prune-disable flag. * * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. * * This function is only expected to be called from within * i40e_sync_vsi_filters. * * NOTE: This function expects to be called while under the * mac_filter_hash_lock */ static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, struct hlist_head *tmp_add_list, struct hlist_head *tmp_del_list, int vlan_filters, bool trusted) { struct i40e_mac_filter *f, *add_head; struct i40e_new_mac_filter *new_mac; struct hlist_node *h; int bkt, new_vlan; hlist_for_each_entry(new_mac, tmp_add_list, hlist) { new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL, vlan_filters, trusted); } hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters, trusted); if (new_vlan != f->vlan) { add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); if (!add_head) return -ENOMEM; /* Create a temporary i40e_new_mac_filter */ new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC); if (!new_mac) return -ENOMEM; new_mac->f = add_head; new_mac->state = add_head->state; /* Add the new filter to the tmp list */ hlist_add_head(&new_mac->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; hash_del(&f->hlist); hlist_add_head(&f->hlist, tmp_del_list); } } vsi->has_vlan_filter = !!vlan_filters; return 0; } /** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address * * Remove whatever filter the firmware set up so the driver can manage * its own filtering intelligently. **/ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) { struct i40e_aqc_remove_macvlan_element_data element; struct i40e_pf *pf = vsi->back; /* Only appropriate for the PF main VSI */ if (vsi->type != I40E_VSI_MAIN) return; memset(&element, 0, sizeof(element)); ether_addr_copy(element.mac_addr, macaddr); element.vlan_tag = 0; /* Ignore error returns, some firmware does it this way... */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); memset(&element, 0, sizeof(element)); ether_addr_copy(element.mac_addr, macaddr); element.vlan_tag = 0; /* ...and some firmware does it this way. */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); } /** * i40e_add_filter - Add a mac/vlan filter to the VSI * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan * * Returns ptr to the filter object or NULL when no memory available. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. **/ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; u64 key; if (!vsi || !macaddr) return NULL; f = i40e_find_filter(vsi, macaddr, vlan); if (!f) { f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return NULL; /* Update the boolean indicating if we need to function in * VLAN mode. */ if (vlan >= 0) vsi->has_vlan_filter = true; ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; f->state = I40E_FILTER_NEW; INIT_HLIST_NODE(&f->hlist); key = i40e_addr_to_hkey(macaddr); hash_add(vsi->mac_filter_hash, &f->hlist, key); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } /* If we're asked to add a filter that has been marked for removal, it * is safe to simply restore it to active state. __i40e_del_filter * will have simply deleted any filters which were previously marked * NEW or FAILED, so if it is currently marked REMOVE it must have * previously been ACTIVE. Since we haven't yet run the sync filters * task, just restore this filter to the ACTIVE state so that the * sync task leaves it in place */ if (f->state == I40E_FILTER_REMOVE) f->state = I40E_FILTER_ACTIVE; return f; } /** * __i40e_del_filter - Remove a specific filter from the VSI * @vsi: VSI to remove from * @f: the filter to remove from the list * * This function should be called instead of i40e_del_filter only if you know * the exact filter you will remove already, such as via i40e_find_filter or * i40e_find_mac. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. * ANOTHER NOTE: This function MUST be called from within the context of * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() * instead of list_for_each_entry(). **/ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) { if (!f) return; /* If the filter was never added to firmware then we can just delete it * directly and we don't want to set the status to remove or else an * admin queue command will unnecessarily fire. */ if ((f->state == I40E_FILTER_FAILED) || (f->state == I40E_FILTER_NEW)) { hash_del(&f->hlist); kfree(f); } else { f->state = I40E_FILTER_REMOVE; } vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } /** * i40e_del_filter - Remove a MAC/VLAN filter from the VSI * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the VLAN * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. * ANOTHER NOTE: This function MUST be called from within the context of * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() * instead of list_for_each_entry(). **/ void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) { struct i40e_mac_filter *f; if (!vsi || !macaddr) return; f = i40e_find_filter(vsi, macaddr, vlan); __i40e_del_filter(vsi, f); } /** * i40e_add_mac_filter - Add a MAC filter for all active VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be filtered * * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise, * go through all the macvlan filters and add a macvlan filter for each * unique vlan that already exists. If a PVID has been assigned, instead only * add the macaddr to that VLAN. * * Returns last filter added on success, else NULL **/ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr) { struct i40e_mac_filter *f, *add = NULL; struct hlist_node *h; int bkt; if (vsi->info.pvid) return i40e_add_filter(vsi, macaddr, le16_to_cpu(vsi->info.pvid)); if (!i40e_is_vsi_in_vlan(vsi)) return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY); hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_REMOVE) continue; add = i40e_add_filter(vsi, macaddr, f->vlan); if (!add) return NULL; } return add; } /** * i40e_del_mac_filter - Remove a MAC filter from all VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be removed * * Removes a given MAC address from a VSI regardless of what VLAN it has been * associated with. * * Returns 0 for success, or error **/ int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr) { struct i40e_mac_filter *f; struct hlist_node *h; bool found = false; int bkt; lockdep_assert_held(&vsi->mac_filter_hash_lock); hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (ether_addr_equal(macaddr, f->macaddr)) { __i40e_del_filter(vsi, f); found = true; } } if (found) return 0; else return -ENOENT; } /** * i40e_set_mac - NDO callback to set mac address * @netdev: network interface device structure * @p: pointer to an address structure * * Returns 0 on success, negative on failure **/ static int i40e_set_mac(struct net_device *netdev, void *p) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (test_bit(__I40E_DOWN, pf->state) || test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; if (ether_addr_equal(hw->mac.addr, addr->sa_data)) netdev_info(netdev, "returning to hw mac address %pM\n", hw->mac.addr); else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); /* Copy the address first, so that we avoid a possible race with * .set_rx_mode(). * - Remove old address from MAC filter * - Copy new address * - Add new address to MAC filter */ spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); eth_hw_addr_set(netdev, addr->sa_data); i40e_add_mac_filter(vsi, netdev->dev_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); if (vsi->type == I40E_VSI_MAIN) { int ret; ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL, addr->sa_data, NULL); if (ret) netdev_info(netdev, "Ignoring error from firmware on LAA update, status %pe, AQ ret %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } /* schedule our worker thread which will take care of * applying the new filter changes */ i40e_service_event_schedule(pf); return 0; } /** * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed * @lut: pointer to lookup table of lut_size * @lut_size: size of the lookup table **/ static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret = 0; if (seed) { struct i40e_aqc_get_set_rss_key_data *seed_dw = (struct i40e_aqc_get_set_rss_key_data *)seed; ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); if (ret) { dev_info(&pf->pdev->dev, "Cannot set RSS key, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; } } if (lut) { bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { dev_info(&pf->pdev->dev, "Cannot set RSS lut, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; } } return ret; } /** * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used * @vsi: VSI structure **/ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; u8 seed[I40E_HKEY_ARRAY_SIZE]; u8 *lut; int ret; if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return 0; if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); if (!vsi->rss_size) return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; /* Use the user configured hash keys and lookup table if there is one, * otherwise use default */ if (vsi->rss_lut_user) memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); if (vsi->rss_hkey_user) memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); kfree(lut); return ret; } /** * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config * @vsi: the VSI being configured, * @ctxt: VSI context structure * @enabled_tc: number of traffic classes to enable * * Prepares VSI tc_config to have queue configurations based on MQPRIO options. **/ static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt, u8 enabled_tc) { u16 qcount = 0, max_qcount, qmap, sections = 0; int i, override_q, pow, num_qps, ret; u8 netdev_tc = 0, offset = 0; if (vsi->type != I40E_VSI_MAIN) return -EINVAL; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; sections |= I40E_AQ_VSI_PROP_SCHED_VALID; vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; num_qps = vsi->mqprio_qopt.qopt.count[0]; /* find the next higher power-of-2 of num queue pairs */ pow = ilog2(num_qps); if (!is_power_of_2(num_qps)) pow++; qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); /* Setup queue offset/count for all TCs for given VSI */ max_qcount = vsi->mqprio_qopt.qopt.count[0]; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* See if the given TC is enabled for the given VSI */ if (vsi->tc_config.enabled_tc & BIT(i)) { offset = vsi->mqprio_qopt.qopt.offset[i]; qcount = vsi->mqprio_qopt.qopt.count[i]; if (qcount > max_qcount) max_qcount = qcount; vsi->tc_config.tc_info[i].qoffset = offset; vsi->tc_config.tc_info[i].qcount = qcount; vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++; } else { /* TC is not enabled so set the offset to * default queue and allocate one queue * for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; vsi->tc_config.tc_info[i].qcount = 1; vsi->tc_config.tc_info[i].netdev_tc = 0; } } /* Set actual Tx/Rx queue pairs */ vsi->num_queue_pairs = offset + qcount; /* Setup queue TC[0].qmap for given VSI context */ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); ctxt->info.valid_sections |= cpu_to_le16(sections); /* Reconfigure RSS for main VSI with max queue count */ vsi->rss_size = max_qcount; ret = i40e_vsi_config_rss(vsi); if (ret) { dev_info(&vsi->back->pdev->dev, "Failed to reconfig rss for num_queues (%u)\n", max_qcount); return ret; } vsi->reconfig_rss = true; dev_dbg(&vsi->back->pdev->dev, "Reconfigured rss with num_queues (%u)\n", max_qcount); /* Find queue count available for channel VSIs and starting offset * for channel VSIs */ override_q = vsi->mqprio_qopt.qopt.count[0]; if (override_q && override_q < vsi->num_queue_pairs) { vsi->cnt_q_avail = vsi->num_queue_pairs - override_q; vsi->next_base_queue = override_q; } return 0; } /** * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc * @vsi: the VSI being setup * @ctxt: VSI context structure * @enabled_tc: Enabled TCs bitmap * @is_add: True if called before Add VSI * * Setup VSI queue mapping for enabled traffic classes. **/ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt, u8 enabled_tc, bool is_add) { struct i40e_pf *pf = vsi->back; u16 num_tc_qps = 0; u16 sections = 0; u8 netdev_tc = 0; u16 numtc = 1; u16 qcount; u8 offset; u16 qmap; int i; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; /* zero out queue mapping, it will get updated on the end of the function */ memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); if (vsi->type == I40E_VSI_MAIN) { /* This code helps add more queue to the VSI if we have * more cores than RSS can support, the higher cores will * be served by ATR or other filters. Furthermore, the * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes * We need at least one queue pair for the interface * to be usable as we see in else statement. */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; else if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) vsi->num_queue_pairs = pf->num_lan_msix; else vsi->num_queue_pairs = 1; } /* Number of queues per enabled TC */ if (vsi->type == I40E_VSI_MAIN || (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) num_tc_qps = vsi->num_queue_pairs; else num_tc_qps = vsi->alloc_queue_pairs; if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) /* TC is enabled */ numtc++; } if (!numtc) { dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n"); numtc = 1; } num_tc_qps = num_tc_qps / numtc; num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf)); } vsi->tc_config.numtc = numtc; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; /* Do not allow use more TC queue pairs than MSI-X vectors exist */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix); /* Setup queue offset/count for all TCs for given VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* See if the given TC is enabled for the given VSI */ if (vsi->tc_config.enabled_tc & BIT(i)) { /* TC is enabled */ int pow, num_qps; switch (vsi->type) { case I40E_VSI_MAIN: if ((!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) || vsi->tc_config.enabled_tc != 1) { qcount = min_t(int, pf->alloc_rss_size, num_tc_qps); break; } fallthrough; case I40E_VSI_FDIR: case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: default: qcount = num_tc_qps; WARN_ON(i != 0); break; } vsi->tc_config.tc_info[i].qoffset = offset; vsi->tc_config.tc_info[i].qcount = qcount; /* find the next higher power-of-2 of num queue pairs */ num_qps = qcount; pow = 0; while (num_qps && (BIT_ULL(pow) < qcount)) { pow++; num_qps >>= 1; } vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++; qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); offset += qcount; } else { /* TC is not enabled so set the offset to * default queue and allocate one queue * for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; vsi->tc_config.tc_info[i].qcount = 1; vsi->tc_config.tc_info[i].netdev_tc = 0; qmap = 0; } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } /* Do not change previously set num_queue_pairs for PFs and VFs*/ if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) vsi->num_queue_pairs = offset; /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { sections |= I40E_AQ_VSI_PROP_SCHED_VALID; ctxt->info.up_enable_bits = enabled_tc; } if (vsi->type == I40E_VSI_SRIOV) { ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG); for (i = 0; i < vsi->num_queue_pairs; i++) ctxt->info.queue_mapping[i] = cpu_to_le16(vsi->base_queue + i); } else { ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); } ctxt->info.valid_sections |= cpu_to_le16(sections); } /** * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address * @netdev: the netdevice * @addr: address to add * * Called by __dev_(mc|uc)_sync when an address needs to be added. We call * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. */ static int i40e_addr_sync(struct net_device *netdev, const u8 *addr) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; if (i40e_add_mac_filter(vsi, addr)) return 0; else return -ENOMEM; } /** * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address * @netdev: the netdevice * @addr: address to add * * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. */ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; /* Under some circumstances, we might receive a request to delete * our own device address from our uc list. Because we store the * device address in the VSI's MAC/VLAN filter list, we need to ignore * such requests and not delete our device address from this list. */ if (ether_addr_equal(addr, netdev->dev_addr)) return 0; i40e_del_mac_filter(vsi, addr); return 0; } /** * i40e_set_rx_mode - NDO callback to set the netdev filters * @netdev: network interface device structure **/ static void i40e_set_rx_mode(struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; spin_lock_bh(&vsi->mac_filter_hash_lock); __dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync); __dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync); spin_unlock_bh(&vsi->mac_filter_hash_lock); /* check for other flag changes */ if (vsi->current_netdev_flags != vsi->netdev->flags) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } } /** * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * * MAC filter entries from this list were slated for deletion. **/ static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, struct hlist_head *from) { struct i40e_mac_filter *f; struct hlist_node *h; hlist_for_each_entry_safe(f, h, from, hlist) { u64 key = i40e_addr_to_hkey(f->macaddr); /* Move the element back into MAC filter list*/ hlist_del(&f->hlist); hash_add(vsi->mac_filter_hash, &f->hlist, key); } } /** * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to vsi struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * * MAC filter entries from this list were slated for addition. **/ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, struct hlist_head *from) { struct i40e_new_mac_filter *new; struct hlist_node *h; hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */ hlist_del(&new->hlist); netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } } /** * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we * ignore broadcast filters within the list, since these are not handled via * the normal firmware update path. */ static struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) { hlist_for_each_entry_continue(next, hlist) { if (!is_broadcast_ether_addr(next->f->macaddr)) return next; } return NULL; } /** * i40e_update_filter_state - Update filter state based on return data * from firmware * @count: Number of filters added * @add_list: return data from fw * @add_head: pointer to first filter in current batch * * MAC filter entries from list were slated to be added to device. Returns * number of successful filters. Note that 0 does NOT mean success! **/ static int i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, struct i40e_new_mac_filter *add_head) { int retval = 0; int i; for (i = 0; i < count; i++) { /* Always check status of each filter. We don't need to check * the firmware return status because we pre-set the filter * status to I40E_AQC_MM_ERR_NO_RES when sending the filter * request to the adminq. Thus, if it no longer matches then * we know the filter is active. */ if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) { add_head->state = I40E_FILTER_FAILED; } else { add_head->state = I40E_FILTER_ACTIVE; retval++; } add_head = i40e_next_filter(add_head); if (!add_head) break; } return retval; } /** * i40e_aqc_del_filters - Request firmware to delete a set of filters * @vsi: ptr to the VSI * @vsi_name: name to display in messages * @list: the list of filters to send to firmware * @num_del: the number of filters to delete * @retval: Set to -EIO on failure to delete * * Send a request to firmware via AdminQ to delete a set of filters. Uses * *retval instead of a return value so that success does not force ret_val to * be set to 0. This ensures that a sequence of calls to this function * preserve the previous value of *retval on successful delete. */ static void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_remove_macvlan_element_data *list, int num_del, int *retval) { struct i40e_hw *hw = &vsi->back->hw; enum i40e_admin_queue_err aq_status; int aq_ret; aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL, &aq_status); /* Explicitly ignore and do not report when firmware returns ENOENT */ if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) { *retval = -EIO; dev_info(&vsi->back->pdev->dev, "ignoring delete macvlan error on %s, err %pe, aq_err %s\n", vsi_name, ERR_PTR(aq_ret), i40e_aq_str(hw, aq_status)); } } /** * i40e_aqc_add_filters - Request firmware to add a set of filters * @vsi: ptr to the VSI * @vsi_name: name to display in messages * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add * * Send a request to firmware via AdminQ to add a chunk of filters. Will set * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of * space for more filters. */ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, int num_add) { struct i40e_hw *hw = &vsi->back->hw; enum i40e_admin_queue_err aq_status; int fcnt; i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status); fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { if (vsi->type == I40E_VSI_MAIN) { set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", i40e_aq_str(hw, aq_status), vsi_name); } else if (vsi->type == I40E_VSI_SRIOV || vsi->type == I40E_VSI_VMDQ1 || vsi->type == I40E_VSI_VMDQ2) { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", i40e_aq_str(hw, aq_status), vsi_name, vsi_name); } else { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", i40e_aq_str(hw, aq_status), vsi_name, vsi->type); } } } /** * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags * @vsi: pointer to the VSI * @vsi_name: the VSI name * @f: filter data * * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. * * Returns status indicating success or failure; **/ static int i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_mac_filter *f) { bool enable = f->state == I40E_FILTER_NEW; struct i40e_hw *hw = &vsi->back->hw; int aq_ret; if (f->vlan == I40E_VLAN_ANY) { aq_ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, enable, NULL); } else { aq_ret = i40e_aq_set_vsi_bc_promisc_on_vlan(hw, vsi->seid, enable, f->vlan, NULL); } if (aq_ret) { set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s, forcing overflow promiscuous on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); } return aq_ret; } /** * i40e_set_promiscuous - set promiscuous mode * @pf: board private structure * @promisc: promisc on or off * * There are different ways of setting promiscuous mode on a PF depending on * what state/environment we're in. This identifies and sets it appropriately. * Returns 0 on success. **/ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; int aq_ret; if (vsi->type == I40E_VSI_MAIN && i40e_pf_get_main_veb(pf) && !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic * replicated on the Main VSI. */ if (promisc) aq_ret = i40e_aq_set_default_vsi(hw, vsi->seid, NULL); else aq_ret = i40e_aq_clear_default_vsi(hw, vsi->seid, NULL); if (aq_ret) { dev_info(&pf->pdev->dev, "Set default VSI failed, err %pe, aq_err %s\n", ERR_PTR(aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } } else { aq_ret = i40e_aq_set_vsi_unicast_promiscuous( hw, vsi->seid, promisc, NULL, true); if (aq_ret) { dev_info(&pf->pdev->dev, "set unicast promisc failed, err %pe, aq_err %s\n", ERR_PTR(aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } aq_ret = i40e_aq_set_vsi_multicast_promiscuous( hw, vsi->seid, promisc, NULL); if (aq_ret) { dev_info(&pf->pdev->dev, "set multicast promisc failed, err %pe, aq_err %s\n", ERR_PTR(aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } } if (!aq_ret) pf->cur_promisc = promisc; return aq_ret; } /** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value **/ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct hlist_head tmp_add_list, tmp_del_list; struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; bool old_overflow, new_overflow; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; char vsi_name[16] = "PF"; int filter_list_len = 0; u32 changed_flags = 0; struct hlist_node *h; struct i40e_pf *pf; int num_add = 0; int num_del = 0; int aq_ret = 0; int retval = 0; u16 cmd_flags; int list_size; int bkt; /* empty array typed pointers, kcalloc later */ struct i40e_aqc_add_macvlan_element_data *add_list; struct i40e_aqc_remove_macvlan_element_data *del_list; while (test_and_set_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state)) usleep_range(1000, 2000); pf = vsi->back; old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); if (vsi->netdev) { changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; vsi->current_netdev_flags = vsi->netdev->flags; } INIT_HLIST_HEAD(&tmp_add_list); INIT_HLIST_HEAD(&tmp_del_list); if (vsi->type == I40E_VSI_SRIOV) snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id); else if (vsi->type != I40E_VSI_MAIN) snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid); if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) { vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED; spin_lock_bh(&vsi->mac_filter_hash_lock); /* Create a list of filters to delete. */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_REMOVE) { /* Move the element into temporary del_list */ hash_del(&f->hlist); hlist_add_head(&f->hlist, &tmp_del_list); /* Avoid counting removed filters */ continue; } if (f->state == I40E_FILTER_NEW) { /* Create a temporary i40e_new_mac_filter */ new = kzalloc(sizeof(*new), GFP_ATOMIC); if (!new) goto err_no_memory_locked; /* Store pointer to the real filter */ new->f = f; new->state = f->state; /* Add it to the hash list */ hlist_add_head(&new->hlist, &tmp_add_list); } /* Count the number of active (current and new) VLAN * filters we have now. Does not count filters which * are marked for deletion. */ if (f->vlan > 0) vlan_filters++; } if (vsi->type != I40E_VSI_SRIOV) retval = i40e_correct_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters); else if (pf->vf) retval = i40e_correct_vf_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters, pf->vf[vsi->vf_id].trusted); hlist_for_each_entry(new, &tmp_add_list, hlist) netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); if (retval) goto err_no_memory_locked; spin_unlock_bh(&vsi->mac_filter_hash_lock); } /* Now process 'del_list' outside the lock */ if (!hlist_empty(&tmp_del_list)) { filter_list_len = hw->aq.asq_buf_size / sizeof(struct i40e_aqc_remove_macvlan_element_data); list_size = filter_list_len * sizeof(struct i40e_aqc_remove_macvlan_element_data); del_list = kzalloc(list_size, GFP_ATOMIC); if (!del_list) goto err_no_memory; hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) { cmd_flags = 0; /* handle broadcast filters by updating the broadcast * promiscuous flag and release filter list. */ if (is_broadcast_ether_addr(f->macaddr)) { i40e_aqc_broadcast_filter(vsi, vsi_name, f); hlist_del(&f->hlist); kfree(f); continue; } /* add to delete list */ ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) { del_list[num_del].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { del_list[num_del].vlan_tag = cpu_to_le16((u16)(f->vlan)); } cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; del_list[num_del].flags = cmd_flags; num_del++; /* flush a full buffer */ if (num_del == filter_list_len) { i40e_aqc_del_filters(vsi, vsi_name, del_list, num_del, &retval); memset(del_list, 0, list_size); num_del = 0; } /* Release memory for MAC filter entries which were * synced up with HW. */ hlist_del(&f->hlist); kfree(f); } if (num_del) { i40e_aqc_del_filters(vsi, vsi_name, del_list, num_del, &retval); } kfree(del_list); del_list = NULL; } if (!hlist_empty(&tmp_add_list)) { /* Do all the adds now. */ filter_list_len = hw->aq.asq_buf_size / sizeof(struct i40e_aqc_add_macvlan_element_data); list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data); add_list = kzalloc(list_size, GFP_ATOMIC); if (!add_list) goto err_no_memory; num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ if (is_broadcast_ether_addr(new->f->macaddr)) { if (i40e_aqc_broadcast_filter(vsi, vsi_name, new->f)) new->state = I40E_FILTER_FAILED; else new->state = I40E_FILTER_ACTIVE; continue; } /* add to add array */ if (num_add == 0) add_head = new; cmd_flags = 0; ether_addr_copy(add_list[num_add].mac_addr, new->f->macaddr); if (new->f->vlan == I40E_VLAN_ANY) { add_list[num_add].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { add_list[num_add].vlan_tag = cpu_to_le16((u16)(new->f->vlan)); } add_list[num_add].queue_number = 0; /* set invalid match method for later detection */ add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES; cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; add_list[num_add].flags = cpu_to_le16(cmd_flags); num_add++; /* flush a full buffer */ if (num_add == filter_list_len) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, num_add); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, num_add); } /* Now move all of the filters from the temp add list back to * the VSI's list. */ spin_lock_bh(&vsi->mac_filter_hash_lock); hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ if (new->f->state == I40E_FILTER_NEW) new->f->state = new->state; hlist_del(&new->hlist); netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); add_list = NULL; } /* Determine the number of active and failed filters. */ spin_lock_bh(&vsi->mac_filter_hash_lock); vsi->active_filters = 0; hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->state == I40E_FILTER_ACTIVE) vsi->active_filters++; else if (f->state == I40E_FILTER_FAILED) failed_filters++; } spin_unlock_bh(&vsi->mac_filter_hash_lock); /* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ if (old_overflow && !failed_filters && vsi->active_filters < vsi->promisc_threshold) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); vsi->promisc_threshold = 0; } /* if the VF is not trusted do not do promisc */ if (vsi->type == I40E_VSI_SRIOV && pf->vf && !pf->vf[vsi->vf_id].trusted) { clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); /* If we are entering overflow promiscuous, we need to calculate a new * threshold for when we are safe to exit */ if (!old_overflow && new_overflow) vsi->promisc_threshold = (vsi->active_filters * 3) / 4; /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI); aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, vsi->seid, cur_multipromisc, NULL); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, hw->aq.asq_last_status); dev_info(&pf->pdev->dev, "set multi promisc failed on %s, err %pe aq_err %s\n", vsi_name, ERR_PTR(aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { dev_info(&pf->pdev->dev, "%s allmulti mode.\n", cur_multipromisc ? "entering" : "leaving"); } } if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || new_overflow); aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, hw->aq.asq_last_status); dev_info(&pf->pdev->dev, "Setting promiscuous %s failed on %s, err %pe aq_err %s\n", cur_promisc ? "on" : "off", vsi_name, ERR_PTR(aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } } out: /* if something went wrong then set the changed flag so we try again */ if (retval) vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state); return retval; err_no_memory: /* Restore elements on the temporary add and delete lists */ spin_lock_bh(&vsi->mac_filter_hash_lock); err_no_memory_locked: i40e_undo_del_filter_entries(vsi, &tmp_del_list); i40e_undo_add_filter_entries(vsi, &tmp_add_list); spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state); return -ENOMEM; } /** * i40e_sync_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure **/ static void i40e_sync_filters_subtask(struct i40e_pf *pf) { struct i40e_vsi *vsi; int v; if (!pf) return; if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state)) return; if (test_bit(__I40E_VF_DISABLE, pf->state)) { set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); return; } i40e_pf_for_each_vsi(pf, v, vsi) { if ((vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) && !test_bit(__I40E_VSI_RELEASING, vsi->state)) { int ret = i40e_sync_vsi_filters(vsi); if (ret) { /* come back and try again later */ set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); break; } } } } /** * i40e_calculate_vsi_rx_buf_len - Calculates buffer length * * @vsi: VSI to calculate rx_buf_len from */ static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) { if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048); return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; } /** * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI * @vsi: the vsi * @xdp_prog: XDP program **/ static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi, struct bpf_prog *xdp_prog) { u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); u16 chain_len; if (xdp_prog && !xdp_prog->aux->xdp_has_frags) chain_len = 1; else chain_len = I40E_MAX_CHAINED_RX_BUFFERS; return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER); } /** * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure **/ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int frame_size; frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) { netdev_err(netdev, "Error changing mtu to %d, Max is %d\n", new_mtu, frame_size - I40E_PACKET_HDR_PAD); return -EINVAL; } netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev)) i40e_vsi_reinit_locked(vsi); set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); return 0; } /** * i40e_ioctl - Access the hwtstamp interface * @netdev: network interface device structure * @ifr: interface request data * @cmd: ioctl command **/ int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; switch (cmd) { case SIOCGHWTSTAMP: return i40e_ptp_get_ts_config(pf, ifr); case SIOCSHWTSTAMP: return i40e_ptp_set_ts_config(pf, ifr); default: return -EOPNOTSUPP; } } /** * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI * @vsi: the vsi being adjusted **/ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi) { struct i40e_vsi_context ctxt; int ret; /* Don't modify stripping options if a port VLAN is active */ if (vsi->info.pvid) return; if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0)) return; /* already enabled */ vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; ctxt.seid = vsi->seid; ctxt.info = vsi->info; ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, "update vlan stripping failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&vsi->back->hw, vsi->back->hw.aq.asq_last_status)); } } /** * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI * @vsi: the vsi being adjusted **/ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi) { struct i40e_vsi_context ctxt; int ret; /* Don't modify stripping options if a port VLAN is active */ if (vsi->info.pvid) return; if ((vsi->info.valid_sections & cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) && ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) == I40E_AQ_VSI_PVLAN_EMOD_MASK)) return; /* already disabled */ vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_NOTHING; ctxt.seid = vsi->seid; ctxt.info = vsi->info; ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, "update vlan stripping failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&vsi->back->hw, vsi->back->hw.aq.asq_last_status)); } } /** * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address * @vsi: the vsi being configured * @vid: vlan id to be added (0 = untagged only , -1 = any) * * This is a helper function for adding a new MAC/VLAN filter with the * specified VLAN for each existing MAC address already in the hash table. * This function does *not* perform any accounting to update filters based on * VLAN mode. * * NOTE: this function expects to be called while under the * mac_filter_hash_lock **/ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) { struct i40e_mac_filter *f, *add_f; struct hlist_node *h; int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { /* If we're asked to add a filter that has been marked for * removal, it is safe to simply restore it to active state. * __i40e_del_filter will have simply deleted any filters which * were previously marked NEW or FAILED, so if it is currently * marked REMOVE it must have previously been ACTIVE. Since we * haven't yet run the sync filters task, just restore this * filter to the ACTIVE state so that the sync task leaves it * in place. */ if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) { f->state = I40E_FILTER_ACTIVE; continue; } else if (f->state == I40E_FILTER_REMOVE) { continue; } add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, "Could not add vlan filter %d for %pM\n", vid, f->macaddr); return -ENOMEM; } } return 0; } /** * i40e_vsi_add_vlan - Add VSI membership for given VLAN * @vsi: the VSI being configured * @vid: VLAN id to be added **/ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid) { int err; if (vsi->info.pvid) return -EINVAL; /* The network stack will attempt to add VID=0, with the intention to * receive priority tagged packets with a VLAN of 0. Our HW receives * these packets by default when configured to receive untagged * packets, so we don't need to add a filter for this case. * Additionally, HW interprets adding a VID=0 filter as meaning to * receive *only* tagged traffic and stops receiving untagged traffic. * Thus, we do not want to actually add a filter for VID=0 */ if (!vid) return 0; /* Locked once because all functions invoked below iterates list*/ spin_lock_bh(&vsi->mac_filter_hash_lock); err = i40e_add_vlan_all_mac(vsi, vid); spin_unlock_bh(&vsi->mac_filter_hash_lock); if (err) return err; /* schedule our worker thread which will take care of * applying the new filter changes */ i40e_service_event_schedule(vsi->back); return 0; } /** * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN * @vsi: the vsi being configured * @vid: vlan id to be removed (0 = untagged only , -1 = any) * * This function should be used to remove all VLAN filters which match the * given VID. It does not schedule the service event and does not take the * mac_filter_hash_lock so it may be combined with other operations under * a single invocation of the mac_filter_hash_lock. * * NOTE: this function expects to be called while under the * mac_filter_hash_lock */ void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) { struct i40e_mac_filter *f; struct hlist_node *h; int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->vlan == vid) __i40e_del_filter(vsi, f); } } /** * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN * @vsi: the VSI being configured * @vid: VLAN id to be removed **/ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid) { if (!vid || vsi->info.pvid) return; spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_rm_vlan_all_mac(vsi, vid); spin_unlock_bh(&vsi->mac_filter_hash_lock); /* schedule our worker thread which will take care of * applying the new filter changes */ i40e_service_event_schedule(vsi->back); } /** * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be added * * net_device_ops implementation for adding vlan ids **/ static int i40e_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, u16 vid) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; int ret = 0; if (vid >= VLAN_N_VID) return -EINVAL; ret = i40e_vsi_add_vlan(vsi, vid); if (!ret) set_bit(vid, vsi->active_vlans); return ret; } /** * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be added **/ static void i40e_vlan_rx_add_vid_up(struct net_device *netdev, __always_unused __be16 proto, u16 vid) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; if (vid >= VLAN_N_VID) return; set_bit(vid, vsi->active_vlans); } /** * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be removed * * net_device_ops implementation for removing vlan ids **/ static int i40e_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, u16 vid) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; /* return code is ignored as there is nothing a user * can do about failure to remove and a log message was * already printed from the other function */ i40e_vsi_kill_vlan(vsi, vid); clear_bit(vid, vsi->active_vlans); return 0; } /** * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up * @vsi: the vsi being brought back up **/ static void i40e_restore_vlan(struct i40e_vsi *vsi) { u16 vid; if (!vsi->netdev) return; if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) i40e_vlan_stripping_enable(vsi); else i40e_vlan_stripping_disable(vsi); for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q), vid); } /** * i40e_vsi_add_pvid - Add pvid for the VSI * @vsi: the vsi being adjusted * @vid: the vlan id to set as a PVID **/ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) { struct i40e_vsi_context ctxt; int ret; vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.pvid = cpu_to_le16(vid); vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_TAGGED | I40E_AQ_VSI_PVLAN_INSERT_PVID | I40E_AQ_VSI_PVLAN_EMOD_STR; ctxt.seid = vsi->seid; ctxt.info = vsi->info; ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, "add pvid failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&vsi->back->hw, vsi->back->hw.aq.asq_last_status)); return -ENOENT; } return 0; } /** * i40e_vsi_remove_pvid - Remove the pvid from the VSI * @vsi: the vsi being adjusted * * Just use the vlan_rx_register() service to put it back to normal **/ void i40e_vsi_remove_pvid(struct i40e_vsi *vsi) { vsi->info.pvid = 0; i40e_vlan_stripping_disable(vsi); } /** * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources * @vsi: ptr to the VSI * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure **/ static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi) { int i, err = 0; for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_setup_tx_descriptors(vsi->tx_rings[i]); if (!i40e_enabled_xdp_vsi(vsi)) return err; for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]); return err; } /** * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues * @vsi: ptr to the VSI * * Free VSI's transmit software resources **/ static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi) { int i; if (vsi->tx_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) i40e_free_tx_resources(vsi->tx_rings[i]); } if (vsi->xdp_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) i40e_free_tx_resources(vsi->xdp_rings[i]); } } /** * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources * @vsi: ptr to the VSI * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure **/ static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi) { int i, err = 0; for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_setup_rx_descriptors(vsi->rx_rings[i]); return err; } /** * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues * @vsi: ptr to the VSI * * Free all receive software resources **/ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) { int i; if (!vsi->rx_rings) return; for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc) i40e_free_rx_resources(vsi->rx_rings[i]); } /** * i40e_config_xps_tx_ring - Configure XPS for a Tx ring * @ring: The Tx ring to configure * * This enables/disables XPS for a given Tx descriptor ring * based on the TCs enabled for the VSI that ring belongs to. **/ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { int cpu; if (!ring->q_vector || !ring->netdev || ring->ch) return; /* We only initialize XPS once, so as not to overwrite user settings */ if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) return; cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), ring->queue_index); } /** * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled * @ring: The Tx or Rx ring * * Returns the AF_XDP buffer pool or NULL. **/ static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring) { bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); int qid = ring->queue_index; if (ring_is_xdp(ring)) qid -= ring->vsi->alloc_queue_pairs; if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) return NULL; return xsk_get_pool_from_qid(ring->vsi->netdev, qid); } /** * i40e_configure_tx_ring - Configure a transmit ring context and rest * @ring: The Tx ring to configure * * Configure the Tx descriptor ring in the HMC context. **/ static int i40e_configure_tx_ring(struct i40e_ring *ring) { struct i40e_vsi *vsi = ring->vsi; u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_txq tx_ctx; u32 qtx_ctl = 0; int err = 0; if (ring_is_xdp(ring)) ring->xsk_pool = i40e_xsk_pool(ring); /* some ATR related tx ring init */ if (test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) { ring->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE; ring->atr_count = 0; } else { ring->atr_sample_rate = 0; } /* configure XPS */ i40e_config_xps_tx_ring(ring); /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(tx_ctx)); tx_ctx.new_context = 1; tx_ctx.base = (ring->dma / 128); tx_ctx.qlen = ring->count; if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) || test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) tx_ctx.fd_ena = 1; if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags)) tx_ctx.timesync_ena = 1; /* FDIR VSI tx ring can still use RS bit and writebacks */ if (vsi->type != I40E_VSI_FDIR) tx_ctx.head_wb_ena = 1; tx_ctx.head_wb_addr = ring->dma + (ring->count * sizeof(struct i40e_tx_desc)); /* As part of VSI creation/update, FW allocates certain * Tx arbitration queue sets for each TC enabled for * the VSI. The FW returns the handles to these queue * sets as part of the response buffer to Add VSI, * Update VSI, etc. AQ commands. It is expected that * these queue set handles be associated with the Tx * queues by the driver as part of the TX queue context * initialization. This has to be done regardless of * DCB as by default everything is mapped to TC0. */ if (ring->ch) tx_ctx.rdylist = le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]); else tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); tx_ctx.rdylist_act = 0; /* clear the context in the HMC */ err = i40e_clear_lan_tx_queue_context(hw, pf_q); if (err) { dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); return -ENOMEM; } /* set the context in the HMC */ err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx); if (err) { dev_info(&vsi->back->pdev->dev, "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n", ring->queue_index, pf_q, err); return -ENOMEM; } /* Now associate this queue with this PCI function */ if (ring->ch) { if (ring->ch->type == I40E_VSI_VMDQ2) qtx_ctl = I40E_QTX_CTL_VM_QUEUE; else return -EINVAL; qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, ring->ch->vsi_number); } else { if (vsi->type == I40E_VSI_VMDQ2) { qtx_ctl = I40E_QTX_CTL_VM_QUEUE; qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, vsi->id); } else { qtx_ctl = I40E_QTX_CTL_PF_QUEUE; } } qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_PF_INDX_MASK, hw->pf_id); wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl); i40e_flush(hw); /* cache tail off for easier writes later */ ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q); return 0; } /** * i40e_rx_offset - Return expected offset into page to access data * @rx_ring: Ring we are requesting offset of * * Returns the offset value for ring into the data buffer. */ static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) { return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; } /** * i40e_configure_rx_ring - Configure a receive ring context * @ring: The Rx ring to configure * * Configure the Rx descriptor ring in the HMC context. **/ static int i40e_configure_rx_ring(struct i40e_ring *ring) { struct i40e_vsi *vsi = ring->vsi; u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len; u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; int err = 0; bool ok; bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); ring->rx_buf_len = vsi->rx_buf_len; /* XDP RX-queue info only needed for RX rings exposed to XDP */ if (ring->vsi->type != I40E_VSI_MAIN) goto skip; if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, ring->q_vector->napi.napi_id, ring->rx_buf_len); if (err) return err; } ring->xsk_pool = i40e_xsk_pool(ring); if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->queue_index, ring->q_vector->napi.napi_id, ring->rx_buf_len); if (err) return err; err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); if (err) return err; dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->queue_index); } else { err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); if (err) return err; } skip: xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq); rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); rx_ctx.base = (ring->dma / 128); rx_ctx.qlen = ring->count; /* use 16 byte descriptors */ rx_ctx.dsize = 0; /* descriptor type is always zero * rx_ctx.dtype = 0; */ rx_ctx.hsplit_0 = 0; rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len); if (hw->revision_id == 0) rx_ctx.lrxqthresh = 0; else rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; /* this controls whether VLAN is stripped from inner headers */ rx_ctx.showiv = 0; /* set the prefena field to 1 because the manual says to */ rx_ctx.prefena = 1; /* clear the context in the HMC */ err = i40e_clear_lan_rx_queue_context(hw, pf_q); if (err) { dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); return -ENOMEM; } /* set the context in the HMC */ err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx); if (err) { dev_info(&vsi->back->pdev->dev, "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n", ring->queue_index, pf_q, err); return -ENOMEM; } /* configure Rx buffer alignment */ if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) { if (I40E_2K_TOO_SMALL_WITH_PADDING) { dev_info(&vsi->back->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); return -EOPNOTSUPP; } clear_ring_build_skb_enabled(ring); } else { set_ring_build_skb_enabled(ring); } ring->rx_offset = i40e_rx_offset(ring); /* cache tail for quicker writes, and clear the reg before use */ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); if (ring->xsk_pool) { xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)); } else { ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); } if (!ok) { /* Log this in case the user has forgotten to give the kernel * any buffers, even later in the application. */ dev_info(&vsi->back->pdev->dev, "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n", ring->xsk_pool ? "AF_XDP ZC enabled " : "", ring->queue_index, pf_q); } return 0; } /** * i40e_vsi_configure_tx - Configure the VSI for Tx * @vsi: VSI structure describing this set of rings and resources * * Configure the Tx VSI for operation. **/ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) { int err = 0; u16 i; for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->tx_rings[i]); if (err || !i40e_enabled_xdp_vsi(vsi)) return err; for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) err = i40e_configure_tx_ring(vsi->xdp_rings[i]); return err; } /** * i40e_vsi_configure_rx - Configure the VSI for Rx * @vsi: the VSI being configured * * Configure the Rx VSI for operation. **/ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) { int err = 0; u16 i; vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); #if (PAGE_SIZE < 8192) if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN) { vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; vsi->max_frame = vsi->rx_buf_len; } #endif /* set up individual rings */ for (i = 0; i < vsi->num_queue_pairs && !err; i++) err = i40e_configure_rx_ring(vsi->rx_rings[i]); return err; } /** * i40e_vsi_config_dcb_rings - Update rings to reflect DCB TC * @vsi: ptr to the VSI **/ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) { struct i40e_ring *tx_ring, *rx_ring; u16 qoffset, qcount; int i, n; if (!test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Reset the TC information */ for (i = 0; i < vsi->num_queue_pairs; i++) { rx_ring = vsi->rx_rings[i]; tx_ring = vsi->tx_rings[i]; rx_ring->dcb_tc = 0; tx_ring->dcb_tc = 0; } return; } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { if (!(vsi->tc_config.enabled_tc & BIT_ULL(n))) continue; qoffset = vsi->tc_config.tc_info[n].qoffset; qcount = vsi->tc_config.tc_info[n].qcount; for (i = qoffset; i < (qoffset + qcount); i++) { rx_ring = vsi->rx_rings[i]; tx_ring = vsi->tx_rings[i]; rx_ring->dcb_tc = n; tx_ring->dcb_tc = n; } } } /** * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI * @vsi: ptr to the VSI **/ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi) { if (vsi->netdev) i40e_set_rx_mode(vsi->netdev); } /** * i40e_reset_fdir_filter_cnt - Reset flow director filter counters * @pf: Pointer to the targeted PF * * Set all flow director counters to 0. */ static void i40e_reset_fdir_filter_cnt(struct i40e_pf *pf) { pf->fd_tcp4_filter_cnt = 0; pf->fd_udp4_filter_cnt = 0; pf->fd_sctp4_filter_cnt = 0; pf->fd_ip4_filter_cnt = 0; pf->fd_tcp6_filter_cnt = 0; pf->fd_udp6_filter_cnt = 0; pf->fd_sctp6_filter_cnt = 0; pf->fd_ip6_filter_cnt = 0; } /** * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters * @vsi: Pointer to the targeted VSI * * This function replays the hlist on the hw where all the SB Flow Director * filters were saved. **/ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi) { struct i40e_fdir_filter *filter; struct i40e_pf *pf = vsi->back; struct hlist_node *node; if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* Reset FDir counters as we're replaying all existing filters */ i40e_reset_fdir_filter_cnt(pf); hlist_for_each_entry_safe(filter, node, &pf->fdir_filter_list, fdir_node) { i40e_add_del_fdir(vsi, filter, true); } } /** * i40e_vsi_configure - Set up the VSI for action * @vsi: the VSI being configured **/ static int i40e_vsi_configure(struct i40e_vsi *vsi) { int err; i40e_set_vsi_rx_mode(vsi); i40e_restore_vlan(vsi); i40e_vsi_config_dcb_rings(vsi); err = i40e_vsi_configure_tx(vsi); if (!err) err = i40e_vsi_configure_rx(vsi); return err; } /** * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured **/ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) { bool has_xdp = i40e_enabled_xdp_vsi(vsi); struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u16 vector; int i, q; u32 qp; /* The interrupt indexing is offset by 1 in the PFINT_ITRn * and PFINT_LNKLSTn registers, e.g.: * PFINT_ITRn[0..n-1] gets msix-1..msix-n (qpair interrupts) */ qp = vsi->base_queue; vector = vsi->base_vector; for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; q_vector->rx.next_update = jiffies + 1; q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.target_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->tx.next_update = jiffies + 1; q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; /* Set ITR for software interrupts triggered after exiting * busy-loop polling. */ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1), I40E_ITR_20K); wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); /* begin of linked list for RX queue assigned to this vector */ wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp); for (q = 0; q < q_vector->num_ringpairs; q++) { u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp; u32 val; val = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(qp), val); if (has_xdp) { /* TX queue with next queue set to TX */ val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(nextqp), val); } /* TX queue with next RX or end of linked list */ val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | ((qp + 1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); /* Terminate the linked list */ if (q == (q_vector->num_ringpairs - 1)) val |= (I40E_QUEUE_END_OF_LIST << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(qp), val); qp++; } } i40e_flush(hw); } /** * i40e_enable_misc_int_causes - enable the non-queue interrupts * @pf: pointer to private device data structure **/ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 val; /* clear things first */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ val = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK; if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, val); /* SW_ITR_IDX = 0, but don't change INTENA */ wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); /* OTHER_ITR_IDX = 0 */ wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /** * i40e_configure_msi_and_legacy - Legacy mode interrupt config in the HW * @vsi: the VSI being configured **/ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) { u32 nextqp = i40e_enabled_xdp_vsi(vsi) ? vsi->alloc_queue_pairs : 0; struct i40e_q_vector *q_vector = vsi->q_vectors[0]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; /* set the ITR configuration */ q_vector->rx.next_update = jiffies + 1; q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->tx.next_update = jiffies + 1; q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the queue * interrupt RX queue in linked list with next queue set to TX */ wr32(hw, I40E_QINT_RQCTL(0), I40E_QINT_RQCTL_VAL(nextqp, 0, TX)); if (i40e_enabled_xdp_vsi(vsi)) { /* TX queue in linked list with next queue set to TX */ wr32(hw, I40E_QINT_TQCTL(nextqp), I40E_QINT_TQCTL_VAL(nextqp, 0, TX)); } /* last TX queue so the next RX queue doesn't matter */ wr32(hw, I40E_QINT_TQCTL(0), I40E_QINT_TQCTL_VAL(I40E_QUEUE_END_OF_LIST, 0, RX)); i40e_flush(hw); } /** * i40e_irq_dynamic_disable_icr0 - Disable default interrupt generation for icr0 * @pf: board private structure **/ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; wr32(hw, I40E_PFINT_DYN_CTL0, I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); i40e_flush(hw); } /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure **/ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); i40e_flush(hw); } /** * i40e_msix_clean_rings - MSIX mode Interrupt Handler * @irq: interrupt number * @data: pointer to a q_vector **/ static irqreturn_t i40e_msix_clean_rings(int irq, void *data) { struct i40e_q_vector *q_vector = data; if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } /** * i40e_irq_affinity_notify - Callback for affinity changes * @notify: context as to what irq was changed * @mask: the new affinity mask * * This is a callback function used by the irq_set_affinity_notifier function * so that we may register to receive changes to the irq affinity masks. **/ static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) { struct i40e_q_vector *q_vector = container_of(notify, struct i40e_q_vector, affinity_notify); cpumask_copy(&q_vector->affinity_mask, mask); } /** * i40e_irq_affinity_release - Callback for affinity notifier release * @ref: internal core kernel usage * * This is a callback function used by the irq_set_affinity_notifier function * to inform the current notification subscriber that they will no longer * receive notifications. **/ static void i40e_irq_affinity_release(struct kref *ref) {} /** * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts * @vsi: the VSI being configured * @basename: name for the vector * * Allocates MSI-X vectors and requests interrupts from the kernel. **/ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) { int q_vectors = vsi->num_q_vectors; struct i40e_pf *pf = vsi->back; int base = vsi->base_vector; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; int irq_num; int cpu; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; irq_num = pf->msix_entries[base + vector].vector; if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); tx_int_idx++; } else if (q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "rx", rx_int_idx++); } else if (q_vector->tx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "tx", tx_int_idx++); } else { /* skip this unused q_vector */ continue; } err = request_irq(irq_num, vsi->irq_handler, 0, q_vector->name, q_vector); if (err) { dev_info(&pf->pdev->dev, "MSIX request_irq failed, error: %d\n", err); goto free_queue_irqs; } /* register for affinity change notifications */ q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* Spread affinity hints out across online CPUs. * * get_cpu_mask returns a static constant mask with * a permanent lifetime so it's ok to pass to * irq_update_affinity_hint without making a copy. */ cpu = cpumask_local_spread(q_vector->v_idx, -1); irq_update_affinity_hint(irq_num, get_cpu_mask(cpu)); } vsi->irqs_ready = true; return 0; free_queue_irqs: while (vector) { vector--; irq_num = pf->msix_entries[base + vector].vector; irq_set_affinity_notifier(irq_num, NULL); irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, &vsi->q_vectors[vector]); } return err; } /** * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI * @vsi: the VSI being un-configured **/ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int base = vsi->base_vector; int i; /* disable interrupt causation from each queue */ for (i = 0; i < vsi->num_queue_pairs; i++) { u32 val; val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx)); val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val); if (!i40e_enabled_xdp_vsi(vsi)) continue; wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0); } /* disable each interrupt */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = vsi->base_vector; i < (vsi->num_q_vectors + vsi->base_vector); i++) wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0); i40e_flush(hw); for (i = 0; i < vsi->num_q_vectors; i++) synchronize_irq(pf->msix_entries[i + base].vector); } else { /* Legacy and MSI mode - this stops all interrupt handling */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); wr32(hw, I40E_PFINT_DYN_CTL0, 0); i40e_flush(hw); synchronize_irq(pf->pdev->irq); } } /** * i40e_vsi_enable_irq - Enable IRQ for the given VSI * @vsi: the VSI being configured **/ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { i40e_irq_dynamic_enable_icr0(pf); } i40e_flush(&pf->hw); return 0; } /** * i40e_free_misc_vector - Free the vector that handles non-queue events * @pf: board private structure **/ static void i40e_free_misc_vector(struct i40e_pf *pf) { /* Disable ICR 0 */ wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { free_irq(pf->msix_entries[0].vector, pf); clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); } } /** * i40e_intr - MSI/Legacy and non-queue interrupt handler * @irq: interrupt number * @data: pointer to a q_vector * * This is the handler used for all MSI/Legacy interrupts, and deals * with both queue and non-queue interrupts. This is also used in * MSIX mode to handle the non-queue interrupts. **/ static irqreturn_t i40e_intr(int irq, void *data) { struct i40e_pf *pf = (struct i40e_pf *)data; struct i40e_hw *hw = &pf->hw; irqreturn_t ret = IRQ_NONE; u32 icr0, icr0_remaining; u32 val, ena_mask; icr0 = rd32(hw, I40E_PFINT_ICR0); ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* if sharing a legacy IRQ, we might get called w/o an intr pending */ if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0) goto enable_intr; /* if interrupt but no bits showing, must be SWINT */ if (((icr0 & ~I40E_PFINT_ICR0_INTEVENT_MASK) == 0) || (icr0 & I40E_PFINT_ICR0_SWINT_MASK)) pf->sw_int_count++; if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) { ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n"); set_bit(__I40E_CORE_RESET_REQUESTED, pf->state); } /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */ if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_q_vector *q_vector = vsi->q_vectors[0]; /* We do not have a way to disarm Queue causes while leaving * interrupt enabled for all other causes, ideally * interrupt should be disabled while we are in NAPI but * this is not a performance path and napi_schedule() * can deal with rescheduling. */ if (!test_bit(__I40E_DOWN, pf->state)) napi_schedule_irqoff(&q_vector->napi); } if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; set_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state); i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n"); } if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; set_bit(__I40E_MDD_EVENT_PENDING, pf->state); } if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) { /* disable any further VFLR event notifications */ if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) { u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg &= ~I40E_PFINT_ICR0_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); } else { ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); } } if (icr0 & I40E_PFINT_ICR0_GRST_MASK) { if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) set_bit(__I40E_RESET_INTR_RECEIVED, pf->state); ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; val = rd32(hw, I40E_GLGEN_RSTAT); val = FIELD_GET(I40E_GLGEN_RSTAT_RESET_TYPE_MASK, val); if (val == I40E_RESET_CORER) { pf->corer_count++; } else if (val == I40E_RESET_GLOBR) { pf->globr_count++; } else if (val == I40E_RESET_EMPR) { pf->empr_count++; set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state); } } if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) { icr0 &= ~I40E_PFINT_ICR0_HMC_ERR_MASK; dev_info(&pf->pdev->dev, "HMC error interrupt\n"); dev_info(&pf->pdev->dev, "HMC error info 0x%x, HMC error data 0x%x\n", rd32(hw, I40E_PFHMC_ERRORINFO), rd32(hw, I40E_PFHMC_ERRORDATA)); } if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) { u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0); if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK) schedule_work(&pf->ptp_extts0_work); if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) i40e_ptp_tx_hwtstamp(pf); icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; } /* If a critical error is pending we have no choice but to reset the * device. * Report and mask out any remaining unexpected interrupts. */ icr0_remaining = icr0 & ena_mask; if (icr0_remaining) { dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n", icr0_remaining); if ((icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) || (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) || (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) { dev_info(&pf->pdev->dev, "device will be reset\n"); set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } ena_mask &= ~icr0_remaining; } ret = IRQ_HANDLED; enable_intr: /* re-enable interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, pf->state) || test_bit(__I40E_RECOVERY_MODE, pf->state)) { i40e_service_event_schedule(pf); i40e_irq_dynamic_enable_icr0(pf); } return ret; } /** * i40e_clean_fdir_tx_irq - Reclaim resources after transmit completes * @tx_ring: tx ring to clean * @budget: how many cleans we're allowed * * Returns true if there's any budget left (e.g. the clean is finished) **/ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget) { struct i40e_vsi *vsi = tx_ring->vsi; u16 i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; struct i40e_tx_desc *tx_desc; tx_buf = &tx_ring->tx_bi[i]; tx_desc = I40E_TX_DESC(tx_ring, i); i -= tx_ring->count; do { struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; /* prevent any other reads prior to eop_desc */ smp_rmb(); /* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz & cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) break; /* clear next_to_watch to prevent false hangs */ tx_buf->next_to_watch = NULL; tx_desc->buffer_addr = 0; tx_desc->cmd_type_offset_bsz = 0; /* move past filter desc */ tx_buf++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring->count; tx_buf = tx_ring->tx_bi; tx_desc = I40E_TX_DESC(tx_ring, 0); } /* unmap skb header data */ dma_unmap_single(tx_ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); if (tx_buf->tx_flags & I40E_TX_FLAGS_FD_SB) kfree(tx_buf->raw_buf); tx_buf->raw_buf = NULL; tx_buf->tx_flags = 0; tx_buf->next_to_watch = NULL; dma_unmap_len_set(tx_buf, len, 0); tx_desc->buffer_addr = 0; tx_desc->cmd_type_offset_bsz = 0; /* move us past the eop_desc for start of next FD desc */ tx_buf++; tx_desc++; i++; if (unlikely(!i)) { i -= tx_ring->count; tx_buf = tx_ring->tx_bi; tx_desc = I40E_TX_DESC(tx_ring, 0); } /* update budget accounting */ budget--; } while (likely(budget)); i += tx_ring->count; tx_ring->next_to_clean = i; if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx); return budget > 0; } /** * i40e_fdir_clean_ring - Interrupt Handler for FDIR SB ring * @irq: interrupt number * @data: pointer to a q_vector **/ static irqreturn_t i40e_fdir_clean_ring(int irq, void *data) { struct i40e_q_vector *q_vector = data; struct i40e_vsi *vsi; if (!q_vector->tx.ring) return IRQ_HANDLED; vsi = q_vector->tx.ring->vsi; i40e_clean_fdir_tx_irq(q_vector->tx.ring, vsi->work_limit); return IRQ_HANDLED; } /** * i40e_map_vector_to_qp - Assigns the queue pair to the vector * @vsi: the VSI being configured * @v_idx: vector index * @qp_idx: queue pair index **/ static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx]; struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx]; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; q_vector->tx.ring = tx_ring; q_vector->tx.count++; /* Place XDP Tx ring in the same q_vector ring list as regular Tx */ if (i40e_enabled_xdp_vsi(vsi)) { struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx]; xdp_ring->q_vector = q_vector; xdp_ring->next = q_vector->tx.ring; q_vector->tx.ring = xdp_ring; q_vector->tx.count++; } rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; q_vector->rx.ring = rx_ring; q_vector->rx.count++; } /** * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors * @vsi: the VSI being configured * * This function maps descriptor rings to the queue-specific vectors * we were allotted through the MSI-X enabling code. Ideally, we'd have * one vector per queue pair, but on a constrained vector budget, we * group the queue pairs as "efficiently" as possible. **/ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) { int qp_remaining = vsi->num_queue_pairs; int q_vectors = vsi->num_q_vectors; int num_ringpairs; int v_start = 0; int qp_idx = 0; /* If we don't have enough vectors for a 1-to-1 mapping, we'll have to * group them so there are multiple queues per vector. * It is also important to go through all the vectors available to be * sure that if we don't use all the vectors, that the remaining vectors * are cleared. This is especially important when decreasing the * number of queues in use. */ for (; v_start < q_vectors; v_start++) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_start]; num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start); q_vector->num_ringpairs = num_ringpairs; q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1; q_vector->rx.count = 0; q_vector->tx.count = 0; q_vector->rx.ring = NULL; q_vector->tx.ring = NULL; while (num_ringpairs--) { i40e_map_vector_to_qp(vsi, v_start, qp_idx); qp_idx++; qp_remaining--; } } } /** * i40e_vsi_request_irq - Request IRQ from the OS * @vsi: the VSI being configured * @basename: name for the vector **/ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) { struct i40e_pf *pf = vsi->back; int err; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) err = i40e_vsi_request_irq_msix(vsi, basename); else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) err = request_irq(pf->pdev->irq, i40e_intr, 0, pf->int_name, pf); else err = request_irq(pf->pdev->irq, i40e_intr, IRQF_SHARED, pf->int_name, pf); if (err) dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err); return err; } #ifdef CONFIG_NET_POLL_CONTROLLER /** * i40e_netpoll - A Polling 'interrupt' handler * @netdev: network interface device structure * * This is used by netconsole to send skbs without having to re-enable * interrupts. It's not called while the normal interrupt routine is executing. **/ static void i40e_netpoll(struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int i; /* if interface is down do nothing */ if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_msix_clean_rings(0, vsi->q_vectors[i]); } else { i40e_intr(pf->pdev->irq, netdev); } } #endif #define I40E_QTX_ENA_WAIT_COUNT 50 /** * i40e_pf_txq_wait - Wait for a PF's Tx queue to be enabled or disabled * @pf: the PF being configured * @pf_q: the PF queue * @enable: enable or disable state of the queue * * This routine will wait for the given Tx queue of the PF to reach the * enabled or disabled state. * Returns -ETIMEDOUT in case of failing to reach the requested state after * multiple retries; else will return 0 in case of success. **/ static int i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable) { int i; u32 tx_reg; for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) { tx_reg = rd32(&pf->hw, I40E_QTX_ENA(pf_q)); if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; usleep_range(10, 20); } if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT) return -ETIMEDOUT; return 0; } /** * i40e_control_tx_q - Start or stop a particular Tx queue * @pf: the PF structure * @pf_q: the PF queue to configure * @enable: start or stop the queue * * This function enables or disables a single queue. Note that any delay * required after the operation is expected to be handled by the caller of * this function. **/ static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable) { struct i40e_hw *hw = &pf->hw; u32 tx_reg; int i; /* warn the TX unit of coming changes */ i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable); if (!enable) usleep_range(10, 20); for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) { tx_reg = rd32(hw, I40E_QTX_ENA(pf_q)); if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) == ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1)) break; usleep_range(1000, 2000); } /* Skip if the queue is already in the requested state */ if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) return; /* turn on/off the queue */ if (enable) { wr32(hw, I40E_QTX_HEAD(pf_q), 0); tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK; } else { tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; } wr32(hw, I40E_QTX_ENA(pf_q), tx_reg); } /** * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion * @seid: VSI SEID * @pf: the PF structure * @pf_q: the PF queue to configure * @is_xdp: true if the queue is used for XDP * @enable: start or stop the queue **/ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp, bool enable) { int ret; i40e_control_tx_q(pf, pf_q, enable); /* wait for the change to finish */ ret = i40e_pf_txq_wait(pf, pf_q, enable); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d %sTx ring %d %sable timeout\n", seid, (is_xdp ? "XDP " : ""), pf_q, (enable ? "en" : "dis")); } return ret; } /** * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured **/ static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, false /*is xdp*/, true); if (ret) break; if (!i40e_enabled_xdp_vsi(vsi)) continue; ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, true /*is xdp*/, true); if (ret) break; } return ret; } /** * i40e_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled * @pf: the PF being configured * @pf_q: the PF queue * @enable: enable or disable state of the queue * * This routine will wait for the given Rx queue of the PF to reach the * enabled or disabled state. * Returns -ETIMEDOUT in case of failing to reach the requested state after * multiple retries; else will return 0 in case of success. **/ static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable) { int i; u32 rx_reg; for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) { rx_reg = rd32(&pf->hw, I40E_QRX_ENA(pf_q)); if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; usleep_range(10, 20); } if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT) return -ETIMEDOUT; return 0; } /** * i40e_control_rx_q - Start or stop a particular Rx queue * @pf: the PF structure * @pf_q: the PF queue to configure * @enable: start or stop the queue * * This function enables or disables a single queue. Note that * any delay required after the operation is expected to be * handled by the caller of this function. **/ static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable) { struct i40e_hw *hw = &pf->hw; u32 rx_reg; int i; for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) { rx_reg = rd32(hw, I40E_QRX_ENA(pf_q)); if (((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT) & 1) == ((rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT) & 1)) break; usleep_range(1000, 2000); } /* Skip if the queue is already in the requested state */ if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) return; /* turn on/off the queue */ if (enable) rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK; else rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_q), rx_reg); } /** * i40e_control_wait_rx_q * @pf: the PF structure * @pf_q: queue being configured * @enable: start or stop the rings * * This function enables or disables a single queue along with waiting * for the change to finish. The caller of this function should handle * the delays needed in the case of disabling queues. **/ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) { int ret = 0; i40e_control_rx_q(pf, pf_q, enable); /* wait for the change to finish */ ret = i40e_pf_rxq_wait(pf, pf_q, enable); if (ret) return ret; return ret; } /** * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured **/ static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d enable timeout\n", vsi->seid, pf_q); break; } } return ret; } /** * i40e_vsi_start_rings - Start a VSI's rings * @vsi: the VSI being configured **/ int i40e_vsi_start_rings(struct i40e_vsi *vsi) { int ret = 0; /* do rx first for enable and last for disable */ ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; ret = i40e_vsi_enable_tx(vsi); return ret; } #define I40E_DISABLE_TX_GAP_MSEC 50 /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; u32 pf_q, tx_q_end, rx_q_end; /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); tx_q_end = vsi->base_queue + vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) i40e_pre_tx_queue_cfg(&pf->hw, pf_q, false); rx_q_end = vsi->base_queue + vsi->num_queue_pairs; for (pf_q = vsi->base_queue; pf_q < rx_q_end; pf_q++) i40e_control_rx_q(pf, pf_q, false); msleep(I40E_DISABLE_TX_GAP_MSEC); for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); i40e_vsi_wait_queues_disabled(vsi); } /** * i40e_vsi_stop_rings_no_wait - Stop a VSI's rings and do not delay * @vsi: the VSI being shutdown * * This function stops all the rings for a VSI but does not delay to verify * that rings have been disabled. It is expected that the caller is shutting * down multiple VSIs at once and will delay together for all the VSIs after * initiating the shutdown. This is particularly useful for shutting down lots * of VFs together. Otherwise, a large delay can be incurred while configuring * each VSI in serial. **/ void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { i40e_control_tx_q(pf, pf_q, false); i40e_control_rx_q(pf, pf_q, false); } } /** * i40e_vsi_free_irq - Free the irq association with the OS * @vsi: the VSI being configured **/ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int base = vsi->base_vector; u32 val, qp; int i; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { if (!vsi->q_vectors) return; if (!vsi->irqs_ready) return; vsi->irqs_ready = false; for (i = 0; i < vsi->num_q_vectors; i++) { int irq_num; u16 vector; vector = i + base; irq_num = pf->msix_entries[vector].vector; /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || !vsi->q_vectors[i]->num_ringpairs) continue; /* clear the affinity notifier in the IRQ descriptor */ irq_set_affinity_notifier(irq_num, NULL); /* remove our suggested affinity mask for this IRQ */ irq_update_affinity_hint(irq_num, NULL); free_irq(irq_num, vsi->q_vectors[i]); /* Tear down the interrupt queue link list * * We know that they come in pairs and always * the Rx first, then the Tx. To clear the * link list, stick the EOL value into the * next_q field of the registers. */ val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1)); qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val); while (qp != I40E_QUEUE_END_OF_LIST) { u32 next; val = rd32(hw, I40E_QINT_RQCTL(qp)); val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK | I40E_QINT_RQCTL_MSIX0_INDX_MASK | I40E_QINT_RQCTL_CAUSE_ENA_MASK | I40E_QINT_RQCTL_INTEVENT_MASK); val |= (I40E_QINT_RQCTL_ITR_INDX_MASK | I40E_QINT_RQCTL_NEXTQ_INDX_MASK); wr32(hw, I40E_QINT_RQCTL(qp), val); val = rd32(hw, I40E_QINT_TQCTL(qp)); next = FIELD_GET(I40E_QINT_TQCTL_NEXTQ_INDX_MASK, val); val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK | I40E_QINT_TQCTL_MSIX0_INDX_MASK | I40E_QINT_TQCTL_CAUSE_ENA_MASK | I40E_QINT_TQCTL_INTEVENT_MASK); val |= (I40E_QINT_TQCTL_ITR_INDX_MASK | I40E_QINT_TQCTL_NEXTQ_INDX_MASK); wr32(hw, I40E_QINT_TQCTL(qp), val); qp = next; } } } else { free_irq(pf->pdev->irq, pf); val = rd32(hw, I40E_PFINT_LNKLST0); qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLST0, val); val = rd32(hw, I40E_QINT_RQCTL(qp)); val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK | I40E_QINT_RQCTL_MSIX0_INDX_MASK | I40E_QINT_RQCTL_CAUSE_ENA_MASK | I40E_QINT_RQCTL_INTEVENT_MASK); val |= (I40E_QINT_RQCTL_ITR_INDX_MASK | I40E_QINT_RQCTL_NEXTQ_INDX_MASK); wr32(hw, I40E_QINT_RQCTL(qp), val); val = rd32(hw, I40E_QINT_TQCTL(qp)); val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK | I40E_QINT_TQCTL_MSIX0_INDX_MASK | I40E_QINT_TQCTL_CAUSE_ENA_MASK | I40E_QINT_TQCTL_INTEVENT_MASK); val |= (I40E_QINT_TQCTL_ITR_INDX_MASK | I40E_QINT_TQCTL_NEXTQ_INDX_MASK); wr32(hw, I40E_QINT_TQCTL(qp), val); } } /** * i40e_free_q_vector - Free memory allocated for specific interrupt vector * @vsi: the VSI being configured * @v_idx: Index of vector to be freed * * This function frees the memory allocated to the q_vector. In addition if * NAPI is enabled it will delete any references to the NAPI struct prior * to freeing the q_vector. **/ static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *ring; if (!q_vector) return; /* disassociate q_vector from rings */ i40e_for_each_ring(ring, q_vector->tx) ring->q_vector = NULL; i40e_for_each_ring(ring, q_vector->rx) ring->q_vector = NULL; /* only VSI w/ an associated netdev is set up w/ NAPI */ if (vsi->netdev) netif_napi_del(&q_vector->napi); vsi->q_vectors[v_idx] = NULL; kfree_rcu(q_vector, rcu); } /** * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors * @vsi: the VSI being un-configured * * This frees the memory allocated to the q_vectors and * deletes references to the NAPI struct. **/ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi) { int v_idx; for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) i40e_free_q_vector(vsi, v_idx); } /** * i40e_reset_interrupt_capability - Disable interrupt setup in OS * @pf: board private structure **/ static void i40e_reset_interrupt_capability(struct i40e_pf *pf) { /* If we're in Legacy mode, the interrupt was cleaned in vsi_close */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { pci_disable_msix(pf->pdev); kfree(pf->msix_entries); pf->msix_entries = NULL; kfree(pf->irq_pile); pf->irq_pile = NULL; } else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { pci_disable_msi(pf->pdev); } clear_bit(I40E_FLAG_MSI_ENA, pf->flags); clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); } /** * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings * @pf: board private structure * * We go through and clear interrupt specific resources and reset the structure * to pre-load conditions **/ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { struct i40e_vsi *vsi; int i; if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); i40e_pf_for_each_vsi(pf, i, vsi) i40e_vsi_free_q_vectors(vsi); i40e_reset_interrupt_capability(pf); } /** * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured **/ static void i40e_napi_enable_all(struct i40e_vsi *vsi) { int q_idx; if (!vsi->netdev) return; for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; if (q_vector->rx.ring || q_vector->tx.ring) napi_enable(&q_vector->napi); } } /** * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured **/ static void i40e_napi_disable_all(struct i40e_vsi *vsi) { int q_idx; if (!vsi->netdev) return; for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; if (q_vector->rx.ring || q_vector->tx.ring) napi_disable(&q_vector->napi); } } /** * i40e_vsi_close - Shut down a VSI * @vsi: the vsi to be quelled **/ static void i40e_vsi_close(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state)) i40e_down(vsi); i40e_vsi_free_irq(vsi); i40e_vsi_free_tx_resources(vsi); i40e_vsi_free_rx_resources(vsi); vsi->current_netdev_flags = 0; set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) set_bit(__I40E_CLIENT_RESET, pf->state); } /** * i40e_quiesce_vsi - Pause a given VSI * @vsi: the VSI being paused **/ static void i40e_quiesce_vsi(struct i40e_vsi *vsi) { if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state); if (vsi->netdev && netif_running(vsi->netdev)) vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); else i40e_vsi_close(vsi); } /** * i40e_unquiesce_vsi - Resume a given VSI * @vsi: the VSI being resumed **/ static void i40e_unquiesce_vsi(struct i40e_vsi *vsi) { if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state)) return; if (vsi->netdev && netif_running(vsi->netdev)) vsi->netdev->netdev_ops->ndo_open(vsi->netdev); else i40e_vsi_open(vsi); /* this clears the DOWN bit */ } /** * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF * @pf: the PF **/ static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf) { struct i40e_vsi *vsi; int v; i40e_pf_for_each_vsi(pf, v, vsi) i40e_quiesce_vsi(vsi); } /** * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF * @pf: the PF **/ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) { struct i40e_vsi *vsi; int v; i40e_pf_for_each_vsi(pf, v, vsi) i40e_unquiesce_vsi(vsi); } /** * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * * Wait until all queues on a given VSI have been disabled. **/ int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { /* Check and wait for the Tx queue */ ret = i40e_pf_txq_wait(pf, pf_q, false); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Tx ring %d disable timeout\n", vsi->seid, pf_q); return ret; } if (!i40e_enabled_xdp_vsi(vsi)) goto wait_rx; /* Check and wait for the XDP Tx queue */ ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs, false); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d XDP Tx ring %d disable timeout\n", vsi->seid, pf_q); return ret; } wait_rx: /* Check and wait for the Rx queue */ ret = i40e_pf_rxq_wait(pf, pf_q, false); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d disable timeout\n", vsi->seid, pf_q); return ret; } } return 0; } #ifdef CONFIG_I40E_DCB /** * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled * @pf: the PF * * This function waits for the queues to be in disabled state for all the * VSIs that are managed by this PF. **/ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { struct i40e_vsi *vsi; int v, ret = 0; i40e_pf_for_each_vsi(pf, v, vsi) { ret = i40e_vsi_wait_queues_disabled(vsi); if (ret) break; } return ret; } #endif /** * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP * @pf: pointer to PF * * Get TC map for ISCSI PF type that will include iSCSI TC * and LAN TC. **/ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf) { struct i40e_dcb_app_priority_table app; struct i40e_hw *hw = &pf->hw; u8 enabled_tc = 1; /* TC0 is always enabled */ u8 tc, i; /* Get the iSCSI APP TLV */ struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; for (i = 0; i < dcbcfg->numapps; i++) { app = dcbcfg->app[i]; if (app.selector == I40E_APP_SEL_TCPIP && app.protocolid == I40E_APP_PROTOID_ISCSI) { tc = dcbcfg->etscfg.prioritytable[app.priority]; enabled_tc |= BIT(tc); break; } } return enabled_tc; } /** * i40e_dcb_get_num_tc - Get the number of TCs from DCBx config * @dcbcfg: the corresponding DCBx configuration structure * * Return the number of TCs from given DCBx configuration **/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) { int i, tc_unused = 0; u8 num_tc = 0; u8 ret = 0; /* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority * and create a bitmask of enabled TCs */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]); /* Now scan the bitmask to check for * contiguous TCs starting with TC0 */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (num_tc & BIT(i)) { if (!tc_unused) { ret++; } else { pr_err("Non-contiguous TC - Disabling DCB\n"); return 1; } } else { tc_unused = 1; } } /* There is always at least TC0 */ if (!ret) ret = 1; return ret; } /** * i40e_dcb_get_enabled_tc - Get enabled traffic classes * @dcbcfg: the corresponding DCBx configuration structure * * Query the current DCB configuration and return the number of * traffic classes enabled from the given DCBX config **/ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) { u8 num_tc = i40e_dcb_get_num_tc(dcbcfg); u8 enabled_tc = 1; u8 i; for (i = 0; i < num_tc; i++) enabled_tc |= BIT(i); return enabled_tc; } /** * i40e_mqprio_get_enabled_tc - Get enabled traffic classes * @pf: PF being queried * * Query the current MQPRIO configuration and return the number of * traffic classes enabled. **/ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 num_tc = vsi->mqprio_qopt.qopt.num_tc; u8 enabled_tc = 1, i; for (i = 1; i < num_tc; i++) enabled_tc |= BIT(i); return enabled_tc; } /** * i40e_pf_get_num_tc - Get enabled traffic classes for PF * @pf: PF being queried * * Return number of traffic classes enabled for the given PF **/ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) { u8 i, enabled_tc = 1; u8 num_tc = 0; if (i40e_is_tc_mqprio_enabled(pf)) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); return vsi->mqprio_qopt.qopt.num_tc; } /* If neither MQPRIO nor DCB is enabled, then always use single TC */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return 1; /* SFP mode will be enabled for all TCs on port */ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_num_tc(&pf->hw.local_dcbx_config); /* MFP mode return count of enabled TCs for this PF */ if (pf->hw.func_caps.iscsi) enabled_tc = i40e_get_iscsi_tc_map(pf); else return 1; /* Only TC0 */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) num_tc++; } return num_tc; } /** * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. **/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { if (i40e_is_tc_mqprio_enabled(pf)) return i40e_mqprio_get_enabled_tc(pf); /* If neither MQPRIO nor DCB is enabled for this PF then just return * default TC */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return I40E_DEFAULT_TRAFFIC_CLASS; /* SFP mode we want PF to be enabled for all TCs */ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config); /* MFP enabled and iSCSI PF type */ if (pf->hw.func_caps.iscsi) return i40e_get_iscsi_tc_map(pf); else return I40E_DEFAULT_TRAFFIC_CLASS; } /** * i40e_vsi_get_bw_info - Query VSI BW Information * @vsi: the VSI being queried * * Returns 0 on success, negative value on failure **/ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi) { struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0}; struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u32 tc_bw_max; int ret; int i; /* Get the VSI level BW configuration */ ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't get PF vsi bw config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -EINVAL; } /* Get the VSI level BW configuration per TC */ ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't get PF vsi ets bw config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -EINVAL; } if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) { dev_info(&pf->pdev->dev, "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n", bw_config.tc_valid_bits, bw_ets_config.tc_valid_bits); /* Still continuing */ } vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit); vsi->bw_max_quanta = bw_config.max_bw; tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) | (le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i]; vsi->bw_ets_limit_credits[i] = le16_to_cpu(bw_ets_config.credits[i]); /* 3 bits out of 4 for each TC */ vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7); } return 0; } /** * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC * @vsi: the VSI being configured * @enabled_tc: TC bitmap * @bw_share: BW shared credits per TC * * Returns 0 on success, negative value on failure **/ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; struct i40e_pf *pf = vsi->back; int ret; int i; /* There is no need to reset BW when mqprio mode is on. */ if (i40e_is_tc_mqprio_enabled(pf)) return 0; if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, "Failed to reset tx rate for vsi->seid %u\n", vsi->seid); return ret; } memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n", pf->hw.aq.asq_last_status); return -EINVAL; } for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) vsi->info.qs_handle[i] = bw_data.qs_handles[i]; return 0; } /** * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration * @vsi: the VSI being configured * @enabled_tc: TC map to be enabled * **/ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) { struct net_device *netdev = vsi->netdev; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u8 netdev_tc = 0; int i; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; if (!netdev) return; if (!enabled_tc) { netdev_reset_tc(netdev); return; } /* Set up actual enabled TCs on the VSI */ if (netdev_set_num_tc(netdev, vsi->tc_config.numtc)) return; /* set per TC queues for the VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* Only set TC queues for enabled tcs * * e.g. For a VSI that has TC0 and TC3 enabled the * enabled_tc bitmap would be 0x00001001; the driver * will set the numtc for netdev as 2 that will be * referenced by the netdev layer as TC 0 and 1. */ if (vsi->tc_config.enabled_tc & BIT(i)) netdev_set_tc_queue(netdev, vsi->tc_config.tc_info[i].netdev_tc, vsi->tc_config.tc_info[i].qcount, vsi->tc_config.tc_info[i].qoffset); } if (i40e_is_tc_mqprio_enabled(pf)) return; /* Assign UP2TC map for the VSI */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { /* Get the actual TC# for the UP */ u8 ets_tc = dcbcfg->etscfg.prioritytable[i]; /* Get the mapped netdev TC# for the UP */ netdev_tc = vsi->tc_config.tc_info[ets_tc].netdev_tc; netdev_set_prio_tc_map(netdev, i, netdev_tc); } } /** * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map * @vsi: the VSI being configured * @ctxt: the ctxt buffer returned from AQ VSI update param command **/ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt) { /* copy just the sections touched not the entire info * since not all sections are valid as returned by * update vsi params */ vsi->info.mapping_flags = ctxt->info.mapping_flags; memcpy(&vsi->info.queue_mapping, &ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping)); memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping, sizeof(vsi->info.tc_mapping)); } /** * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI * @vsi: the VSI being reconfigured * @vsi_offset: offset from main VF VSI */ int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) { struct i40e_vsi_context ctxt = {}; struct i40e_pf *pf; struct i40e_hw *hw; int ret; if (!vsi) return -EINVAL; pf = vsi->back; hw = &pf->hw; ctxt.seid = vsi->seid; ctxt.pf_num = hw->pf_id; ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_VF; ctxt.info = vsi->info; i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, false); if (vsi->reconfig_rss) { vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); ret = i40e_vsi_config_rss(vsi); if (ret) { dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); return ret; } vsi->reconfig_rss = false; } ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "Update vsi config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; } /* update the local VSI info with updated queue map */ i40e_vsi_update_queue_map(vsi, &ctxt); vsi->info.valid_sections = 0; return ret; } /** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured * @enabled_tc: TC bitmap * * This configures a particular VSI for TCs that are mapped to the * given TC bitmap. It uses default bandwidth share for TCs across * VSIs to configure TC for a particular VSI. * * NOTE: * It is expected that the VSI queues have been quisced before calling * this function. **/ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) { u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; int ret = 0; int i; /* Check if enabled_tc is same as existing or new TCs */ if (vsi->tc_config.enabled_tc == enabled_tc && vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) return ret; /* Enable ETS TCs with equal BW Share for now across all VSIs */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) bw_share[i] = 1; } ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) { struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; dev_info(&pf->pdev->dev, "Failed configuring TC map %d for VSI %d\n", enabled_tc, vsi->seid); ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); if (ret) { dev_info(&pf->pdev->dev, "Failed querying vsi bw info, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); goto out; } if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) { u8 valid_tc = bw_config.tc_valid_bits & enabled_tc; if (!valid_tc) valid_tc = bw_config.tc_valid_bits; /* Always enable TC0, no matter what */ valid_tc |= 1; dev_info(&pf->pdev->dev, "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n", enabled_tc, bw_config.tc_valid_bits, valid_tc); enabled_tc = valid_tc; } ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) { dev_err(&pf->pdev->dev, "Unable to configure TC map %d for VSI %d\n", enabled_tc, vsi->seid); goto out; } } /* Update Queue Pairs Mapping for currently enabled UPs */ ctxt.seid = vsi->seid; ctxt.pf_num = vsi->back->hw.pf_id; ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; if (i40e_is_tc_mqprio_enabled(pf)) { ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); if (ret) goto out; } else { i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); } /* On destroying the qdisc, reset vsi->rss_size, as number of enabled * queues changed. */ if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) { vsi->rss_size = min_t(int, vsi->back->alloc_rss_size, vsi->num_queue_pairs); ret = i40e_vsi_config_rss(vsi); if (ret) { dev_info(&vsi->back->pdev->dev, "Failed to reconfig rss for num_queues\n"); return ret; } vsi->reconfig_rss = false; } if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } /* Update the VSI after updating the VSI queue-mapping * information */ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "Update vsi tc config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); goto out; } /* update the local VSI info with updated queue map */ i40e_vsi_update_queue_map(vsi, &ctxt); vsi->info.valid_sections = 0; /* Update current VSI BW information */ ret = i40e_vsi_get_bw_info(vsi); if (ret) { dev_info(&pf->pdev->dev, "Failed updating vsi bw info, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); goto out; } /* Update the netdev TC setup */ i40e_vsi_config_netdev_tc(vsi, enabled_tc); out: return ret; } /** * i40e_vsi_reconfig_tc - Reconfigure VSI Tx Scheduler for stored TC map * @vsi: VSI to be reconfigured * * This reconfigures a particular VSI for TCs that are mapped to the * TC bitmap stored previously for the VSI. * * Context: It is expected that the VSI queues have been quisced before * calling this function. * * Return: 0 on success, negative value on failure **/ static int i40e_vsi_reconfig_tc(struct i40e_vsi *vsi) { u8 enabled_tc; enabled_tc = vsi->tc_config.enabled_tc; vsi->tc_config.enabled_tc = 0; return i40e_vsi_config_tc(vsi, enabled_tc); } /** * i40e_get_link_speed - Returns link speed for the interface * @vsi: VSI to be configured * **/ static int i40e_get_link_speed(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: return 40000; case I40E_LINK_SPEED_25GB: return 25000; case I40E_LINK_SPEED_20GB: return 20000; case I40E_LINK_SPEED_10GB: return 10000; case I40E_LINK_SPEED_1GB: return 1000; default: return -EINVAL; } } /** * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits * @vsi: Pointer to vsi structure * @max_tx_rate: max TX rate in bytes to be converted into Mbits * * Helper function to convert units before send to set BW limit **/ static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate) { if (max_tx_rate < I40E_BW_MBPS_DIVISOR) { dev_warn(&vsi->back->pdev->dev, "Setting max tx rate to minimum usable value of 50Mbps.\n"); max_tx_rate = I40E_BW_CREDIT_DIVISOR; } else { do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); } return max_tx_rate; } /** * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate * @vsi: VSI to be configured * @seid: seid of the channel/VSI * @max_tx_rate: max TX rate to be configured as BW limit * * Helper function to set BW limit for a given VSI **/ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) { struct i40e_pf *pf = vsi->back; u64 credits = 0; int speed = 0; int ret = 0; speed = i40e_get_link_speed(vsi); if (max_tx_rate > speed) { dev_err(&pf->pdev->dev, "Invalid max tx rate %llu specified for VSI seid %d.", max_tx_rate, seid); return -EINVAL; } if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) { dev_warn(&pf->pdev->dev, "Setting max tx rate to minimum usable value of 50Mbps.\n"); max_tx_rate = I40E_BW_CREDIT_DIVISOR; } /* Tx rate credits are in values of 50Mbps, 0 is disabled */ credits = max_tx_rate; do_div(credits, I40E_BW_CREDIT_DIVISOR); ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits, I40E_MAX_BW_INACTIVE_ACCUM, NULL); if (ret) dev_err(&pf->pdev->dev, "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %pe aq_err %s\n", max_tx_rate, seid, ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return ret; } /** * i40e_remove_queue_channels - Remove queue channels for the TCs * @vsi: VSI to be configured * * Remove queue channels for the TCs **/ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) { enum i40e_admin_queue_err last_aq_status; struct i40e_cloud_filter *cfilter; struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct hlist_node *node; int ret, i; /* Reset rss size that was stored when reconfiguring rss for * channel VSIs with non-power-of-2 queue count. */ vsi->current_rss_size = 0; /* perform cleanup for channels if they exist */ if (list_empty(&vsi->ch_list)) return; list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { struct i40e_vsi *p_vsi; list_del(&ch->list); p_vsi = ch->parent_vsi; if (!p_vsi || !ch->initialized) { kfree(ch); continue; } /* Reset queue contexts */ for (i = 0; i < ch->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; pf_q = ch->base_queue + i; tx_ring = vsi->tx_rings[pf_q]; tx_ring->ch = NULL; rx_ring = vsi->rx_rings[pf_q]; rx_ring->ch = NULL; } /* Reset BW configured for this VSI via mqprio */ ret = i40e_set_bw_limit(vsi, ch->seid, 0); if (ret) dev_info(&vsi->back->pdev->dev, "Failed to reset tx rate for ch->seid %u\n", ch->seid); /* delete cloud filters associated with this channel */ hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, cloud_node) { if (cfilter->seid != ch->seid) continue; hash_del(&cfilter->cloud_node); if (cfilter->dst_port) ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, false); else ret = i40e_add_del_cloud_filter(vsi, cfilter, false); last_aq_status = pf->hw.aq.asq_last_status; if (ret) dev_info(&pf->pdev->dev, "Failed to delete cloud filter, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, last_aq_status)); kfree(cfilter); } /* delete VSI from FW */ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, NULL); if (ret) dev_err(&vsi->back->pdev->dev, "unable to remove channel (%d) for parent VSI(%d)\n", ch->seid, p_vsi->seid); kfree(ch); } INIT_LIST_HEAD(&vsi->ch_list); } /** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with * * Helper function which returns max value among the queue counts set on the * channels/TCs created. **/ static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi) { struct i40e_channel *ch, *ch_tmp; int max = 0; list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { if (!ch->initialized) continue; if (ch->num_queue_pairs > max) max = ch->num_queue_pairs; } return max; } /** * i40e_validate_num_queues - validate num_queues w.r.t channel * @pf: ptr to PF device * @num_queues: number of queues * @vsi: the parent VSI * @reconfig_rss: indicates should the RSS be reconfigured or not * * This function validates number of queues in the context of new channel * which is being established and determines if RSS should be reconfigured * or not for parent VSI. **/ static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues, struct i40e_vsi *vsi, bool *reconfig_rss) { int max_ch_queues; if (!reconfig_rss) return -EINVAL; *reconfig_rss = false; if (vsi->current_rss_size) { if (num_queues > vsi->current_rss_size) { dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) > vsi's current_size(%d)\n", num_queues, vsi->current_rss_size); return -EINVAL; } else if ((num_queues < vsi->current_rss_size) && (!is_power_of_2(num_queues))) { dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n", num_queues, vsi->current_rss_size); return -EINVAL; } } if (!is_power_of_2(num_queues)) { /* Find the max num_queues configured for channel if channel * exist. * if channel exist, then enforce 'num_queues' to be more than * max ever queues configured for channel. */ max_ch_queues = i40e_get_max_queues_for_channel(vsi); if (num_queues < max_ch_queues) { dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) < max queues configured for channel(%d)\n", num_queues, max_ch_queues); return -EINVAL; } *reconfig_rss = true; } return 0; } /** * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size * @vsi: the VSI being setup * @rss_size: size of RSS, accordingly LUT gets reprogrammed * * This function reconfigures RSS by reprogramming LUTs using 'rss_size' **/ static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size) { struct i40e_pf *pf = vsi->back; u8 seed[I40E_HKEY_ARRAY_SIZE]; struct i40e_hw *hw = &pf->hw; int local_rss_size; u8 *lut; int ret; if (!vsi->rss_size) return -EINVAL; if (rss_size > vsi->rss_size) return -EINVAL; local_rss_size = min_t(int, vsi->rss_size, rss_size); lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; /* Ignoring user configured lut if there is one */ i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size); /* Use user configured hash key if there is one, otherwise * use default. */ if (vsi->rss_hkey_user) memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); if (ret) { dev_info(&pf->pdev->dev, "Cannot set RSS lut, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); kfree(lut); return ret; } kfree(lut); /* Do the update w.r.t. storing rss_size */ if (!vsi->orig_rss_size) vsi->orig_rss_size = vsi->rss_size; vsi->current_rss_size = local_rss_size; return ret; } /** * i40e_channel_setup_queue_map - Setup a channel queue map * @pf: ptr to PF device * @ctxt: VSI context structure * @ch: ptr to channel structure * * Setup queue map for a specific channel **/ static void i40e_channel_setup_queue_map(struct i40e_pf *pf, struct i40e_vsi_context *ctxt, struct i40e_channel *ch) { u16 qcount, qmap, sections = 0; u8 offset = 0; int pow; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; sections |= I40E_AQ_VSI_PROP_SCHED_VALID; qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix); ch->num_queue_pairs = qcount; /* find the next higher power-of-2 of num queue pairs */ pow = ilog2(qcount); if (!is_power_of_2(qcount)) pow++; qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); /* Setup queue TC[0].qmap for given VSI context */ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */ ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue); ctxt->info.valid_sections |= cpu_to_le16(sections); } /** * i40e_add_channel - add a channel by adding VSI * @pf: ptr to PF device * @uplink_seid: underlying HW switching element (VEB) ID * @ch: ptr to channel structure * * Add a channel (VSI) using add_vsi and queue_map **/ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, struct i40e_channel *ch) { struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; u8 enabled_tc = 0x1; /* TC0 enabled */ int ret; if (ch->type != I40E_VSI_VMDQ2) { dev_info(&pf->pdev->dev, "add new vsi failed, ch->type %d\n", ch->type); return -EINVAL; } memset(&ctxt, 0, sizeof(ctxt)); ctxt.pf_num = hw->pf_id; ctxt.vf_num = 0; ctxt.uplink_seid = uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; if (ch->type == I40E_VSI_VMDQ2) ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } /* Set queue map for a given VSI context */ i40e_channel_setup_queue_map(pf, &ctxt, ch); /* Now time to create VSI */ ret = i40e_aq_add_vsi(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "add new vsi failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -ENOENT; } /* Success, update channel, set enabled_tc only if the channel * is not a macvlan */ ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx); /* copy just the sections touched not the entire info * since not all sections are valid as returned by * update vsi params */ ch->info.mapping_flags = ctxt.info.mapping_flags; memcpy(&ch->info.queue_mapping, &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping)); memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping, sizeof(ctxt.info.tc_mapping)); return 0; } static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch, u8 *bw_share) { struct i40e_aqc_configure_vsi_tc_bw_data bw_data; int ret; int i; memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = ch->enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid, &bw_data, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n", vsi->back->hw.aq.asq_last_status, ch->seid); return -EINVAL; } for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) ch->info.qs_handle[i] = bw_data.qs_handles[i]; return 0; } /** * i40e_channel_config_tx_ring - config TX ring associated with new channel * @pf: ptr to PF device * @vsi: the VSI being setup * @ch: ptr to channel structure * * Configure TX rings associated with channel (VSI) since queues are being * from parent VSI. **/ static int i40e_channel_config_tx_ring(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch) { u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; int ret; int i; /* Enable ETS TCs with equal BW Share for now across all VSIs */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (ch->enabled_tc & BIT(i)) bw_share[i] = 1; } /* configure BW for new VSI */ ret = i40e_channel_config_bw(vsi, ch, bw_share); if (ret) { dev_info(&vsi->back->pdev->dev, "Failed configuring TC map %d for channel (seid %u)\n", ch->enabled_tc, ch->seid); return ret; } for (i = 0; i < ch->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; pf_q = ch->base_queue + i; /* Get to TX ring ptr of main VSI, for re-setup TX queue * context */ tx_ring = vsi->tx_rings[pf_q]; tx_ring->ch = ch; /* Get the RX ring ptr */ rx_ring = vsi->rx_rings[pf_q]; rx_ring->ch = ch; } return 0; } /** * i40e_setup_hw_channel - setup new channel * @pf: ptr to PF device * @vsi: the VSI being setup * @ch: ptr to channel structure * @uplink_seid: underlying HW switching element (VEB) ID * @type: type of channel to be created (VMDq2/VF) * * Setup new channel (VSI) based on specified type (VMDq2/VF) * and configures TX rings accordingly **/ static inline int i40e_setup_hw_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch, u16 uplink_seid, u8 type) { int ret; ch->initialized = false; ch->base_queue = vsi->next_base_queue; ch->type = type; /* Proceed with creation of channel (VMDq2) VSI */ ret = i40e_add_channel(pf, uplink_seid, ch); if (ret) { dev_info(&pf->pdev->dev, "failed to add_channel using uplink_seid %u\n", uplink_seid); return ret; } /* Mark the successful creation of channel */ ch->initialized = true; /* Reconfigure TX queues using QTX_CTL register */ ret = i40e_channel_config_tx_ring(pf, vsi, ch); if (ret) { dev_info(&pf->pdev->dev, "failed to configure TX rings for channel %u\n", ch->seid); return ret; } /* update 'next_base_queue' */ vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs; dev_dbg(&pf->pdev->dev, "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n", ch->seid, ch->vsi_number, ch->stat_counter_idx, ch->num_queue_pairs, vsi->next_base_queue); return ret; } /** * i40e_setup_channel - setup new channel using uplink element * @pf: ptr to PF device * @vsi: pointer to the VSI to set up the channel within * @ch: ptr to channel structure * * Setup new channel (VSI) based on specified type (VMDq2/VF) * and uplink switching element (uplink_seid) **/ static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch) { struct i40e_vsi *main_vsi; u8 vsi_type; u16 seid; int ret; if (vsi->type == I40E_VSI_MAIN) { vsi_type = I40E_VSI_VMDQ2; } else { dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n", vsi->type); return false; } /* underlying switching element */ main_vsi = i40e_pf_get_main_vsi(pf); seid = main_vsi->uplink_seid; /* create channel (VSI), configure TX rings */ ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); if (ret) { dev_err(&pf->pdev->dev, "failed to setup hw_channel\n"); return false; } return ch->initialized ? true : false; } /** * i40e_validate_and_set_switch_mode - sets up switch mode correctly * @vsi: ptr to VSI which has PF backing * * Sets up switch mode correctly if it needs to be changed and perform * what are allowed modes. **/ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) { u8 mode; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret; ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities); if (ret) return -EINVAL; if (hw->dev_caps.switch_mode) { /* if switch mode is set, support mode2 (non-tunneled for * cloud filter) for now */ u32 switch_mode = hw->dev_caps.switch_mode & I40E_SWITCH_MODE_MASK; if (switch_mode >= I40E_CLOUD_FILTER_MODE1) { if (switch_mode == I40E_CLOUD_FILTER_MODE2) return 0; dev_err(&pf->pdev->dev, "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n", hw->dev_caps.switch_mode); return -EINVAL; } } /* Set Bit 7 to be valid */ mode = I40E_AQ_SET_SWITCH_BIT7_VALID; /* Set L4type for TCP support */ mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; /* Set cloud filter mode */ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; /* Prep mode field for set_switch_config */ ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags, pf->last_sw_conf_valid_flags, mode, NULL); if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH) dev_err(&pf->pdev->dev, "couldn't set switch config bits, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; } /** * i40e_create_queue_channel - function to create channel * @vsi: VSI to be configured * @ch: ptr to channel (it contains channel specific params) * * This function creates channel (VSI) using num_queues specified by user, * reconfigs RSS if needed. **/ int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch) { struct i40e_pf *pf = vsi->back; bool reconfig_rss; int err; if (!ch) return -EINVAL; if (!ch->num_queue_pairs) { dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n", ch->num_queue_pairs); return -EINVAL; } /* validate user requested num_queues for channel */ err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi, &reconfig_rss); if (err) { dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n", ch->num_queue_pairs); return -EINVAL; } /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); if (vsi->type == I40E_VSI_MAIN) { if (i40e_is_tc_mqprio_enabled(pf)) i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); else i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } /* now onwards for main VSI, number of queues will be value * of TC0's queue count */ } /* By this time, vsi->cnt_q_avail shall be set to non-zero and * it should be more than num_queues */ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) { dev_dbg(&pf->pdev->dev, "Error: cnt_q_avail (%u) less than num_queues %d\n", vsi->cnt_q_avail, ch->num_queue_pairs); return -EINVAL; } /* reconfig_rss only if vsi type is MAIN_VSI */ if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) { err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs); if (err) { dev_info(&pf->pdev->dev, "Error: unable to reconfig rss for num_queues (%u)\n", ch->num_queue_pairs); return -EINVAL; } } if (!i40e_setup_channel(pf, vsi, ch)) { dev_info(&pf->pdev->dev, "Failed to setup channel\n"); return -EINVAL; } dev_info(&pf->pdev->dev, "Setup channel (id:%u) utilizing num_queues %d\n", ch->seid, ch->num_queue_pairs); /* configure VSI for BW limit */ if (ch->max_tx_rate) { u64 credits = ch->max_tx_rate; if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) return -EINVAL; do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&pf->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", ch->max_tx_rate, credits, ch->seid); } /* in case of VF, this will be main SRIOV VSI */ ch->parent_vsi = vsi; /* and update main_vsi's count for queue_available to use */ vsi->cnt_q_avail -= ch->num_queue_pairs; return 0; } /** * i40e_configure_queue_channels - Add queue channel for the given TCs * @vsi: VSI to be configured * * Configures queue channel mapping to the given TCs **/ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) { struct i40e_channel *ch; u64 max_rate = 0; int ret = 0, i; /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ vsi->tc_seid_map[0] = vsi->seid; for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (vsi->tc_config.enabled_tc & BIT(i)) { ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) { ret = -ENOMEM; goto err_free; } INIT_LIST_HEAD(&ch->list); ch->num_queue_pairs = vsi->tc_config.tc_info[i].qcount; ch->base_queue = vsi->tc_config.tc_info[i].qoffset; /* Bandwidth limit through tc interface is in bytes/s, * change to Mbit/s */ max_rate = vsi->mqprio_qopt.max_rate[i]; do_div(max_rate, I40E_BW_MBPS_DIVISOR); ch->max_tx_rate = max_rate; list_add_tail(&ch->list, &vsi->ch_list); ret = i40e_create_queue_channel(vsi, ch); if (ret) { dev_err(&vsi->back->pdev->dev, "Failed creating queue channel with TC%d: queues %d\n", i, ch->num_queue_pairs); goto err_free; } vsi->tc_seid_map[i] = ch->seid; } } /* reset to reconfigure TX queue contexts */ i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true); return ret; err_free: i40e_remove_queue_channels(vsi); return ret; } /** * i40e_veb_config_tc - Configure TCs for given VEB * @veb: given VEB * @enabled_tc: TC bitmap * * Configures given TC bitmap for VEB (switching) element **/ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc) { struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0}; struct i40e_pf *pf = veb->pf; int ret = 0; int i; /* No TCs or already enabled TCs just return */ if (!enabled_tc || veb->enabled_tc == enabled_tc) return ret; bw_data.tc_valid_bits = enabled_tc; /* bw_data.absolute_credits is not set (relative) */ /* Enable ETS TCs with equal BW Share for now */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) bw_data.tc_bw_share_credits[i] = 1; } ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid, &bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, "VEB bw config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto out; } /* Update the BW information */ ret = i40e_veb_get_bw_info(veb); if (ret) { dev_info(&pf->pdev->dev, "Failed getting veb bw config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } out: return ret; } #ifdef CONFIG_I40E_DCB /** * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs * @pf: PF struct * * Reconfigure VEB/VSIs on a given PF; it is assumed that * the caller would've quiesce all the VSIs before calling * this function **/ static void i40e_dcb_reconfigure(struct i40e_pf *pf) { struct i40e_vsi *vsi; struct i40e_veb *veb; u8 tc_map = 0; int ret; int v; /* Enable the TCs available on PF to all VEBs */ tc_map = i40e_pf_get_tc_map(pf); if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS) return; i40e_pf_for_each_veb(pf, v, veb) { ret = i40e_veb_config_tc(veb, tc_map); if (ret) { dev_info(&pf->pdev->dev, "Failed configuring TC for VEB seid=%d\n", veb->seid); /* Will try to configure as many components */ } } /* Update each VSI */ i40e_pf_for_each_vsi(pf, v, vsi) { /* - Enable all TCs for the LAN VSI * - For all others keep them at TC0 for now */ if (vsi->type == I40E_VSI_MAIN) tc_map = i40e_pf_get_tc_map(pf); else tc_map = I40E_DEFAULT_TRAFFIC_CLASS; ret = i40e_vsi_config_tc(vsi, tc_map); if (ret) { dev_info(&pf->pdev->dev, "Failed configuring TC for VSI seid=%d\n", vsi->seid); /* Will try to configure as many components */ } else { /* Re-configure VSI vectors based on updated TC map */ i40e_vsi_map_rings_to_vectors(vsi); if (vsi->netdev) i40e_dcbnl_set_all(vsi); } } } /** * i40e_resume_port_tx - Resume port Tx * @pf: PF struct * * Resume a port's Tx and issue a PF reset in case of failure to * resume. **/ static int i40e_resume_port_tx(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int ret; ret = i40e_aq_resume_port_tx(hw, NULL); if (ret) { dev_info(&pf->pdev->dev, "Resume Port Tx failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } return ret; } /** * i40e_suspend_port_tx - Suspend port Tx * @pf: PF struct * * Suspend a port's Tx and issue a PF reset in case of failure. **/ static int i40e_suspend_port_tx(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int ret; ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL); if (ret) { dev_info(&pf->pdev->dev, "Suspend Port Tx failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } return ret; } /** * i40e_hw_set_dcb_config - Program new DCBX settings into HW * @pf: PF being configured * @new_cfg: New DCBX configuration * * Program DCB settings into HW and reconfigure VEB/VSIs on * given PF. Uses "Set LLDP MIB" AQC to program the hardware. **/ static int i40e_hw_set_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) { struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config; int ret; /* Check if need reconfiguration */ if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) { dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n"); return 0; } /* Config change disable all VSIs */ i40e_pf_quiesce_all_vsi(pf); /* Copy the new config to the current config */ *old_cfg = *new_cfg; old_cfg->etsrec = old_cfg->etscfg; ret = i40e_set_dcb_config(&pf->hw); if (ret) { dev_info(&pf->pdev->dev, "Set DCB Config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto out; } /* Changes in configuration update VEB/VSI */ i40e_dcb_reconfigure(pf); out: /* In case of reset do not try to resume anything */ if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { /* Re-start the VSIs if disabled */ ret = i40e_resume_port_tx(pf); /* In case of error no point in resuming VSIs */ if (ret) goto err; i40e_pf_unquiesce_all_vsi(pf); } err: return ret; } /** * i40e_hw_dcb_config - Program new DCBX settings into HW * @pf: PF being configured * @new_cfg: New DCBX configuration * * Program DCB settings into HW and reconfigure VEB/VSIs on * given PF **/ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) { struct i40e_aqc_configure_switching_comp_ets_data ets_data; u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0}; u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS]; struct i40e_dcbx_config *old_cfg; u8 mode[I40E_MAX_TRAFFIC_CLASS]; struct i40e_rx_pb_config pb_cfg; struct i40e_hw *hw = &pf->hw; u8 num_ports = hw->num_ports; bool need_reconfig; int ret = -EINVAL; u8 lltc_map = 0; u8 tc_map = 0; u8 new_numtc; u8 i; dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n"); /* Un-pack information to Program ETS HW via shared API * numtc, tcmap * LLTC map * ETS/NON-ETS arbiter mode * max exponent (credit refills) * Total number of ports * PFC priority bit-map * Priority Table * BW % per TC * Arbiter mode between UPs sharing same TC * TSA table (ETS or non-ETS) * EEE enabled or not * MFS TC table */ new_numtc = i40e_dcb_get_num_tc(new_cfg); memset(&ets_data, 0, sizeof(ets_data)); for (i = 0; i < new_numtc; i++) { tc_map |= BIT(i); switch (new_cfg->etscfg.tsatable[i]) { case I40E_IEEE_TSA_ETS: prio_type[i] = I40E_DCB_PRIO_TYPE_ETS; ets_data.tc_bw_share_credits[i] = new_cfg->etscfg.tcbwtable[i]; break; case I40E_IEEE_TSA_STRICT: prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT; lltc_map |= BIT(i); ets_data.tc_bw_share_credits[i] = I40E_DCB_STRICT_PRIO_CREDITS; break; default: /* Invalid TSA type */ need_reconfig = false; goto out; } } old_cfg = &hw->local_dcbx_config; /* Check if need reconfiguration */ need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg); /* If needed, enable/disable frame tagging, disable all VSIs * and suspend port tx */ if (need_reconfig) { /* Enable DCB tagging only when more than one TC */ if (new_numtc > 1) set_bit(I40E_FLAG_DCB_ENA, pf->flags); else clear_bit(I40E_FLAG_DCB_ENA, pf->flags); set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ i40e_pf_quiesce_all_vsi(pf); ret = i40e_suspend_port_tx(pf); if (ret) goto err; } /* Configure Port ETS Tx Scheduler */ ets_data.tc_valid_bits = tc_map; ets_data.tc_strict_priority_flags = lltc_map; ret = i40e_aq_config_switch_comp_ets (hw, pf->mac_seid, &ets_data, i40e_aqc_opc_modify_switching_comp_ets, NULL); if (ret) { dev_info(&pf->pdev->dev, "Modify Port ETS failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto out; } /* Configure Rx ETS HW */ memset(&mode, I40E_DCB_ARB_MODE_ROUND_ROBIN, sizeof(mode)); i40e_dcb_hw_set_num_tc(hw, new_numtc); i40e_dcb_hw_rx_fifo_config(hw, I40E_DCB_ARB_MODE_ROUND_ROBIN, I40E_DCB_ARB_MODE_STRICT_PRIORITY, I40E_DCB_DEFAULT_MAX_EXPONENT, lltc_map); i40e_dcb_hw_rx_cmd_monitor_config(hw, new_numtc, num_ports); i40e_dcb_hw_rx_ets_bw_config(hw, new_cfg->etscfg.tcbwtable, mode, prio_type); i40e_dcb_hw_pfc_config(hw, new_cfg->pfc.pfcenable, new_cfg->etscfg.prioritytable); i40e_dcb_hw_rx_up2tc_config(hw, new_cfg->etscfg.prioritytable); /* Configure Rx Packet Buffers in HW */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); mfs_tc[i] = main_vsi->netdev->mtu; mfs_tc[i] += I40E_PACKET_HDR_PAD; } i40e_dcb_hw_calculate_pool_sizes(hw, num_ports, false, new_cfg->pfc.pfcenable, mfs_tc, &pb_cfg); i40e_dcb_hw_rx_pb_config(hw, &pf->pb_cfg, &pb_cfg); /* Update the local Rx Packet buffer config */ pf->pb_cfg = pb_cfg; /* Inform the FW about changes to DCB configuration */ ret = i40e_aq_dcb_updated(&pf->hw, NULL); if (ret) { dev_info(&pf->pdev->dev, "DCB Updated failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto out; } /* Update the port DCBx configuration */ *old_cfg = *new_cfg; /* Changes in configuration update VEB/VSI */ i40e_dcb_reconfigure(pf); out: /* Re-start the VSIs if disabled */ if (need_reconfig) { ret = i40e_resume_port_tx(pf); clear_bit(__I40E_PORT_SUSPENDED, pf->state); /* In case of error no point in resuming VSIs */ if (ret) goto err; /* Wait for the PF's queues to be disabled */ ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); goto err; } else { i40e_pf_unquiesce_all_vsi(pf); set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } /* registers are set, lets apply */ if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) ret = i40e_hw_set_dcb_config(pf, new_cfg); } err: return ret; } /** * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW * @pf: PF being queried * * Set default DCB configuration in case DCB is to be done in SW. **/ int i40e_dcb_sw_default_config(struct i40e_pf *pf) { struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config; struct i40e_aqc_configure_switching_comp_ets_data ets_data; struct i40e_hw *hw = &pf->hw; int err; if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) { /* Update the local cached instance with TC0 ETS */ memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config)); pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; pf->tmp_cfg.etscfg.maxtcs = 0; pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING; pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; /* FW needs one App to configure HW */ pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS; pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE; pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO; pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE; return i40e_hw_set_dcb_config(pf, &pf->tmp_cfg); } memset(&ets_data, 0, sizeof(ets_data)); ets_data.tc_valid_bits = I40E_DEFAULT_TRAFFIC_CLASS; /* TC0 only */ ets_data.tc_strict_priority_flags = 0; /* ETS */ ets_data.tc_bw_share_credits[0] = I40E_IEEE_DEFAULT_ETS_TCBW; /* 100% to TC0 */ /* Enable ETS on the Physical port */ err = i40e_aq_config_switch_comp_ets (hw, pf->mac_seid, &ets_data, i40e_aqc_opc_enable_switching_comp_ets, NULL); if (err) { dev_info(&pf->pdev->dev, "Enable Port ETS failed, err %pe aq_err %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); err = -ENOENT; goto out; } /* Update the local cached instance with TC0 ETS */ dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; dcb_cfg->etscfg.cbs = 0; dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS; dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; out: return err; } /** * i40e_init_pf_dcb - Initialize DCB configuration * @pf: PF being configured * * Query the current DCB configuration and cache it * in the hardware structure **/ static int i40e_init_pf_dcb(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int err; /* Do not enable DCB for SW1 and SW2 images even if the FW is capable * Also do not enable DCBx if FW LLDP agent is disabled */ if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) { dev_info(&pf->pdev->dev, "DCB is not supported.\n"); err = -EOPNOTSUPP; goto out; } if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) { dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n"); err = i40e_dcb_sw_default_config(pf); if (err) { dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n"); goto out; } dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n"); pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; /* at init capable but disabled */ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); goto out; } err = i40e_init_dcb(hw, true); if (!err) { /* Device/Function is not DCBX capable */ if ((!hw->func_caps.dcb) || (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) { dev_info(&pf->pdev->dev, "DCBX offload is not supported or is disabled for this PF.\n"); } else { /* When status is not DISABLED then DCBX in FW */ pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE; set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Enable DCB tagging only when more than one TC * or explicitly disable if only one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) set_bit(I40E_FLAG_DCB_ENA, pf->flags); else clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_dbg(&pf->pdev->dev, "DCBX offload is supported for this PF.\n"); } } else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) { dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n"); set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags); } else { dev_info(&pf->pdev->dev, "Query for DCB configuration failed, err %pe aq_err %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } out: return err; } #endif /* CONFIG_I40E_DCB */ static void i40e_print_link_message_eee(struct i40e_vsi *vsi, const char *speed, const char *fc) { struct ethtool_keee kedata; memzero_explicit(&kedata, sizeof(kedata)); if (vsi->netdev->ethtool_ops->get_eee) vsi->netdev->ethtool_ops->get_eee(vsi->netdev, &kedata); if (!linkmode_empty(kedata.supported)) netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Flow Control: %s, EEE: %s\n", speed, fc, kedata.eee_enabled ? "Enabled" : "Disabled"); else netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n", speed, fc); } /** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message * @isup: true of link is up, false otherwise */ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { enum i40e_aq_link_speed new_speed; struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; char *req_fec = ""; char *an = ""; if (isup) new_speed = pf->hw.phy.link_info.link_speed; else new_speed = I40E_LINK_SPEED_UNKNOWN; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; vsi->current_isup = isup; vsi->current_speed = new_speed; if (!isup) { netdev_info(vsi->netdev, "NIC Link is Down\n"); return; } /* Warn user if link speed on NPAR enabled partition is not at * least 10GB */ if (pf->hw.func_caps.npar_enable && (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n"); switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: speed = "40 G"; break; case I40E_LINK_SPEED_20GB: speed = "20 G"; break; case I40E_LINK_SPEED_25GB: speed = "25 G"; break; case I40E_LINK_SPEED_10GB: speed = "10 G"; break; case I40E_LINK_SPEED_5GB: speed = "5 G"; break; case I40E_LINK_SPEED_2_5GB: speed = "2.5 G"; break; case I40E_LINK_SPEED_1GB: speed = "1000 M"; break; case I40E_LINK_SPEED_100MB: speed = "100 M"; break; default: break; } switch (pf->hw.fc.current_mode) { case I40E_FC_FULL: fc = "RX/TX"; break; case I40E_FC_TX_PAUSE: fc = "TX"; break; case I40E_FC_RX_PAUSE: fc = "RX"; break; default: fc = "None"; break; } if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { req_fec = "None"; fec = "None"; an = "False"; if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) an = "True"; if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) fec = "CL74 FC-FEC/BASE-R"; else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) fec = "CL108 RS-FEC"; /* 'CL108 RS-FEC' should be displayed when RS is requested, or * both RS and FC are requested */ if (vsi->back->hw.phy.link_info.req_fec_info & (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) { if (vsi->back->hw.phy.link_info.req_fec_info & I40E_AQ_REQUEST_FEC_RS) req_fec = "CL108 RS-FEC"; else req_fec = "CL74 FC-FEC/BASE-R"; } netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", speed, req_fec, fec, an, fc); } else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) { req_fec = "None"; fec = "None"; an = "False"; if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) an = "True"; if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) fec = "CL74 FC-FEC/BASE-R"; if (pf->hw.phy.link_info.req_fec_info & I40E_AQ_REQUEST_FEC_KR) req_fec = "CL74 FC-FEC/BASE-R"; netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", speed, req_fec, fec, an, fc); } else { i40e_print_link_message_eee(vsi, speed, fc); } } /** * i40e_up_complete - Finish the last steps of bringing up a connection * @vsi: the VSI being configured **/ static int i40e_up_complete(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int err; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) i40e_vsi_configure_msix(vsi); else i40e_configure_msi_and_legacy(vsi); /* start rings */ err = i40e_vsi_start_rings(vsi); if (err) return err; clear_bit(__I40E_VSI_DOWN, vsi->state); i40e_napi_enable_all(vsi); i40e_vsi_enable_irq(vsi); if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) && (vsi->netdev)) { i40e_print_link_message(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); } /* replay FDIR SB filters */ if (vsi->type == I40E_VSI_FDIR) { /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; i40e_fdir_filter_restore(vsi); } /* On the next run of the service_task, notify any clients of the new * opened netdev */ set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); i40e_service_event_schedule(pf); return 0; } /** * i40e_vsi_reinit_locked - Reset the VSI * @vsi: the VSI being configured * * Rebuild the ring structs after some configuration * has changed, e.g. MTU size. **/ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) usleep_range(1000, 2000); i40e_down(vsi); i40e_up(vsi); clear_bit(__I40E_CONFIG_BUSY, pf->state); } /** * i40e_force_link_state - Force the link status * @pf: board private structure * @is_up: whether the link state should be forced up or down **/ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up) { struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config = {0}; bool non_zero_phy_type = is_up; struct i40e_hw *hw = &pf->hw; u64 mask; u8 speed; int err; /* Card might've been put in an unstable state by other drivers * and applications, which causes incorrect speed values being * set on startup. In order to clear speed registers, we call * get_phy_capabilities twice, once to get initial state of * available speeds, and once to get current PHY config. */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err) { dev_err(&pf->pdev->dev, "failed to get phy cap., ret = %pe last_status = %s\n", ERR_PTR(err), i40e_aq_str(hw, hw->aq.asq_last_status)); return err; } speed = abilities.link_speed; /* Get the current phy config */ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (err) { dev_err(&pf->pdev->dev, "failed to get phy cap., ret = %pe last_status = %s\n", ERR_PTR(err), i40e_aq_str(hw, hw->aq.asq_last_status)); return err; } /* If link needs to go up, but was not forced to go down, * and its speed values are OK, no need for a flap * if non_zero_phy_type was set, still need to force up */ if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) non_zero_phy_type = true; else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) return 0; /* To force link we need to set bits for all supported PHY types, * but there are now more than 32, so we need to split the bitmap * across two fields. */ mask = I40E_PHY_TYPES_BITMASK; config.phy_type = non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; config.phy_type_ext = non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0; /* Copy the old settings, except of phy_type */ config.abilities = abilities.abilities; if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) { if (is_up) config.abilities |= I40E_AQ_PHY_ENABLE_LINK; else config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK); } if (abilities.link_speed != 0) config.link_speed = abilities.link_speed; else config.link_speed = speed; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; config.fec_config = abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_PHY_FEC_CONFIG_MASK; err = i40e_aq_set_phy_config(hw, &config, NULL); if (err) { dev_err(&pf->pdev->dev, "set phy config ret = %pe last_status = %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return err; } /* Update the link info */ err = i40e_update_link_info(hw); if (err) { /* Wait a little bit (on 40G cards it sometimes takes a really * long time for link to come back from the atomic reset) * and try once more */ msleep(1000); i40e_update_link_info(hw); } i40e_aq_set_link_restart_an(hw, is_up, NULL); return 0; } /** * i40e_up - Bring the connection back up after being down * @vsi: the VSI being configured **/ int i40e_up(struct i40e_vsi *vsi) { int err; if (vsi->type == I40E_VSI_MAIN && (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) i40e_force_link_state(vsi->back, true); err = i40e_vsi_configure(vsi); if (!err) err = i40e_up_complete(vsi); return err; } /** * i40e_down - Shutdown the connection processing * @vsi: the VSI being stopped **/ void i40e_down(struct i40e_vsi *vsi) { int i; /* It is assumed that the caller of this function * sets the vsi->state __I40E_VSI_DOWN bit. */ if (vsi->netdev) { netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); if (vsi->type == I40E_VSI_MAIN && (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); if (i40e_enabled_xdp_vsi(vsi)) { /* Make sure that in-progress ndo_xdp_xmit and * ndo_xsk_wakeup calls are completed. */ synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[i]); } i40e_clean_rx_ring(vsi->rx_rings[i]); } } /** * i40e_validate_mqprio_qopt- validate queue mapping info * @vsi: the VSI being configured * @mqprio_qopt: queue parametrs **/ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { u64 sum_max_rate = 0; u64 max_rate = 0; int i; if (mqprio_qopt->qopt.offset[0] != 0 || mqprio_qopt->qopt.num_tc < 1 || mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS) return -EINVAL; for (i = 0; ; i++) { if (!mqprio_qopt->qopt.count[i]) return -EINVAL; if (mqprio_qopt->min_rate[i]) { dev_err(&vsi->back->pdev->dev, "Invalid min tx rate (greater than 0) specified\n"); return -EINVAL; } max_rate = mqprio_qopt->max_rate[i]; do_div(max_rate, I40E_BW_MBPS_DIVISOR); sum_max_rate += max_rate; if (i >= mqprio_qopt->qopt.num_tc - 1) break; if (mqprio_qopt->qopt.offset[i + 1] != (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL; } if (vsi->num_queue_pairs < (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { dev_err(&vsi->back->pdev->dev, "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL; } if (sum_max_rate > i40e_get_link_speed(vsi)) { dev_err(&vsi->back->pdev->dev, "Invalid max tx rate specified\n"); return -EINVAL; } return 0; } /** * i40e_vsi_set_default_tc_config - set default values for tc configuration * @vsi: the VSI being configured **/ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) { u16 qcount; int i; /* Only TC0 is enabled */ vsi->tc_config.numtc = 1; vsi->tc_config.enabled_tc = 1; qcount = min_t(int, vsi->alloc_queue_pairs, i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; if (i == 0) vsi->tc_config.tc_info[i].qcount = qcount; else vsi->tc_config.tc_info[i].qcount = 1; vsi->tc_config.tc_info[i].netdev_tc = 0; } } /** * i40e_del_macvlan_filter * @hw: pointer to the HW structure * @seid: seid of the channel VSI * @macaddr: the mac address to apply as a filter * @aq_err: store the admin Q error * * This function deletes a mac filter on the channel VSI which serves as the * macvlan. Returns 0 on success. **/ static int i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, const u8 *macaddr, int *aq_err) { struct i40e_aqc_remove_macvlan_element_data element; int status; memset(&element, 0, sizeof(element)); ether_addr_copy(element.mac_addr, macaddr); element.vlan_tag = 0; element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL); *aq_err = hw->aq.asq_last_status; return status; } /** * i40e_add_macvlan_filter * @hw: pointer to the HW structure * @seid: seid of the channel VSI * @macaddr: the mac address to apply as a filter * @aq_err: store the admin Q error * * This function adds a mac filter on the channel VSI which serves as the * macvlan. Returns 0 on success. **/ static int i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, const u8 *macaddr, int *aq_err) { struct i40e_aqc_add_macvlan_element_data element; u16 cmd_flags = 0; int status; ether_addr_copy(element.mac_addr, macaddr); element.vlan_tag = 0; element.queue_number = 0; element.match_method = I40E_AQC_MM_ERR_NO_RES; cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; element.flags = cpu_to_le16(cmd_flags); status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL); *aq_err = hw->aq.asq_last_status; return status; } /** * i40e_reset_ch_rings - Reset the queue contexts in a channel * @vsi: the VSI we want to access * @ch: the channel we want to access */ static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; int i; for (i = 0; i < ch->num_queue_pairs; i++) { pf_q = ch->base_queue + i; tx_ring = vsi->tx_rings[pf_q]; tx_ring->ch = NULL; rx_ring = vsi->rx_rings[pf_q]; rx_ring->ch = NULL; } } /** * i40e_free_macvlan_channels * @vsi: the VSI we want to access * * This function frees the Qs of the channel VSI from * the stack and also deletes the channel VSIs which * serve as macvlans. */ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) { struct i40e_channel *ch, *ch_tmp; int ret; if (list_empty(&vsi->macvlan_list)) return; list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { struct i40e_vsi *parent_vsi; if (i40e_is_channel_macvlan(ch)) { i40e_reset_ch_rings(vsi, ch); clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); netdev_set_sb_channel(ch->fwd->netdev, 0); kfree(ch->fwd); ch->fwd = NULL; } list_del(&ch->list); parent_vsi = ch->parent_vsi; if (!parent_vsi || !ch->initialized) { kfree(ch); continue; } /* remove the VSI */ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, NULL); if (ret) dev_err(&vsi->back->pdev->dev, "unable to remove channel (%d) for parent VSI(%d)\n", ch->seid, parent_vsi->seid); kfree(ch); } vsi->macvlan_cnt = 0; } /** * i40e_fwd_ring_up - bring the macvlan device up * @vsi: the VSI we want to access * @vdev: macvlan netdevice * @fwd: the private fwd structure */ static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd) { struct i40e_channel *ch = NULL, *ch_tmp, *iter; int ret = 0, num_tc = 1, i, aq_err; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; /* Go through the list and find an available channel */ list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { if (!i40e_is_channel_macvlan(iter)) { iter->fwd = fwd; /* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++) netdev_bind_sb_channel_queue(vsi->netdev, vdev, i, iter->num_queue_pairs, iter->base_queue); for (i = 0; i < iter->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring; u16 pf_q; pf_q = iter->base_queue + i; /* Get to TX ring ptr */ tx_ring = vsi->tx_rings[pf_q]; tx_ring->ch = iter; /* Get the RX ring ptr */ rx_ring = vsi->rx_rings[pf_q]; rx_ring->ch = iter; } ch = iter; break; } } if (!ch) return -EINVAL; /* Guarantee all rings are updated before we update the * MAC address filter. */ wmb(); /* Add a mac filter */ ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); if (ret) { /* if we cannot add the MAC rule then disable the offload */ macvlan_release_l2fw_offload(vdev); for (i = 0; i < ch->num_queue_pairs; i++) { struct i40e_ring *rx_ring; u16 pf_q; pf_q = ch->base_queue + i; rx_ring = vsi->rx_rings[pf_q]; rx_ring->netdev = NULL; } dev_info(&pf->pdev->dev, "Error adding mac filter on macvlan err %pe, aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, aq_err)); netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n"); } return ret; } /** * i40e_setup_macvlans - create the channels which will be macvlans * @vsi: the VSI we want to access * @macvlan_cnt: no. of macvlans to be setup * @qcnt: no. of Qs per macvlan * @vdev: macvlan netdevice */ static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, struct net_device *vdev) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; u16 sections, qmap, num_qps; struct i40e_channel *ch; int i, pow, ret = 0; u8 offset = 0; if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt) return -EINVAL; num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt); /* find the next higher power-of-2 of num queue pairs */ pow = fls(roundup_pow_of_two(num_qps) - 1); qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); /* Setup context bits for the main VSI */ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; sections |= I40E_AQ_VSI_PROP_SCHED_VALID; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; ctxt.pf_num = vsi->back->hw.pf_id; ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; ctxt.info.tc_mapping[0] = cpu_to_le16(qmap); ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); ctxt.info.valid_sections |= cpu_to_le16(sections); /* Reconfigure RSS for main VSI with new max queue count */ vsi->rss_size = max_t(u16, num_qps, qcnt); ret = i40e_vsi_config_rss(vsi); if (ret) { dev_info(&pf->pdev->dev, "Failed to reconfig RSS for num_queues (%u)\n", vsi->rss_size); return ret; } vsi->reconfig_rss = true; dev_dbg(&vsi->back->pdev->dev, "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size); vsi->next_base_queue = num_qps; vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps; /* Update the VSI after updating the VSI queue-mapping * information */ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "Update vsi tc config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; } /* update the local VSI info with updated queue map */ i40e_vsi_update_queue_map(vsi, &ctxt); vsi->info.valid_sections = 0; /* Create channels for macvlans */ INIT_LIST_HEAD(&vsi->macvlan_list); for (i = 0; i < macvlan_cnt; i++) { ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) { ret = -ENOMEM; goto err_free; } INIT_LIST_HEAD(&ch->list); ch->num_queue_pairs = qcnt; if (!i40e_setup_channel(pf, vsi, ch)) { ret = -EINVAL; kfree(ch); goto err_free; } ch->parent_vsi = vsi; vsi->cnt_q_avail -= ch->num_queue_pairs; vsi->macvlan_cnt++; list_add_tail(&ch->list, &vsi->macvlan_list); } return ret; err_free: dev_info(&pf->pdev->dev, "Failed to setup macvlans\n"); i40e_free_macvlan_channels(vsi); return ret; } /** * i40e_fwd_add - configure macvlans * @netdev: net device to configure * @vdev: macvlan netdevice **/ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_fwd_adapter *fwd; int avail_macvlan, ret; if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); return ERR_PTR(-EINVAL); } if (i40e_is_tc_mqprio_enabled(pf)) { netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); return ERR_PTR(-EINVAL); } if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) { netdev_info(netdev, "Not enough vectors available to support macvlans\n"); return ERR_PTR(-EINVAL); } /* The macvlan device has to be a single Q device so that the * tc_to_txq field can be reused to pick the tx queue. */ if (netif_is_multiqueue(vdev)) return ERR_PTR(-ERANGE); if (!vsi->macvlan_cnt) { /* reserve bit 0 for the pf device */ set_bit(0, vsi->fwd_bitmask); /* Try to reserve as many queues as possible for macvlans. First * reserve 3/4th of max vectors, then half, then quarter and * calculate Qs per macvlan as you go */ vectors = pf->num_lan_msix; if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { /* allocate 4 Qs per macvlan and 32 Qs to the PF*/ q_per_macvlan = 4; macvlan_cnt = (vectors - 32) / 4; } else if (vectors <= 64 && vectors > 32) { /* allocate 2 Qs per macvlan and 16 Qs to the PF*/ q_per_macvlan = 2; macvlan_cnt = (vectors - 16) / 2; } else if (vectors <= 32 && vectors > 16) { /* allocate 1 Q per macvlan and 16 Qs to the PF*/ q_per_macvlan = 1; macvlan_cnt = vectors - 16; } else if (vectors <= 16 && vectors > 8) { /* allocate 1 Q per macvlan and 8 Qs to the PF */ q_per_macvlan = 1; macvlan_cnt = vectors - 8; } else { /* allocate 1 Q per macvlan and 1 Q to the PF */ q_per_macvlan = 1; macvlan_cnt = vectors - 1; } if (macvlan_cnt == 0) return ERR_PTR(-EBUSY); /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); /* sets up the macvlans but does not "enable" them */ ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan, vdev); if (ret) return ERR_PTR(ret); /* Unquiesce VSI */ i40e_unquiesce_vsi(vsi); } avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask, vsi->macvlan_cnt); if (avail_macvlan >= I40E_MAX_MACVLANS) return ERR_PTR(-EBUSY); /* create the fwd struct */ fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); if (!fwd) return ERR_PTR(-ENOMEM); set_bit(avail_macvlan, vsi->fwd_bitmask); fwd->bit_no = avail_macvlan; netdev_set_sb_channel(vdev, avail_macvlan); fwd->netdev = vdev; if (!netif_running(netdev)) return fwd; /* Set fwd ring up */ ret = i40e_fwd_ring_up(vsi, vdev, fwd); if (ret) { /* unbind the queues and drop the subordinate channel config */ netdev_unbind_sb_channel(netdev, vdev); netdev_set_sb_channel(vdev, 0); kfree(fwd); return ERR_PTR(-EINVAL); } return fwd; } /** * i40e_del_all_macvlans - Delete all the mac filters on the channels * @vsi: the VSI we want to access */ static void i40e_del_all_macvlans(struct i40e_vsi *vsi) { struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int aq_err, ret = 0; if (list_empty(&vsi->macvlan_list)) return; list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { if (i40e_is_channel_macvlan(ch)) { ret = i40e_del_macvlan_filter(hw, ch->seid, i40e_channel_mac(ch), &aq_err); if (!ret) { /* Reset queue contexts */ i40e_reset_ch_rings(vsi, ch); clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); netdev_set_sb_channel(ch->fwd->netdev, 0); kfree(ch->fwd); ch->fwd = NULL; } } } } /** * i40e_fwd_del - delete macvlan interfaces * @netdev: net device to configure * @vdev: macvlan netdevice */ static void i40e_fwd_del(struct net_device *netdev, void *vdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_fwd_adapter *fwd = vdev; struct i40e_channel *ch, *ch_tmp; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int aq_err, ret = 0; /* Find the channel associated with the macvlan and del mac filter */ list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { if (i40e_is_channel_macvlan(ch) && ether_addr_equal(i40e_channel_mac(ch), fwd->netdev->dev_addr)) { ret = i40e_del_macvlan_filter(hw, ch->seid, i40e_channel_mac(ch), &aq_err); if (!ret) { /* Reset queue contexts */ i40e_reset_ch_rings(vsi, ch); clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); netdev_unbind_sb_channel(netdev, fwd->netdev); netdev_set_sb_channel(fwd->netdev, 0); kfree(ch->fwd); ch->fwd = NULL; } else { dev_info(&pf->pdev->dev, "Error deleting mac filter on macvlan err %pe, aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, aq_err)); } break; } } } /** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure * @type_data: tc offload data **/ static int i40e_setup_tc(struct net_device *netdev, void *type_data) { struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; u8 enabled_tc = 0, num_tc, hw; bool need_reset = false; int old_queue_pairs; int ret = -EINVAL; u16 mode; int i; old_queue_pairs = vsi->num_queue_pairs; num_tc = mqprio_qopt->qopt.num_tc; hw = mqprio_qopt->qopt.hw; mode = mqprio_qopt->mode; if (!hw) { clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); goto config_tc; } /* Check if MFP enabled */ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); return ret; } switch (mode) { case TC_MQPRIO_MODE_DCB: clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); /* Check if DCB enabled to continue */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "DCB is not enabled for adapter\n"); return ret; } /* Check whether tc count is within enabled limit */ if (num_tc > i40e_pf_get_num_tc(pf)) { netdev_info(netdev, "TC count greater than enabled on link for adapter\n"); return ret; } break; case TC_MQPRIO_MODE_CHANNEL: if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); return ret; } if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) return ret; memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); break; default: return -EINVAL; } config_tc: /* Generate TC map for number of tc requested */ for (i = 0; i < num_tc; i++) enabled_tc |= BIT(i); /* Requesting same TC configuration as already enabled */ if (enabled_tc == vsi->tc_config.enabled_tc && mode != TC_MQPRIO_MODE_CHANNEL) return 0; /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); if (!hw && !i40e_is_tc_mqprio_enabled(pf)) i40e_remove_queue_channels(vsi); /* Configure VSI for enabled TCs */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n", vsi->seid); need_reset = true; goto exit; } else if (enabled_tc && (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { netdev_info(netdev, "Failed to create channel. Override queues (%u) not power of 2\n", vsi->tc_config.tc_info[0].qcount); ret = -EINVAL; need_reset = true; goto exit; } dev_info(&vsi->back->pdev->dev, "Setup channel (id:%u) utilizing num_queues %d\n", vsi->seid, vsi->tc_config.tc_info[0].qcount); if (i40e_is_tc_mqprio_enabled(pf)) { if (vsi->mqprio_qopt.max_rate[0]) { u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, vsi->mqprio_qopt.max_rate[0]); ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (!ret) { u64 credits = max_tx_rate; do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", max_tx_rate, credits, vsi->seid); } else { need_reset = true; goto exit; } } ret = i40e_configure_queue_channels(vsi); if (ret) { vsi->num_queue_pairs = old_queue_pairs; netdev_info(netdev, "Failed configuring queue channels\n"); need_reset = true; goto exit; } } exit: /* Reset the configuration data to defaults, only TC0 is enabled */ if (need_reset) { i40e_vsi_set_default_tc_config(vsi); need_reset = false; } /* Unquiesce VSI */ i40e_unquiesce_vsi(vsi); return ret; } /** * i40e_set_cld_element - sets cloud filter element data * @filter: cloud filter rule * @cld: ptr to cloud filter element data * * This is helper function to copy data into cloud filter element **/ static inline void i40e_set_cld_element(struct i40e_cloud_filter *filter, struct i40e_aqc_cloud_filters_element_data *cld) { u32 ipa; int i; memset(cld, 0, sizeof(*cld)); ether_addr_copy(cld->outer_mac, filter->dst_mac); ether_addr_copy(cld->inner_mac, filter->src_mac); if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6) return; if (filter->n_proto == ETH_P_IPV6) { #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) { ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa); } } else { ipa = be32_to_cpu(filter->dst_ipv4); memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); } cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id)); /* tenant_id is not supported by FW now, once the support is enabled * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id) */ if (filter->tenant_id) return; } /** * i40e_add_del_cloud_filter - Add/del cloud filter * @vsi: pointer to VSI * @filter: cloud filter rule * @add: if true, add, if false, delete * * Add or delete a cloud filter for a specific flow spec. * Returns 0 if the filter were successfully added. **/ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, struct i40e_cloud_filter *filter, bool add) { struct i40e_aqc_cloud_filters_element_data cld_filter; struct i40e_pf *pf = vsi->back; int ret; static const u16 flag_table[128] = { [I40E_CLOUD_FILTER_FLAGS_OMAC] = I40E_AQC_ADD_CLOUD_FILTER_OMAC, [I40E_CLOUD_FILTER_FLAGS_IMAC] = I40E_AQC_ADD_CLOUD_FILTER_IMAC, [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] = I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN, [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] = I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID, [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] = I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC, [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] = I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID, [I40E_CLOUD_FILTER_FLAGS_IIP] = I40E_AQC_ADD_CLOUD_FILTER_IIP, }; if (filter->flags >= ARRAY_SIZE(flag_table)) return -EIO; memset(&cld_filter, 0, sizeof(cld_filter)); /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter); if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE) cld_filter.flags = cpu_to_le16(filter->tunnel_type << I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT); if (filter->n_proto == ETH_P_IPV6) cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | I40E_AQC_ADD_CLOUD_FLAGS_IPV6); else cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | I40E_AQC_ADD_CLOUD_FLAGS_IPV4); if (add) ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid, &cld_filter, 1); else ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid, &cld_filter, 1); if (ret) dev_dbg(&pf->pdev->dev, "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n", add ? "add" : "delete", filter->dst_port, ret, pf->hw.aq.asq_last_status); else dev_info(&pf->pdev->dev, "%s cloud filter for VSI: %d\n", add ? "Added" : "Deleted", filter->seid); return ret; } /** * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf * @vsi: pointer to VSI * @filter: cloud filter rule * @add: if true, add, if false, delete * * Add or delete a cloud filter for a specific flow spec using big buffer. * Returns 0 if the filter were successfully added. **/ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, struct i40e_cloud_filter *filter, bool add) { struct i40e_aqc_cloud_filters_element_bb cld_filter; struct i40e_pf *pf = vsi->back; int ret; /* Both (src/dst) valid mac_addr are not supported */ if ((is_valid_ether_addr(filter->dst_mac) && is_valid_ether_addr(filter->src_mac)) || (is_multicast_ether_addr(filter->dst_mac) && is_multicast_ether_addr(filter->src_mac))) return -EOPNOTSUPP; /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP * ports are not supported via big buffer now. */ if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port || (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.src_ip6)) return -EOPNOTSUPP; memset(&cld_filter, 0, sizeof(cld_filter)); /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); if (is_valid_ether_addr(filter->dst_mac) || is_valid_ether_addr(filter->src_mac) || is_multicast_ether_addr(filter->dst_mac) || is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) return -EOPNOTSUPP; /* since we validated that L4 port must be valid before * we get here, start with respective "flags" value * and update if vlan is present or not */ cld_filter.element.flags = cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT); if (filter->vlan_id) { cld_filter.element.flags = cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); } } else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { cld_filter.element.flags = cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); if (filter->n_proto == ETH_P_IPV6) cld_filter.element.flags |= cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6); else cld_filter.element.flags |= cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4); } else { dev_err(&pf->pdev->dev, "either mac or ip has to be valid for cloud filter\n"); return -EINVAL; } /* Now copy L4 port in Byte 6..7 in general fields */ cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] = be16_to_cpu(filter->dst_port); if (add) { /* Validate current device switch mode, change if necessary */ ret = i40e_validate_and_set_switch_mode(vsi); if (ret) { dev_err(&pf->pdev->dev, "failed to set switch mode, ret %d\n", ret); return ret; } ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid, &cld_filter, 1); } else { ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid, &cld_filter, 1); } if (ret) dev_dbg(&pf->pdev->dev, "Failed to %s cloud filter(big buffer) err %d aq_err %d\n", add ? "add" : "delete", ret, pf->hw.aq.asq_last_status); else dev_info(&pf->pdev->dev, "%s cloud filter for VSI: %d, L4 port: %d\n", add ? "add" : "delete", filter->seid, ntohs(filter->dst_port)); return ret; } /** * i40e_parse_cls_flower - Parse tc flower filters provided by kernel * @vsi: Pointer to VSI * @f: Pointer to struct flow_cls_offload * @filter: Pointer to cloud filter structure * **/ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, struct flow_cls_offload *f, struct i40e_cloud_filter *filter) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; struct i40e_pf *pf = vsi->back; u8 field_flags = 0; if (dissector->used_keys & ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) | BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) | BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID))) { dev_err(&pf->pdev->dev, "Unsupported key used: 0x%llx\n", dissector->used_keys); return -EOPNOTSUPP; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { struct flow_match_enc_keyid match; flow_rule_match_enc_keyid(rule, &match); if (match.mask->keyid != 0) field_flags |= I40E_CLOUD_FIELD_TEN_ID; filter->tenant_id = be32_to_cpu(match.key->keyid); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; flow_rule_match_basic(rule, &match); n_proto_key = ntohs(match.key->n_proto); n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; n_proto_mask = 0; } filter->n_proto = n_proto_key & n_proto_mask; filter->ip_proto = match.key->ip_proto; } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; flow_rule_match_eth_addrs(rule, &match); /* use is_broadcast and is_zero to check for all 0xf or 0 */ if (!is_zero_ether_addr(match.mask->dst)) { if (is_broadcast_ether_addr(match.mask->dst)) { field_flags |= I40E_CLOUD_FIELD_OMAC; } else { dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", match.mask->dst); return -EIO; } } if (!is_zero_ether_addr(match.mask->src)) { if (is_broadcast_ether_addr(match.mask->src)) { field_flags |= I40E_CLOUD_FIELD_IMAC; } else { dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", match.mask->src); return -EIO; } } ether_addr_copy(filter->dst_mac, match.key->dst); ether_addr_copy(filter->src_mac, match.key->src); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { struct flow_match_vlan match; flow_rule_match_vlan(rule, &match); if (match.mask->vlan_id) { if (match.mask->vlan_id == VLAN_VID_MASK) { field_flags |= I40E_CLOUD_FIELD_IVLAN; } else { dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", match.mask->vlan_id); return -EIO; } } filter->vlan_id = cpu_to_be16(match.key->vlan_id); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; flow_rule_match_control(rule, &match); addr_type = match.key->addr_type; if (flow_rule_has_control_flags(match.mask->flags, f->common.extack)) return -EOPNOTSUPP; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_match_ipv4_addrs match; flow_rule_match_ipv4_addrs(rule, &match); if (match.mask->dst) { if (match.mask->dst == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n", &match.mask->dst); return -EIO; } } if (match.mask->src) { if (match.mask->src == cpu_to_be32(0xffffffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n", &match.mask->src); return -EIO; } } if (field_flags & I40E_CLOUD_FIELD_TEN_ID) { dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); return -EIO; } filter->dst_ipv4 = match.key->dst; filter->src_ipv4 = match.key->src; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { struct flow_match_ipv6_addrs match; flow_rule_match_ipv6_addrs(rule, &match); /* src and dest IPV6 address should not be LOOPBACK * (0:0:0:0:0:0:0:1), which can be represented as ::1 */ if (ipv6_addr_loopback(&match.key->dst) || ipv6_addr_loopback(&match.key->src)) { dev_err(&pf->pdev->dev, "Bad ipv6, addr is LOOPBACK\n"); return -EIO; } if (!ipv6_addr_any(&match.mask->dst) || !ipv6_addr_any(&match.mask->src)) field_flags |= I40E_CLOUD_FIELD_IIP; memcpy(&filter->src_ipv6, &match.key->src.s6_addr32, sizeof(filter->src_ipv6)); memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32, sizeof(filter->dst_ipv6)); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; flow_rule_match_ports(rule, &match); if (match.mask->src) { if (match.mask->src == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", be16_to_cpu(match.mask->src)); return -EIO; } } if (match.mask->dst) { if (match.mask->dst == cpu_to_be16(0xffff)) { field_flags |= I40E_CLOUD_FIELD_IIP; } else { dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", be16_to_cpu(match.mask->dst)); return -EIO; } } filter->dst_port = match.key->dst; filter->src_port = match.key->src; switch (filter->ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: break; default: dev_err(&pf->pdev->dev, "Only UDP and TCP transport are supported\n"); return -EINVAL; } } filter->flags = field_flags; return 0; } /** * i40e_handle_tclass: Forward to a traffic class on the device * @vsi: Pointer to VSI * @tc: traffic class index on the device * @filter: Pointer to cloud filter structure * **/ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, struct i40e_cloud_filter *filter) { struct i40e_channel *ch, *ch_tmp; /* direct to a traffic class on the same device */ if (tc == 0) { filter->seid = vsi->seid; return 0; } else if (vsi->tc_config.enabled_tc & BIT(tc)) { if (!filter->dst_port) { dev_err(&vsi->back->pdev->dev, "Specify destination port to direct to traffic class that is not default\n"); return -EINVAL; } if (list_empty(&vsi->ch_list)) return -EINVAL; list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { if (ch->seid == vsi->tc_seid_map[tc]) filter->seid = ch->seid; } return 0; } dev_err(&vsi->back->pdev->dev, "TC is not enabled\n"); return -EINVAL; } /** * i40e_configure_clsflower - Configure tc flower filters * @vsi: Pointer to VSI * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_configure_clsflower(struct i40e_vsi *vsi, struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); struct i40e_cloud_filter *filter = NULL; struct i40e_pf *pf = vsi->back; int err = 0; if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); return -EOPNOTSUPP; } if (!tc) { dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); return -EINVAL; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; if (pf->fdir_pf_active_filters || (!hlist_empty(&pf->fdir_filter_list))) { dev_err(&vsi->back->pdev->dev, "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n"); return -EINVAL; } if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags)) { dev_err(&vsi->back->pdev->dev, "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n"); clear_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags); clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, vsi->back->flags); } filter = kzalloc(sizeof(*filter), GFP_KERNEL); if (!filter) return -ENOMEM; filter->cookie = cls_flower->cookie; err = i40e_parse_cls_flower(vsi, cls_flower, filter); if (err < 0) goto err; err = i40e_handle_tclass(vsi, tc, filter); if (err < 0) goto err; /* Add cloud filter */ if (filter->dst_port) err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true); else err = i40e_add_del_cloud_filter(vsi, filter, true); if (err) { dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n", err); goto err; } /* add filter to the ordered list */ INIT_HLIST_NODE(&filter->cloud_node); hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list); pf->num_cloud_filters++; return err; err: kfree(filter); return err; } /** * i40e_find_cloud_filter - Find the could filter in the list * @vsi: Pointer to VSI * @cookie: filter specific cookie * **/ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, unsigned long *cookie) { struct i40e_cloud_filter *filter = NULL; struct hlist_node *node2; hlist_for_each_entry_safe(filter, node2, &vsi->back->cloud_filter_list, cloud_node) if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) return filter; return NULL; } /** * i40e_delete_clsflower - Remove tc flower filters * @vsi: Pointer to VSI * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_delete_clsflower(struct i40e_vsi *vsi, struct flow_cls_offload *cls_flower) { struct i40e_cloud_filter *filter = NULL; struct i40e_pf *pf = vsi->back; int err = 0; filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie); if (!filter) return -EINVAL; hash_del(&filter->cloud_node); if (filter->dst_port) err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false); else err = i40e_add_del_cloud_filter(vsi, filter, false); kfree(filter); if (err) { dev_err(&pf->pdev->dev, "Failed to delete cloud filter, err %pe\n", ERR_PTR(err)); return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); } pf->num_cloud_filters--; if (!pf->num_cloud_filters) if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } return 0; } /** * i40e_setup_tc_cls_flower - flower classifier offloads * @np: net device to configure * @cls_flower: offload data **/ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, struct flow_cls_offload *cls_flower) { struct i40e_vsi *vsi = np->vsi; switch (cls_flower->command) { case FLOW_CLS_REPLACE: return i40e_configure_clsflower(vsi, cls_flower); case FLOW_CLS_DESTROY: return i40e_delete_clsflower(vsi, cls_flower); case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; } } static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct i40e_netdev_priv *np = cb_priv; if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSFLOWER: return i40e_setup_tc_cls_flower(np, type_data); default: return -EOPNOTSUPP; } } static LIST_HEAD(i40e_block_cb_list); static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { struct i40e_netdev_priv *np = netdev_priv(netdev); switch (type) { case TC_SETUP_QDISC_MQPRIO: return i40e_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, &i40e_block_cb_list, i40e_setup_tc_block_cb, np, np, true); default: return -EOPNOTSUPP; } } /** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the netdev watchdog subtask is * enabled, and the stack is notified that the interface is ready. * * Returns 0 on success, negative value on failure **/ int i40e_open(struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int err; /* disallow open during test or if eeprom is broken */ if (test_bit(__I40E_TESTING, pf->state) || test_bit(__I40E_BAD_EEPROM, pf->state)) return -EBUSY; netif_carrier_off(netdev); if (i40e_force_link_state(pf, true)) return -EAGAIN; err = i40e_vsi_open(vsi); if (err) return err; /* configure global TSO hardware offload settings */ wr32(&pf->hw, I40E_GLLAN_TSOMSK_F, be32_to_cpu(TCP_FLAG_PSH | TCP_FLAG_FIN) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_M, be32_to_cpu(TCP_FLAG_PSH | TCP_FLAG_FIN | TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); udp_tunnel_get_rx_info(netdev); return 0; } /** * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues * @vsi: vsi structure * * This updates netdev's number of tx/rx queues * * Returns status of setting tx/rx queues **/ static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi) { int ret; ret = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_queue_pairs); if (ret) return ret; return netif_set_real_num_tx_queues(vsi->netdev, vsi->num_queue_pairs); } /** * i40e_vsi_open - * @vsi: the VSI to open * * Finish initialization of the VSI. * * Returns 0 on success, negative value on failure * * Note: expects to be called while under rtnl_lock() **/ int i40e_vsi_open(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; char int_name[I40E_INT_NAME_STR_LEN]; int err; /* allocate descriptors */ err = i40e_vsi_setup_tx_resources(vsi); if (err) goto err_setup_tx; err = i40e_vsi_setup_rx_resources(vsi); if (err) goto err_setup_rx; err = i40e_vsi_configure(vsi); if (err) goto err_setup_rx; if (vsi->netdev) { snprintf(int_name, sizeof(int_name) - 1, "%s-%s", dev_driver_string(&pf->pdev->dev), vsi->netdev->name); err = i40e_vsi_request_irq(vsi, int_name); if (err) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues; } else if (vsi->type == I40E_VSI_FDIR) { snprintf(int_name, sizeof(int_name) - 1, "%s-%s:fdir", dev_driver_string(&pf->pdev->dev), dev_name(&pf->pdev->dev)); err = i40e_vsi_request_irq(vsi, int_name); if (err) goto err_setup_rx; } else { err = -EINVAL; goto err_setup_rx; } err = i40e_up_complete(vsi); if (err) goto err_up_complete; return 0; err_up_complete: i40e_down(vsi); err_set_queues: i40e_vsi_free_irq(vsi); err_setup_rx: i40e_vsi_free_rx_resources(vsi); err_setup_tx: i40e_vsi_free_tx_resources(vsi); if (vsi->type == I40E_VSI_MAIN) i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return err; } /** * i40e_fdir_filter_exit - Cleans up the Flow Director accounting * @pf: Pointer to PF * * This function destroys the hlist where all the Flow Director * filters were saved. **/ static void i40e_fdir_filter_exit(struct i40e_pf *pf) { struct i40e_fdir_filter *filter; struct i40e_flex_pit *pit_entry, *tmp; struct hlist_node *node2; hlist_for_each_entry_safe(filter, node2, &pf->fdir_filter_list, fdir_node) { hlist_del(&filter->fdir_node); kfree(filter); } list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) { list_del(&pit_entry->list); kfree(pit_entry); } INIT_LIST_HEAD(&pf->l3_flex_pit_list); list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) { list_del(&pit_entry->list); kfree(pit_entry); } INIT_LIST_HEAD(&pf->l4_flex_pit_list); pf->fdir_pf_active_filters = 0; i40e_reset_fdir_filter_cnt(pf); /* Reprogram the default input set for TCP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for TCP/IPv6 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_TCP, I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for UDP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for UDP/IPv6 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_UDP, I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for SCTP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for SCTP/IPv6 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP, I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); /* Reprogram the default input set for Other/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); /* Reprogram the default input set for Other/IPv6 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV6, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); } /** * i40e_cloud_filter_exit - Cleans up the cloud filters * @pf: Pointer to PF * * This function destroys the hlist where all the cloud filters * were saved. **/ static void i40e_cloud_filter_exit(struct i40e_pf *pf) { struct i40e_cloud_filter *cfilter; struct hlist_node *node; hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, cloud_node) { hlist_del(&cfilter->cloud_node); kfree(cfilter); } pf->num_cloud_filters = 0; if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } } /** * i40e_close - Disables a network interface * @netdev: network interface device structure * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the driver's control, but * this netdev interface is disabled. * * Returns 0, this is not allowed to fail **/ int i40e_close(struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; i40e_vsi_close(vsi); return 0; } /** * i40e_do_reset - Start a PF or Core Reset sequence * @pf: board private structure * @reset_flags: which reset is requested * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called. * * The essential difference in resets is that the PF Reset * doesn't clear the packet buffers, doesn't reset the PE * firmware, and doesn't bother the other PFs on the chip. **/ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) { struct i40e_vsi *vsi; u32 val; int i; /* do the biggest reset indicated */ if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) { /* Request a Global Reset * * This will start the chip's countdown to the actual full * chip reset event, and a warning interrupt to be sent * to all PFs, including the requestor. Our handler * for the warning interrupt will deal with the shutdown * and recovery of the switch setup. */ dev_dbg(&pf->pdev->dev, "GlobalR requested\n"); val = rd32(&pf->hw, I40E_GLGEN_RTRIG); val |= I40E_GLGEN_RTRIG_GLOBR_MASK; wr32(&pf->hw, I40E_GLGEN_RTRIG, val); } else if (reset_flags & BIT_ULL(__I40E_CORE_RESET_REQUESTED)) { /* Request a Core Reset * * Same as Global Reset, except does *not* include the MAC/PHY */ dev_dbg(&pf->pdev->dev, "CoreR requested\n"); val = rd32(&pf->hw, I40E_GLGEN_RTRIG); val |= I40E_GLGEN_RTRIG_CORER_MASK; wr32(&pf->hw, I40E_GLGEN_RTRIG, val); i40e_flush(&pf->hw); } else if (reset_flags & I40E_PF_RESET_FLAG) { /* Request a PF Reset * * Resets only the PF-specific registers * * This goes directly to the tear-down and rebuild of * the switch, since we need to do all the recovery as * for the Core Reset. */ dev_dbg(&pf->pdev->dev, "PFR requested\n"); i40e_handle_reset_warning(pf, lock_acquired); } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { /* Request a PF Reset * * Resets PF and reinitializes PFs VSI. */ i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, true, lock_acquired); dev_info(&pf->pdev->dev, test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { /* Find the VSI(s) that requested a re-init */ dev_info(&pf->pdev->dev, "VSI reinit requested\n"); i40e_pf_for_each_vsi(pf, i, vsi) { if (test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED, vsi->state)) i40e_vsi_reinit_locked(vsi); } } else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) { /* Find the VSI(s) that needs to be brought down */ dev_info(&pf->pdev->dev, "VSI down requested\n"); i40e_pf_for_each_vsi(pf, i, vsi) { if (test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state)) { set_bit(__I40E_VSI_DOWN, vsi->state); i40e_down(vsi); } } } else { dev_info(&pf->pdev->dev, "bad reset request 0x%08x\n", reset_flags); } } #ifdef CONFIG_I40E_DCB /** * i40e_dcb_need_reconfig - Check if DCB needs reconfig * @pf: board private structure * @old_cfg: current DCB config * @new_cfg: new DCB config **/ bool i40e_dcb_need_reconfig(struct i40e_pf *pf, struct i40e_dcbx_config *old_cfg, struct i40e_dcbx_config *new_cfg) { bool need_reconfig = false; /* Check if ETS configuration has changed */ if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, sizeof(new_cfg->etscfg))) { /* If Priority Table has changed reconfig is needed */ if (memcmp(&new_cfg->etscfg.prioritytable, &old_cfg->etscfg.prioritytable, sizeof(new_cfg->etscfg.prioritytable))) { need_reconfig = true; dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n"); } if (memcmp(&new_cfg->etscfg.tcbwtable, &old_cfg->etscfg.tcbwtable, sizeof(new_cfg->etscfg.tcbwtable))) dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n"); if (memcmp(&new_cfg->etscfg.tsatable, &old_cfg->etscfg.tsatable, sizeof(new_cfg->etscfg.tsatable))) dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n"); } /* Check if PFC configuration has changed */ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { need_reconfig = true; dev_dbg(&pf->pdev->dev, "PFC config change detected.\n"); } /* Check if APP Table has changed */ if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) { need_reconfig = true; dev_dbg(&pf->pdev->dev, "APP Table change detected.\n"); } dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig); return need_reconfig; } /** * i40e_handle_lldp_event - Handle LLDP Change MIB event * @pf: board private structure * @e: event info posted on ARQ **/ static int i40e_handle_lldp_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { struct i40e_aqc_lldp_get_mib *mib = (struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw; struct i40e_hw *hw = &pf->hw; struct i40e_dcbx_config tmp_dcbx_cfg; bool need_reconfig = false; int ret = 0; u8 type; /* X710-T*L 2.5G and 5G speeds don't support DCB */ if (I40E_IS_X710TL_DEVICE(hw->device_id) && (hw->phy.link_info.link_speed & ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) && !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) /* let firmware decide if the DCB should be disabled */ set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Not DCB capable or capability disabled */ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return ret; /* Ignore if event is not for Nearest Bridge */ type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) & I40E_AQ_LLDP_BRIDGE_TYPE_MASK); dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type); if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE) return ret; /* Check MIB Type and return if event for Remote MIB update */ type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK; dev_dbg(&pf->pdev->dev, "LLDP event mib type %s\n", type ? "remote" : "local"); if (type == I40E_AQ_LLDP_MIB_REMOTE) { /* Update the remote cached instance and return */ ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE, I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE, &hw->remote_dcbx_config); goto exit; } /* Store the old configuration */ tmp_dcbx_cfg = hw->local_dcbx_config; /* Reset the old DCBx configuration data */ memset(&hw->local_dcbx_config, 0, sizeof(hw->local_dcbx_config)); /* Get updated DCBX data from firmware */ ret = i40e_get_dcb_config(&pf->hw); if (ret) { /* X710-T*L 2.5G and 5G speeds don't support DCB */ if (I40E_IS_X710TL_DEVICE(hw->device_id) && (hw->phy.link_info.link_speed & (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); } else { dev_info(&pf->pdev->dev, "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } goto exit; } /* No change detected in DCBX configs */ if (!memcmp(&tmp_dcbx_cfg, &hw->local_dcbx_config, sizeof(tmp_dcbx_cfg))) { dev_dbg(&pf->pdev->dev, "No change detected in DCBX configuration.\n"); goto exit; } need_reconfig = i40e_dcb_need_reconfig(pf, &tmp_dcbx_cfg, &hw->local_dcbx_config); i40e_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &hw->local_dcbx_config); if (!need_reconfig) goto exit; /* Enable DCB tagging only when more than one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) set_bit(I40E_FLAG_DCB_ENA, pf->flags); else clear_bit(I40E_FLAG_DCB_ENA, pf->flags); set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ i40e_pf_quiesce_all_vsi(pf); /* Changes in configuration update VEB/VSI */ i40e_dcb_reconfigure(pf); ret = i40e_resume_port_tx(pf); clear_bit(__I40E_PORT_SUSPENDED, pf->state); /* In case of error no point in resuming VSIs */ if (ret) goto exit; /* Wait for the PF's queues to be disabled */ ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } else { i40e_pf_unquiesce_all_vsi(pf); set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } exit: return ret; } #endif /* CONFIG_I40E_DCB */ /** * i40e_do_reset_safe - Protected reset path for userland calls. * @pf: board private structure * @reset_flags: which reset is requested * **/ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags) { rtnl_lock(); i40e_do_reset(pf, reset_flags, true); rtnl_unlock(); } /** * i40e_handle_lan_overflow_event - Handler for LAN queue overflow event * @pf: board private structure * @e: event info posted on ARQ * * Handler for LAN Queue Overflow Event generated by the firmware for PF * and VF queues **/ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { struct i40e_aqc_lan_overflow *data = (struct i40e_aqc_lan_overflow *)&e->desc.params.raw; u32 queue = le32_to_cpu(data->prtdcb_rupto); u32 qtx_ctl = le32_to_cpu(data->otx_ctl); struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; u16 vf_id; dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n", queue, qtx_ctl); if (FIELD_GET(I40E_QTX_CTL_PFVF_Q_MASK, qtx_ctl) != I40E_QTX_CTL_VF_QUEUE) return; /* Queue belongs to VF, find the VF and issue VF reset */ vf_id = FIELD_GET(I40E_QTX_CTL_VFVM_INDX_MASK, qtx_ctl); vf_id -= hw->func_caps.vf_base_id; vf = &pf->vf[vf_id]; i40e_vc_notify_vf_reset(vf); /* Allow VF to process pending reset notification */ msleep(20); i40e_reset_vf(vf, false); } /** * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters * @pf: board private structure **/ u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf) { u32 val, fcnt_prog; val = rd32(&pf->hw, I40E_PFQF_FDSTAT); fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK); return fcnt_prog; } /** * i40e_get_current_fd_count - Get total FD filters programmed for this PF * @pf: board private structure **/ u32 i40e_get_current_fd_count(struct i40e_pf *pf) { u32 val, fcnt_prog; val = rd32(&pf->hw, I40E_PFQF_FDSTAT); fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) + FIELD_GET(I40E_PFQF_FDSTAT_BEST_CNT_MASK, val); return fcnt_prog; } /** * i40e_get_global_fd_count - Get total FD filters programmed on device * @pf: board private structure **/ u32 i40e_get_global_fd_count(struct i40e_pf *pf) { u32 val, fcnt_prog; val = rd32(&pf->hw, I40E_GLQF_FDCNT_0); fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) + FIELD_GET(I40E_GLQF_FDCNT_0_BESTCNT_MASK, val); return fcnt_prog; } /** * i40e_reenable_fdir_sb - Restore FDir SB capability * @pf: board private structure **/ static void i40e_reenable_fdir_sb(struct i40e_pf *pf) { if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } /** * i40e_reenable_fdir_atr - Restore FDir ATR capability * @pf: board private structure **/ static void i40e_reenable_fdir_atr(struct i40e_pf *pf) { if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) { /* ATR uses the same filtering logic as SB rules. It only * functions properly if the input set mask is at the default * settings. It is safe to restore the default input set * because there are no active TCPv4 filter rules. */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } /** * i40e_delete_invalid_filter - Delete an invalid FDIR filter * @pf: board private structure * @filter: FDir filter to remove */ static void i40e_delete_invalid_filter(struct i40e_pf *pf, struct i40e_fdir_filter *filter) { /* Update counters */ pf->fdir_pf_active_filters--; pf->fd_inv = 0; switch (filter->flow_type) { case TCP_V4_FLOW: pf->fd_tcp4_filter_cnt--; break; case UDP_V4_FLOW: pf->fd_udp4_filter_cnt--; break; case SCTP_V4_FLOW: pf->fd_sctp4_filter_cnt--; break; case TCP_V6_FLOW: pf->fd_tcp6_filter_cnt--; break; case UDP_V6_FLOW: pf->fd_udp6_filter_cnt--; break; case SCTP_V6_FLOW: pf->fd_udp6_filter_cnt--; break; case IP_USER_FLOW: switch (filter->ipl4_proto) { case IPPROTO_TCP: pf->fd_tcp4_filter_cnt--; break; case IPPROTO_UDP: pf->fd_udp4_filter_cnt--; break; case IPPROTO_SCTP: pf->fd_sctp4_filter_cnt--; break; case IPPROTO_IP: pf->fd_ip4_filter_cnt--; break; } break; case IPV6_USER_FLOW: switch (filter->ipl4_proto) { case IPPROTO_TCP: pf->fd_tcp6_filter_cnt--; break; case IPPROTO_UDP: pf->fd_udp6_filter_cnt--; break; case IPPROTO_SCTP: pf->fd_sctp6_filter_cnt--; break; case IPPROTO_IP: pf->fd_ip6_filter_cnt--; break; } break; } /* Remove the filter from the list and free memory */ hlist_del(&filter->fdir_node); kfree(filter); } /** * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled * @pf: board private structure **/ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) { struct i40e_fdir_filter *filter; u32 fcnt_prog, fcnt_avail; struct hlist_node *node; if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return; /* Check if we have enough room to re-enable FDir SB capability. */ fcnt_prog = i40e_get_global_fd_count(pf); fcnt_avail = pf->fdir_pf_filter_count; if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) || (pf->fd_add_err == 0) || (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) i40e_reenable_fdir_sb(pf); /* We should wait for even more space before re-enabling ATR. * Additionally, we cannot enable ATR as long as we still have TCP SB * rules active. */ if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) && pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0) i40e_reenable_fdir_atr(pf); /* if hw had a problem adding a filter, delete it */ if (pf->fd_inv > 0) { hlist_for_each_entry_safe(filter, node, &pf->fdir_filter_list, fdir_node) if (filter->fd_id == pf->fd_inv) i40e_delete_invalid_filter(pf, filter); } } #define I40E_MIN_FD_FLUSH_INTERVAL 10 #define I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE 30 /** * i40e_fdir_flush_and_replay - Function to flush all FD filters and replay SB * @pf: board private structure **/ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) { unsigned long min_flush_time; int flush_wait_retry = 50; bool disable_atr = false; int fd_room; int reg; if (!time_after(jiffies, pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return; /* If the flush is happening too quick and we have mostly SB rules we * should not re-enable ATR for some time. */ min_flush_time = pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ); fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters; if (!(time_after(jiffies, min_flush_time)) && (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) { if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n"); disable_atr = true; } pf->fd_flush_timestamp = jiffies; set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); i40e_flush(&pf->hw); pf->fd_flush_cnt++; pf->fd_add_err = 0; do { /* Check FD flush status every 5-6msec */ usleep_range(5000, 6000); reg = rd32(&pf->hw, I40E_PFQF_CTL_1); if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK)) break; } while (flush_wait_retry--); if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) { dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n"); } else { /* replay sideband filters */ i40e_fdir_filter_restore(i40e_pf_get_main_vsi(pf)); if (!disable_atr && !pf->fd_tcp4_filter_cnt) clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); } } /** * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) { return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters; } /** * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table * @pf: board private structure **/ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) { /* if interface is down do nothing */ if (test_bit(__I40E_DOWN, pf->state)) return; if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) i40e_fdir_flush_and_replay(pf); i40e_fdir_check_and_reenable(pf); } /** * i40e_vsi_link_event - notify VSI of a link event * @vsi: vsi to be notified * @link_up: link up or down **/ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up) { if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state)) return; switch (vsi->type) { case I40E_VSI_MAIN: if (!vsi->netdev || !vsi->netdev_registered) break; if (link_up) { netif_carrier_on(vsi->netdev); netif_tx_wake_all_queues(vsi->netdev); } else { netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); } break; case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: case I40E_VSI_CTRL: case I40E_VSI_IWARP: case I40E_VSI_MIRROR: default: /* there is no notification for other VSIs */ break; } } /** * i40e_veb_link_event - notify elements on the veb of a link event * @veb: veb to be notified * @link_up: link up or down **/ static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up) { struct i40e_vsi *vsi; struct i40e_pf *pf; int i; if (!veb || !veb->pf) return; pf = veb->pf; /* Send link event to contained VSIs */ i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == veb->seid) i40e_vsi_link_event(vsi, link_up); } /** * i40e_link_event - Update netif_carrier status * @pf: board private structure **/ static void i40e_link_event(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_veb *veb = i40e_pf_get_main_veb(pf); u8 new_link_speed, old_link_speed; bool new_link, old_link; int status; #ifdef CONFIG_I40E_DCB int err; #endif /* CONFIG_I40E_DCB */ /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP); status = i40e_get_link_status(&pf->hw, &new_link); /* On success, disable temp link polling */ if (status == 0) { clear_bit(__I40E_TEMP_LINK_POLLING, pf->state); } else { /* Enable link polling temporarily until i40e_get_link_status * returns 0 */ set_bit(__I40E_TEMP_LINK_POLLING, pf->state); dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n", status); return; } old_link_speed = pf->hw.phy.link_info_old.link_speed; new_link_speed = pf->hw.phy.link_info.link_speed; if (new_link == old_link && new_link_speed == old_link_speed && (test_bit(__I40E_VSI_DOWN, vsi->state) || new_link == netif_carrier_ok(vsi->netdev))) return; i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. */ if (veb) i40e_veb_link_event(veb, new_link); else i40e_vsi_link_event(vsi, new_link); if (pf->vf) i40e_vc_notify_link_state(pf); if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) i40e_ptp_set_increment(pf); #ifdef CONFIG_I40E_DCB if (new_link == old_link) return; /* Not SW DCB so firmware will take care of default settings */ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) return; /* We cover here only link down, as after link up in case of SW DCB * SW LLDP agent will take care of setting it up */ if (!new_link) { dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n"); memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg)); err = i40e_dcb_sw_default_config(pf); if (err) { clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); } else { pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); } } #endif /* CONFIG_I40E_DCB */ } /** * i40e_watchdog_subtask - periodic checks not using event driven response * @pf: board private structure **/ static void i40e_watchdog_subtask(struct i40e_pf *pf) { struct i40e_vsi *vsi; struct i40e_veb *veb; int i; /* if interface is down do nothing */ if (test_bit(__I40E_DOWN, pf->state) || test_bit(__I40E_CONFIG_BUSY, pf->state)) return; /* make sure we don't do these things too often */ if (time_before(jiffies, (pf->service_timer_previous + pf->service_timer_period))) return; pf->service_timer_previous = jiffies; if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) || test_bit(__I40E_TEMP_LINK_POLLING, pf->state)) i40e_link_event(pf); /* Update the stats for active netdevs so the network stack * can look at updated numbers whenever it cares to */ i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->netdev) i40e_update_stats(vsi); if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) { /* Update the stats for the active switching components */ i40e_pf_for_each_veb(pf, i, veb) i40e_update_veb_stats(veb); } i40e_ptp_rx_hang(pf); i40e_ptp_tx_hang(pf); } /** * i40e_reset_subtask - Set up for resetting the device and driver * @pf: board private structure **/ static void i40e_reset_subtask(struct i40e_pf *pf) { u32 reset_flags = 0; if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_REINIT_REQUESTED); clear_bit(__I40E_REINIT_REQUESTED, pf->state); } if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_PF_RESET_REQUESTED); clear_bit(__I40E_PF_RESET_REQUESTED, pf->state); } if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED); clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state); } if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED); clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); } if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_DOWN_REQUESTED); clear_bit(__I40E_DOWN_REQUESTED, pf->state); } /* If there's a recovery already waiting, it takes * precedence before starting a new reset sequence. */ if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { i40e_prep_for_reset(pf); i40e_reset(pf); i40e_rebuild(pf, false, false); } /* If we're already down or resetting, just bail */ if (reset_flags && !test_bit(__I40E_DOWN, pf->state) && !test_bit(__I40E_CONFIG_BUSY, pf->state)) { i40e_do_reset(pf, reset_flags, false); } } /** * i40e_handle_link_event - Handle link event * @pf: board private structure * @e: event info posted on ARQ **/ static void i40e_handle_link_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct * This completely ignores any state information * in the ARQ event info, instead choosing to always * issue the AQ update link status command. */ i40e_link_event(pf); /* Check if module meets thermal requirements */ if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) { dev_err(&pf->pdev->dev, "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n"); dev_err(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); } else { /* check for unqualified module, if link is down, suppress * the message if link was forced to be down. */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP)) && (!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) { dev_err(&pf->pdev->dev, "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n"); dev_err(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); } } } /** * i40e_clean_adminq_subtask - Clean the AdminQ rings * @pf: board private structure **/ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) { struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; u16 pending, i = 0; u16 opcode; u32 oldval; int ret; u32 val; /* Do not run clean AQ when PF reset fails */ if (test_bit(__I40E_RESET_FAILED, pf->state)) return; /* check for error indications */ val = rd32(&pf->hw, I40E_PF_ARQLEN); oldval = val; if (val & I40E_PF_ARQLEN_ARQVFE_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ VF Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQVFE_MASK; } if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK; pf->arq_overflows++; } if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK; } if (oldval != val) wr32(&pf->hw, I40E_PF_ARQLEN, val); val = rd32(&pf->hw, I40E_PF_ATQLEN); oldval = val; if (val & I40E_PF_ATQLEN_ATQVFE_MASK) { if (pf->hw.debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ASQ VF Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQVFE_MASK; } if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) { if (pf->hw.debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK; } if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) { if (pf->hw.debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n"); val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK; } if (oldval != val) wr32(&pf->hw, I40E_PF_ATQLEN, val); event.buf_len = I40E_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) return; do { ret = i40e_clean_arq_element(hw, &event, &pending); if (ret == -EALREADY) break; else if (ret) { dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret); break; } opcode = le16_to_cpu(event.desc.opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: rtnl_lock(); i40e_handle_link_event(pf, &event); rtnl_unlock(); break; case i40e_aqc_opc_send_msg_to_pf: ret = i40e_vc_process_vf_msg(pf, le16_to_cpu(event.desc.retval), le32_to_cpu(event.desc.cookie_high), le32_to_cpu(event.desc.cookie_low), event.msg_buf, event.msg_len); break; case i40e_aqc_opc_lldp_update_mib: dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n"); #ifdef CONFIG_I40E_DCB rtnl_lock(); i40e_handle_lldp_event(pf, &event); rtnl_unlock(); #endif /* CONFIG_I40E_DCB */ break; case i40e_aqc_opc_event_lan_overflow: dev_dbg(&pf->pdev->dev, "ARQ LAN queue overflow event received\n"); i40e_handle_lan_overflow_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_peer: dev_info(&pf->pdev->dev, "ARQ: Msg from other pf\n"); break; case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: case i40e_aqc_opc_oem_post_update: i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation 0x%04x completed\n", opcode); break; default: dev_info(&pf->pdev->dev, "ARQ: Unknown event 0x%04x ignored\n", opcode); break; } } while (i++ < I40E_AQ_WORK_LIMIT); if (i < I40E_AQ_WORK_LIMIT) clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state); /* re-enable Admin queue interrupt cause */ val = rd32(hw, I40E_PFINT_ICR0_ENA); val |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, val); i40e_flush(hw); kfree(event.msg_buf); } /** * i40e_verify_eeprom - make sure eeprom is good to use * @pf: board private structure **/ static void i40e_verify_eeprom(struct i40e_pf *pf) { int err; err = i40e_diag_eeprom_test(&pf->hw); if (err) { /* retry in case of garbage read */ err = i40e_diag_eeprom_test(&pf->hw); if (err) { dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n", err); set_bit(__I40E_BAD_EEPROM, pf->state); } } if (!err && test_bit(__I40E_BAD_EEPROM, pf->state)) { dev_info(&pf->pdev->dev, "eeprom check passed, Tx/Rx traffic enabled\n"); clear_bit(__I40E_BAD_EEPROM, pf->state); } } /** * i40e_enable_pf_switch_lb * @pf: pointer to the PF structure * * enable switch loop back or die - no point in a return value **/ static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; ctxt.vf_num = 0; ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return; } ctxt.flags = I40E_AQ_VSI_TYPE_PF; ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "update vsi switch failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } } /** * i40e_disable_pf_switch_lb * @pf: pointer to the PF structure * * disable switch loop back or die - no point in a return value **/ static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret; ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; ctxt.vf_num = 0; ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return; } ctxt.flags = I40E_AQ_VSI_TYPE_PF; ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "update vsi switch failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } } /** * i40e_config_bridge_mode - Configure the HW bridge mode * @veb: pointer to the bridge instance * * Configure the loop back mode for the LAN VSI that is downlink to the * specified HW bridge instance. It is expected this function is called * when a new HW bridge is instantiated. **/ static void i40e_config_bridge_mode(struct i40e_veb *veb) { struct i40e_pf *pf = veb->pf; if (pf->hw.debug_mask & I40E_DEBUG_LAN) dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n", veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); if (veb->bridge_mode & BRIDGE_MODE_VEPA) i40e_disable_pf_switch_lb(pf); else i40e_enable_pf_switch_lb(pf); } /** * i40e_reconstitute_veb - rebuild the VEB and VSIs connected to it * @veb: pointer to the VEB instance * * This is a function that builds the attached VSIs. We track the connections * through our own index numbers because the seid's from the HW could change * across the reset. **/ static int i40e_reconstitute_veb(struct i40e_veb *veb) { struct i40e_vsi *ctl_vsi = NULL; struct i40e_pf *pf = veb->pf; struct i40e_vsi *vsi; int v, ret; /* As we do not maintain PV (port virtualizer) switch element then * there can be only one non-floating VEB that have uplink to MAC SEID * and its control VSI is the main one. */ if (WARN_ON(veb->uplink_seid && veb->uplink_seid != pf->mac_seid)) { dev_err(&pf->pdev->dev, "Invalid uplink SEID for VEB %d\n", veb->idx); return -ENOENT; } if (veb->uplink_seid == pf->mac_seid) { /* Check that the LAN VSI has VEB owning flag set */ ctl_vsi = i40e_pf_get_main_vsi(pf); if (WARN_ON(ctl_vsi->veb_idx != veb->idx || !(ctl_vsi->flags & I40E_VSI_FLAG_VEB_OWNER))) { dev_err(&pf->pdev->dev, "Invalid control VSI for VEB %d\n", veb->idx); return -ENOENT; } /* Add the control VSI to switch */ ret = i40e_add_vsi(ctl_vsi); if (ret) { dev_err(&pf->pdev->dev, "Rebuild of owner VSI for VEB %d failed: %d\n", veb->idx, ret); return ret; } i40e_vsi_reset_stats(ctl_vsi); } /* create the VEB in the switch and move the VSI onto the VEB */ ret = i40e_add_veb(veb, ctl_vsi); if (ret) return ret; if (veb->uplink_seid) { if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) veb->bridge_mode = BRIDGE_MODE_VEB; else veb->bridge_mode = BRIDGE_MODE_VEPA; i40e_config_bridge_mode(veb); } /* create the remaining VSIs attached to this VEB */ i40e_pf_for_each_vsi(pf, v, vsi) { if (vsi == ctl_vsi) continue; if (vsi->veb_idx == veb->idx) { vsi->uplink_seid = veb->seid; ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of vsi_idx %d failed: %d\n", v, ret); return ret; } i40e_vsi_reset_stats(vsi); } } return ret; } /** * i40e_get_capabilities - get info about the HW * @pf: the PF struct * @list_type: AQ capability to be queried **/ static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type) { struct i40e_aqc_list_capabilities_element_resp *cap_buf; u16 data_size; int buf_len; int err; buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); do { cap_buf = kzalloc(buf_len, GFP_KERNEL); if (!cap_buf) return -ENOMEM; /* this loads the data into the hw struct for us */ err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len, &data_size, list_type, NULL); /* data loaded, buffer no longer needed */ kfree(cap_buf); if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) { /* retry with a larger buffer */ buf_len = data_size; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) { dev_info(&pf->pdev->dev, "capability discovery failed, err %pe aq_err %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -ENODEV; } } while (err); if (pf->hw.debug_mask & I40E_DEBUG_USER) { if (list_type == i40e_aqc_opc_list_func_capabilities) { dev_info(&pf->pdev->dev, "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", pf->hw.pf_id, pf->hw.func_caps.num_vfs, pf->hw.func_caps.num_msix_vectors, pf->hw.func_caps.num_msix_vectors_vf, pf->hw.func_caps.fd_filters_guaranteed, pf->hw.func_caps.fd_filters_best_effort, pf->hw.func_caps.num_tx_qp, pf->hw.func_caps.num_vsis); } else if (list_type == i40e_aqc_opc_list_dev_capabilities) { dev_info(&pf->pdev->dev, "switch_mode=0x%04x, function_valid=0x%08x\n", pf->hw.dev_caps.switch_mode, pf->hw.dev_caps.valid_functions); dev_info(&pf->pdev->dev, "SR-IOV=%d, num_vfs for all function=%u\n", pf->hw.dev_caps.sr_iov_1_1, pf->hw.dev_caps.num_vfs); dev_info(&pf->pdev->dev, "num_vsis=%u, num_rx:%u, num_tx=%u\n", pf->hw.dev_caps.num_vsis, pf->hw.dev_caps.num_rx_qp, pf->hw.dev_caps.num_tx_qp); } } if (list_type == i40e_aqc_opc_list_func_capabilities) { #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \ + pf->hw.func_caps.num_vfs) if (pf->hw.revision_id == 0 && pf->hw.func_caps.num_vsis < DEF_NUM_VSI) { dev_info(&pf->pdev->dev, "got num_vsis %d, setting num_vsis to %d\n", pf->hw.func_caps.num_vsis, DEF_NUM_VSI); pf->hw.func_caps.num_vsis = DEF_NUM_VSI; } } return 0; } static int i40e_vsi_clear(struct i40e_vsi *vsi); /** * i40e_fdir_sb_setup - initialize the Flow Director resources for Sideband * @pf: board private structure **/ static void i40e_fdir_sb_setup(struct i40e_pf *pf) { struct i40e_vsi *main_vsi, *vsi; /* quick workaround for an NVM issue that leaves a critical register * uninitialized */ if (!rd32(&pf->hw, I40E_GLQF_HKEY(0))) { static const u32 hkey[] = { 0xe640d33f, 0xcdfe98ab, 0x73fa7161, 0x0d7a7d36, 0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb, 0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21, 0x95b3a76d}; int i; for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++) wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]); } if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* find existing VSI and see if it needs configuring */ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); /* create a new VSI if none exists */ if (!vsi) { main_vsi = i40e_pf_get_main_vsi(pf); vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, main_vsi->seid, 0); if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); return; } } i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring); } /** * i40e_fdir_teardown - release the Flow Director resources * @pf: board private structure **/ static void i40e_fdir_teardown(struct i40e_pf *pf) { struct i40e_vsi *vsi; i40e_fdir_filter_exit(pf); vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (vsi) i40e_vsi_release(vsi); } /** * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs * @vsi: PF main vsi * @seid: seid of main or channel VSIs * * Rebuilds cloud filters associated with main VSI and channel VSIs if they * existed before reset **/ static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid) { struct i40e_cloud_filter *cfilter; struct i40e_pf *pf = vsi->back; struct hlist_node *node; int ret; /* Add cloud filters back if they exist */ hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, cloud_node) { if (cfilter->seid != seid) continue; if (cfilter->dst_port) ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); else ret = i40e_add_del_cloud_filter(vsi, cfilter, true); if (ret) { dev_dbg(&pf->pdev->dev, "Failed to rebuild cloud filter, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return ret; } } return 0; } /** * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset * @vsi: PF main vsi * * Rebuilds channel VSIs if they existed before reset **/ static int i40e_rebuild_channels(struct i40e_vsi *vsi) { struct i40e_channel *ch, *ch_tmp; int ret; if (list_empty(&vsi->ch_list)) return 0; list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { if (!ch->initialized) break; /* Proceed with creation of channel (VMDq2) VSI */ ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch); if (ret) { dev_info(&vsi->back->pdev->dev, "failed to rebuild channels using uplink_seid %u\n", vsi->uplink_seid); return ret; } /* Reconfigure TX queues using QTX_CTL register */ ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch); if (ret) { dev_info(&vsi->back->pdev->dev, "failed to configure TX rings for channel %u\n", ch->seid); return ret; } /* update 'next_base_queue' */ vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs; if (ch->max_tx_rate) { u64 credits = ch->max_tx_rate; if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) return -EINVAL; do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", ch->max_tx_rate, credits, ch->seid); } ret = i40e_rebuild_cloud_filters(vsi, ch->seid); if (ret) { dev_dbg(&vsi->back->pdev->dev, "Failed to rebuild cloud filters for channel VSI %u\n", ch->seid); return ret; } } return 0; } /** * i40e_clean_xps_state - clean xps state for every tx_ring * @vsi: ptr to the VSI **/ static void i40e_clean_xps_state(struct i40e_vsi *vsi) { int i; if (vsi->tx_rings) for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->tx_rings[i]) clear_bit(__I40E_TX_XPS_INIT_DONE, vsi->tx_rings[i]->state); } /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * * Close up the VFs and other things in prep for PF Reset. **/ static void i40e_prep_for_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; struct i40e_vsi *vsi; int ret = 0; u32 v; clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state); if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return; if (i40e_check_asq_alive(&pf->hw)) i40e_vc_notify_reset(pf); dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); /* quiesce the VSIs and their queues that are not already DOWN */ i40e_pf_quiesce_all_vsi(pf); i40e_pf_for_each_vsi(pf, v, vsi) { i40e_clean_xps_state(vsi); vsi->seid = 0; } i40e_shutdown_adminq(&pf->hw); /* call shutdown HMC */ if (hw->hmc.hmc_obj) { ret = i40e_shutdown_lan_hmc(hw); if (ret) dev_warn(&pf->pdev->dev, "shutdown_lan_hmc failed: %d\n", ret); } /* Save the current PTP time so that we can restore the time after the * reset completes. */ i40e_ptp_save_hw_time(pf); } /** * i40e_send_version - update firmware with driver version * @pf: PF struct */ static void i40e_send_version(struct i40e_pf *pf) { struct i40e_driver_version dv; dv.major_version = 0xff; dv.minor_version = 0xff; dv.build_version = 0xff; dv.subbuild_version = 0; strscpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string)); i40e_aq_send_driver_version(&pf->hw, &dv, NULL); } /** * i40e_get_oem_version - get OEM specific version information * @hw: pointer to the hardware structure **/ static void i40e_get_oem_version(struct i40e_hw *hw) { u16 block_offset = 0xffff; u16 block_length = 0; u16 capabilities = 0; u16 gen_snap = 0; u16 release = 0; #define I40E_SR_NVM_OEM_VERSION_PTR 0x1B #define I40E_NVM_OEM_LENGTH_OFFSET 0x00 #define I40E_NVM_OEM_CAPABILITIES_OFFSET 0x01 #define I40E_NVM_OEM_GEN_OFFSET 0x02 #define I40E_NVM_OEM_RELEASE_OFFSET 0x03 #define I40E_NVM_OEM_CAPABILITIES_MASK 0x000F #define I40E_NVM_OEM_LENGTH 3 /* Check if pointer to OEM version block is valid. */ i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset); if (block_offset == 0xffff) return; /* Check if OEM version block has correct length. */ i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET, &block_length); if (block_length < I40E_NVM_OEM_LENGTH) return; /* Check if OEM version format is as expected. */ i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET, &capabilities); if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0) return; i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_GEN_OFFSET, &gen_snap); i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET, &release); hw->nvm.oem_ver = FIELD_PREP(I40E_OEM_GEN_MASK | I40E_OEM_SNAP_MASK, gen_snap) | FIELD_PREP(I40E_OEM_RELEASE_MASK, release); hw->nvm.eetrack = I40E_OEM_EETRACK_ID; } /** * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen * @pf: board private structure **/ static int i40e_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int ret; ret = i40e_pf_reset(hw); if (ret) { dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret); set_bit(__I40E_RESET_FAILED, pf->state); clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state); } else { pf->pfr_count++; } return ret; } /** * i40e_rebuild - rebuild using a saved config * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called. **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; struct i40e_veb *veb; int ret; u32 val; int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && is_recovery_mode_reported) i40e_set_ethtool_ops(vsi->netdev); if (test_bit(__I40E_DOWN, pf->state) && !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); /* rebuild the basics for the AdminQ, HMC, and initial HW switch */ ret = i40e_init_adminq(&pf->hw); if (ret) { dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto clear_recovery; } i40e_get_oem_version(&pf->hw); if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { /* The following delay is necessary for firmware update. */ mdelay(1000); } /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); /* if we are going out of or into recovery mode we have to act * accordingly with regard to resources initialization * and deinitialization */ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ if (i40e_setup_misc_vector_for_recovery_mode(pf)) goto end_unlock; } else { if (!lock_acquired) rtnl_lock(); /* we're going out of recovery mode so we'll free * the IRQ allocated specifically for recovery mode * and restore the interrupt scheme */ free_irq(pf->pdev->irq, pf); i40e_clear_interrupt_scheme(pf); if (i40e_restore_interrupt_scheme(pf)) goto end_unlock; } /* tell the firmware that we're starting */ i40e_send_version(pf); /* bail out in case recovery mode was detected, as there is * no need for further configuration. */ goto end_unlock; } i40e_clear_pxe_mode(hw); ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (ret) goto end_core_reset; ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (ret) { dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret); goto end_core_reset; } ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (ret) { dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret); goto end_core_reset; } #ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB * is not supported with new link speed */ if (i40e_is_tc_mqprio_enabled(pf)) { i40e_aq_set_dcb_parameters(hw, false, NULL); } else { if (I40E_IS_X710TL_DEVICE(hw->device_id) && (hw->phy.link_info.link_speed & (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); if (ret) { dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", ret); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Continue without DCB enabled */ } } } #endif /* CONFIG_I40E_DCB */ if (!lock_acquired) rtnl_lock(); ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock; /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. */ ret = i40e_aq_set_phy_int_mask(&pf->hw, ~(I40E_AQ_EVENT_LINK_UPDOWN | I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* Rebuild the VSIs and VEBs that existed before reset. * They are still in our local switch element arrays, so only * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try * to recover minimal use by getting the basic PF VSI working. */ if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); /* Rebuild VEBs */ i40e_pf_for_each_veb(pf, v, veb) { ret = i40e_reconstitute_veb(veb); if (!ret) continue; /* If Main VEB failed, we're in deep doodoo, * so give up rebuilding the switch and set up * for minimal rebuild of PF VSI. * If orphan failed, we'll report the error * but try to keep going. */ if (veb->uplink_seid == pf->mac_seid) { dev_info(&pf->pdev->dev, "rebuild of switch failed: %d, will try to set up simple PF connection\n", ret); vsi->uplink_seid = pf->mac_seid; break; } else if (veb->uplink_seid == 0) { dev_info(&pf->pdev->dev, "rebuild of orphan VEB failed: %d\n", ret); } } } if (vsi->uplink_seid == pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n"); /* no VEB, so rebuild only the Main VSI */ ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); goto end_unlock; } } if (vsi->mqprio_qopt.max_rate[0]) { u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi, vsi->mqprio_qopt.max_rate[0]); u64 credits = 0; ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (ret) goto end_unlock; credits = max_tx_rate; do_div(credits, I40E_BW_CREDIT_DIVISOR); dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", max_tx_rate, credits, vsi->seid); } ret = i40e_rebuild_cloud_filters(vsi, vsi->seid); if (ret) goto end_unlock; /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs * for this main VSI if they exist */ ret = i40e_rebuild_channels(vsi); if (ret) goto end_unlock; /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO. */ #define I40E_REG_MSS 0x000E64DC #define I40E_REG_MSS_MIN_MASK 0x3FF0000 #define I40E_64BYTE_MSS 0x400000 val = rd32(hw, I40E_REG_MSS); if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { val &= ~I40E_REG_MSS_MIN_MASK; val |= I40E_64BYTE_MSS; wr32(hw, I40E_REG_MSS, val); } if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } /* reinit the misc interrupt */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { ret = i40e_setup_misc_vector(pf); if (ret) goto end_unlock; } /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out * PAUSE or PFC frames and potentially controlling traffic for other * PF/VF VSIs. * The FW can still send Flow control frames if enabled. */ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); /* restart the VSIs that were rebuilt and running before the reset */ i40e_pf_unquiesce_all_vsi(pf); /* Release the RTNL lock before we start resetting VFs */ if (!lock_acquired) rtnl_unlock(); /* Restore promiscuous settings */ ret = i40e_set_promiscuous(pf, pf->cur_promisc); if (ret) dev_warn(&pf->pdev->dev, "Failed to restore promiscuous setting: %s, err %pe aq_err %s\n", pf->cur_promisc ? "on" : "off", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); i40e_reset_all_vfs(pf, true); /* tell the firmware that we're starting */ i40e_send_version(pf); /* We've already released the lock, so don't do it again */ goto end_core_reset; end_unlock: if (!lock_acquired) rtnl_unlock(); end_core_reset: clear_bit(__I40E_RESET_FAILED, pf->state); clear_recovery: clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state); clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state); } /** * i40e_reset_and_rebuild - reset and rebuild using a saved config * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called. **/ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { int ret; if (test_bit(__I40E_IN_REMOVE, pf->state)) return; /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear. */ ret = i40e_reset(pf); if (!ret) i40e_rebuild(pf, reinit, lock_acquired); else dev_err(&pf->pdev->dev, "%s: i40e_reset() FAILED", __func__); } /** * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild * @pf: board private structure * * Close up the VFs and other things in prep for a Core Reset, * then get ready to rebuild the world. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called. **/ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired) { i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, false, lock_acquired); } /** * i40e_handle_mdd_event * @pf: pointer to the PF structure * * Called from the MDD irq handler to identify possibly malicious vfs **/ static void i40e_handle_mdd_event(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; bool mdd_detected = false; struct i40e_vf *vf; u32 reg; int i; if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state)) return; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = FIELD_GET(I40E_GL_MDET_TX_PF_NUM_MASK, reg); u16 vf_num = FIELD_GET(I40E_GL_MDET_TX_VF_NUM_MASK, reg); u8 event = FIELD_GET(I40E_GL_MDET_TX_EVENT_MASK, reg); u16 queue = FIELD_GET(I40E_GL_MDET_TX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_tx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n", event, queue, pf_num, vf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 func = FIELD_GET(I40E_GL_MDET_RX_FUNCTION_MASK, reg); u8 event = FIELD_GET(I40E_GL_MDET_RX_EVENT_MASK, reg); u16 queue = FIELD_GET(I40E_GL_MDET_RX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_rx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n", event, queue, func); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n"); } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n"); } } /* see if one of the VFs needs its hand slapped */ for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) { vf = &(pf->vf[i]); reg = rd32(hw, I40E_VP_MDET_TX(i)); if (reg & I40E_VP_MDET_TX_VALID_MASK) { wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF); vf->num_mdd_events++; dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", i); dev_info(&pf->pdev->dev, "Use PF Control I/F to re-enable the VF\n"); set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); } reg = rd32(hw, I40E_VP_MDET_RX(i)); if (reg & I40E_VP_MDET_RX_VALID_MASK) { wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF); vf->num_mdd_events++; dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", i); dev_info(&pf->pdev->dev, "Use PF Control I/F to re-enable the VF\n"); set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); } } /* re-enable mdd interrupt cause */ clear_bit(__I40E_MDD_EVENT_PENDING, pf->state); reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); i40e_flush(hw); } /** * i40e_service_task - Run the driver's async subtasks * @work: pointer to work_struct containing our data **/ static void i40e_service_task(struct work_struct *work) { struct i40e_pf *pf = container_of(work, struct i40e_pf, service_task); unsigned long start_time = jiffies; /* don't bother with service tasks if a reset is in progress */ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || test_bit(__I40E_SUSPENDED, pf->state)) return; if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return; if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) { i40e_detect_recover_hung(pf); i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); i40e_vc_process_vflr_event(pf); i40e_watchdog_subtask(pf); i40e_fdir_reinit_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { /* Client subtask will reopen next time through. */ i40e_notify_client_of_netdev_close(pf, true); } else { i40e_client_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE, pf->state)) i40e_notify_client_of_l2_param_changes(pf); } i40e_sync_filters_subtask(pf); } else { i40e_reset_subtask(pf); } i40e_clean_adminq_subtask(pf); /* flush memory to make sure state is correct before next watchdog */ smp_mb__before_atomic(); clear_bit(__I40E_SERVICE_SCHED, pf->state); /* If the tasks have taken longer than one timer cycle or there * is more work to be done, reschedule the service task now * rather than wait for the timer to tick again. */ if (time_after(jiffies, (start_time + pf->service_timer_period)) || test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state) || test_bit(__I40E_MDD_EVENT_PENDING, pf->state) || test_bit(__I40E_VFLR_EVENT_PENDING, pf->state)) i40e_service_event_schedule(pf); } /** * i40e_service_timer - timer callback * @t: timer list pointer **/ static void i40e_service_timer(struct timer_list *t) { struct i40e_pf *pf = from_timer(pf, t, service_timer); mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); i40e_service_event_schedule(pf); } /** * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI * @vsi: the VSI being configured **/ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; switch (vsi->type) { case I40E_VSI_MAIN: vsi->alloc_queue_pairs = pf->num_lan_qps; if (!vsi->num_tx_desc) vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc) vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) vsi->num_q_vectors = pf->num_lan_msix; else vsi->num_q_vectors = 1; break; case I40E_VSI_FDIR: vsi->alloc_queue_pairs = 1; vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT, I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT, I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_fdsb_msix; break; case I40E_VSI_VMDQ2: vsi->alloc_queue_pairs = pf->num_vmdq_qps; if (!vsi->num_tx_desc) vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc) vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_vmdq_msix; break; case I40E_VSI_SRIOV: vsi->alloc_queue_pairs = pf->num_vf_qps; if (!vsi->num_tx_desc) vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc) vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); break; default: WARN_ON(1); return -ENODATA; } if (is_kdump_kernel()) { vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS; vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS; } return 0; } /** * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi * @vsi: VSI pointer * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. * * On error: returns error code (negative) * On success: returns 0 **/ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors) { struct i40e_ring **next_rings; int size; int ret = 0; /* allocate memory for both Tx, XDP Tx and Rx ring pointers */ size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 3 : 2); vsi->tx_rings = kzalloc(size, GFP_KERNEL); if (!vsi->tx_rings) return -ENOMEM; next_rings = vsi->tx_rings + vsi->alloc_queue_pairs; if (i40e_enabled_xdp_vsi(vsi)) { vsi->xdp_rings = next_rings; next_rings += vsi->alloc_queue_pairs; } vsi->rx_rings = next_rings; if (alloc_qvectors) { /* allocate memory for q_vector pointers */ size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors; vsi->q_vectors = kzalloc(size, GFP_KERNEL); if (!vsi->q_vectors) { ret = -ENOMEM; goto err_vectors; } } return ret; err_vectors: kfree(vsi->tx_rings); return ret; } /** * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF * @pf: board private structure * @type: type of VSI * * On error: returns error code (negative) * On success: returns vsi index in PF (positive) **/ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) { int ret = -ENODEV; struct i40e_vsi *vsi; int vsi_idx; int i; /* Need to protect the allocation of the VSIs at the PF level */ mutex_lock(&pf->switch_mutex); /* VSI list may be fragmented if VSI creation/destruction has * been happening. We can afford to do a quick scan to look * for any free VSIs in the list. * * find next empty vsi slot, looping back around if necessary */ i = pf->next_vsi; while (i < pf->num_alloc_vsi && pf->vsi[i]) i++; if (i >= pf->num_alloc_vsi) { i = 0; while (i < pf->next_vsi && pf->vsi[i]) i++; } if (i < pf->num_alloc_vsi && !pf->vsi[i]) { vsi_idx = i; /* Found one! */ } else { ret = -ENODEV; goto unlock_pf; /* out of VSI slots! */ } pf->next_vsi = ++i; vsi = kzalloc(sizeof(*vsi), GFP_KERNEL); if (!vsi) { ret = -ENOMEM; goto unlock_pf; } vsi->type = type; vsi->back = pf; set_bit(__I40E_VSI_DOWN, vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; vsi->int_rate_limit = 0; vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ? pf->rss_table_size : 64; vsi->netdev_registered = false; vsi->work_limit = I40E_DEFAULT_IRQ_WORK; hash_init(vsi->mac_filter_hash); vsi->irqs_ready = false; if (type == I40E_VSI_MAIN) { vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL); if (!vsi->af_xdp_zc_qps) goto err_rings; } ret = i40e_set_num_rings_in_vsi(vsi); if (ret) goto err_rings; ret = i40e_vsi_alloc_arrays(vsi, true); if (ret) goto err_rings; /* Setup default MSIX irq handler for VSI */ i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings); /* Initialize VSI lock */ spin_lock_init(&vsi->mac_filter_hash_lock); pf->vsi[vsi_idx] = vsi; ret = vsi_idx; goto unlock_pf; err_rings: bitmap_free(vsi->af_xdp_zc_qps); pf->next_vsi = i - 1; kfree(vsi); unlock_pf: mutex_unlock(&pf->switch_mutex); return ret; } /** * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI * @vsi: VSI pointer * @free_qvectors: a bool to specify if q_vectors need to be freed. * * On error: returns error code (negative) * On success: returns 0 **/ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) { /* free the ring and vector containers */ if (free_qvectors) { kfree(vsi->q_vectors); vsi->q_vectors = NULL; } kfree(vsi->tx_rings); vsi->tx_rings = NULL; vsi->rx_rings = NULL; vsi->xdp_rings = NULL; } /** * i40e_clear_rss_config_user - clear the user configured RSS hash keys * and lookup table * @vsi: Pointer to VSI structure */ static void i40e_clear_rss_config_user(struct i40e_vsi *vsi) { if (!vsi) return; kfree(vsi->rss_hkey_user); vsi->rss_hkey_user = NULL; kfree(vsi->rss_lut_user); vsi->rss_lut_user = NULL; } /** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured **/ static int i40e_vsi_clear(struct i40e_vsi *vsi) { struct i40e_pf *pf; if (!vsi) return 0; if (!vsi->back) goto free_vsi; pf = vsi->back; mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n", vsi->idx, vsi->idx, vsi->type); goto unlock_vsi; } if (pf->vsi[vsi->idx] != vsi) { dev_err(&pf->pdev->dev, "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n", pf->vsi[vsi->idx]->idx, pf->vsi[vsi->idx]->type, vsi->idx, vsi->type); goto unlock_vsi; } /* updates the PF for this cleared vsi */ i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); bitmap_free(vsi->af_xdp_zc_qps); i40e_vsi_free_arrays(vsi, true); i40e_clear_rss_config_user(vsi); pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) pf->next_vsi = vsi->idx; unlock_vsi: mutex_unlock(&pf->switch_mutex); free_vsi: kfree(vsi); return 0; } /** * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI * @vsi: the VSI being cleaned **/ static void i40e_vsi_clear_rings(struct i40e_vsi *vsi) { int i; if (vsi->tx_rings && vsi->tx_rings[0]) { for (i = 0; i < vsi->alloc_queue_pairs; i++) { kfree_rcu(vsi->tx_rings[i], rcu); WRITE_ONCE(vsi->tx_rings[i], NULL); WRITE_ONCE(vsi->rx_rings[i], NULL); if (vsi->xdp_rings) WRITE_ONCE(vsi->xdp_rings[i], NULL); } } } /** * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI * @vsi: the VSI being configured **/ static int i40e_alloc_rings(struct i40e_vsi *vsi) { int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2; struct i40e_pf *pf = vsi->back; struct i40e_ring *ring; /* Set basic values in the rings to be used later during open() */ for (i = 0; i < vsi->alloc_queue_pairs; i++) { /* allocate space for both Tx and Rx in one shot */ ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL); if (!ring) goto err_out; ring->queue_index = i; ring->reg_idx = vsi->base_queue + i; ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; ring->itr_setting = pf->tx_itr_default; WRITE_ONCE(vsi->tx_rings[i], ring++); if (!i40e_enabled_xdp_vsi(vsi)) goto setup_rx; ring->queue_index = vsi->alloc_queue_pairs + i; ring->reg_idx = vsi->base_queue + ring->queue_index; ring->ring_active = false; ring->vsi = vsi; ring->netdev = NULL; ring->dev = &pf->pdev->dev; ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); ring->itr_setting = pf->tx_itr_default; WRITE_ONCE(vsi->xdp_rings[i], ring++); setup_rx: ring->queue_index = i; ring->reg_idx = vsi->base_queue + i; ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; ring->count = vsi->num_rx_desc; ring->size = 0; ring->dcb_tc = 0; ring->itr_setting = pf->rx_itr_default; WRITE_ONCE(vsi->rx_rings[i], ring); } return 0; err_out: i40e_vsi_clear_rings(vsi); return -ENOMEM; } /** * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel * @pf: board private structure * @vectors: the number of MSI-X vectors to request * * Returns the number of vectors reserved, or error **/ static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors) { vectors = pci_enable_msix_range(pf->pdev, pf->msix_entries, I40E_MIN_MSIX, vectors); if (vectors < 0) { dev_info(&pf->pdev->dev, "MSI-X vector reservation failed: %d\n", vectors); vectors = 0; } return vectors; } /** * i40e_init_msix - Setup the MSIX capability * @pf: board private structure * * Work with the OS to set up the MSIX vectors needed. * * Returns the number of vectors reserved or negative on failure **/ static int i40e_init_msix(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int cpus, extra_vectors; int vectors_left; int v_budget, i; int v_actual; int iwarp_requested = 0; if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return -ENODEV; /* The number of vectors we'll request will be comprised of: * - Add 1 for "other" cause for Admin Queue events, etc. * - The number of LAN queue pairs * - Queues being used for RSS. * We don't need as many as max_rss_size vectors. * use rss_size instead in the calculation since that * is governed by number of cpus in the system. * - assumes symmetric Tx/Rx pairing * - The number of VMDq pairs * - The CPU count within the NUMA node if iWARP is enabled * Once we count this up, try the request. * * If we can't get what we want, we'll simplify to nearly nothing * and try again. If that still fails, we punt. */ vectors_left = hw->func_caps.num_msix_vectors; v_budget = 0; /* reserve one vector for miscellaneous handler */ if (vectors_left) { v_budget++; vectors_left--; } /* reserve some vectors for the main PF traffic queues. Initially we * only reserve at most 50% of the available vectors, in the case that * the number of online CPUs is large. This ensures that we can enable * extra features as well. Once we've enabled the other features, we * will use any remaining vectors to reach as close as we can to the * number of online CPUs. */ cpus = num_online_cpus(); pf->num_lan_msix = min_t(int, cpus, vectors_left / 2); vectors_left -= pf->num_lan_msix; /* reserve one vector for sideband flow director */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (vectors_left) { pf->num_fdsb_msix = 1; v_budget++; vectors_left--; } else { pf->num_fdsb_msix = 0; } } /* can we reserve enough for iWARP? */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { iwarp_requested = pf->num_iwarp_msix; if (!vectors_left) pf->num_iwarp_msix = 0; else if (vectors_left < pf->num_iwarp_msix) pf->num_iwarp_msix = 1; v_budget += pf->num_iwarp_msix; vectors_left -= pf->num_iwarp_msix; } /* any vectors left over go for VMDq support */ if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) { if (!vectors_left) { pf->num_vmdq_msix = 0; pf->num_vmdq_qps = 0; } else { int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); /* if we're short on vectors for what's desired, we limit * the queues per vmdq. If this is still more than are * available, the user will need to change the number of * queues/vectors used by the PF later with the ethtool * channels command */ if (vectors_left < vmdq_vecs_wanted) { pf->num_vmdq_qps = 1; vmdq_vecs_wanted = pf->num_vmdq_vsis; vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); } pf->num_vmdq_msix = pf->num_vmdq_qps; v_budget += vmdq_vecs; vectors_left -= vmdq_vecs; } } /* On systems with a large number of SMP cores, we previously limited * the number of vectors for num_lan_msix to be at most 50% of the * available vectors, to allow for other features. Now, we add back * the remaining vectors. However, we ensure that the total * num_lan_msix will not exceed num_online_cpus(). To do this, we * calculate the number of vectors we can add without going over the * cap of CPUs. For systems with a small number of CPUs this will be * zero. */ extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left); pf->num_lan_msix += extra_vectors; vectors_left -= extra_vectors; WARN(vectors_left < 0, "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n"); v_budget += pf->num_lan_msix; pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); if (!pf->msix_entries) return -ENOMEM; for (i = 0; i < v_budget; i++) pf->msix_entries[i].entry = i; v_actual = i40e_reserve_msix_vectors(pf, v_budget); if (v_actual < I40E_MIN_MSIX) { clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); kfree(pf->msix_entries); pf->msix_entries = NULL; pci_disable_msix(pf->pdev); return -ENODEV; } else if (v_actual == I40E_MIN_MSIX) { /* Adjust for minimal MSIX use */ pf->num_vmdq_vsis = 0; pf->num_vmdq_qps = 0; pf->num_lan_qps = 1; pf->num_lan_msix = 1; } else if (v_actual != v_budget) { /* If we have limited resources, we will start with no vectors * for the special features and then allocate vectors to some * of these features based on the policy and at the end disable * the features that did not get any vectors. */ int vec; dev_info(&pf->pdev->dev, "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n", v_actual, v_budget); /* reserve the misc vector */ vec = v_actual - 1; /* Scale vector usage down */ pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */ pf->num_vmdq_vsis = 1; pf->num_vmdq_qps = 1; /* partition out the remaining vectors */ switch (vec) { case 2: pf->num_lan_msix = 1; break; case 3: if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_lan_msix = 1; pf->num_iwarp_msix = 1; } else { pf->num_lan_msix = 2; } break; default: if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_iwarp_msix = min_t(int, (vec / 3), iwarp_requested); pf->num_vmdq_vsis = min_t(int, (vec / 3), I40E_DEFAULT_NUM_VMDQ_VSI); } else { pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { pf->num_fdsb_msix = 1; vec--; } pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); pf->num_lan_qps = pf->num_lan_msix; break; } } if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && pf->num_fdsb_msix == 0) { dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_msix == 0) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); } if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && pf->num_iwarp_msix == 0) { dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n"); clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } i40e_debug(&pf->hw, I40E_DEBUG_INIT, "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", pf->num_lan_msix, pf->num_vmdq_msix * pf->num_vmdq_vsis, pf->num_fdsb_msix, pf->num_iwarp_msix); return v_actual; } /** * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector * @vsi: the VSI being configured * @v_idx: index of the vector in the vsi struct * * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector; /* allocate q_vector */ q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL); if (!q_vector) return -ENOMEM; q_vector->vsi = vsi; q_vector->v_idx = v_idx; cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask); if (vsi->netdev) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll); /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; return 0; } /** * i40e_vsi_alloc_q_vectors - Allocate memory for interrupt vectors * @vsi: the VSI being configured * * We allocate one q_vector per queue interrupt. If allocation fails we * return -ENOMEM. **/ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int err, v_idx, num_q_vectors; /* if not MSIX, give the one vector only to the LAN VSI */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_q_vectors = vsi->num_q_vectors; else if (vsi->type == I40E_VSI_MAIN) num_q_vectors = 1; else return -EINVAL; for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { err = i40e_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out; } return 0; err_out: while (v_idx--) i40e_free_q_vector(vsi, v_idx); return err; } /** * i40e_init_interrupt_scheme - Determine proper interrupt scheme * @pf: board private structure to initialize **/ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) { int vectors = 0; ssize_t size; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { vectors = i40e_init_msix(pf); if (vectors < 0) { clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); clear_bit(I40E_FLAG_RSS_ENA, pf->flags); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); /* rework the queue expectations without MSIX */ i40e_determine_queue_usage(pf); } } if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n"); vectors = pci_enable_msi(pf->pdev); if (vectors < 0) { dev_info(&pf->pdev->dev, "MSI init failed - %d\n", vectors); clear_bit(I40E_FLAG_MSI_ENA, pf->flags); } vectors = 1; /* one MSI or Legacy vector */ } if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) && !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n"); /* set up vector assignment tracking */ size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors); pf->irq_pile = kzalloc(size, GFP_KERNEL); if (!pf->irq_pile) return -ENOMEM; pf->irq_pile->num_entries = vectors; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); return 0; } /** * i40e_restore_interrupt_scheme - Restore the interrupt scheme * @pf: private board data structure * * Restore the interrupt scheme that was cleared when we suspended the * device. This should be called during resume to re-allocate the q_vectors * and reacquire IRQs. */ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) { struct i40e_vsi *vsi; int err, i; /* We cleared the MSI and MSI-X flags when disabling the old interrupt * scheme. We need to re-enabled them here in order to attempt to * re-acquire the MSI or MSI-X vectors */ set_bit(I40E_FLAG_MSI_ENA, pf->flags); set_bit(I40E_FLAG_MSIX_ENA, pf->flags); err = i40e_init_interrupt_scheme(pf); if (err) return err; /* Now that we've re-acquired IRQs, we need to remap the vectors and * rings together again. */ i40e_pf_for_each_vsi(pf, i, vsi) { err = i40e_vsi_alloc_q_vectors(vsi); if (err) goto err_unwind; i40e_vsi_map_rings_to_vectors(vsi); } err = i40e_setup_misc_vector(pf); if (err) goto err_unwind; if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) i40e_client_update_msix_info(pf); return 0; err_unwind: while (i--) { if (pf->vsi[i]) i40e_vsi_free_q_vectors(pf->vsi[i]); } return err; } /** * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle * non queue events in recovery mode * @pf: board private structure * * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage * the non-queue interrupts, e.g. AdminQ and errors in recovery mode. * This is handled differently than in recovery mode since no Tx/Rx resources * are being allocated. **/ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf) { int err; if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { err = i40e_setup_misc_vector(pf); if (err) { dev_info(&pf->pdev->dev, "MSI-X misc vector request failed, error %d\n", err); return err; } } else { u32 flags = test_bit(I40E_FLAG_MSI_ENA, pf->flags) ? 0 : IRQF_SHARED; err = request_irq(pf->pdev->irq, i40e_intr, flags, pf->int_name, pf); if (err) { dev_info(&pf->pdev->dev, "MSI/legacy misc vector request failed, error %d\n", err); return err; } i40e_enable_misc_int_causes(pf); i40e_irq_dynamic_enable_icr0(pf); } return 0; } /** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure * * This sets up the handler for MSIX 0, which is used to manage the * non-queue interrupts, e.g. AdminQ and errors. This is not used * when in MSI or Legacy interrupt mode. **/ static int i40e_setup_misc_vector(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; int err = 0; /* Only request the IRQ once, the first time through. */ if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n", pf->int_name, err); return -EFAULT; } } i40e_enable_misc_int_causes(pf); /* associate no queues to the misc vector */ wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST); wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1); i40e_flush(hw); i40e_irq_dynamic_enable_icr0(pf); return err; } /** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure * @seed: Buffter to store the hash keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Return 0 on success, negative on failure */ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret = 0; if (seed) { ret = i40e_aq_get_rss_key(hw, vsi->id, (struct i40e_aqc_get_set_rss_key_data *)seed); if (ret) { dev_info(&pf->pdev->dev, "Cannot get RSS key, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return ret; } } if (lut) { bool pf_lut = vsi->type == I40E_VSI_MAIN; ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) { dev_info(&pf->pdev->dev, "Cannot get RSS lut, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return ret; } } return ret; } /** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure **/ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, const u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u16 vf_id = vsi->vf_id; u8 i; /* Fill out hash function seed */ if (seed) { u32 *seed_dw = (u32 *)seed; if (vsi->type == I40E_VSI_MAIN) { for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); } else if (vsi->type == I40E_VSI_SRIOV) { for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++) wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]); } else { dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n"); } } if (lut) { u32 *lut_dw = (u32 *)lut; if (vsi->type == I40E_VSI_MAIN) { if (lut_size != I40E_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); } else if (vsi->type == I40E_VSI_SRIOV) { if (lut_size != I40E_VF_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]); } else { dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); } } i40e_flush(hw); return 0; } /** * i40e_get_rss_reg - Get the RSS keys and lut by reading registers * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u16 i; if (seed) { u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); } if (lut) { u32 *lut_dw = (u32 *)lut; if (lut_size != I40E_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i)); } return 0; } /** * i40e_config_rss - Configure RSS keys and lut * @vsi: Pointer to VSI structure * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure */ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_config_rss_aq(vsi, seed, lut, lut_size); else return i40e_config_rss_reg(vsi, seed, lut, lut_size); } /** * i40e_get_rss - Get RSS keys and lut * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_get_rss_aq(vsi, seed, lut, lut_size); else return i40e_get_rss_reg(vsi, seed, lut, lut_size); } /** * i40e_fill_rss_lut - Fill the RSS lookup table with default values * @pf: Pointer to board private structure * @lut: Lookup table * @rss_table_size: Lookup table size * @rss_size: Range of queue number for hashing */ void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, u16 rss_table_size, u16 rss_size) { u16 i; for (i = 0; i < rss_table_size; i++) lut[i] = i % rss_size; } /** * i40e_pf_config_rss - Prepare for RSS if used * @pf: board private structure **/ static int i40e_pf_config_rss(struct i40e_pf *pf) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); u8 seed[I40E_HKEY_ARRAY_SIZE]; u8 *lut; struct i40e_hw *hw = &pf->hw; u32 reg_val; u64 hena; int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= i40e_pf_get_default_rss_hena(pf); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Determine the RSS table size based on the hardware capabilities */ reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? (reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) : (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val); /* Determine the RSS size of the VSI */ if (!vsi->rss_size) { u16 qcount; /* If the firmware does something weird during VSI init, we * could end up with zero TCs. Check for that to avoid * divide-by-zero. It probably won't pass traffic, but it also * won't panic. */ qcount = vsi->num_queue_pairs / (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1); vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount); } if (!vsi->rss_size) return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM; /* Use user configured lut if there is one, otherwise use default */ if (vsi->rss_lut_user) memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); /* Use user configured hash key if there is one, otherwise * use default. */ if (vsi->rss_hkey_user) memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); kfree(lut); return ret; } /** * i40e_reconfig_rss_queues - change number of queues for rss and rebuild * @pf: board private structure * @queue_count: the requested queue count for rss. * * returns 0 if rss is not enabled, if enabled returns the final rss queue * count which may be different from the requested queue count. * Note: expects to be called while under rtnl_lock() **/ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); int new_rss_size; if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags)) return 0; queue_count = min_t(int, queue_count, num_online_cpus()); new_rss_size = min_t(int, queue_count, pf->rss_size_max); if (queue_count != vsi->num_queue_pairs) { u16 qcount; vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); if (test_bit(__I40E_IN_REMOVE, pf->state)) return pf->alloc_rss_size; pf->alloc_rss_size = new_rss_size; i40e_reset_and_rebuild(pf, true, true); /* Discard the user configured hash keys and lut, if less * queues are enabled. */ if (queue_count < vsi->rss_size) { i40e_clear_rss_config_user(vsi); dev_dbg(&pf->pdev->dev, "discard user configured hash keys and lut\n"); } /* Reset vsi->rss_size, as number of enabled queues changed */ qcount = vsi->num_queue_pairs / vsi->tc_config.numtc; vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount); i40e_pf_config_rss(pf); } dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count: %d/%d\n", vsi->req_queue_pairs, pf->rss_size_max); return pf->alloc_rss_size; } /** * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition * @pf: board private structure **/ int i40e_get_partition_bw_setting(struct i40e_pf *pf) { bool min_valid, max_valid; u32 max_bw, min_bw; int status; status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw, &min_valid, &max_valid); if (!status) { if (min_valid) pf->min_bw = min_bw; if (max_valid) pf->max_bw = max_bw; } return status; } /** * i40e_set_partition_bw_setting - Set BW settings for this PF partition * @pf: board private structure **/ int i40e_set_partition_bw_setting(struct i40e_pf *pf) { struct i40e_aqc_configure_partition_bw_data bw_data; int status; memset(&bw_data, 0, sizeof(bw_data)); /* Set the valid bit for this PF */ bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id)); bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK; bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK; /* Set the new bandwidths */ status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL); return status; } /** * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition * @pf: board private structure **/ int i40e_commit_partition_bw_setting(struct i40e_pf *pf) { /* Commit temporary BW setting to permanent NVM image */ enum i40e_admin_queue_err last_aq_status; u16 nvm_word; int ret; if (pf->hw.partition_id != 1) { dev_info(&pf->pdev->dev, "Commit BW only works on partition 1! This is partition %d", pf->hw.partition_id); ret = -EOPNOTSUPP; goto bw_commit_out; } /* Acquire NVM for read access */ ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ); last_aq_status = pf->hw.aq.asq_last_status; if (ret) { dev_info(&pf->pdev->dev, "Cannot acquire NVM for read access, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, last_aq_status)); goto bw_commit_out; } /* Read word 0x10 of NVM - SW compatibility word 1 */ ret = i40e_aq_read_nvm(&pf->hw, I40E_SR_NVM_CONTROL_WORD, 0x10, sizeof(nvm_word), &nvm_word, false, NULL); /* Save off last admin queue command status before releasing * the NVM */ last_aq_status = pf->hw.aq.asq_last_status; i40e_release_nvm(&pf->hw); if (ret) { dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, last_aq_status)); goto bw_commit_out; } /* Wait a bit for NVM release to complete */ msleep(50); /* Acquire NVM for write access */ ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE); last_aq_status = pf->hw.aq.asq_last_status; if (ret) { dev_info(&pf->pdev->dev, "Cannot acquire NVM for write access, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, last_aq_status)); goto bw_commit_out; } /* Write it back out unchanged to initiate update NVM, * which will force a write of the shadow (alt) RAM to * the NVM - thus storing the bandwidth values permanently. */ ret = i40e_aq_update_nvm(&pf->hw, I40E_SR_NVM_CONTROL_WORD, 0x10, sizeof(nvm_word), &nvm_word, true, 0, NULL); /* Save off last admin queue command status before releasing * the NVM */ last_aq_status = pf->hw.aq.asq_last_status; i40e_release_nvm(&pf->hw); if (ret) dev_info(&pf->pdev->dev, "BW settings NOT SAVED, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, last_aq_status)); bw_commit_out: return ret; } /** * i40e_is_total_port_shutdown_enabled - read NVM and return value * if total port shutdown feature is enabled for this PF * @pf: board private structure **/ static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf) { #define I40E_TOTAL_PORT_SHUTDOWN_ENABLED BIT(4) #define I40E_FEATURES_ENABLE_PTR 0x2A #define I40E_CURRENT_SETTING_PTR 0x2B #define I40E_LINK_BEHAVIOR_WORD_OFFSET 0x2D #define I40E_LINK_BEHAVIOR_WORD_LENGTH 0x1 #define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED BIT(0) #define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH 4 u16 sr_emp_sr_settings_ptr = 0; u16 features_enable = 0; u16 link_behavior = 0; int read_status = 0; bool ret = false; read_status = i40e_read_nvm_word(&pf->hw, I40E_SR_EMP_SR_SETTINGS_PTR, &sr_emp_sr_settings_ptr); if (read_status) goto err_nvm; read_status = i40e_read_nvm_word(&pf->hw, sr_emp_sr_settings_ptr + I40E_FEATURES_ENABLE_PTR, &features_enable); if (read_status) goto err_nvm; if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) { read_status = i40e_read_nvm_module_data(&pf->hw, I40E_SR_EMP_SR_SETTINGS_PTR, I40E_CURRENT_SETTING_PTR, I40E_LINK_BEHAVIOR_WORD_OFFSET, I40E_LINK_BEHAVIOR_WORD_LENGTH, &link_behavior); if (read_status) goto err_nvm; link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH); ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior; } return ret; err_nvm: dev_warn(&pf->pdev->dev, "total-port-shutdown feature is off due to read nvm error: %pe\n", ERR_PTR(read_status)); return ret; } /** * i40e_sw_init - Initialize general software structures (struct i40e_pf) * @pf: board private structure to initialize * * i40e_sw_init initializes the Adapter private data structure. * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; u16 pow; /* Set default capability flags */ bitmap_zero(pf->flags, I40E_PF_FLAGS_NBITS); set_bit(I40E_FLAG_MSI_ENA, pf->flags); set_bit(I40E_FLAG_MSIX_ENA, pf->flags); /* Set default ITR */ pf->rx_itr_default = I40E_ITR_RX_DEF; pf->tx_itr_default = I40E_ITR_TX_DEF; /* Depending on PF configurations, it is possible that the RSS * maximum might end up larger than the available queues */ pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width); pf->alloc_rss_size = 1; pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); /* find the next higher power-of-2 of num cpus */ pow = roundup_pow_of_two(num_online_cpus()); pf->rss_size_max = min_t(int, pf->rss_size_max, pow); if (pf->hw.func_caps.rss) { set_bit(I40E_FLAG_RSS_ENA, pf->flags); pf->alloc_rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); } /* MFP mode enabled */ if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) { set_bit(I40E_FLAG_MFP_ENA, pf->flags); dev_info(&pf->pdev->dev, "MFP mode Enabled\n"); if (i40e_get_partition_bw_setting(pf)) { dev_warn(&pf->pdev->dev, "Could not get partition bw settings\n"); } else { dev_info(&pf->pdev->dev, "Partition BW Min = %8.8x, Max = %8.8x\n", pf->min_bw, pf->max_bw); /* nudge the Tx scheduler */ i40e_set_partition_bw_setting(pf); } } if ((pf->hw.func_caps.fd_filters_guaranteed > 0) || (pf->hw.func_caps.fd_filters_best_effort > 0)) { set_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && pf->hw.num_partitions > 1) dev_info(&pf->pdev->dev, "Flow Director Sideband mode Disabled in MFP mode\n"); else set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); pf->fdir_pf_filter_count = pf->hw.func_caps.fd_filters_guaranteed; pf->hw.fdir_shared_filter_count = pf->hw.func_caps.fd_filters_best_effort; } /* Enable HW ATR eviction if possible */ if (test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps)) set_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags); if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; set_bit(I40E_FLAG_VMDQ_ENA, pf->flags); pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf); } if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) { set_bit(I40E_FLAG_IWARP_ENA, pf->flags); /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } /* Stopping FW LLDP engine is supported on XL710 and X722 * starting from FW versions determined in i40e_init_adminq. * Stopping the FW LLDP engine is not supported on XL710 * if NPAR is functioning so unset this hw flag in this case. */ if (pf->hw.mac.type == I40E_MAC_XL710 && pf->hw.func_caps.npar_enable) clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps); #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; set_bit(I40E_FLAG_SRIOV_ENA, pf->flags); pf->num_req_vfs = min_t(int, pf->hw.func_caps.num_vfs, I40E_MAX_VF_COUNT); } #endif /* CONFIG_PCI_IOV */ pf->lan_veb = I40E_NO_VEB; pf->lan_vsi = I40E_NO_VSI; /* By default FW has this off for performance reasons */ clear_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); /* set up queue assignment tracking */ size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * pf->hw.func_caps.num_tx_qp); pf->qp_pile = kzalloc(size, GFP_KERNEL); if (!pf->qp_pile) { err = -ENOMEM; goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; pf->tx_timeout_recovery_level = 1; if (pf->hw.mac.type != I40E_MAC_X722 && i40e_is_total_port_shutdown_enabled(pf)) { /* Link down on close must be on when total port shutdown * is enabled for a given port */ set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags); set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags); dev_info(&pf->pdev->dev, "total-port-shutdown was enabled, link-down-on-close is forced on\n"); } mutex_init(&pf->switch_mutex); sw_init_done: return err; } /** * i40e_set_ntuple - set the ntuple feature flag and take action * @pf: board private structure to initialize * @features: the feature set that the stack is suggesting * * returns a bool to indicate if reset needs to happen **/ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) { bool need_reset = false; /* Check if Flow Director n-tuple support was enabled or disabled. If * the state changed, we need to reset. */ if (features & NETIF_F_NTUPLE) { /* Enable filters and mark for reset */ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) need_reset = true; /* enable FD_SB only if there is MSI-X vector and no cloud * filters exist */ if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) { set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } } else { /* turn off filters, mark for reset and clear SW filter list */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { need_reset = true; i40e_fdir_filter_exit(pf); } clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; /* if ATR was auto disabled it can be re-enabled. */ if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); } return need_reset; } /** * i40e_clear_rss_lut - clear the rx hash lookup table * @vsi: the VSI being configured **/ static void i40e_clear_rss_lut(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u16 vf_id = vsi->vf_id; u8 i; if (vsi->type == I40E_VSI_MAIN) { for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) wr32(hw, I40E_PFQF_HLUT(i), 0); } else if (vsi->type == I40E_VSI_SRIOV) { for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0); } else { dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); } } /** * i40e_set_loopback - turn on/off loopback mode on underlying PF * @vsi: ptr to VSI * @ena: flag to indicate the on/off setting */ static int i40e_set_loopback(struct i40e_vsi *vsi, bool ena) { bool if_running = netif_running(vsi->netdev) && !test_and_set_bit(__I40E_VSI_DOWN, vsi->state); int ret; if (if_running) i40e_down(vsi); ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL); if (ret) netdev_err(vsi->netdev, "Failed to toggle loopback state\n"); if (if_running) i40e_up(vsi); return ret; } /** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting * Note: expects to be called while under rtnl_lock() **/ static int i40e_set_features(struct net_device *netdev, netdev_features_t features) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; bool need_reset; if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) i40e_pf_config_rss(pf); else if (!(features & NETIF_F_RXHASH) && netdev->features & NETIF_F_RXHASH) i40e_clear_rss_lut(vsi); if (features & NETIF_F_HW_VLAN_CTAG_RX) i40e_vlan_stripping_enable(vsi); else i40e_vlan_stripping_disable(vsi); if (!(features & NETIF_F_HW_TC) && (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) { dev_err(&pf->pdev->dev, "Offloaded tc filters active, can't turn hw_tc_offload off"); return -EINVAL; } if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt) i40e_del_all_macvlans(vsi); need_reset = i40e_set_ntuple(pf, features); if (need_reset) i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); if ((features ^ netdev->features) & NETIF_F_LOOPBACK) return i40e_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); return 0; } static int i40e_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; u8 type, filter_index; int ret; type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN : I40E_AQC_TUNNEL_TYPE_NGE; ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index, NULL); if (ret) { netdev_info(netdev, "add UDP port failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return -EIO; } udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index); return 0; } static int i40e_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; int ret; ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL); if (ret) { netdev_info(netdev, "delete UDP port failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return -EIO; } return 0; } static int i40e_get_phys_port_id(struct net_device *netdev, struct netdev_phys_item_id *ppid) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; if (!test_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps)) return -EOPNOTSUPP; ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id)); memcpy(ppid->id, hw->mac.port_addr, ppid->id_len); return 0; } /** * i40e_ndo_fdb_add - add an entry to the hardware database * @ndm: the input from the stack * @tb: pointer to array of nladdr (unused) * @dev: the net device pointer * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation * @extack: netlink extended ack, unused currently */ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags, struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; int err = 0; if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) return -EOPNOTSUPP; if (vid) { pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name); return -EINVAL; } /* Hardware does not support aging addresses so if a * ndm_state is given only allow permanent addresses */ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { netdev_info(dev, "FDB only supports static addresses\n"); return -EINVAL; } if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) err = dev_uc_add_excl(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_add_excl(dev, addr); else err = -EINVAL; /* Only return duplicate errors if NLM_F_EXCL is set */ if (err == -EEXIST && !(flags & NLM_F_EXCL)) err = 0; return err; } /** * i40e_ndo_bridge_setlink - Set the hardware bridge mode * @dev: the netdev being configured * @nlh: RTNL message * @flags: bridge flags * @extack: netlink extended ack * * Inserts a new hardware bridge if not already created and * enables the bridging mode requested (VEB or VEPA). If the * hardware bridge has already been inserted and the request * is to change the mode then that requires a PF reset to * allow rebuild of the components with required hardware * bridge mode enabled. * * Note: expects to be called while under rtnl_lock() **/ static int i40e_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct nlattr *attr, *br_spec; struct i40e_veb *veb; int rem; /* Only for PF VSI for now */ if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for PF VSI */ veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!br_spec) return -EINVAL; nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { __u16 mode = nla_get_u16(attr); if ((mode != BRIDGE_MODE_VEPA) && (mode != BRIDGE_MODE_VEB)) return -EINVAL; /* Insert a new HW bridge */ if (!veb) { veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { veb->bridge_mode = mode; i40e_config_bridge_mode(veb); } else { /* No Bridge HW offload available */ return -ENOENT; } break; } else if (mode != veb->bridge_mode) { /* Existing HW bridge but different mode needs reset */ veb->bridge_mode = mode; /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */ if (mode == BRIDGE_MODE_VEB) set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); else clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); break; } } return 0; } /** * i40e_ndo_bridge_getlink - Get the hardware bridge mode * @skb: skb buff * @pid: process id * @seq: RTNL message seq # * @dev: the netdev being configured * @filter_mask: unused * @nlflags: netlink flags passed in * * Return the mode in which the hardware bridge is operating in * i.e VEB or VEPA. **/ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 __always_unused filter_mask, int nlflags) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_veb *veb; /* Only for PF VSI for now */ if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP; /* Find the HW bridge for the PF VSI */ veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); if (!veb) return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, 0, 0, nlflags, filter_mask, NULL); } /** * i40e_features_check - Validate encapsulated packet conforms to limits * @skb: skb buff * @dev: This physical port's netdev * @features: Offload features that the stack believes apply **/ static netdev_features_t i40e_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { size_t len; /* No point in doing any of this if neither checksum nor GSO are * being requested for this frame. We can rule out both by just * checking for CHECKSUM_PARTIAL */ if (skb->ip_summed != CHECKSUM_PARTIAL) return features; /* We cannot support GSO if the MSS is going to be less than * 64 bytes. If it is then we need to drop support for GSO. */ if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; if (skb->encapsulation) { /* L4TUNLEN can support 127 words */ len = skb_inner_network_header(skb) - skb_transport_header(skb); if (len & ~(127 * 2)) goto out_err; /* IPLEN can support at most 127 dwords */ len = skb_inner_transport_header(skb) - skb_inner_network_header(skb); if (len & ~(127 * 4)) goto out_err; } /* No need to validate L4LEN as TCP is the only protocol with a * flexible value and we support all possible values supported * by TCP, which is at most 15 dwords */ return features; out_err: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } /** * i40e_xdp_setup - add/remove an XDP program * @vsi: VSI to changed * @prog: XDP program * @extack: netlink extended ack **/ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { int frame_size = i40e_max_vsi_frame_size(vsi, prog); struct i40e_pf *pf = vsi->back; struct bpf_prog *old_prog; bool need_reset; int i; /* VSI shall be deleted in a moment, block loading new programs */ if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) return -EINVAL; /* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); return -EINVAL; } /* When turning XDP on->off/off->on we reset and rebuild the rings. */ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); if (need_reset) i40e_prep_for_reset(pf); old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { if (!prog) { xdp_features_clear_redirect_target(vsi->netdev); /* Wait until ndo_xsk_wakeup completes. */ synchronize_rcu(); } i40e_reset_and_rebuild(pf, true, true); } if (!i40e_enabled_xdp_vsi(vsi) && prog) { if (i40e_realloc_rx_bi_zc(vsi, true)) return -ENOMEM; } else if (i40e_enabled_xdp_vsi(vsi) && !prog) { if (i40e_realloc_rx_bi_zc(vsi, false)) return -ENOMEM; } for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); if (old_prog) bpf_prog_put(old_prog); /* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start. */ if (need_reset && prog) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_pool) (void)i40e_xsk_wakeup(vsi->netdev, i, XDP_WAKEUP_RX); xdp_features_set_redirect_target(vsi->netdev, true); } return 0; } /** * i40e_enter_busy_conf - Enters busy config state * @vsi: vsi * * Returns 0 on success, <0 for failure. **/ static int i40e_enter_busy_conf(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int timeout = 50; while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; usleep_range(1000, 2000); } return 0; } /** * i40e_exit_busy_conf - Exits busy config state * @vsi: vsi **/ static void i40e_exit_busy_conf(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; clear_bit(__I40E_CONFIG_BUSY, pf->state); } /** * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair * @vsi: vsi * @queue_pair: queue pair **/ static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) { memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, sizeof(vsi->rx_rings[queue_pair]->rx_stats)); memset(&vsi->tx_rings[queue_pair]->stats, 0, sizeof(vsi->tx_rings[queue_pair]->stats)); if (i40e_enabled_xdp_vsi(vsi)) { memset(&vsi->xdp_rings[queue_pair]->stats, 0, sizeof(vsi->xdp_rings[queue_pair]->stats)); } } /** * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair * @vsi: vsi * @queue_pair: queue pair **/ static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) { i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); if (i40e_enabled_xdp_vsi(vsi)) { /* Make sure that in-progress ndo_xdp_xmit calls are * completed. */ synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); } i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); } /** * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair * @vsi: vsi * @queue_pair: queue pair * @enable: true for enable, false for disable **/ static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair, bool enable) { struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_q_vector *q_vector = rxr->q_vector; if (!vsi->netdev) return; /* All rings in a qp belong to the same qvector. */ if (q_vector->rx.ring || q_vector->tx.ring) { if (enable) napi_enable(&q_vector->napi); else napi_disable(&q_vector->napi); } } /** * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair * @vsi: vsi * @queue_pair: queue pair * @enable: true for enable, false for disable * * Returns 0 on success, <0 on failure. **/ static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair, bool enable) { struct i40e_pf *pf = vsi->back; int pf_q, ret = 0; pf_q = vsi->base_queue + queue_pair; ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, false /*is xdp*/, enable); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Tx ring %d %sable timeout\n", vsi->seid, pf_q, (enable ? "en" : "dis")); return ret; } i40e_control_rx_q(pf, pf_q, enable); ret = i40e_pf_rxq_wait(pf, pf_q, enable); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d %sable timeout\n", vsi->seid, pf_q, (enable ? "en" : "dis")); return ret; } /* Due to HW errata, on Rx disable only, the register can * indicate done before it really is. Needs 50ms to be sure */ if (!enable) mdelay(50); if (!i40e_enabled_xdp_vsi(vsi)) return ret; ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, true /*is xdp*/, enable); if (ret) { dev_info(&pf->pdev->dev, "VSI seid %d XDP Tx ring %d %sable timeout\n", vsi->seid, pf_q, (enable ? "en" : "dis")); } return ret; } /** * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair * @vsi: vsi * @queue_pair: queue_pair **/ static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) { struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; /* All rings in a qp belong to the same qvector. */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); else i40e_irq_dynamic_enable_icr0(pf); i40e_flush(hw); } /** * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair * @vsi: vsi * @queue_pair: queue_pair **/ static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) { struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; /* For simplicity, instead of removing the qp interrupt causes * from the interrupt linked list, we simply disable the interrupt, and * leave the list intact. * * All rings in a qp belong to the same qvector. */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); i40e_flush(hw); synchronize_irq(pf->msix_entries[intpf].vector); } else { /* Legacy and MSI mode - this stops all interrupt handling */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); wr32(hw, I40E_PFINT_DYN_CTL0, 0); i40e_flush(hw); synchronize_irq(pf->pdev->irq); } } /** * i40e_queue_pair_disable - Disables a queue pair * @vsi: vsi * @queue_pair: queue pair * * Returns 0 on success, <0 on failure. **/ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) { int err; err = i40e_enter_busy_conf(vsi); if (err) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); return err; } /** * i40e_queue_pair_enable - Enables a queue pair * @vsi: vsi * @queue_pair: queue pair * * Returns 0 on success, <0 on failure. **/ int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair) { int err; err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); if (err) return err; if (i40e_enabled_xdp_vsi(vsi)) { err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); if (err) return err; } err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); if (err) return err; err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */); i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */); i40e_queue_pair_enable_irq(vsi, queue_pair); i40e_exit_busy_conf(vsi); return err; } /** * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command **/ static int i40e_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; if (vsi->type != I40E_VSI_MAIN) return -EINVAL; switch (xdp->command) { case XDP_SETUP_PROG: return i40e_xdp_setup(vsi, xdp->prog, xdp->extack); case XDP_SETUP_XSK_POOL: return i40e_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); default: return -EINVAL; } } static const struct net_device_ops i40e_netdev_ops = { .ndo_open = i40e_open, .ndo_stop = i40e_close, .ndo_start_xmit = i40e_lan_xmit_frame, .ndo_get_stats64 = i40e_get_netdev_stats_struct, .ndo_set_rx_mode = i40e_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = i40e_set_mac, .ndo_change_mtu = i40e_change_mtu, .ndo_eth_ioctl = i40e_ioctl, .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, .ndo_get_vf_stats = i40e_get_vf_stats, .ndo_set_vf_rate = i40e_ndo_set_vf_bw, .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, .ndo_set_vf_trust = i40e_ndo_set_vf_trust, .ndo_get_phys_port_id = i40e_get_phys_port_id, .ndo_fdb_add = i40e_ndo_fdb_add, .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, .ndo_xsk_wakeup = i40e_xsk_wakeup, .ndo_dfwd_add_station = i40e_fwd_add, .ndo_dfwd_del_station = i40e_fwd_del, }; /** * i40e_config_netdev - Setup the netdev flags * @vsi: the VSI being configured * * Returns 0 on success, negative value on failure **/ static int i40e_config_netdev(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_netdev_priv *np; struct net_device *netdev; u8 broadcast[ETH_ALEN]; u8 mac_addr[ETH_ALEN]; int etherdev_size; netdev_features_t hw_enc_features; netdev_features_t hw_features; etherdev_size = sizeof(struct i40e_netdev_priv); netdev = alloc_etherdev_mq(etherdev_size, vsi->alloc_queue_pairs); if (!netdev) return -ENOMEM; vsi->netdev = netdev; np = netdev_priv(netdev); np->vsi = vsi; hw_enc_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_SOFT_FEATURES | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_UDP_L4 | NETIF_F_SCTP_CRC | NETIF_F_RXHASH | NETIF_F_RXCSUM | 0; if (!test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps)) netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; netdev->hw_enc_features |= hw_enc_features; /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; #define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES; netdev->features |= NETIF_F_GSO_PARTIAL | I40E_GSO_PARTIAL_FEATURES; netdev->mpls_features |= NETIF_F_SG; netdev->mpls_features |= NETIF_F_HW_CSUM; netdev->mpls_features |= NETIF_F_TSO; netdev->mpls_features |= NETIF_F_TSO6; netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES; /* enable macvlan offloads */ netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; netdev->hw_features |= hw_features | NETIF_F_LOOPBACK; netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; netdev->features &= ~NETIF_F_HW_TC; if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); /* The following steps are necessary for two reasons. First, * some older NVM configurations load a default MAC-VLAN * filter that will accept any tagged packet, and we want to * replace this with a normal filter. Additionally, it is * possible our MAC address was provided by the platform using * Open Firmware or similar. * * Thus, we need to remove the default filter and install one * specific to the MAC address. */ i40e_rm_default_mac_filter(vsi, mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_add_mac_filter(vsi, mac_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY | NETDEV_XDP_ACT_RX_SG; netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD; } else { /* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to * the end, which is 4 bytes long, so force truncation of the * original name by IFNAMSIZ - 4 */ struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d", IFNAMSIZ - 4, main_vsi->netdev->name); eth_random_addr(mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_add_mac_filter(vsi, mac_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); } /* Add the broadcast filter so that we initially will receive * broadcast packets. Note that when a new VLAN is first added the * driver will convert all filters marked I40E_VLAN_ANY into VLAN * specific filters as part of transitioning into "vlan" operation. * When more VLANs are added, the driver will copy each existing MAC * filter and add it for the new VLAN. * * Broadcast filters are handled specially by * i40e_sync_filters_subtask, as the driver must to set the broadcast * promiscuous bit instead of adding this directly as a MAC/VLAN * filter. The subtask will update the correct broadcast promiscuous * bits as VLANs become active or inactive. */ eth_broadcast_addr(broadcast); spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_add_mac_filter(vsi, broadcast); spin_unlock_bh(&vsi->mac_filter_hash_lock); eth_hw_addr_set(netdev, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); /* i40iw_net_event() reads 16 bytes from neigh->primary_key */ netdev->neigh_priv_len = sizeof(u32) * 4; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; /* Setup netdev TC information */ i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc); netdev->netdev_ops = &i40e_netdev_ops; netdev->watchdog_timeo = 5 * HZ; i40e_set_ethtool_ops(netdev); /* MTU range: 68 - 9706 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD; return 0; } /** * i40e_vsi_delete - Delete a VSI from the switch * @vsi: the VSI being removed * * Returns 0 on success, negative value on failure **/ static void i40e_vsi_delete(struct i40e_vsi *vsi) { /* remove default VSI is not allowed */ if (vsi == vsi->back->vsi[vsi->back->lan_vsi]) return; i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL); } /** * i40e_is_vsi_uplink_mode_veb - Check if the VSI's uplink bridge mode is VEB * @vsi: the VSI being queried * * Returns 1 if HW bridge mode is VEB and return 0 in case of VEPA mode **/ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi) { struct i40e_veb *veb; struct i40e_pf *pf = vsi->back; /* Uplink is not a bridge so default to VEB */ if (vsi->veb_idx >= I40E_MAX_VEB) return 1; veb = pf->veb[vsi->veb_idx]; if (!veb) { dev_info(&pf->pdev->dev, "There is no veb associated with the bridge\n"); return -ENOENT; } /* Uplink is a bridge in VEPA mode */ if (veb->bridge_mode & BRIDGE_MODE_VEPA) { return 0; } else { /* Uplink is a bridge in VEB mode */ return 1; } /* VEPA is now default bridge, so return 0 */ return 0; } /** * i40e_add_vsi - Add a VSI to the switch * @vsi: the VSI being configured * * This initializes a VSI context depending on the VSI type to be added and * passes it down to the add_vsi aq command. **/ static int i40e_add_vsi(struct i40e_vsi *vsi) { int ret = -ENODEV; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; struct i40e_mac_filter *f; struct hlist_node *h; int bkt; u8 enabled_tc = 0x1; /* TC0 enabled */ int f_count = 0; memset(&ctxt, 0, sizeof(ctxt)); switch (vsi->type) { case I40E_VSI_MAIN: /* The PF's main VSI is already setup as part of the * device initialization, so we'll not bother with * the add_vsi call, but we will retrieve the current * VSI context. */ ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; ctxt.vf_num = 0; ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); ctxt.flags = I40E_AQ_VSI_TYPE_PF; if (ret) { dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -ENOENT; } vsi->info = ctxt.info; vsi->info.valid_sections = 0; vsi->seid = ctxt.seid; vsi->id = ctxt.vsi_number; enabled_tc = i40e_pf_get_tc_map(pf); /* Source pruning is enabled by default, so the flag is * negative logic - if it's set, we need to fiddle with * the VSI to disable source pruning. */ if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) { memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; ctxt.vf_num = 0; ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB); ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "update vsi failed, err %d aq_err %s\n", ret, i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); ret = -ENOENT; goto err; } } /* MFP mode setup queue map and update VSI */ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi)) { /* NIC type PF */ memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; ctxt.vf_num = 0; i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { dev_info(&pf->pdev->dev, "update vsi failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); ret = -ENOENT; goto err; } /* update the local VSI info queue map */ i40e_vsi_update_queue_map(vsi, &ctxt); vsi->info.valid_sections = 0; } else { /* Default/Main VSI is only enabled for TC0 * reconfigure it to enable all TCs that are * available on the port in SFP mode. * For MFP case the iSCSI PF would use this * flow to enable LAN+iSCSI TC. */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { /* Single TC condition is not fatal, * message and continue */ dev_info(&pf->pdev->dev, "failed to configure TCs for main VSI tc_map 0x%08x, err %pe aq_err %s\n", enabled_tc, ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } } break; case I40E_VSI_FDIR: ctxt.pf_num = hw->pf_id; ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_PF; if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags) && (i40e_is_vsi_uplink_mode_veb(vsi))) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break; case I40E_VSI_VMDQ2: ctxt.pf_num = hw->pf_id; ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; /* This VSI is connected to VEB so the switch_id * should be set to zero by default. */ if (i40e_is_vsi_uplink_mode_veb(vsi)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } /* Setup the VSI tx/rx queue map for TC0 only for now */ i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break; case I40E_VSI_SRIOV: ctxt.pf_num = hw->pf_id; ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id; ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_VF; /* This VSI is connected to VEB so the switch_id * should be set to zero by default. */ if (i40e_is_vsi_uplink_mode_veb(vsi)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= (I40E_AQ_VSI_QUE_OPT_TCP_ENA | I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI); } ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL; if (pf->vf[vsi->vf_id].spoofchk) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID); ctxt.info.sec_flags |= (I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK | I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK); } /* Setup the VSI tx/rx queue map for TC0 only for now */ i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break; case I40E_VSI_IWARP: /* send down message to iWARP */ break; default: return -ENODEV; } if (vsi->type != I40E_VSI_MAIN) { ret = i40e_aq_add_vsi(hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, "add vsi failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); ret = -ENOENT; goto err; } vsi->info = ctxt.info; vsi->info.valid_sections = 0; vsi->seid = ctxt.seid; vsi->id = ctxt.vsi_number; } spin_lock_bh(&vsi->mac_filter_hash_lock); vsi->active_filters = 0; /* If macvlan filters already exist, force them to get loaded */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { f->state = I40E_FILTER_NEW; f_count++; } spin_unlock_bh(&vsi->mac_filter_hash_lock); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); if (f_count) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); } /* Update VSI BW information */ ret = i40e_vsi_get_bw_info(vsi); if (ret) { dev_info(&pf->pdev->dev, "couldn't get vsi bw info, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* VSI is already added so not tearing that up */ ret = 0; } err: return ret; } /** * i40e_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed * * Returns 0 on success or < 0 on error **/ int i40e_vsi_release(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; struct hlist_node *h; struct i40e_veb *veb; struct i40e_pf *pf; u16 uplink_seid; int i, n, bkt; pf = vsi->back; /* release of a VEB-owner or last VSI is not allowed */ if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) { dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n", vsi->seid, vsi->uplink_seid); return -ENODEV; } if (vsi->type == I40E_VSI_MAIN && !test_bit(__I40E_DOWN, pf->state)) { dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { vsi->netdev_registered = false; if (vsi->netdev) { /* results in a call to i40e_close() */ unregister_netdev(vsi->netdev); } } else { i40e_vsi_close(vsi); } i40e_vsi_disable_irq(vsi); } if (vsi->type == I40E_VSI_MAIN) i40e_devlink_destroy_port(pf); spin_lock_bh(&vsi->mac_filter_hash_lock); /* clear the sync flag on all filters */ if (vsi->netdev) { __dev_uc_unsync(vsi->netdev, NULL); __dev_mc_unsync(vsi->netdev, NULL); } /* make sure any remaining filters are marked for deletion */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) __i40e_del_filter(vsi, f); spin_unlock_bh(&vsi->mac_filter_hash_lock); i40e_sync_vsi_filters(vsi); i40e_vsi_delete(vsi); i40e_vsi_free_q_vectors(vsi); if (vsi->netdev) { free_netdev(vsi->netdev); vsi->netdev = NULL; } i40e_vsi_clear_rings(vsi); i40e_vsi_clear(vsi); /* If this was the last thing on the VEB, except for the * controlling VSI, remove the VEB, which puts the controlling * VSI onto the uplink port. * * Well, okay, there's one more exception here: don't remove * the floating VEBs yet. We'll wait for an explicit remove request * from up the network stack. */ veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); if (veb && veb->uplink_seid) { n = 0; /* Count non-controlling VSIs present on the VEB */ i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == uplink_seid && (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) n++; /* If there is no VSI except the control one then release * the VEB and put the control VSI onto VEB uplink. */ if (!n) i40e_veb_release(veb); } return 0; } /** * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI * @vsi: ptr to the VSI * * This should only be called after i40e_vsi_mem_alloc() which allocates the * corresponding SW VSI structure and initializes num_queue_pairs for the * newly allocated VSI. * * Returns 0 on success or negative on failure **/ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi) { int ret = -ENOENT; struct i40e_pf *pf = vsi->back; if (vsi->q_vectors[0]) { dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n", vsi->seid); return -EEXIST; } if (vsi->base_vector) { dev_info(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", vsi->seid, vsi->base_vector); return -EEXIST; } ret = i40e_vsi_alloc_q_vectors(vsi); if (ret) { dev_info(&pf->pdev->dev, "failed to allocate %d q_vector for VSI %d, ret=%d\n", vsi->num_q_vectors, vsi->seid, ret); vsi->num_q_vectors = 0; goto vector_setup_out; } /* In Legacy mode, we do not have to get any other vector since we * piggyback on the misc/ICR0 for queue interrupts. */ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; if (vsi->num_q_vectors) vsi->base_vector = i40e_get_lump(pf, pf->irq_pile, vsi->num_q_vectors, vsi->idx); if (vsi->base_vector < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d vectors for VSI %d, err=%d\n", vsi->num_q_vectors, vsi->seid, vsi->base_vector); i40e_vsi_free_q_vectors(vsi); ret = -ENOENT; goto vector_setup_out; } vector_setup_out: return ret; } /** * i40e_vsi_reinit_setup - return and reallocate resources for a VSI * @vsi: pointer to the vsi. * * This re-allocates a vsi's queue resources. * * Returns pointer to the successfully allocated and configured VSI sw struct * on success, otherwise returns NULL on failure. **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { struct i40e_vsi *main_vsi; u16 alloc_queue_pairs; struct i40e_pf *pf; int ret; if (!vsi) return NULL; pf = vsi->back; i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_vsi_clear_rings(vsi); i40e_vsi_free_arrays(vsi, false); i40e_set_num_rings_in_vsi(vsi); ret = i40e_vsi_alloc_arrays(vsi, false); if (ret) goto err_vsi; alloc_queue_pairs = vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err %d\n", alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; /* Update the FW view of the VSI. Force a reset of TC and queue * layout configurations. */ main_vsi = i40e_pf_get_main_vsi(pf); main_vsi->seid = pf->main_vsi_seid; i40e_vsi_reconfig_tc(main_vsi); if (vsi->type == I40E_VSI_MAIN) i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr); /* assign it some queues */ ret = i40e_alloc_rings(vsi); if (ret) goto err_rings; /* map all of the rings to the q_vectors */ i40e_vsi_map_rings_to_vectors(vsi); return vsi; err_rings: i40e_vsi_free_q_vectors(vsi); if (vsi->netdev_registered) { vsi->netdev_registered = false; unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } if (vsi->type == I40E_VSI_MAIN) i40e_devlink_destroy_port(pf); i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: i40e_vsi_clear(vsi); return NULL; } /** * i40e_vsi_setup - Set up a VSI by a given type * @pf: board private structure * @type: VSI type * @uplink_seid: the switch element to link to * @param1: usage depends upon VSI type. For VF types, indicates VF id * * This allocates the sw VSI structure and its queue resources, then add a VSI * to the identified VEB. * * Returns pointer to the successfully allocated and configure VSI sw struct on * success, otherwise returns NULL on failure. **/ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink_seid, u32 param1) { struct i40e_vsi *vsi = NULL; struct i40e_veb *veb = NULL; u16 alloc_queue_pairs; int v_idx; int ret; /* The requested uplink_seid must be either * - the PF's port seid * no VEB is needed because this is the PF * or this is a Flow Director special case VSI * - seid of an existing VEB * - seid of a VSI that owns an existing VEB * - seid of a VSI that doesn't own a VEB * a new VEB is created and the VSI becomes the owner * - seid of the PF VSI, which is what creates the first VEB * this is a special case of the previous * * Find which uplink_seid we were given and create a new VEB if needed */ veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); if (!veb && uplink_seid != pf->mac_seid) { vsi = i40e_pf_get_vsi_by_seid(pf, uplink_seid); if (!vsi) { dev_info(&pf->pdev->dev, "no such uplink_seid %d\n", uplink_seid); return NULL; } if (vsi->uplink_seid == pf->mac_seid) veb = i40e_veb_setup(pf, pf->mac_seid, vsi->seid, vsi->tc_config.enabled_tc); else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid, vsi->tc_config.enabled_tc); if (veb) { if (vsi->type != I40E_VSI_MAIN) { dev_info(&vsi->back->pdev->dev, "New VSI creation error, uplink seid of LAN VSI expected.\n"); return NULL; } /* We come up by default in VEPA mode if SRIOV is not * already enabled, in which case we can't force VEPA * mode. */ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { veb->bridge_mode = BRIDGE_MODE_VEPA; clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } i40e_config_bridge_mode(veb); } veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); if (!veb) { dev_info(&pf->pdev->dev, "couldn't add VEB\n"); return NULL; } vsi->flags |= I40E_VSI_FLAG_VEB_OWNER; uplink_seid = veb->seid; } /* get vsi sw struct */ v_idx = i40e_vsi_mem_alloc(pf, type); if (v_idx < 0) goto err_alloc; vsi = pf->vsi[v_idx]; if (!vsi) goto err_alloc; vsi->type = type; vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB); if (type == I40E_VSI_MAIN) pf->lan_vsi = v_idx; else if (type == I40E_VSI_SRIOV) vsi->vf_id = param1; /* assign it some queues */ alloc_queue_pairs = vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) { dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err=%d\n", alloc_queue_pairs, vsi->seid, ret); goto err_vsi; } vsi->base_queue = ret; /* get a VSI from the hardware */ vsi->uplink_seid = uplink_seid; ret = i40e_add_vsi(vsi); if (ret) goto err_vsi; switch (vsi->type) { /* setup the netdev if needed */ case I40E_VSI_MAIN: case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); if (ret) goto err_netdev; ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; if (vsi->type == I40E_VSI_MAIN) { ret = i40e_devlink_create_port(pf); if (ret) goto err_netdev; SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); } ret = register_netdev(vsi->netdev); if (ret) goto err_dl_port; vsi->netdev_registered = true; netif_carrier_off(vsi->netdev); #ifdef CONFIG_I40E_DCB /* Setup DCB netlink interface */ i40e_dcbnl_setup(vsi); #endif /* CONFIG_I40E_DCB */ fallthrough; case I40E_VSI_FDIR: /* set up vectors and rings if needed */ ret = i40e_vsi_setup_vectors(vsi); if (ret) goto err_msix; ret = i40e_alloc_rings(vsi); if (ret) goto err_rings; /* map all of the rings to the q_vectors */ i40e_vsi_map_rings_to_vectors(vsi); i40e_vsi_reset_stats(vsi); break; default: /* no netdev or rings for the other VSI types */ break; } if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) && vsi->type == I40E_VSI_VMDQ2) { ret = i40e_vsi_config_rss(vsi); if (ret) goto err_config; } return vsi; err_config: i40e_vsi_clear_rings(vsi); err_rings: i40e_vsi_free_q_vectors(vsi); err_msix: if (vsi->netdev_registered) { vsi->netdev_registered = false; unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } err_dl_port: if (vsi->type == I40E_VSI_MAIN) i40e_devlink_destroy_port(pf); err_netdev: i40e_aq_delete_element(&pf->hw, vsi->seid, NULL); err_vsi: i40e_vsi_clear(vsi); err_alloc: return NULL; } /** * i40e_veb_get_bw_info - Query VEB BW information * @veb: the veb to query * * Query the Tx scheduler BW configuration data for given VEB **/ static int i40e_veb_get_bw_info(struct i40e_veb *veb) { struct i40e_aqc_query_switching_comp_ets_config_resp ets_data; struct i40e_aqc_query_switching_comp_bw_config_resp bw_data; struct i40e_pf *pf = veb->pf; struct i40e_hw *hw = &pf->hw; u32 tc_bw_max; int ret = 0; int i; ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid, &bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, "query veb bw config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, hw->aq.asq_last_status)); goto out; } ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid, &ets_data, NULL); if (ret) { dev_info(&pf->pdev->dev, "query veb bw ets config failed, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, hw->aq.asq_last_status)); goto out; } veb->bw_limit = le16_to_cpu(ets_data.port_bw_limit); veb->bw_max_quanta = ets_data.tc_bw_max; veb->is_abs_credits = bw_data.absolute_credits_enable; veb->enabled_tc = ets_data.tc_valid_bits; tc_bw_max = le16_to_cpu(bw_data.tc_bw_max[0]) | (le16_to_cpu(bw_data.tc_bw_max[1]) << 16); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { veb->bw_tc_share_credits[i] = bw_data.tc_bw_share_credits[i]; veb->bw_tc_limit_credits[i] = le16_to_cpu(bw_data.tc_bw_limits[i]); veb->bw_tc_max_quanta[i] = ((tc_bw_max >> (i*4)) & 0x7); } out: return ret; } /** * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF * @pf: board private structure * * On error: returns error code (negative) * On success: returns vsi index in PF (positive) **/ static int i40e_veb_mem_alloc(struct i40e_pf *pf) { int ret = -ENOENT; struct i40e_veb *veb; int i; /* Need to protect the allocation of switch elements at the PF level */ mutex_lock(&pf->switch_mutex); /* VEB list may be fragmented if VEB creation/destruction has * been happening. We can afford to do a quick scan to look * for any free slots in the list. * * find next empty veb slot, looping back around if necessary */ i = 0; while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL)) i++; if (i >= I40E_MAX_VEB) { ret = -ENOMEM; goto err_alloc_veb; /* out of VEB slots! */ } veb = kzalloc(sizeof(*veb), GFP_KERNEL); if (!veb) { ret = -ENOMEM; goto err_alloc_veb; } veb->pf = pf; veb->idx = i; veb->enabled_tc = 1; pf->veb[i] = veb; ret = i; err_alloc_veb: mutex_unlock(&pf->switch_mutex); return ret; } /** * i40e_switch_branch_release - Delete a branch of the switch tree * @branch: where to start deleting * * This uses recursion to find the tips of the branch to be * removed, deleting until we get back to and can delete this VEB. **/ static void i40e_switch_branch_release(struct i40e_veb *branch) { struct i40e_pf *pf = branch->pf; u16 branch_seid = branch->seid; u16 veb_idx = branch->idx; struct i40e_vsi *vsi; struct i40e_veb *veb; int i; /* release any VEBs on this VEB - RECURSION */ i40e_pf_for_each_veb(pf, i, veb) if (veb->uplink_seid == branch->seid) i40e_switch_branch_release(veb); /* Release the VSIs on this VEB, but not the owner VSI. * * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing * the VEB itself, so don't use (*branch) after this loop. */ i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == branch_seid && (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) i40e_vsi_release(vsi); /* There's one corner case where the VEB might not have been * removed, so double check it here and remove it if needed. * This case happens if the veb was created from the debugfs * commands and no VSIs were added to it. */ if (pf->veb[veb_idx]) i40e_veb_release(pf->veb[veb_idx]); } /** * i40e_veb_clear - remove veb struct * @veb: the veb to remove **/ static void i40e_veb_clear(struct i40e_veb *veb) { if (!veb) return; if (veb->pf) { struct i40e_pf *pf = veb->pf; mutex_lock(&pf->switch_mutex); if (pf->veb[veb->idx] == veb) pf->veb[veb->idx] = NULL; mutex_unlock(&pf->switch_mutex); } kfree(veb); } /** * i40e_veb_release - Delete a VEB and free its resources * @veb: the VEB being removed **/ void i40e_veb_release(struct i40e_veb *veb) { struct i40e_vsi *vsi, *vsi_it; struct i40e_pf *pf; int i, n = 0; pf = veb->pf; /* find the remaining VSI and check for extras */ i40e_pf_for_each_vsi(pf, i, vsi_it) if (vsi_it->uplink_seid == veb->seid) { if (vsi_it->flags & I40E_VSI_FLAG_VEB_OWNER) vsi = vsi_it; n++; } /* Floating VEB has to be empty and regular one must have * single owner VSI. */ if ((veb->uplink_seid && n != 1) || (!veb->uplink_seid && n != 0)) { dev_info(&pf->pdev->dev, "can't remove VEB %d with %d VSIs left\n", veb->seid, n); return; } /* For regular VEB move the owner VSI to uplink port */ if (veb->uplink_seid) { vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER; vsi->uplink_seid = veb->uplink_seid; vsi->veb_idx = I40E_NO_VEB; } i40e_aq_delete_element(&pf->hw, veb->seid, NULL); i40e_veb_clear(veb); } /** * i40e_add_veb - create the VEB in the switch * @veb: the VEB to be instantiated * @vsi: the controlling VSI **/ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); int ret; ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi ? vsi->seid : 0, veb->enabled_tc, vsi ? false : true, &veb->seid, enable_stats, NULL); /* get a VEB from the hardware */ if (ret) { dev_info(&pf->pdev->dev, "couldn't add VEB, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -EPERM; } /* get statistics counter */ ret = i40e_aq_get_veb_parameters(&pf->hw, veb->seid, NULL, NULL, &veb->stats_idx, NULL, NULL, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't get VEB statistics idx, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return -EPERM; } ret = i40e_veb_get_bw_info(veb); if (ret) { dev_info(&pf->pdev->dev, "couldn't get VEB bw info, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); i40e_aq_delete_element(&pf->hw, veb->seid, NULL); return -ENOENT; } if (vsi) { vsi->uplink_seid = veb->seid; vsi->veb_idx = veb->idx; vsi->flags |= I40E_VSI_FLAG_VEB_OWNER; } return 0; } /** * i40e_veb_setup - Set up a VEB * @pf: board private structure * @uplink_seid: the switch element to link to * @vsi_seid: the initial VSI seid * @enabled_tc: Enabled TC bit-map * * This allocates the sw VEB structure and links it into the switch * It is possible and legal for this to be a duplicate of an already * existing VEB. It is also possible for both uplink and vsi seids * to be zero, in order to create a floating VEB. * * Returns pointer to the successfully allocated VEB sw struct on * success, otherwise returns NULL on failure. **/ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 uplink_seid, u16 vsi_seid, u8 enabled_tc) { struct i40e_vsi *vsi = NULL; struct i40e_veb *veb; int veb_idx; int ret; /* if one seid is 0, the other must be 0 to create a floating relay */ if ((uplink_seid == 0 || vsi_seid == 0) && (uplink_seid + vsi_seid != 0)) { dev_info(&pf->pdev->dev, "one, not both seid's are 0: uplink=%d vsi=%d\n", uplink_seid, vsi_seid); return NULL; } /* make sure there is such a vsi and uplink */ if (vsi_seid) { vsi = i40e_pf_get_vsi_by_seid(pf, vsi_seid); if (!vsi) { dev_err(&pf->pdev->dev, "vsi seid %d not found\n", vsi_seid); return NULL; } } /* get veb sw struct */ veb_idx = i40e_veb_mem_alloc(pf); if (veb_idx < 0) goto err_alloc; veb = pf->veb[veb_idx]; veb->uplink_seid = uplink_seid; veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1); /* create the VEB in the switch */ ret = i40e_add_veb(veb, vsi); if (ret) goto err_veb; if (vsi && vsi->idx == pf->lan_vsi) pf->lan_veb = veb->idx; return veb; err_veb: i40e_veb_clear(veb); err_alloc: return NULL; } /** * i40e_setup_pf_switch_element - set PF vars based on switch type * @pf: board private structure * @ele: element we are building info from * @num_reported: total number of elements * @printconfig: should we print the contents * * helper function to assist in extracting a few useful SEID values. **/ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, struct i40e_aqc_switch_config_element_resp *ele, u16 num_reported, bool printconfig) { u16 downlink_seid = le16_to_cpu(ele->downlink_seid); u16 uplink_seid = le16_to_cpu(ele->uplink_seid); u8 element_type = ele->element_type; u16 seid = le16_to_cpu(ele->seid); struct i40e_veb *veb; if (printconfig) dev_info(&pf->pdev->dev, "type=%d seid=%d uplink=%d downlink=%d\n", element_type, seid, uplink_seid, downlink_seid); switch (element_type) { case I40E_SWITCH_ELEMENT_TYPE_MAC: pf->mac_seid = seid; break; case I40E_SWITCH_ELEMENT_TYPE_VEB: /* Main VEB? */ if (uplink_seid != pf->mac_seid) break; veb = i40e_pf_get_main_veb(pf); if (!veb) { int v; /* find existing or else empty VEB */ veb = i40e_pf_get_veb_by_seid(pf, seid); if (veb) { pf->lan_veb = veb->idx; } else { v = i40e_veb_mem_alloc(pf); if (v < 0) break; pf->lan_veb = v; } } /* Try to get again main VEB as pf->lan_veb may have changed */ veb = i40e_pf_get_main_veb(pf); if (!veb) break; veb->seid = seid; veb->uplink_seid = pf->mac_seid; veb->pf = pf; break; case I40E_SWITCH_ELEMENT_TYPE_VSI: if (num_reported != 1) break; /* This is immediately after a reset so we can assume this is * the PF's VSI */ pf->mac_seid = uplink_seid; pf->main_vsi_seid = seid; if (printconfig) dev_info(&pf->pdev->dev, "pf_seid=%d main_vsi_seid=%d\n", downlink_seid, pf->main_vsi_seid); break; case I40E_SWITCH_ELEMENT_TYPE_PF: case I40E_SWITCH_ELEMENT_TYPE_VF: case I40E_SWITCH_ELEMENT_TYPE_EMP: case I40E_SWITCH_ELEMENT_TYPE_BMC: case I40E_SWITCH_ELEMENT_TYPE_PE: case I40E_SWITCH_ELEMENT_TYPE_PA: /* ignore these for now */ break; default: dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n", element_type, seid); break; } } /** * i40e_fetch_switch_configuration - Get switch config from firmware * @pf: board private structure * @printconfig: should we print the contents * * Get the current switch configuration from the device and * extract a few useful SEID values. **/ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig) { struct i40e_aqc_get_switch_config_resp *sw_config; u16 next_seid = 0; int ret = 0; u8 *aq_buf; int i; aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL); if (!aq_buf) return -ENOMEM; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; do { u16 num_reported, num_total; ret = i40e_aq_get_switch_config(&pf->hw, sw_config, I40E_AQ_LARGE_BUF, &next_seid, NULL); if (ret) { dev_info(&pf->pdev->dev, "get switch config failed err %d aq_err %s\n", ret, i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); kfree(aq_buf); return -ENOENT; } num_reported = le16_to_cpu(sw_config->header.num_reported); num_total = le16_to_cpu(sw_config->header.num_total); if (printconfig) dev_info(&pf->pdev->dev, "header: %d reported %d total\n", num_reported, num_total); for (i = 0; i < num_reported; i++) { struct i40e_aqc_switch_config_element_resp *ele = &sw_config->element[i]; i40e_setup_pf_switch_element(pf, ele, num_reported, printconfig); } } while (next_seid != 0); kfree(aq_buf); return ret; } /** * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure **/ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired) { struct i40e_vsi *main_vsi; u16 flags = 0; int ret; /* find out what's out there already */ ret = i40e_fetch_switch_configuration(pf, false); if (ret) { dev_info(&pf->pdev->dev, "couldn't fetch switch config, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); return ret; } i40e_pf_reset_stats(pf); /* set the switch config bit for the whole device to * support limited promisc or true promisc * when user requests promisc. The default is limited * promisc. */ if ((pf->hw.pf_id == 0) && !test_bit(I40E_FLAG_TRUE_PROMISC_ENA, pf->flags)) { flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; pf->last_sw_conf_flags = flags; } if (pf->hw.pf_id == 0) { u16 valid_flags; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0, NULL); if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, "couldn't set switch config bits, err %pe aq_err %s\n", ERR_PTR(ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* not a fatal problem, just keep going */ } pf->last_sw_conf_valid_flags = valid_flags; } /* first time setup */ main_vsi = i40e_pf_get_main_vsi(pf); if (!main_vsi || reinit) { struct i40e_veb *veb; u16 uplink_seid; /* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch */ veb = i40e_pf_get_main_veb(pf); if (veb) uplink_seid = veb->seid; else uplink_seid = pf->mac_seid; if (!main_vsi) main_vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0); else if (reinit) main_vsi = i40e_vsi_reinit_setup(main_vsi); if (!main_vsi) { dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n"); i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); return -EAGAIN; } } else { /* force a reset of TC and queue layout configurations */ main_vsi->seid = pf->main_vsi_seid; i40e_vsi_reconfig_tc(main_vsi); } i40e_vlan_stripping_disable(main_vsi); i40e_fdir_sb_setup(pf); /* Setup static PF queue filter control settings */ ret = i40e_setup_pf_filter_control(pf); if (ret) { dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n", ret); /* Failure here should not stop continuing other steps */ } /* enable RSS in the HW, even for only one queue, as the stack can use * the hash */ if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ i40e_link_event(pf); i40e_ptp_init(pf); if (!lock_acquired) rtnl_lock(); /* repopulate tunnel port filters */ udp_tunnel_nic_reset_ntf(main_vsi->netdev); if (!lock_acquired) rtnl_unlock(); return ret; } /** * i40e_determine_queue_usage - Work out queue distribution * @pf: board private structure **/ static void i40e_determine_queue_usage(struct i40e_pf *pf) { int queues_left; int q_max; pf->num_lan_qps = 0; /* Find the max queues to be put into basic use. We'll always be * using TC0, whether or not DCB is running, and TC0 will get the * big RSS set. */ queues_left = pf->hw.func_caps.num_tx_qp; if ((queues_left == 1) || !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { /* one qp for PF, no queues for anything else */ queues_left = 0; pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ clear_bit(I40E_FLAG_RSS_ENA, pf->flags); clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } else if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) && !test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) { /* one qp for PF */ pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; clear_bit(I40E_FLAG_RSS_ENA, pf->flags); clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } else { /* Not enough queues for all TCs */ if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) && queues_left < I40E_MAX_TRAFFIC_CLASS) { clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } /* limit lan qps to the smaller of qps, cpus or msix */ q_max = max_t(int, pf->rss_size_max, num_online_cpus()); q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp); q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors); pf->num_lan_qps = q_max; queues_left -= pf->num_lan_qps; } if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (queues_left > 1) { queues_left -= 1; /* save 1 queue for FD */ } else { clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n"); } } if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && pf->num_vf_qps && pf->num_req_vfs && queues_left) { pf->num_req_vfs = min_t(int, pf->num_req_vfs, (queues_left / pf->num_vf_qps)); queues_left -= (pf->num_req_vfs * pf->num_vf_qps); } if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) { pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis, (queues_left / pf->num_vmdq_qps)); queues_left -= (pf->num_vmdq_vsis * pf->num_vmdq_qps); } pf->queues_left = queues_left; dev_dbg(&pf->pdev->dev, "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, !!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags), pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); } /** * i40e_setup_pf_filter_control - Setup PF static filter control * @pf: PF to be setup * * i40e_setup_pf_filter_control sets up a PF's initial filter control * settings. If PE/FCoE are enabled then it will also set the per PF * based filter sizes required for them. It also enables Flow director, * ethertype and macvlan type filter settings for the pf. * * Returns 0 on success, negative on failure **/ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) { struct i40e_filter_control_settings *settings = &pf->filter_settings; settings->hash_lut_size = I40E_HASH_LUT_SIZE_128; /* Flow Director is enabled */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) || test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) settings->enable_fdir = true; /* Ethtype and MACVLAN filters enabled for PF */ settings->enable_ethtype = true; settings->enable_macvlan = true; if (i40e_set_filter_control(&pf->hw, settings)) return -ENOENT; return 0; } #define INFO_STRING_LEN 255 #define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; char *buf; int i; buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL); if (!buf) return; i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", pf->hw.func_caps.num_vsis, main_vsi->num_queue_pairs); if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " RSS"); if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " FD_ATR"); if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { i += scnprintf(&buf[i], REMAIN(i), " FD_SB"); i += scnprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " DCB"); i += scnprintf(&buf[i], REMAIN(i), " VxLAN"); i += scnprintf(&buf[i], REMAIN(i), " Geneve"); if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " PTP"); if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " VEB"); else i += scnprintf(&buf[i], REMAIN(i), " VEPA"); dev_info(&pf->pdev->dev, "%s\n", buf); kfree(buf); WARN_ON(i > INFO_STRING_LEN); } /** * i40e_get_platform_mac_addr - get platform-specific MAC address * @pdev: PCI device information struct * @pf: board private structure * * Look up the MAC address for the device. First we'll try * eth_platform_get_mac_address, which will check Open Firmware, or arch * specific fallback. Otherwise, we'll default to the stored value in * firmware. **/ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) { if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr); } /** * i40e_set_fec_in_flags - helper function for setting FEC options in flags * @fec_cfg: FEC option to set in flags * @flags: ptr to flags in which we set FEC option **/ void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags) { if (fec_cfg & I40E_AQ_SET_FEC_AUTO) { set_bit(I40E_FLAG_RS_FEC, flags); set_bit(I40E_FLAG_BASE_R_FEC, flags); } if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) || (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) { set_bit(I40E_FLAG_RS_FEC, flags); clear_bit(I40E_FLAG_BASE_R_FEC, flags); } if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) || (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) { set_bit(I40E_FLAG_BASE_R_FEC, flags); clear_bit(I40E_FLAG_RS_FEC, flags); } if (fec_cfg == 0) { clear_bit(I40E_FLAG_RS_FEC, flags); clear_bit(I40E_FLAG_BASE_R_FEC, flags); } } /** * i40e_check_recovery_mode - check if we are running transition firmware * @pf: board private structure * * Check registers indicating the firmware runs in recovery mode. Sets the * appropriate driver state. * * Returns true if the recovery mode was detected, false otherwise **/ static bool i40e_check_recovery_mode(struct i40e_pf *pf) { u32 val = rd32(&pf->hw, I40E_GL_FWSTS); if (val & I40E_GL_FWSTS_FWS1B_MASK) { dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); set_bit(__I40E_RECOVERY_MODE, pf->state); return true; } if (test_bit(__I40E_RECOVERY_MODE, pf->state)) dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n"); return false; } /** * i40e_pf_loop_reset - perform reset in a loop. * @pf: board private structure * * This function is useful when a NIC is about to enter recovery mode. * When a NIC's internal data structures are corrupted the NIC's * firmware is going to enter recovery mode. * Right after a POR it takes about 7 minutes for firmware to enter * recovery mode. Until that time a NIC is in some kind of intermediate * state. After that time period the NIC almost surely enters * recovery mode. The only way for a driver to detect intermediate * state is to issue a series of pf-resets and check a return value. * If a PF reset returns success then the firmware could be in recovery * mode so the caller of this code needs to check for recovery mode * if this function returns success. There is a little chance that * firmware will hang in intermediate state forever. * Since waiting 7 minutes is quite a lot of time this function waits * 10 seconds and then gives up by returning an error. * * Return 0 on success, negative on failure. **/ static int i40e_pf_loop_reset(struct i40e_pf *pf) { /* wait max 10 seconds for PF reset to succeed */ const unsigned long time_end = jiffies + 10 * HZ; struct i40e_hw *hw = &pf->hw; int ret; ret = i40e_pf_reset(hw); while (ret != 0 && time_before(jiffies, time_end)) { usleep_range(10000, 20000); ret = i40e_pf_reset(hw); } if (ret == 0) pf->pfr_count++; else dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret); return ret; } /** * i40e_check_fw_empr - check if FW issued unexpected EMP Reset * @pf: board private structure * * Check FW registers to determine if FW issued unexpected EMP Reset. * Every time when unexpected EMP Reset occurs the FW increments * a counter of unexpected EMP Resets. When the counter reaches 10 * the FW should enter the Recovery mode * * Returns true if FW issued unexpected EMP Reset **/ static bool i40e_check_fw_empr(struct i40e_pf *pf) { const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) & I40E_GL_FWSTS_FWS1B_MASK; return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) && (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10); } /** * i40e_handle_resets - handle EMP resets and PF resets * @pf: board private structure * * Handle both EMP resets and PF resets and conclude whether there are * any issues regarding these resets. If there are any issues then * generate log entry. * * Return 0 if NIC is healthy or negative value when there are issues * with resets **/ static int i40e_handle_resets(struct i40e_pf *pf) { const int pfr = i40e_pf_loop_reset(pf); const bool is_empr = i40e_check_fw_empr(pf); if (is_empr || pfr != 0) dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n"); return is_empr ? -EIO : pfr; } /** * i40e_init_recovery_mode - initialize subsystems needed in recovery mode * @pf: board private structure * @hw: ptr to the hardware info * * This function does a minimal setup of all subsystems needed for running * recovery mode. * * Returns 0 on success, negative on failure **/ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) { struct i40e_vsi *vsi; int err; int v_idx; pci_set_drvdata(pf->pdev, pf); pci_save_state(pf->pdev); /* set up periodic task facility */ timer_setup(&pf->service_timer, i40e_service_timer, 0); pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); clear_bit(__I40E_SERVICE_SCHED, pf->state); err = i40e_init_interrupt_scheme(pf); if (err) goto err_switch_setup; /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with * the understanding that we might not get them all later. */ if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC) pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; /* Set up the vsi struct and our local tracking of the MAIN PF vsi. */ pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_switch_setup; } /* We allocate one VSI which is needed as absolute minimum * in order to register the netdev */ v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); if (v_idx < 0) { err = v_idx; goto err_switch_setup; } pf->lan_vsi = v_idx; vsi = pf->vsi[v_idx]; if (!vsi) { err = -EFAULT; goto err_switch_setup; } vsi->alloc_queue_pairs = 1; err = i40e_config_netdev(vsi); if (err) goto err_switch_setup; err = register_netdev(vsi->netdev); if (err) goto err_switch_setup; vsi->netdev_registered = true; i40e_dbg_pf_init(pf); err = i40e_setup_misc_vector_for_recovery_mode(pf); if (err) goto err_switch_setup; /* tell the firmware that we're starting */ i40e_send_version(pf); /* since everything's happy, start the service_task timer */ mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); return 0; err_switch_setup: i40e_reset_interrupt_capability(pf); timer_shutdown_sync(&pf->service_timer); i40e_shutdown_adminq(hw); iounmap(hw->hw_addr); pci_release_mem_regions(pf->pdev); pci_disable_device(pf->pdev); i40e_free_pf(pf); return err; } /** * i40e_set_subsystem_device_id - set subsystem device id * @hw: pointer to the hardware info * * Set PCI subsystem device id either from a pci_dev structure or * a specific FW register. **/ static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw) { struct i40e_pf *pf = i40e_hw_to_pf(hw); hw->subsystem_device_id = pf->pdev->subsystem_device ? pf->pdev->subsystem_device : (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX); } /** * i40e_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in i40e_pci_tbl * * i40e_probe initializes a PF identified by a pci_dev structure. * The OS initialization, configuring of the PF private structure, * and a hardware reset occur. * * Returns 0 on success, negative on failure **/ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct i40e_aq_get_phy_abilities_resp abilities; #ifdef CONFIG_I40E_DCB enum i40e_get_fw_lldp_status_resp lldp_status; #endif /* CONFIG_I40E_DCB */ struct i40e_vsi *vsi; struct i40e_pf *pf; struct i40e_hw *hw; u16 wol_nvm_bits; char nvm_ver[32]; u16 link_status; #ifdef CONFIG_I40E_DCB int status; #endif /* CONFIG_I40E_DCB */ int err; u32 val; err = pci_enable_device_mem(pdev); if (err) return err; /* set up for high or low dma */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); goto err_dma; } /* set up pci connections */ err = pci_request_mem_regions(pdev, i40e_driver_name); if (err) { dev_info(&pdev->dev, "pci_request_selected_regions failed %d\n", err); goto err_pci_reg; } pci_set_master(pdev); /* Now that we have a PCI connection, we need to do the * low level device setup. This is primarily setting up * the Admin Queue structures and then querying for the * device's current profile information. */ pf = i40e_alloc_pf(&pdev->dev); if (!pf) { err = -ENOMEM; goto err_pf_alloc; } pf->next_vsi = 0; pf->pdev = pdev; set_bit(__I40E_DOWN, pf->state); hw = &pf->hw; pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0), I40E_MAX_CSR_SPACE); /* We believe that the highest register to read is * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size * is not less than that before mapping to prevent a * kernel panic. */ if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) { dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n", pf->ioremap_len); err = -ENOMEM; goto err_ioremap; } hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len); if (!hw->hw_addr) { err = -EIO; dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n", (unsigned int)pci_resource_start(pdev, 0), pf->ioremap_len, err); goto err_ioremap; } hw->vendor_id = pdev->vendor; hw->device_id = pdev->device; pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); hw->subsystem_vendor_id = pdev->subsystem_vendor; i40e_set_subsystem_device_id(hw); hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); hw->bus.bus_id = pdev->bus->number; /* Select something other than the 802.1ad ethertype for the * switch to use internally and drop on ingress. */ hw->switch_tag = 0xffff; hw->first_tag = ETH_P_8021AD; hw->second_tag = ETH_P_8021Q; INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); INIT_LIST_HEAD(&pf->ddp_old_prof); /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove */ mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); pf->msg_enable = netif_msg_init(debug, NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK); if (debug < -1) pf->hw.debug_mask = debug; /* do a special CORER for clearing PXE mode once at init */ if (hw->revision_id == 0 && (rd32(hw, I40E_GLLAN_RCTL_0) & I40E_GLLAN_RCTL_0_PXE_MODE_MASK)) { wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK); i40e_flush(hw); msleep(200); pf->corer_count++; i40e_clear_pxe_mode(hw); } /* Reset here to make sure all is clean and to define PF 'n' */ i40e_clear_hw(hw); err = i40e_set_mac_type(hw); if (err) { dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n", err); goto err_pf_reset; } err = i40e_handle_resets(pf); if (err) goto err_pf_reset; i40e_check_recovery_mode(pf); if (is_kdump_kernel()) { hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN; hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN; } else { hw->aq.num_arq_entries = I40E_AQ_LEN; hw->aq.num_asq_entries = I40E_AQ_LEN; } hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE; snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", dev_driver_string(&pf->pdev->dev), dev_name(&pdev->dev)); err = i40e_init_shared_code(hw); if (err) { dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n", err); goto err_pf_reset; } /* set up a default setting for link flow control */ pf->hw.fc.requested_mode = I40E_FC_NONE; err = i40e_init_adminq(hw); if (err) { if (err == -EIO) dev_info(&pdev->dev, "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, I40E_FW_MINOR_VERSION(hw)); else dev_info(&pdev->dev, "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); goto err_pf_reset; } i40e_get_oem_version(hw); i40e_get_pba_string(hw); /* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */ i40e_nvm_version_str(hw, nvm_ver, sizeof(nvm_ver)); dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, nvm_ver, hw->vendor_id, hw->device_id, hw->subsystem_vendor_id, hw->subsystem_device_id); if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR, I40E_FW_MINOR_VERSION(hw) + 1)) dev_dbg(&pdev->dev, "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, I40E_FW_MINOR_VERSION(hw)); else if (i40e_is_aq_api_ver_lt(hw, 1, 4)) dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, I40E_FW_MINOR_VERSION(hw)); i40e_verify_eeprom(pf); /* Rev 0 hardware was never productized */ if (hw->revision_id < 1) dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); i40e_clear_pxe_mode(hw); err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup; err = i40e_sw_init(pf); if (err) { dev_info(&pdev->dev, "sw_init failed: %d\n", err); goto err_sw_init; } if (test_bit(__I40E_RECOVERY_MODE, pf->state)) return i40e_init_recovery_mode(pf, hw); err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (err) { dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err); goto err_init_lan_hmc; } err = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (err) { dev_info(&pdev->dev, "configure_lan_hmc failed: %d\n", err); err = -ENOENT; goto err_configure_lan_hmc; } /* Disable LLDP for NICs that have firmware versions lower than v4.3. * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); i40e_aq_stop_lldp(hw, true, false, NULL); } /* allow a platform config to override the HW addr */ i40e_get_platform_mac_addr(pdev, pf); if (!is_valid_ether_addr(hw->mac.addr)) { dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr); err = -EIO; goto err_mac_addr; } dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr); ether_addr_copy(hw->mac.perm_addr, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); if (is_valid_ether_addr(hw->mac.port_addr)) set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps); i40e_ptp_alloc_pins(pf); pci_set_drvdata(pdev, pf); pci_save_state(pdev); #ifdef CONFIG_I40E_DCB status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status); (!status && lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ? (clear_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) : (set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)); dev_info(&pdev->dev, test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); /* Enable FW to write default DCB config on link-up */ i40e_aq_set_dcb_parameters(hw, true, NULL); err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); clear_bit(I40E_FLAG_DCB_ENA, pf->flags); /* Continue without DCB enabled */ } #endif /* CONFIG_I40E_DCB */ /* set up periodic task facility */ timer_setup(&pf->service_timer, i40e_service_timer, 0); pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); clear_bit(__I40E_SERVICE_SCHED, pf->state); /* NVM bit on means WoL disabled for the port */ i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits); if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1) pf->wol_en = false; else pf->wol_en = true; device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en); /* set up the main switch operations */ i40e_determine_queue_usage(pf); err = i40e_init_interrupt_scheme(pf); if (err) goto err_switch_setup; /* Reduce Tx and Rx pairs for kdump * When MSI-X is enabled, it's not allowed to use more TC queue * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1. */ if (is_kdump_kernel()) pf->num_lan_msix = 1; pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | UDP_TUNNEL_TYPE_GENEVE; /* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with * the understanding that we might not get them all later. */ if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC) pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { dev_warn(&pf->pdev->dev, "limiting the VSI count due to UDP tunnel limitation %d > %d\n", pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; } /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_switch_setup; } #ifdef CONFIG_PCI_IOV /* prep for VF support */ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { if (pci_num_vf(pdev)) set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } #endif err = i40e_setup_pf_switch(pf, false, false); if (err) { dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } vsi = i40e_pf_get_main_vsi(pf); INIT_LIST_HEAD(&vsi->ch_list); /* if FDIR VSI was set up, start it now */ vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (vsi) i40e_vsi_open(vsi); /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. */ err = i40e_aq_set_phy_int_mask(&pf->hw, ~(I40E_AQ_EVENT_LINK_UPDOWN | I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO. */ val = rd32(hw, I40E_REG_MSS); if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { val &= ~I40E_REG_MSS_MIN_MASK; val |= I40E_64BYTE_MSS; wr32(hw, I40E_REG_MSS, val); } if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); } /* The main driver is (mostly) up and happy. We need to set this state * before setting up the misc vector or we get a race and the vector * ends up disabled forever. */ clear_bit(__I40E_DOWN, pf->state); /* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI * the misc functionality and queue processing is combined in * the same vector and that gets setup at open. */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { err = i40e_setup_misc_vector(pf); if (err) { dev_info(&pdev->dev, "setup of misc vector failed: %d\n", err); i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); goto err_vsis; } } #ifdef CONFIG_PCI_IOV /* prep for VF support */ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK; wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val); i40e_flush(hw); if (pci_num_vf(pdev)) { dev_info(&pdev->dev, "Active VFs found, allocating resources.\n"); err = i40e_alloc_vfs(pf, pci_num_vf(pdev)); if (err) dev_info(&pdev->dev, "Error %d allocating resources for existing VFs\n", err); } } #endif /* CONFIG_PCI_IOV */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile, pf->num_iwarp_msix, I40E_IWARP_IRQ_PILE_ID); if (pf->iwarp_base_vector < 0) { dev_info(&pdev->dev, "failed to get tracking for %d vectors for IWARP err=%d\n", pf->num_iwarp_msix, pf->iwarp_base_vector); clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } } i40e_dbg_pf_init(pf); /* tell the firmware that we're starting */ i40e_send_version(pf); /* since everything's happy, start the service_task timer */ mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); /* add this PF to client device list and launch a client service task */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { err = i40e_lan_add_device(pf); if (err) dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", err); } #define PCI_SPEED_SIZE 8 #define PCI_WIDTH_SIZE 8 /* Devices on the IOSF bus do not have this information * and will report PCI Gen 1 x 1 by default so don't bother * checking them. */ if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) { char speed[PCI_SPEED_SIZE] = "Unknown"; char width[PCI_WIDTH_SIZE] = "Unknown"; /* Get the negotiated link width and speed from PCI config * space */ pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA, &link_status); i40e_set_pci_config_data(hw, link_status); switch (hw->bus.speed) { case i40e_bus_speed_8000: strscpy(speed, "8.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_5000: strscpy(speed, "5.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_2500: strscpy(speed, "2.5", PCI_SPEED_SIZE); break; default: break; } switch (hw->bus.width) { case i40e_bus_width_pcie_x8: strscpy(width, "8", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x4: strscpy(width, "4", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x2: strscpy(width, "2", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x1: strscpy(width, "1", PCI_WIDTH_SIZE); break; default: break; } dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n", speed, width); if (hw->bus.width < i40e_bus_width_pcie_x8 || hw->bus.speed < i40e_bus_speed_8000) { dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n"); dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n"); } } /* get the requested speeds from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (err) dev_dbg(&pf->pdev->dev, "get requested speeds ret = %pe last_status = %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->hw.phy.link_info.requested_speeds = abilities.link_speed; /* set the FEC config due to the board capabilities */ i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags); /* get the supported phy types from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err) dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n", ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* make sure the MFS hasn't been set lower than the default */ #define MAX_FRAME_SIZE_DEFAULT 0x2600 val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, rd32(&pf->hw, I40E_PRTGL_SAH)); if (val < MAX_FRAME_SIZE_DEFAULT) dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out * PAUSE or PFC frames and potentially controlling traffic for other * PF/VF VSIs. * The FW can still send Flow control frames if enabled. */ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps); if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722) set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps); /* print a string summarizing features */ i40e_print_features(pf); i40e_devlink_register(pf); return 0; /* Unwind what we've done if something failed in the setup */ err_vsis: set_bit(__I40E_DOWN, pf->state); i40e_clear_interrupt_scheme(pf); kfree(pf->vsi); err_switch_setup: i40e_reset_interrupt_capability(pf); timer_shutdown_sync(&pf->service_timer); err_mac_addr: err_configure_lan_hmc: (void)i40e_shutdown_lan_hmc(hw); err_init_lan_hmc: kfree(pf->qp_pile); err_sw_init: err_adminq_setup: err_pf_reset: iounmap(hw->hw_addr); err_ioremap: i40e_free_pf(pf); err_pf_alloc: pci_release_mem_regions(pdev); err_pci_reg: err_dma: pci_disable_device(pdev); return err; } /** * i40e_remove - Device removal routine * @pdev: PCI device information struct * * i40e_remove is called by the PCI subsystem to alert the driver * that is should release a PCI device. This could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory. **/ static void i40e_remove(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; struct i40e_vsi *vsi; struct i40e_veb *veb; int ret_code; int i; i40e_devlink_unregister(pf); i40e_dbg_pf_exit(pf); i40e_ptp_stop(pf); /* Disable RSS in hw */ i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE * flags, once they are set, i40e_rebuild should not be called as * i40e_prep_for_reset always returns early. */ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) usleep_range(1000, 2000); set_bit(__I40E_IN_REMOVE, pf->state); if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) { set_bit(__I40E_VF_RESETS_DISABLED, pf->state); i40e_free_vfs(pf); clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); } /* no more scheduling of any task */ set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); if (pf->service_timer.function) timer_shutdown_sync(&pf->service_timer); if (pf->service_task.func) cancel_work_sync(&pf->service_task); if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { struct i40e_vsi *vsi = pf->vsi[0]; /* We know that we have allocated only one vsi for this PF, * it was just for registering netdevice, so the interface * could be visible in the 'ifconfig' output */ unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); goto unmap; } /* Client close must be called explicitly here because the timer * has been stopped. */ i40e_notify_client_of_netdev_close(pf, false); i40e_fdir_teardown(pf); /* If there is a switch structure or any orphans, remove them. * This will leave only the PF's VSI remaining. */ i40e_pf_for_each_veb(pf, i, veb) if (veb->uplink_seid == pf->mac_seid || veb->uplink_seid == 0) i40e_switch_branch_release(veb); /* Now we can shutdown the PF's VSIs, just before we kill * adminq and hmc. */ i40e_pf_for_each_vsi(pf, i, vsi) { i40e_vsi_close(vsi); i40e_vsi_release(vsi); pf->vsi[i] = NULL; } i40e_cloud_filter_exit(pf); /* remove attached clients */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { ret_code = i40e_lan_del_device(pf); if (ret_code) dev_warn(&pdev->dev, "Failed to delete client device: %d\n", ret_code); } /* shutdown and destroy the HMC */ if (hw->hmc.hmc_obj) { ret_code = i40e_shutdown_lan_hmc(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the HMC resources: %d\n", ret_code); } unmap: /* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) && !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) free_irq(pf->pdev->irq, pf); /* shutdown the adminq */ i40e_shutdown_adminq(hw); /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); mutex_destroy(&hw->aq.asq_mutex); /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */ rtnl_lock(); i40e_clear_interrupt_scheme(pf); i40e_pf_for_each_vsi(pf, i, vsi) { if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) i40e_vsi_clear_rings(vsi); i40e_vsi_clear(vsi); pf->vsi[i] = NULL; } rtnl_unlock(); i40e_pf_for_each_veb(pf, i, veb) { kfree(veb); pf->veb[i] = NULL; } kfree(pf->qp_pile); kfree(pf->vsi); iounmap(hw->hw_addr); i40e_free_pf(pf); pci_release_mem_regions(pdev); pci_disable_device(pdev); } /** * i40e_enable_mc_magic_wake - enable multicast magic packet wake up * using the mac_address_write admin q function * @pf: pointer to i40e_pf struct **/ static void i40e_enable_mc_magic_wake(struct i40e_pf *pf) { struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; u8 mac_addr[6]; u16 flags = 0; int ret; /* Get current MAC address in case it's an LAA */ if (main_vsi && main_vsi->netdev) { ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr); } else { dev_err(&pf->pdev->dev, "Failed to retrieve MAC address; using default\n"); ether_addr_copy(mac_addr, hw->mac.addr); } /* The FW expects the mac address write cmd to first be called with * one of these flags before calling it again with the multicast * enable flags. */ flags = I40E_AQC_WRITE_TYPE_LAA_WOL; if (hw->func_caps.flex10_enable && hw->partition_id != 1) flags = I40E_AQC_WRITE_TYPE_LAA_ONLY; ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); if (ret) { dev_err(&pf->pdev->dev, "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up"); return; } flags = I40E_AQC_MC_MAG_EN | I40E_AQC_WOL_PRESERVE_ON_PFR | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG; ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); if (ret) dev_err(&pf->pdev->dev, "Failed to enable Multicast Magic Packet wake up\n"); } /** * i40e_io_suspend - suspend all IO operations * @pf: pointer to i40e_pf struct * **/ static int i40e_io_suspend(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; set_bit(__I40E_DOWN, pf->state); /* Ensure service task will not be running */ del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); /* Client close must be called explicitly here because the timer * has been stopped. */ i40e_notify_client_of_netdev_close(pf, false); if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && pf->wol_en) i40e_enable_mc_magic_wake(pf); /* Since we're going to destroy queues during the * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this * whole section */ rtnl_lock(); i40e_prep_for_reset(pf); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); /* Clear the interrupt scheme and release our IRQs so that the system * can safely hibernate even when there are a large number of CPUs. * Otherwise hibernation might fail when mapping all the vectors back * to CPU0. */ i40e_clear_interrupt_scheme(pf); rtnl_unlock(); return 0; } /** * i40e_io_resume - resume IO operations * @pf: pointer to i40e_pf struct * **/ static int i40e_io_resume(struct i40e_pf *pf) { struct device *dev = &pf->pdev->dev; int err; /* We need to hold the RTNL lock prior to restoring interrupt schemes, * since we're going to be restoring queues */ rtnl_lock(); /* We cleared the interrupt scheme when we suspended, so we need to * restore it now to resume device functionality. */ err = i40e_restore_interrupt_scheme(pf); if (err) { dev_err(dev, "Cannot restore interrupt scheme: %d\n", err); } clear_bit(__I40E_DOWN, pf->state); i40e_reset_and_rebuild(pf, false, true); rtnl_unlock(); /* Clear suspended state last after everything is recovered */ clear_bit(__I40E_SUSPENDED, pf->state); /* Restart the service task */ mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); return 0; } /** * i40e_pci_error_detected - warning that something funky happened in PCI land * @pdev: PCI device information struct * @error: the type of PCI error * * Called to warn that something happened and the error handling steps * are in progress. Allows the driver to quiesce things, be ready for * remediation. **/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error) { struct i40e_pf *pf = pci_get_drvdata(pdev); dev_info(&pdev->dev, "%s: error %d\n", __func__, error); if (!pf) { dev_info(&pdev->dev, "Cannot recover - error happened during device probe\n"); return PCI_ERS_RESULT_DISCONNECT; } /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, pf->state)) i40e_io_suspend(pf); /* Request a slot reset */ return PCI_ERS_RESULT_NEED_RESET; } /** * i40e_pci_error_slot_reset - a PCI slot reset just happened * @pdev: PCI device information struct * * Called to find if the driver can work with the device now that * the pci slot has been reset. If a basic connection seems good * (registers are readable and have sane content) then return a * happy little PCI_ERS_RESULT_xxx. **/ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); pci_ers_result_t result; u32 reg; dev_dbg(&pdev->dev, "%s\n", __func__); /* enable I/O and memory of the device */ if (pci_enable_device(pdev)) { dev_info(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; } else { pci_set_master(pdev); pci_restore_state(pdev); pci_save_state(pdev); pci_wake_from_d3(pdev, false); reg = rd32(&pf->hw, I40E_GLGEN_RTRIG); if (reg == 0) result = PCI_ERS_RESULT_RECOVERED; else result = PCI_ERS_RESULT_DISCONNECT; } return result; } /** * i40e_pci_error_reset_prepare - prepare device driver for pci reset * @pdev: PCI device information struct */ static void i40e_pci_error_reset_prepare(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); i40e_prep_for_reset(pf); } /** * i40e_pci_error_reset_done - pci reset done, device driver reset can begin * @pdev: PCI device information struct */ static void i40e_pci_error_reset_done(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); if (test_bit(__I40E_IN_REMOVE, pf->state)) return; i40e_reset_and_rebuild(pf, false, false); #ifdef CONFIG_PCI_IOV i40e_restore_all_vfs_msi_state(pdev); #endif /* CONFIG_PCI_IOV */ } /** * i40e_pci_error_resume - restart operations after PCI error recovery * @pdev: PCI device information struct * * Called to allow the driver to bring things back up after PCI error * and/or reset recovery has finished. **/ static void i40e_pci_error_resume(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); dev_dbg(&pdev->dev, "%s\n", __func__); if (test_bit(__I40E_SUSPENDED, pf->state)) return; i40e_io_resume(pf); } /** * i40e_shutdown - PCI callback for shutting down * @pdev: PCI device information struct **/ static void i40e_shutdown(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); /* Client close must be called explicitly here because the timer * has been stopped. */ i40e_notify_client_of_netdev_close(pf, false); if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && pf->wol_en) i40e_enable_mc_magic_wake(pf); i40e_prep_for_reset(pf); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); /* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) && !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) free_irq(pf->pdev->irq, pf); /* Since we're going to destroy queues during the * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this * whole section */ rtnl_lock(); i40e_clear_interrupt_scheme(pf); rtnl_unlock(); if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); pci_set_power_state(pdev, PCI_D3hot); } } /** * i40e_suspend - PM callback for moving to D3 * @dev: generic device information structure **/ static int i40e_suspend(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); /* If we're already suspended, then there is nothing to do */ if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) return 0; return i40e_io_suspend(pf); } /** * i40e_resume - PM callback for waking up from D3 * @dev: generic device information structure **/ static int i40e_resume(struct device *dev) { struct i40e_pf *pf = dev_get_drvdata(dev); /* If we're not suspended, then there is nothing to do */ if (!test_bit(__I40E_SUSPENDED, pf->state)) return 0; return i40e_io_resume(pf); } static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, .reset_prepare = i40e_pci_error_reset_prepare, .reset_done = i40e_pci_error_reset_done, .resume = i40e_pci_error_resume, }; static DEFINE_SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, .driver.pm = pm_sleep_ptr(&i40e_pm_ops), .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, }; /** * i40e_init_module - Driver registration routine * * i40e_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem. **/ static int __init i40e_init_module(void) { int err; pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); /* There is no need to throttle the number of active tasks because * each device limits its own task using a state bit for scheduling * the service task, and the device tasks do not interfere with each * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM * since we need to be able to guarantee forward progress even under * memory pressure. */ i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; } i40e_dbg_init(); err = pci_register_driver(&i40e_driver); if (err) { destroy_workqueue(i40e_wq); i40e_dbg_exit(); return err; } return 0; } module_init(i40e_init_module); /** * i40e_exit_module - Driver exit cleanup routine * * i40e_exit_module is called just before the driver is removed * from memory. **/ static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); destroy_workqueue(i40e_wq); ida_destroy(&i40e_client_ida); i40e_dbg_exit(); } module_exit(i40e_exit_module);