1 /* 2 * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. 3 * Copyright 2007 Nuova Systems, Inc. All rights reserved. 4 * 5 * This program is free software; you may redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; version 2 of the License. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 16 * SOFTWARE. 17 * 18 */ 19 20 #include <linux/module.h> 21 #include <linux/kernel.h> 22 #include <linux/string.h> 23 #include <linux/errno.h> 24 #include <linux/types.h> 25 #include <linux/init.h> 26 #include <linux/interrupt.h> 27 #include <linux/workqueue.h> 28 #include <linux/pci.h> 29 #include <linux/netdevice.h> 30 #include <linux/etherdevice.h> 31 #include <linux/if.h> 32 #include <linux/if_ether.h> 33 #include <linux/if_vlan.h> 34 #include <linux/in.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/tcp.h> 38 #include <linux/rtnetlink.h> 39 #include <linux/prefetch.h> 40 #include <net/ip6_checksum.h> 41 #include <linux/ktime.h> 42 #include <linux/numa.h> 43 #ifdef CONFIG_RFS_ACCEL 44 #include <linux/cpu_rmap.h> 45 #endif 46 #include <linux/crash_dump.h> 47 #include <net/busy_poll.h> 48 #include <net/vxlan.h> 49 #include <net/netdev_queues.h> 50 51 #include "cq_enet_desc.h" 52 #include "vnic_dev.h" 53 #include "vnic_intr.h" 54 #include "vnic_stats.h" 55 #include "vnic_vic.h" 56 #include "enic_res.h" 57 #include "enic.h" 58 #include "enic_dev.h" 59 #include "enic_pp.h" 60 #include "enic_clsf.h" 61 #include "enic_rq.h" 62 63 #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) 64 #define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) 65 #define MAX_TSO (1 << 16) 66 #define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) 67 68 #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ 69 #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ 70 #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ 71 72 /* Supported devices */ 73 static const struct pci_device_id enic_id_table[] = { 74 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, 75 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, 76 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, 77 { 0, } /* end of table */ 78 }; 79 80 MODULE_DESCRIPTION(DRV_DESCRIPTION); 81 MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>"); 82 MODULE_LICENSE("GPL"); 83 MODULE_DEVICE_TABLE(pci, enic_id_table); 84 85 #define ENIC_LARGE_PKT_THRESHOLD 1000 86 #define ENIC_MAX_COALESCE_TIMERS 10 87 /* Interrupt moderation table, which will be used to decide the 88 * coalescing timer values 89 * {rx_rate in Mbps, mapping percentage of the range} 90 */ 91 static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = { 92 {4000, 0}, 93 {4400, 10}, 94 {5060, 20}, 95 {5230, 30}, 96 {5540, 40}, 97 {5820, 50}, 98 {6120, 60}, 99 {6435, 70}, 100 {6745, 80}, 101 {7000, 90}, 102 {0xFFFFFFFF, 100} 103 }; 104 105 /* This table helps the driver to pick different ranges for rx coalescing 106 * timer depending on the link speed. 107 */ 108 static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = { 109 {0, 0}, /* 0 - 4 Gbps */ 110 {0, 3}, /* 4 - 10 Gbps */ 111 {3, 6}, /* 10+ Gbps */ 112 }; 113 114 static void enic_init_affinity_hint(struct enic *enic) 115 { 116 int numa_node = dev_to_node(&enic->pdev->dev); 117 int i; 118 119 for (i = 0; i < enic->intr_count; i++) { 120 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || 121 (cpumask_available(enic->msix[i].affinity_mask) && 122 !cpumask_empty(enic->msix[i].affinity_mask))) 123 continue; 124 if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, 125 GFP_KERNEL)) 126 cpumask_set_cpu(cpumask_local_spread(i, numa_node), 127 enic->msix[i].affinity_mask); 128 } 129 } 130 131 static void enic_free_affinity_hint(struct enic *enic) 132 { 133 int i; 134 135 for (i = 0; i < enic->intr_count; i++) { 136 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i)) 137 continue; 138 free_cpumask_var(enic->msix[i].affinity_mask); 139 } 140 } 141 142 static void enic_set_affinity_hint(struct enic *enic) 143 { 144 int i; 145 int err; 146 147 for (i = 0; i < enic->intr_count; i++) { 148 if (enic_is_err_intr(enic, i) || 149 enic_is_notify_intr(enic, i) || 150 !cpumask_available(enic->msix[i].affinity_mask) || 151 cpumask_empty(enic->msix[i].affinity_mask)) 152 continue; 153 err = irq_update_affinity_hint(enic->msix_entry[i].vector, 154 enic->msix[i].affinity_mask); 155 if (err) 156 netdev_warn(enic->netdev, "irq_update_affinity_hint failed, err %d\n", 157 err); 158 } 159 160 for (i = 0; i < enic->wq_count; i++) { 161 int wq_intr = enic_msix_wq_intr(enic, i); 162 163 if (cpumask_available(enic->msix[wq_intr].affinity_mask) && 164 !cpumask_empty(enic->msix[wq_intr].affinity_mask)) 165 netif_set_xps_queue(enic->netdev, 166 enic->msix[wq_intr].affinity_mask, 167 i); 168 } 169 } 170 171 static void enic_unset_affinity_hint(struct enic *enic) 172 { 173 int i; 174 175 for (i = 0; i < enic->intr_count; i++) 176 irq_update_affinity_hint(enic->msix_entry[i].vector, NULL); 177 } 178 179 static int enic_udp_tunnel_set_port(struct net_device *netdev, 180 unsigned int table, unsigned int entry, 181 struct udp_tunnel_info *ti) 182 { 183 struct enic *enic = netdev_priv(netdev); 184 int err; 185 186 spin_lock_bh(&enic->devcmd_lock); 187 188 err = vnic_dev_overlay_offload_cfg(enic->vdev, 189 OVERLAY_CFG_VXLAN_PORT_UPDATE, 190 ntohs(ti->port)); 191 if (err) 192 goto error; 193 194 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 195 enic->vxlan.patch_level); 196 if (err) 197 goto error; 198 199 enic->vxlan.vxlan_udp_port_number = ntohs(ti->port); 200 error: 201 spin_unlock_bh(&enic->devcmd_lock); 202 203 return err; 204 } 205 206 static int enic_udp_tunnel_unset_port(struct net_device *netdev, 207 unsigned int table, unsigned int entry, 208 struct udp_tunnel_info *ti) 209 { 210 struct enic *enic = netdev_priv(netdev); 211 int err; 212 213 spin_lock_bh(&enic->devcmd_lock); 214 215 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 216 OVERLAY_OFFLOAD_DISABLE); 217 if (err) 218 goto unlock; 219 220 enic->vxlan.vxlan_udp_port_number = 0; 221 222 unlock: 223 spin_unlock_bh(&enic->devcmd_lock); 224 225 return err; 226 } 227 228 static const struct udp_tunnel_nic_info enic_udp_tunnels = { 229 .set_port = enic_udp_tunnel_set_port, 230 .unset_port = enic_udp_tunnel_unset_port, 231 .tables = { 232 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, 233 }, 234 }, enic_udp_tunnels_v4 = { 235 .set_port = enic_udp_tunnel_set_port, 236 .unset_port = enic_udp_tunnel_unset_port, 237 .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, 238 .tables = { 239 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, 240 }, 241 }; 242 243 static netdev_features_t enic_features_check(struct sk_buff *skb, 244 struct net_device *dev, 245 netdev_features_t features) 246 { 247 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 248 struct enic *enic = netdev_priv(dev); 249 struct udphdr *udph; 250 u16 port = 0; 251 u8 proto; 252 253 if (!skb->encapsulation) 254 return features; 255 256 features = vxlan_features_check(skb, features); 257 258 switch (vlan_get_protocol(skb)) { 259 case htons(ETH_P_IPV6): 260 if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) 261 goto out; 262 proto = ipv6_hdr(skb)->nexthdr; 263 break; 264 case htons(ETH_P_IP): 265 proto = ip_hdr(skb)->protocol; 266 break; 267 default: 268 goto out; 269 } 270 271 switch (eth->h_proto) { 272 case ntohs(ETH_P_IPV6): 273 if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6)) 274 goto out; 275 fallthrough; 276 case ntohs(ETH_P_IP): 277 break; 278 default: 279 goto out; 280 } 281 282 283 if (proto == IPPROTO_UDP) { 284 udph = udp_hdr(skb); 285 port = be16_to_cpu(udph->dest); 286 } 287 288 /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK 289 * for other UDP port tunnels 290 */ 291 if (port != enic->vxlan.vxlan_udp_port_number) 292 goto out; 293 294 return features; 295 296 out: 297 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 298 } 299 300 int enic_is_dynamic(struct enic *enic) 301 { 302 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; 303 } 304 305 int enic_sriov_enabled(struct enic *enic) 306 { 307 return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; 308 } 309 310 static int enic_is_sriov_vf(struct enic *enic) 311 { 312 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; 313 } 314 315 int enic_is_valid_vf(struct enic *enic, int vf) 316 { 317 #ifdef CONFIG_PCI_IOV 318 return vf >= 0 && vf < enic->num_vfs; 319 #else 320 return 0; 321 #endif 322 } 323 324 static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) 325 { 326 struct enic *enic = vnic_dev_priv(wq->vdev); 327 328 if (buf->sop) 329 dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, 330 DMA_TO_DEVICE); 331 else 332 dma_unmap_page(&enic->pdev->dev, buf->dma_addr, buf->len, 333 DMA_TO_DEVICE); 334 335 if (buf->os_buf) 336 dev_kfree_skb_any(buf->os_buf); 337 } 338 339 static void enic_wq_free_buf(struct vnic_wq *wq, 340 struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque) 341 { 342 struct enic *enic = vnic_dev_priv(wq->vdev); 343 344 enic->wq[wq->index].stats.cq_work++; 345 enic->wq[wq->index].stats.cq_bytes += buf->len; 346 enic_free_wq_buf(wq, buf); 347 } 348 349 static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, 350 u8 type, u16 q_number, u16 completed_index, void *opaque) 351 { 352 struct enic *enic = vnic_dev_priv(vdev); 353 354 spin_lock(&enic->wq[q_number].lock); 355 356 vnic_wq_service(&enic->wq[q_number].vwq, cq_desc, 357 completed_index, enic_wq_free_buf, 358 opaque); 359 360 if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) && 361 vnic_wq_desc_avail(&enic->wq[q_number].vwq) >= 362 (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) { 363 netif_wake_subqueue(enic->netdev, q_number); 364 enic->wq[q_number].stats.wake++; 365 } 366 367 spin_unlock(&enic->wq[q_number].lock); 368 369 return 0; 370 } 371 372 static bool enic_log_q_error(struct enic *enic) 373 { 374 unsigned int i; 375 u32 error_status; 376 bool err = false; 377 378 for (i = 0; i < enic->wq_count; i++) { 379 error_status = vnic_wq_error_status(&enic->wq[i].vwq); 380 err |= error_status; 381 if (error_status) 382 netdev_err(enic->netdev, "WQ[%d] error_status %d\n", 383 i, error_status); 384 } 385 386 for (i = 0; i < enic->rq_count; i++) { 387 error_status = vnic_rq_error_status(&enic->rq[i].vrq); 388 err |= error_status; 389 if (error_status) 390 netdev_err(enic->netdev, "RQ[%d] error_status %d\n", 391 i, error_status); 392 } 393 394 return err; 395 } 396 397 static void enic_msglvl_check(struct enic *enic) 398 { 399 u32 msg_enable = vnic_dev_msg_lvl(enic->vdev); 400 401 if (msg_enable != enic->msg_enable) { 402 netdev_info(enic->netdev, "msg lvl changed from 0x%x to 0x%x\n", 403 enic->msg_enable, msg_enable); 404 enic->msg_enable = msg_enable; 405 } 406 } 407 408 static void enic_mtu_check(struct enic *enic) 409 { 410 u32 mtu = vnic_dev_mtu(enic->vdev); 411 struct net_device *netdev = enic->netdev; 412 413 if (mtu && mtu != enic->port_mtu) { 414 enic->port_mtu = mtu; 415 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 416 mtu = max_t(int, ENIC_MIN_MTU, 417 min_t(int, ENIC_MAX_MTU, mtu)); 418 if (mtu != netdev->mtu) 419 schedule_work(&enic->change_mtu_work); 420 } else { 421 if (mtu < netdev->mtu) 422 netdev_warn(netdev, 423 "interface MTU (%d) set higher " 424 "than switch port MTU (%d)\n", 425 netdev->mtu, mtu); 426 } 427 } 428 } 429 430 static void enic_set_rx_coal_setting(struct enic *enic) 431 { 432 unsigned int speed; 433 int index = -1; 434 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 435 436 /* 1. Read the link speed from fw 437 * 2. Pick the default range for the speed 438 * 3. Update it in enic->rx_coalesce_setting 439 */ 440 speed = vnic_dev_port_speed(enic->vdev); 441 if (speed > ENIC_LINK_SPEED_10G) 442 index = ENIC_LINK_40G_INDEX; 443 else if (speed > ENIC_LINK_SPEED_4G) 444 index = ENIC_LINK_10G_INDEX; 445 else 446 index = ENIC_LINK_4G_INDEX; 447 448 rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; 449 rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; 450 rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; 451 452 /* Start with the value provided by UCSM */ 453 for (index = 0; index < enic->rq_count; index++) 454 enic->cq[index].cur_rx_coal_timeval = 455 enic->config.intr_timer_usec; 456 457 rx_coal->use_adaptive_rx_coalesce = 1; 458 } 459 460 static void enic_link_check(struct enic *enic) 461 { 462 int link_status = vnic_dev_link_status(enic->vdev); 463 int carrier_ok = netif_carrier_ok(enic->netdev); 464 465 if (link_status && !carrier_ok) { 466 netdev_info(enic->netdev, "Link UP\n"); 467 netif_carrier_on(enic->netdev); 468 enic_set_rx_coal_setting(enic); 469 } else if (!link_status && carrier_ok) { 470 netdev_info(enic->netdev, "Link DOWN\n"); 471 netif_carrier_off(enic->netdev); 472 } 473 } 474 475 static void enic_notify_check(struct enic *enic) 476 { 477 enic_msglvl_check(enic); 478 enic_mtu_check(enic); 479 enic_link_check(enic); 480 } 481 482 #define ENIC_TEST_INTR(pba, i) (pba & (1 << i)) 483 484 static irqreturn_t enic_isr_legacy(int irq, void *data) 485 { 486 struct net_device *netdev = data; 487 struct enic *enic = netdev_priv(netdev); 488 unsigned int io_intr = ENIC_LEGACY_IO_INTR; 489 unsigned int err_intr = ENIC_LEGACY_ERR_INTR; 490 unsigned int notify_intr = ENIC_LEGACY_NOTIFY_INTR; 491 u32 pba; 492 493 vnic_intr_mask(&enic->intr[io_intr]); 494 495 pba = vnic_intr_legacy_pba(enic->legacy_pba); 496 if (!pba) { 497 vnic_intr_unmask(&enic->intr[io_intr]); 498 return IRQ_NONE; /* not our interrupt */ 499 } 500 501 if (ENIC_TEST_INTR(pba, notify_intr)) { 502 enic_notify_check(enic); 503 vnic_intr_return_all_credits(&enic->intr[notify_intr]); 504 } 505 506 if (ENIC_TEST_INTR(pba, err_intr)) { 507 vnic_intr_return_all_credits(&enic->intr[err_intr]); 508 enic_log_q_error(enic); 509 /* schedule recovery from WQ/RQ error */ 510 schedule_work(&enic->reset); 511 return IRQ_HANDLED; 512 } 513 514 if (ENIC_TEST_INTR(pba, io_intr)) 515 napi_schedule_irqoff(&enic->napi[0]); 516 else 517 vnic_intr_unmask(&enic->intr[io_intr]); 518 519 return IRQ_HANDLED; 520 } 521 522 static irqreturn_t enic_isr_msi(int irq, void *data) 523 { 524 struct enic *enic = data; 525 526 /* With MSI, there is no sharing of interrupts, so this is 527 * our interrupt and there is no need to ack it. The device 528 * is not providing per-vector masking, so the OS will not 529 * write to PCI config space to mask/unmask the interrupt. 530 * We're using mask_on_assertion for MSI, so the device 531 * automatically masks the interrupt when the interrupt is 532 * generated. Later, when exiting polling, the interrupt 533 * will be unmasked (see enic_poll). 534 * 535 * Also, the device uses the same PCIe Traffic Class (TC) 536 * for Memory Write data and MSI, so there are no ordering 537 * issues; the MSI will always arrive at the Root Complex 538 * _after_ corresponding Memory Writes (i.e. descriptor 539 * writes). 540 */ 541 542 napi_schedule_irqoff(&enic->napi[0]); 543 544 return IRQ_HANDLED; 545 } 546 547 static irqreturn_t enic_isr_msix(int irq, void *data) 548 { 549 struct napi_struct *napi = data; 550 551 napi_schedule_irqoff(napi); 552 553 return IRQ_HANDLED; 554 } 555 556 static irqreturn_t enic_isr_msix_err(int irq, void *data) 557 { 558 struct enic *enic = data; 559 unsigned int intr = enic_msix_err_intr(enic); 560 561 vnic_intr_return_all_credits(&enic->intr[intr]); 562 563 if (enic_log_q_error(enic)) 564 /* schedule recovery from WQ/RQ error */ 565 schedule_work(&enic->reset); 566 567 return IRQ_HANDLED; 568 } 569 570 static irqreturn_t enic_isr_msix_notify(int irq, void *data) 571 { 572 struct enic *enic = data; 573 unsigned int intr = enic_msix_notify_intr(enic); 574 575 enic_notify_check(enic); 576 vnic_intr_return_all_credits(&enic->intr[intr]); 577 578 return IRQ_HANDLED; 579 } 580 581 static int enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq, 582 struct sk_buff *skb, unsigned int len_left, 583 int loopback) 584 { 585 const skb_frag_t *frag; 586 dma_addr_t dma_addr; 587 588 /* Queue additional data fragments */ 589 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 590 len_left -= skb_frag_size(frag); 591 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 0, 592 skb_frag_size(frag), 593 DMA_TO_DEVICE); 594 if (unlikely(enic_dma_map_check(enic, dma_addr))) 595 return -ENOMEM; 596 enic_queue_wq_desc_cont(wq, skb, dma_addr, skb_frag_size(frag), 597 (len_left == 0), /* EOP? */ 598 loopback); 599 } 600 601 return 0; 602 } 603 604 static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, 605 struct sk_buff *skb, int vlan_tag_insert, 606 unsigned int vlan_tag, int loopback) 607 { 608 unsigned int head_len = skb_headlen(skb); 609 unsigned int len_left = skb->len - head_len; 610 int eop = (len_left == 0); 611 dma_addr_t dma_addr; 612 int err = 0; 613 614 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 615 DMA_TO_DEVICE); 616 if (unlikely(enic_dma_map_check(enic, dma_addr))) 617 return -ENOMEM; 618 619 /* Queue the main skb fragment. The fragments are no larger 620 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 621 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 622 * per fragment is queued. 623 */ 624 enic_queue_wq_desc(wq, skb, dma_addr, head_len, vlan_tag_insert, 625 vlan_tag, eop, loopback); 626 627 if (!eop) 628 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 629 630 /* The enic_queue_wq_desc() above does not do HW checksum */ 631 enic->wq[wq->index].stats.csum_none++; 632 enic->wq[wq->index].stats.packets++; 633 enic->wq[wq->index].stats.bytes += skb->len; 634 635 return err; 636 } 637 638 static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, 639 struct sk_buff *skb, int vlan_tag_insert, 640 unsigned int vlan_tag, int loopback) 641 { 642 unsigned int head_len = skb_headlen(skb); 643 unsigned int len_left = skb->len - head_len; 644 unsigned int hdr_len = skb_checksum_start_offset(skb); 645 unsigned int csum_offset = hdr_len + skb->csum_offset; 646 int eop = (len_left == 0); 647 dma_addr_t dma_addr; 648 int err = 0; 649 650 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 651 DMA_TO_DEVICE); 652 if (unlikely(enic_dma_map_check(enic, dma_addr))) 653 return -ENOMEM; 654 655 /* Queue the main skb fragment. The fragments are no larger 656 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 657 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 658 * per fragment is queued. 659 */ 660 enic_queue_wq_desc_csum_l4(wq, skb, dma_addr, head_len, csum_offset, 661 hdr_len, vlan_tag_insert, vlan_tag, eop, 662 loopback); 663 664 if (!eop) 665 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 666 667 enic->wq[wq->index].stats.csum_partial++; 668 enic->wq[wq->index].stats.packets++; 669 enic->wq[wq->index].stats.bytes += skb->len; 670 671 return err; 672 } 673 674 static void enic_preload_tcp_csum_encap(struct sk_buff *skb) 675 { 676 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 677 678 switch (eth->h_proto) { 679 case ntohs(ETH_P_IP): 680 inner_ip_hdr(skb)->check = 0; 681 inner_tcp_hdr(skb)->check = 682 ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, 683 inner_ip_hdr(skb)->daddr, 0, 684 IPPROTO_TCP, 0); 685 break; 686 case ntohs(ETH_P_IPV6): 687 inner_tcp_hdr(skb)->check = 688 ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, 689 &inner_ipv6_hdr(skb)->daddr, 0, 690 IPPROTO_TCP, 0); 691 break; 692 default: 693 WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload"); 694 break; 695 } 696 } 697 698 static void enic_preload_tcp_csum(struct sk_buff *skb) 699 { 700 /* Preload TCP csum field with IP pseudo hdr calculated 701 * with IP length set to zero. HW will later add in length 702 * to each TCP segment resulting from the TSO. 703 */ 704 705 if (skb->protocol == cpu_to_be16(ETH_P_IP)) { 706 ip_hdr(skb)->check = 0; 707 tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 708 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 709 } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { 710 tcp_v6_gso_csum_prep(skb); 711 } 712 } 713 714 static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, 715 struct sk_buff *skb, unsigned int mss, 716 int vlan_tag_insert, unsigned int vlan_tag, 717 int loopback) 718 { 719 unsigned int frag_len_left = skb_headlen(skb); 720 unsigned int len_left = skb->len - frag_len_left; 721 int eop = (len_left == 0); 722 unsigned int offset = 0; 723 unsigned int hdr_len; 724 dma_addr_t dma_addr; 725 unsigned int pkts; 726 unsigned int len; 727 skb_frag_t *frag; 728 729 if (skb->encapsulation) { 730 hdr_len = skb_inner_tcp_all_headers(skb); 731 enic_preload_tcp_csum_encap(skb); 732 enic->wq[wq->index].stats.encap_tso++; 733 } else { 734 hdr_len = skb_tcp_all_headers(skb); 735 enic_preload_tcp_csum(skb); 736 enic->wq[wq->index].stats.tso++; 737 } 738 739 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 740 * for the main skb fragment 741 */ 742 while (frag_len_left) { 743 len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN); 744 dma_addr = dma_map_single(&enic->pdev->dev, 745 skb->data + offset, len, 746 DMA_TO_DEVICE); 747 if (unlikely(enic_dma_map_check(enic, dma_addr))) 748 return -ENOMEM; 749 enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len, 750 vlan_tag_insert, vlan_tag, 751 eop && (len == frag_len_left), loopback); 752 frag_len_left -= len; 753 offset += len; 754 } 755 756 if (eop) 757 goto tso_out_stats; 758 759 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 760 * for additional data fragments 761 */ 762 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 763 len_left -= skb_frag_size(frag); 764 frag_len_left = skb_frag_size(frag); 765 offset = 0; 766 767 while (frag_len_left) { 768 len = min(frag_len_left, 769 (unsigned int)WQ_ENET_MAX_DESC_LEN); 770 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 771 offset, len, 772 DMA_TO_DEVICE); 773 if (unlikely(enic_dma_map_check(enic, dma_addr))) 774 return -ENOMEM; 775 enic_queue_wq_desc_cont(wq, skb, dma_addr, len, 776 (len_left == 0) && 777 (len == frag_len_left),/*EOP*/ 778 loopback); 779 frag_len_left -= len; 780 offset += len; 781 } 782 } 783 784 tso_out_stats: 785 /* calculate how many packets tso sent */ 786 len = skb->len - hdr_len; 787 pkts = len / mss; 788 if ((len % mss) > 0) 789 pkts++; 790 enic->wq[wq->index].stats.packets += pkts; 791 enic->wq[wq->index].stats.bytes += (len + (pkts * hdr_len)); 792 793 return 0; 794 } 795 796 static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, 797 struct sk_buff *skb, 798 int vlan_tag_insert, 799 unsigned int vlan_tag, int loopback) 800 { 801 unsigned int head_len = skb_headlen(skb); 802 unsigned int len_left = skb->len - head_len; 803 /* Hardware will overwrite the checksum fields, calculating from 804 * scratch and ignoring the value placed by software. 805 * Offload mode = 00 806 * mss[2], mss[1], mss[0] bits are set 807 */ 808 unsigned int mss_or_csum = 7; 809 int eop = (len_left == 0); 810 dma_addr_t dma_addr; 811 int err = 0; 812 813 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 814 DMA_TO_DEVICE); 815 if (unlikely(enic_dma_map_check(enic, dma_addr))) 816 return -ENOMEM; 817 818 enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, 819 vlan_tag_insert, vlan_tag, 820 WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, 821 loopback); 822 if (!eop) 823 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 824 825 enic->wq[wq->index].stats.encap_csum++; 826 enic->wq[wq->index].stats.packets++; 827 enic->wq[wq->index].stats.bytes += skb->len; 828 829 return err; 830 } 831 832 static inline int enic_queue_wq_skb(struct enic *enic, 833 struct vnic_wq *wq, struct sk_buff *skb) 834 { 835 unsigned int mss = skb_shinfo(skb)->gso_size; 836 unsigned int vlan_tag = 0; 837 int vlan_tag_insert = 0; 838 int loopback = 0; 839 int err; 840 841 if (skb_vlan_tag_present(skb)) { 842 /* VLAN tag from trunking driver */ 843 vlan_tag_insert = 1; 844 vlan_tag = skb_vlan_tag_get(skb); 845 enic->wq[wq->index].stats.add_vlan++; 846 } else if (enic->loop_enable) { 847 vlan_tag = enic->loop_tag; 848 loopback = 1; 849 } 850 851 if (mss) 852 err = enic_queue_wq_skb_tso(enic, wq, skb, mss, 853 vlan_tag_insert, vlan_tag, 854 loopback); 855 else if (skb->encapsulation) 856 err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, 857 vlan_tag, loopback); 858 else if (skb->ip_summed == CHECKSUM_PARTIAL) 859 err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, 860 vlan_tag, loopback); 861 else 862 err = enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert, 863 vlan_tag, loopback); 864 if (unlikely(err)) { 865 struct vnic_wq_buf *buf; 866 867 buf = wq->to_use->prev; 868 /* while not EOP of previous pkt && queue not empty. 869 * For all non EOP bufs, os_buf is NULL. 870 */ 871 while (!buf->os_buf && (buf->next != wq->to_clean)) { 872 enic_free_wq_buf(wq, buf); 873 wq->ring.desc_avail++; 874 buf = buf->prev; 875 } 876 wq->to_use = buf->next; 877 dev_kfree_skb(skb); 878 } 879 return err; 880 } 881 882 /* netif_tx_lock held, process context with BHs disabled, or BH */ 883 static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, 884 struct net_device *netdev) 885 { 886 struct enic *enic = netdev_priv(netdev); 887 struct vnic_wq *wq; 888 unsigned int txq_map; 889 struct netdev_queue *txq; 890 891 txq_map = skb_get_queue_mapping(skb) % enic->wq_count; 892 wq = &enic->wq[txq_map].vwq; 893 894 if (skb->len <= 0) { 895 dev_kfree_skb_any(skb); 896 enic->wq[wq->index].stats.null_pkt++; 897 return NETDEV_TX_OK; 898 } 899 900 txq = netdev_get_tx_queue(netdev, txq_map); 901 902 /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs, 903 * which is very likely. In the off chance it's going to take 904 * more than * ENIC_NON_TSO_MAX_DESC, linearize the skb. 905 */ 906 907 if (skb_shinfo(skb)->gso_size == 0 && 908 skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC && 909 skb_linearize(skb)) { 910 dev_kfree_skb_any(skb); 911 enic->wq[wq->index].stats.skb_linear_fail++; 912 return NETDEV_TX_OK; 913 } 914 915 spin_lock(&enic->wq[txq_map].lock); 916 917 if (vnic_wq_desc_avail(wq) < 918 skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) { 919 netif_tx_stop_queue(txq); 920 /* This is a hard error, log it */ 921 netdev_err(netdev, "BUG! Tx ring full when queue awake!\n"); 922 spin_unlock(&enic->wq[txq_map].lock); 923 enic->wq[wq->index].stats.desc_full_awake++; 924 return NETDEV_TX_BUSY; 925 } 926 927 if (enic_queue_wq_skb(enic, wq, skb)) 928 goto error; 929 930 if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) { 931 netif_tx_stop_queue(txq); 932 enic->wq[wq->index].stats.stopped++; 933 } 934 skb_tx_timestamp(skb); 935 if (!netdev_xmit_more() || netif_xmit_stopped(txq)) 936 vnic_wq_doorbell(wq); 937 938 error: 939 spin_unlock(&enic->wq[txq_map].lock); 940 941 return NETDEV_TX_OK; 942 } 943 944 /* rcu_read_lock potentially held, nominally process context */ 945 static void enic_get_stats(struct net_device *netdev, 946 struct rtnl_link_stats64 *net_stats) 947 { 948 struct enic *enic = netdev_priv(netdev); 949 struct vnic_stats *stats; 950 u64 pkt_truncated = 0; 951 u64 bad_fcs = 0; 952 int err; 953 int i; 954 955 err = enic_dev_stats_dump(enic, &stats); 956 /* return only when dma_alloc_coherent fails in vnic_dev_stats_dump 957 * For other failures, like devcmd failure, we return previously 958 * recorded stats. 959 */ 960 if (err == -ENOMEM) 961 return; 962 963 net_stats->tx_packets = stats->tx.tx_frames_ok; 964 net_stats->tx_bytes = stats->tx.tx_bytes_ok; 965 net_stats->tx_errors = stats->tx.tx_errors; 966 net_stats->tx_dropped = stats->tx.tx_drops; 967 968 net_stats->rx_packets = stats->rx.rx_frames_ok; 969 net_stats->rx_bytes = stats->rx.rx_bytes_ok; 970 net_stats->rx_errors = stats->rx.rx_errors; 971 net_stats->multicast = stats->rx.rx_multicast_frames_ok; 972 973 for (i = 0; i < enic->rq_count; i++) { 974 struct enic_rq_stats *rqs = &enic->rq[i].stats; 975 976 if (!enic->rq[i].vrq.ctrl) 977 break; 978 pkt_truncated += rqs->pkt_truncated; 979 bad_fcs += rqs->bad_fcs; 980 } 981 net_stats->rx_over_errors = pkt_truncated; 982 net_stats->rx_crc_errors = bad_fcs; 983 net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; 984 } 985 986 static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr) 987 { 988 struct enic *enic = netdev_priv(netdev); 989 990 if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) { 991 unsigned int mc_count = netdev_mc_count(netdev); 992 993 netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n", 994 ENIC_MULTICAST_PERFECT_FILTERS, mc_count); 995 996 return -ENOSPC; 997 } 998 999 enic_dev_add_addr(enic, mc_addr); 1000 enic->mc_count++; 1001 1002 return 0; 1003 } 1004 1005 static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr) 1006 { 1007 struct enic *enic = netdev_priv(netdev); 1008 1009 enic_dev_del_addr(enic, mc_addr); 1010 enic->mc_count--; 1011 1012 return 0; 1013 } 1014 1015 static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr) 1016 { 1017 struct enic *enic = netdev_priv(netdev); 1018 1019 if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) { 1020 unsigned int uc_count = netdev_uc_count(netdev); 1021 1022 netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n", 1023 ENIC_UNICAST_PERFECT_FILTERS, uc_count); 1024 1025 return -ENOSPC; 1026 } 1027 1028 enic_dev_add_addr(enic, uc_addr); 1029 enic->uc_count++; 1030 1031 return 0; 1032 } 1033 1034 static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr) 1035 { 1036 struct enic *enic = netdev_priv(netdev); 1037 1038 enic_dev_del_addr(enic, uc_addr); 1039 enic->uc_count--; 1040 1041 return 0; 1042 } 1043 1044 void enic_reset_addr_lists(struct enic *enic) 1045 { 1046 struct net_device *netdev = enic->netdev; 1047 1048 __dev_uc_unsync(netdev, NULL); 1049 __dev_mc_unsync(netdev, NULL); 1050 1051 enic->mc_count = 0; 1052 enic->uc_count = 0; 1053 enic->flags = 0; 1054 } 1055 1056 static int enic_set_mac_addr(struct net_device *netdev, char *addr) 1057 { 1058 struct enic *enic = netdev_priv(netdev); 1059 1060 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 1061 if (!is_valid_ether_addr(addr) && !is_zero_ether_addr(addr)) 1062 return -EADDRNOTAVAIL; 1063 } else { 1064 if (!is_valid_ether_addr(addr)) 1065 return -EADDRNOTAVAIL; 1066 } 1067 1068 eth_hw_addr_set(netdev, addr); 1069 1070 return 0; 1071 } 1072 1073 static int enic_set_mac_address_dynamic(struct net_device *netdev, void *p) 1074 { 1075 struct enic *enic = netdev_priv(netdev); 1076 struct sockaddr *saddr = p; 1077 char *addr = saddr->sa_data; 1078 int err; 1079 1080 if (netif_running(enic->netdev)) { 1081 err = enic_dev_del_station_addr(enic); 1082 if (err) 1083 return err; 1084 } 1085 1086 err = enic_set_mac_addr(netdev, addr); 1087 if (err) 1088 return err; 1089 1090 if (netif_running(enic->netdev)) { 1091 err = enic_dev_add_station_addr(enic); 1092 if (err) 1093 return err; 1094 } 1095 1096 return err; 1097 } 1098 1099 static int enic_set_mac_address(struct net_device *netdev, void *p) 1100 { 1101 struct sockaddr *saddr = p; 1102 char *addr = saddr->sa_data; 1103 struct enic *enic = netdev_priv(netdev); 1104 int err; 1105 1106 err = enic_dev_del_station_addr(enic); 1107 if (err) 1108 return err; 1109 1110 err = enic_set_mac_addr(netdev, addr); 1111 if (err) 1112 return err; 1113 1114 return enic_dev_add_station_addr(enic); 1115 } 1116 1117 /* netif_tx_lock held, BHs disabled */ 1118 static void enic_set_rx_mode(struct net_device *netdev) 1119 { 1120 struct enic *enic = netdev_priv(netdev); 1121 int directed = 1; 1122 int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0; 1123 int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0; 1124 int promisc = (netdev->flags & IFF_PROMISC) || 1125 netdev_uc_count(netdev) > ENIC_UNICAST_PERFECT_FILTERS; 1126 int allmulti = (netdev->flags & IFF_ALLMULTI) || 1127 netdev_mc_count(netdev) > ENIC_MULTICAST_PERFECT_FILTERS; 1128 unsigned int flags = netdev->flags | 1129 (allmulti ? IFF_ALLMULTI : 0) | 1130 (promisc ? IFF_PROMISC : 0); 1131 1132 if (enic->flags != flags) { 1133 enic->flags = flags; 1134 enic_dev_packet_filter(enic, directed, 1135 multicast, broadcast, promisc, allmulti); 1136 } 1137 1138 if (!promisc) { 1139 __dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync); 1140 if (!allmulti) 1141 __dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync); 1142 } 1143 } 1144 1145 /* netif_tx_lock held, BHs disabled */ 1146 static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) 1147 { 1148 struct enic *enic = netdev_priv(netdev); 1149 schedule_work(&enic->tx_hang_reset); 1150 } 1151 1152 static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 1153 { 1154 struct enic *enic = netdev_priv(netdev); 1155 struct enic_port_profile *pp; 1156 int err; 1157 1158 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1159 if (err) 1160 return err; 1161 1162 if (is_valid_ether_addr(mac) || is_zero_ether_addr(mac)) { 1163 if (vf == PORT_SELF_VF) { 1164 memcpy(pp->vf_mac, mac, ETH_ALEN); 1165 return 0; 1166 } else { 1167 /* 1168 * For sriov vf's set the mac in hw 1169 */ 1170 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1171 vnic_dev_set_mac_addr, mac); 1172 return enic_dev_status_to_errno(err); 1173 } 1174 } else 1175 return -EINVAL; 1176 } 1177 1178 static int enic_set_vf_port(struct net_device *netdev, int vf, 1179 struct nlattr *port[]) 1180 { 1181 static const u8 zero_addr[ETH_ALEN] = {}; 1182 struct enic *enic = netdev_priv(netdev); 1183 struct enic_port_profile prev_pp; 1184 struct enic_port_profile *pp; 1185 int err = 0, restore_pp = 1; 1186 1187 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1188 if (err) 1189 return err; 1190 1191 if (!port[IFLA_PORT_REQUEST]) 1192 return -EOPNOTSUPP; 1193 1194 memcpy(&prev_pp, pp, sizeof(*enic->pp)); 1195 memset(pp, 0, sizeof(*enic->pp)); 1196 1197 pp->set |= ENIC_SET_REQUEST; 1198 pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); 1199 1200 if (port[IFLA_PORT_PROFILE]) { 1201 if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { 1202 memcpy(pp, &prev_pp, sizeof(*pp)); 1203 return -EINVAL; 1204 } 1205 pp->set |= ENIC_SET_NAME; 1206 memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), 1207 PORT_PROFILE_MAX); 1208 } 1209 1210 if (port[IFLA_PORT_INSTANCE_UUID]) { 1211 if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { 1212 memcpy(pp, &prev_pp, sizeof(*pp)); 1213 return -EINVAL; 1214 } 1215 pp->set |= ENIC_SET_INSTANCE; 1216 memcpy(pp->instance_uuid, 1217 nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); 1218 } 1219 1220 if (port[IFLA_PORT_HOST_UUID]) { 1221 if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { 1222 memcpy(pp, &prev_pp, sizeof(*pp)); 1223 return -EINVAL; 1224 } 1225 pp->set |= ENIC_SET_HOST; 1226 memcpy(pp->host_uuid, 1227 nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); 1228 } 1229 1230 if (vf == PORT_SELF_VF) { 1231 /* Special case handling: mac came from IFLA_VF_MAC */ 1232 if (!is_zero_ether_addr(prev_pp.vf_mac)) 1233 memcpy(pp->mac_addr, prev_pp.vf_mac, ETH_ALEN); 1234 1235 if (is_zero_ether_addr(netdev->dev_addr)) 1236 eth_hw_addr_random(netdev); 1237 } else { 1238 /* SR-IOV VF: get mac from adapter */ 1239 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1240 vnic_dev_get_mac_addr, pp->mac_addr); 1241 if (err) { 1242 netdev_err(netdev, "Error getting mac for vf %d\n", vf); 1243 memcpy(pp, &prev_pp, sizeof(*pp)); 1244 return enic_dev_status_to_errno(err); 1245 } 1246 } 1247 1248 err = enic_process_set_pp_request(enic, vf, &prev_pp, &restore_pp); 1249 if (err) { 1250 if (restore_pp) { 1251 /* Things are still the way they were: Implicit 1252 * DISASSOCIATE failed 1253 */ 1254 memcpy(pp, &prev_pp, sizeof(*pp)); 1255 } else { 1256 memset(pp, 0, sizeof(*pp)); 1257 if (vf == PORT_SELF_VF) 1258 eth_hw_addr_set(netdev, zero_addr); 1259 } 1260 } else { 1261 /* Set flag to indicate that the port assoc/disassoc 1262 * request has been sent out to fw 1263 */ 1264 pp->set |= ENIC_PORT_REQUEST_APPLIED; 1265 1266 /* If DISASSOCIATE, clean up all assigned/saved macaddresses */ 1267 if (pp->request == PORT_REQUEST_DISASSOCIATE) { 1268 eth_zero_addr(pp->mac_addr); 1269 if (vf == PORT_SELF_VF) 1270 eth_hw_addr_set(netdev, zero_addr); 1271 } 1272 } 1273 1274 if (vf == PORT_SELF_VF) 1275 eth_zero_addr(pp->vf_mac); 1276 1277 return err; 1278 } 1279 1280 static int enic_get_vf_port(struct net_device *netdev, int vf, 1281 struct sk_buff *skb) 1282 { 1283 struct enic *enic = netdev_priv(netdev); 1284 u16 response = PORT_PROFILE_RESPONSE_SUCCESS; 1285 struct enic_port_profile *pp; 1286 int err; 1287 1288 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1289 if (err) 1290 return err; 1291 1292 if (!(pp->set & ENIC_PORT_REQUEST_APPLIED)) 1293 return -ENODATA; 1294 1295 err = enic_process_get_pp_request(enic, vf, pp->request, &response); 1296 if (err) 1297 return err; 1298 1299 if (nla_put_u16(skb, IFLA_PORT_REQUEST, pp->request) || 1300 nla_put_u16(skb, IFLA_PORT_RESPONSE, response) || 1301 ((pp->set & ENIC_SET_NAME) && 1302 nla_put(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, pp->name)) || 1303 ((pp->set & ENIC_SET_INSTANCE) && 1304 nla_put(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, 1305 pp->instance_uuid)) || 1306 ((pp->set & ENIC_SET_HOST) && 1307 nla_put(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, pp->host_uuid))) 1308 goto nla_put_failure; 1309 return 0; 1310 1311 nla_put_failure: 1312 return -EMSGSIZE; 1313 } 1314 1315 static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) 1316 { 1317 unsigned int intr = enic_msix_rq_intr(enic, rq->index); 1318 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1319 u32 timer = cq->tobe_rx_coal_timeval; 1320 1321 if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) { 1322 vnic_intr_coalescing_timer_set(&enic->intr[intr], timer); 1323 cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval; 1324 } 1325 } 1326 1327 static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq) 1328 { 1329 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 1330 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1331 struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter; 1332 int index; 1333 u32 timer; 1334 u32 range_start; 1335 u32 traffic; 1336 u64 delta; 1337 ktime_t now = ktime_get(); 1338 1339 delta = ktime_us_delta(now, cq->prev_ts); 1340 if (delta < ENIC_AIC_TS_BREAK) 1341 return; 1342 cq->prev_ts = now; 1343 1344 traffic = pkt_size_counter->large_pkt_bytes_cnt + 1345 pkt_size_counter->small_pkt_bytes_cnt; 1346 /* The table takes Mbps 1347 * traffic *= 8 => bits 1348 * traffic *= (10^6 / delta) => bps 1349 * traffic /= 10^6 => Mbps 1350 * 1351 * Combining, traffic *= (8 / delta) 1352 */ 1353 1354 traffic <<= 3; 1355 traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta; 1356 1357 for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++) 1358 if (traffic < mod_table[index].rx_rate) 1359 break; 1360 range_start = (pkt_size_counter->small_pkt_bytes_cnt > 1361 pkt_size_counter->large_pkt_bytes_cnt << 1) ? 1362 rx_coal->small_pkt_range_start : 1363 rx_coal->large_pkt_range_start; 1364 timer = range_start + ((rx_coal->range_end - range_start) * 1365 mod_table[index].range_percent / 100); 1366 /* Damping */ 1367 cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1; 1368 1369 pkt_size_counter->large_pkt_bytes_cnt = 0; 1370 pkt_size_counter->small_pkt_bytes_cnt = 0; 1371 } 1372 1373 static int enic_poll(struct napi_struct *napi, int budget) 1374 { 1375 struct net_device *netdev = napi->dev; 1376 struct enic *enic = netdev_priv(netdev); 1377 unsigned int cq_rq = enic_cq_rq(enic, 0); 1378 unsigned int cq_wq = enic_cq_wq(enic, 0); 1379 unsigned int intr = ENIC_LEGACY_IO_INTR; 1380 unsigned int rq_work_to_do = budget; 1381 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1382 unsigned int work_done, rq_work_done = 0, wq_work_done; 1383 int err; 1384 1385 wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do, 1386 enic_wq_service, NULL); 1387 1388 if (budget > 0) 1389 rq_work_done = vnic_cq_service(&enic->cq[cq_rq], 1390 rq_work_to_do, enic_rq_service, NULL); 1391 1392 /* Accumulate intr event credits for this polling 1393 * cycle. An intr event is the completion of a 1394 * a WQ or RQ packet. 1395 */ 1396 1397 work_done = rq_work_done + wq_work_done; 1398 1399 if (work_done > 0) 1400 vnic_intr_return_credits(&enic->intr[intr], 1401 work_done, 1402 0 /* don't unmask intr */, 1403 0 /* don't reset intr timer */); 1404 1405 err = vnic_rq_fill(&enic->rq[0].vrq, enic_rq_alloc_buf); 1406 1407 /* Buffer allocation failed. Stay in polling 1408 * mode so we can try to fill the ring again. 1409 */ 1410 1411 if (err) 1412 rq_work_done = rq_work_to_do; 1413 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1414 /* Call the function which refreshes the intr coalescing timer 1415 * value based on the traffic. 1416 */ 1417 enic_calc_int_moderation(enic, &enic->rq[0].vrq); 1418 1419 if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) { 1420 1421 /* Some work done, but not enough to stay in polling, 1422 * exit polling 1423 */ 1424 1425 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1426 enic_set_int_moderation(enic, &enic->rq[0].vrq); 1427 vnic_intr_unmask(&enic->intr[intr]); 1428 enic->rq[0].stats.napi_complete++; 1429 } else { 1430 enic->rq[0].stats.napi_repoll++; 1431 } 1432 1433 return rq_work_done; 1434 } 1435 1436 #ifdef CONFIG_RFS_ACCEL 1437 static void enic_free_rx_cpu_rmap(struct enic *enic) 1438 { 1439 free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap); 1440 enic->netdev->rx_cpu_rmap = NULL; 1441 } 1442 1443 static void enic_set_rx_cpu_rmap(struct enic *enic) 1444 { 1445 int i, res; 1446 1447 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) { 1448 enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count); 1449 if (unlikely(!enic->netdev->rx_cpu_rmap)) 1450 return; 1451 for (i = 0; i < enic->rq_count; i++) { 1452 res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap, 1453 enic->msix_entry[i].vector); 1454 if (unlikely(res)) { 1455 enic_free_rx_cpu_rmap(enic); 1456 return; 1457 } 1458 } 1459 } 1460 } 1461 1462 #else 1463 1464 static void enic_free_rx_cpu_rmap(struct enic *enic) 1465 { 1466 } 1467 1468 static void enic_set_rx_cpu_rmap(struct enic *enic) 1469 { 1470 } 1471 1472 #endif /* CONFIG_RFS_ACCEL */ 1473 1474 static int enic_poll_msix_wq(struct napi_struct *napi, int budget) 1475 { 1476 struct net_device *netdev = napi->dev; 1477 struct enic *enic = netdev_priv(netdev); 1478 unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; 1479 struct vnic_wq *wq = &enic->wq[wq_index].vwq; 1480 unsigned int cq; 1481 unsigned int intr; 1482 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1483 unsigned int wq_work_done; 1484 unsigned int wq_irq; 1485 1486 wq_irq = wq->index; 1487 cq = enic_cq_wq(enic, wq_irq); 1488 intr = enic_msix_wq_intr(enic, wq_irq); 1489 wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do, 1490 enic_wq_service, NULL); 1491 1492 vnic_intr_return_credits(&enic->intr[intr], wq_work_done, 1493 0 /* don't unmask intr */, 1494 1 /* reset intr timer */); 1495 if (!wq_work_done) { 1496 napi_complete(napi); 1497 vnic_intr_unmask(&enic->intr[intr]); 1498 return 0; 1499 } 1500 1501 return budget; 1502 } 1503 1504 static int enic_poll_msix_rq(struct napi_struct *napi, int budget) 1505 { 1506 struct net_device *netdev = napi->dev; 1507 struct enic *enic = netdev_priv(netdev); 1508 unsigned int rq = (napi - &enic->napi[0]); 1509 unsigned int cq = enic_cq_rq(enic, rq); 1510 unsigned int intr = enic_msix_rq_intr(enic, rq); 1511 unsigned int work_to_do = budget; 1512 unsigned int work_done = 0; 1513 int err; 1514 1515 /* Service RQ 1516 */ 1517 1518 if (budget > 0) 1519 work_done = vnic_cq_service(&enic->cq[cq], 1520 work_to_do, enic_rq_service, NULL); 1521 1522 /* Return intr event credits for this polling 1523 * cycle. An intr event is the completion of a 1524 * RQ packet. 1525 */ 1526 1527 if (work_done > 0) 1528 vnic_intr_return_credits(&enic->intr[intr], 1529 work_done, 1530 0 /* don't unmask intr */, 1531 0 /* don't reset intr timer */); 1532 1533 err = vnic_rq_fill(&enic->rq[rq].vrq, enic_rq_alloc_buf); 1534 1535 /* Buffer allocation failed. Stay in polling mode 1536 * so we can try to fill the ring again. 1537 */ 1538 1539 if (err) 1540 work_done = work_to_do; 1541 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1542 /* Call the function which refreshes the intr coalescing timer 1543 * value based on the traffic. 1544 */ 1545 enic_calc_int_moderation(enic, &enic->rq[rq].vrq); 1546 1547 if ((work_done < budget) && napi_complete_done(napi, work_done)) { 1548 1549 /* Some work done, but not enough to stay in polling, 1550 * exit polling 1551 */ 1552 1553 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1554 enic_set_int_moderation(enic, &enic->rq[rq].vrq); 1555 vnic_intr_unmask(&enic->intr[intr]); 1556 enic->rq[rq].stats.napi_complete++; 1557 } else { 1558 enic->rq[rq].stats.napi_repoll++; 1559 } 1560 1561 return work_done; 1562 } 1563 1564 static void enic_notify_timer(struct timer_list *t) 1565 { 1566 struct enic *enic = from_timer(enic, t, notify_timer); 1567 1568 enic_notify_check(enic); 1569 1570 mod_timer(&enic->notify_timer, 1571 round_jiffies(jiffies + ENIC_NOTIFY_TIMER_PERIOD)); 1572 } 1573 1574 static void enic_free_intr(struct enic *enic) 1575 { 1576 struct net_device *netdev = enic->netdev; 1577 unsigned int i; 1578 1579 enic_free_rx_cpu_rmap(enic); 1580 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1581 case VNIC_DEV_INTR_MODE_INTX: 1582 free_irq(enic->pdev->irq, netdev); 1583 break; 1584 case VNIC_DEV_INTR_MODE_MSI: 1585 free_irq(enic->pdev->irq, enic); 1586 break; 1587 case VNIC_DEV_INTR_MODE_MSIX: 1588 for (i = 0; i < enic->intr_count; i++) 1589 if (enic->msix[i].requested) 1590 free_irq(enic->msix_entry[i].vector, 1591 enic->msix[i].devid); 1592 break; 1593 default: 1594 break; 1595 } 1596 } 1597 1598 static int enic_request_intr(struct enic *enic) 1599 { 1600 struct net_device *netdev = enic->netdev; 1601 unsigned int i, intr; 1602 int err = 0; 1603 1604 enic_set_rx_cpu_rmap(enic); 1605 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1606 1607 case VNIC_DEV_INTR_MODE_INTX: 1608 1609 err = request_irq(enic->pdev->irq, enic_isr_legacy, 1610 IRQF_SHARED, netdev->name, netdev); 1611 break; 1612 1613 case VNIC_DEV_INTR_MODE_MSI: 1614 1615 err = request_irq(enic->pdev->irq, enic_isr_msi, 1616 0, netdev->name, enic); 1617 break; 1618 1619 case VNIC_DEV_INTR_MODE_MSIX: 1620 1621 for (i = 0; i < enic->rq_count; i++) { 1622 intr = enic_msix_rq_intr(enic, i); 1623 snprintf(enic->msix[intr].devname, 1624 sizeof(enic->msix[intr].devname), 1625 "%s-rx-%u", netdev->name, i); 1626 enic->msix[intr].isr = enic_isr_msix; 1627 enic->msix[intr].devid = &enic->napi[i]; 1628 } 1629 1630 for (i = 0; i < enic->wq_count; i++) { 1631 int wq = enic_cq_wq(enic, i); 1632 1633 intr = enic_msix_wq_intr(enic, i); 1634 snprintf(enic->msix[intr].devname, 1635 sizeof(enic->msix[intr].devname), 1636 "%s-tx-%u", netdev->name, i); 1637 enic->msix[intr].isr = enic_isr_msix; 1638 enic->msix[intr].devid = &enic->napi[wq]; 1639 } 1640 1641 intr = enic_msix_err_intr(enic); 1642 snprintf(enic->msix[intr].devname, 1643 sizeof(enic->msix[intr].devname), 1644 "%s-err", netdev->name); 1645 enic->msix[intr].isr = enic_isr_msix_err; 1646 enic->msix[intr].devid = enic; 1647 1648 intr = enic_msix_notify_intr(enic); 1649 snprintf(enic->msix[intr].devname, 1650 sizeof(enic->msix[intr].devname), 1651 "%s-notify", netdev->name); 1652 enic->msix[intr].isr = enic_isr_msix_notify; 1653 enic->msix[intr].devid = enic; 1654 1655 for (i = 0; i < enic->intr_count; i++) 1656 enic->msix[i].requested = 0; 1657 1658 for (i = 0; i < enic->intr_count; i++) { 1659 err = request_irq(enic->msix_entry[i].vector, 1660 enic->msix[i].isr, 0, 1661 enic->msix[i].devname, 1662 enic->msix[i].devid); 1663 if (err) { 1664 enic_free_intr(enic); 1665 break; 1666 } 1667 enic->msix[i].requested = 1; 1668 } 1669 1670 break; 1671 1672 default: 1673 break; 1674 } 1675 1676 return err; 1677 } 1678 1679 static void enic_synchronize_irqs(struct enic *enic) 1680 { 1681 unsigned int i; 1682 1683 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1684 case VNIC_DEV_INTR_MODE_INTX: 1685 case VNIC_DEV_INTR_MODE_MSI: 1686 synchronize_irq(enic->pdev->irq); 1687 break; 1688 case VNIC_DEV_INTR_MODE_MSIX: 1689 for (i = 0; i < enic->intr_count; i++) 1690 synchronize_irq(enic->msix_entry[i].vector); 1691 break; 1692 default: 1693 break; 1694 } 1695 } 1696 1697 static int enic_dev_notify_set(struct enic *enic) 1698 { 1699 int err; 1700 1701 spin_lock_bh(&enic->devcmd_lock); 1702 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1703 case VNIC_DEV_INTR_MODE_INTX: 1704 err = vnic_dev_notify_set(enic->vdev, ENIC_LEGACY_NOTIFY_INTR); 1705 break; 1706 case VNIC_DEV_INTR_MODE_MSIX: 1707 err = vnic_dev_notify_set(enic->vdev, 1708 enic_msix_notify_intr(enic)); 1709 break; 1710 default: 1711 err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */); 1712 break; 1713 } 1714 spin_unlock_bh(&enic->devcmd_lock); 1715 1716 return err; 1717 } 1718 1719 static void enic_notify_timer_start(struct enic *enic) 1720 { 1721 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1722 case VNIC_DEV_INTR_MODE_MSI: 1723 mod_timer(&enic->notify_timer, jiffies); 1724 break; 1725 default: 1726 /* Using intr for notification for INTx/MSI-X */ 1727 break; 1728 } 1729 } 1730 1731 /* rtnl lock is held, process context */ 1732 static int enic_open(struct net_device *netdev) 1733 { 1734 struct enic *enic = netdev_priv(netdev); 1735 unsigned int i; 1736 int err, ret; 1737 unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; 1738 struct page_pool_params pp_params = { 1739 .order = get_order(max_pkt_len), 1740 .pool_size = enic->config.rq_desc_count, 1741 .nid = dev_to_node(&enic->pdev->dev), 1742 .dev = &enic->pdev->dev, 1743 .dma_dir = DMA_FROM_DEVICE, 1744 .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, 1745 .netdev = netdev, 1746 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 1747 }; 1748 1749 err = enic_request_intr(enic); 1750 if (err) { 1751 netdev_err(netdev, "Unable to request irq.\n"); 1752 return err; 1753 } 1754 enic_init_affinity_hint(enic); 1755 enic_set_affinity_hint(enic); 1756 1757 err = enic_dev_notify_set(enic); 1758 if (err) { 1759 netdev_err(netdev, 1760 "Failed to alloc notify buffer, aborting.\n"); 1761 goto err_out_free_intr; 1762 } 1763 1764 for (i = 0; i < enic->rq_count; i++) { 1765 /* create a page pool for each RQ */ 1766 pp_params.napi = &enic->napi[i]; 1767 pp_params.queue_idx = i; 1768 enic->rq[i].pool = page_pool_create(&pp_params); 1769 if (IS_ERR(enic->rq[i].pool)) { 1770 err = PTR_ERR(enic->rq[i].pool); 1771 enic->rq[i].pool = NULL; 1772 goto err_out_free_rq; 1773 } 1774 1775 /* enable rq before updating rq desc */ 1776 vnic_rq_enable(&enic->rq[i].vrq); 1777 vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); 1778 /* Need at least one buffer on ring to get going */ 1779 if (vnic_rq_desc_used(&enic->rq[i].vrq) == 0) { 1780 netdev_err(netdev, "Unable to alloc receive buffers\n"); 1781 err = -ENOMEM; 1782 goto err_out_free_rq; 1783 } 1784 } 1785 1786 for (i = 0; i < enic->wq_count; i++) 1787 vnic_wq_enable(&enic->wq[i].vwq); 1788 1789 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 1790 enic_dev_add_station_addr(enic); 1791 1792 enic_set_rx_mode(netdev); 1793 1794 netif_tx_wake_all_queues(netdev); 1795 1796 for (i = 0; i < enic->rq_count; i++) 1797 napi_enable(&enic->napi[i]); 1798 1799 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 1800 for (i = 0; i < enic->wq_count; i++) 1801 napi_enable(&enic->napi[enic_cq_wq(enic, i)]); 1802 enic_dev_enable(enic); 1803 1804 for (i = 0; i < enic->intr_count; i++) 1805 vnic_intr_unmask(&enic->intr[i]); 1806 1807 enic_notify_timer_start(enic); 1808 enic_rfs_timer_start(enic); 1809 1810 return 0; 1811 1812 err_out_free_rq: 1813 for (i = 0; i < enic->rq_count; i++) { 1814 ret = vnic_rq_disable(&enic->rq[i].vrq); 1815 if (!ret) { 1816 vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); 1817 page_pool_destroy(enic->rq[i].pool); 1818 enic->rq[i].pool = NULL; 1819 } 1820 } 1821 enic_dev_notify_unset(enic); 1822 err_out_free_intr: 1823 enic_unset_affinity_hint(enic); 1824 enic_free_intr(enic); 1825 1826 return err; 1827 } 1828 1829 /* rtnl lock is held, process context */ 1830 static int enic_stop(struct net_device *netdev) 1831 { 1832 struct enic *enic = netdev_priv(netdev); 1833 unsigned int i; 1834 int err; 1835 1836 for (i = 0; i < enic->intr_count; i++) { 1837 vnic_intr_mask(&enic->intr[i]); 1838 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */ 1839 } 1840 1841 enic_synchronize_irqs(enic); 1842 1843 del_timer_sync(&enic->notify_timer); 1844 enic_rfs_flw_tbl_free(enic); 1845 1846 enic_dev_disable(enic); 1847 1848 for (i = 0; i < enic->rq_count; i++) 1849 napi_disable(&enic->napi[i]); 1850 1851 netif_carrier_off(netdev); 1852 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 1853 for (i = 0; i < enic->wq_count; i++) 1854 napi_disable(&enic->napi[enic_cq_wq(enic, i)]); 1855 netif_tx_disable(netdev); 1856 1857 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 1858 enic_dev_del_station_addr(enic); 1859 1860 for (i = 0; i < enic->wq_count; i++) { 1861 err = vnic_wq_disable(&enic->wq[i].vwq); 1862 if (err) 1863 return err; 1864 } 1865 for (i = 0; i < enic->rq_count; i++) { 1866 err = vnic_rq_disable(&enic->rq[i].vrq); 1867 if (err) 1868 return err; 1869 } 1870 1871 enic_dev_notify_unset(enic); 1872 enic_unset_affinity_hint(enic); 1873 enic_free_intr(enic); 1874 1875 for (i = 0; i < enic->wq_count; i++) 1876 vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); 1877 for (i = 0; i < enic->rq_count; i++) { 1878 vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); 1879 page_pool_destroy(enic->rq[i].pool); 1880 enic->rq[i].pool = NULL; 1881 } 1882 for (i = 0; i < enic->cq_count; i++) 1883 vnic_cq_clean(&enic->cq[i]); 1884 for (i = 0; i < enic->intr_count; i++) 1885 vnic_intr_clean(&enic->intr[i]); 1886 1887 return 0; 1888 } 1889 1890 static int _enic_change_mtu(struct net_device *netdev, int new_mtu) 1891 { 1892 bool running = netif_running(netdev); 1893 int err = 0; 1894 1895 ASSERT_RTNL(); 1896 if (running) { 1897 err = enic_stop(netdev); 1898 if (err) 1899 return err; 1900 } 1901 1902 WRITE_ONCE(netdev->mtu, new_mtu); 1903 1904 if (running) { 1905 err = enic_open(netdev); 1906 if (err) 1907 return err; 1908 } 1909 1910 return 0; 1911 } 1912 1913 static int enic_change_mtu(struct net_device *netdev, int new_mtu) 1914 { 1915 struct enic *enic = netdev_priv(netdev); 1916 1917 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 1918 return -EOPNOTSUPP; 1919 1920 if (netdev->mtu > enic->port_mtu) 1921 netdev_warn(netdev, 1922 "interface MTU (%d) set higher than port MTU (%d)\n", 1923 netdev->mtu, enic->port_mtu); 1924 1925 return _enic_change_mtu(netdev, new_mtu); 1926 } 1927 1928 static void enic_change_mtu_work(struct work_struct *work) 1929 { 1930 struct enic *enic = container_of(work, struct enic, change_mtu_work); 1931 struct net_device *netdev = enic->netdev; 1932 int new_mtu = vnic_dev_mtu(enic->vdev); 1933 1934 rtnl_lock(); 1935 (void)_enic_change_mtu(netdev, new_mtu); 1936 rtnl_unlock(); 1937 1938 netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); 1939 } 1940 1941 #ifdef CONFIG_NET_POLL_CONTROLLER 1942 static void enic_poll_controller(struct net_device *netdev) 1943 { 1944 struct enic *enic = netdev_priv(netdev); 1945 struct vnic_dev *vdev = enic->vdev; 1946 unsigned int i, intr; 1947 1948 switch (vnic_dev_get_intr_mode(vdev)) { 1949 case VNIC_DEV_INTR_MODE_MSIX: 1950 for (i = 0; i < enic->rq_count; i++) { 1951 intr = enic_msix_rq_intr(enic, i); 1952 enic_isr_msix(enic->msix_entry[intr].vector, 1953 &enic->napi[i]); 1954 } 1955 1956 for (i = 0; i < enic->wq_count; i++) { 1957 intr = enic_msix_wq_intr(enic, i); 1958 enic_isr_msix(enic->msix_entry[intr].vector, 1959 &enic->napi[enic_cq_wq(enic, i)]); 1960 } 1961 1962 break; 1963 case VNIC_DEV_INTR_MODE_MSI: 1964 enic_isr_msi(enic->pdev->irq, enic); 1965 break; 1966 case VNIC_DEV_INTR_MODE_INTX: 1967 enic_isr_legacy(enic->pdev->irq, netdev); 1968 break; 1969 default: 1970 break; 1971 } 1972 } 1973 #endif 1974 1975 static int enic_dev_wait(struct vnic_dev *vdev, 1976 int (*start)(struct vnic_dev *, int), 1977 int (*finished)(struct vnic_dev *, int *), 1978 int arg) 1979 { 1980 unsigned long time; 1981 int done; 1982 int err; 1983 1984 err = start(vdev, arg); 1985 if (err) 1986 return err; 1987 1988 /* Wait for func to complete...2 seconds max 1989 */ 1990 1991 time = jiffies + (HZ * 2); 1992 do { 1993 1994 err = finished(vdev, &done); 1995 if (err) 1996 return err; 1997 1998 if (done) 1999 return 0; 2000 2001 schedule_timeout_uninterruptible(HZ / 10); 2002 2003 } while (time_after(time, jiffies)); 2004 2005 return -ETIMEDOUT; 2006 } 2007 2008 static int enic_dev_open(struct enic *enic) 2009 { 2010 int err; 2011 u32 flags = CMD_OPENF_IG_DESCCACHE; 2012 2013 err = enic_dev_wait(enic->vdev, vnic_dev_open, 2014 vnic_dev_open_done, flags); 2015 if (err) 2016 dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n", 2017 err); 2018 2019 return err; 2020 } 2021 2022 static int enic_dev_soft_reset(struct enic *enic) 2023 { 2024 int err; 2025 2026 err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset, 2027 vnic_dev_soft_reset_done, 0); 2028 if (err) 2029 netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n", 2030 err); 2031 2032 return err; 2033 } 2034 2035 static int enic_dev_hang_reset(struct enic *enic) 2036 { 2037 int err; 2038 2039 err = enic_dev_wait(enic->vdev, vnic_dev_hang_reset, 2040 vnic_dev_hang_reset_done, 0); 2041 if (err) 2042 netdev_err(enic->netdev, "vNIC hang reset failed, err %d\n", 2043 err); 2044 2045 return err; 2046 } 2047 2048 int __enic_set_rsskey(struct enic *enic) 2049 { 2050 union vnic_rss_key *rss_key_buf_va; 2051 dma_addr_t rss_key_buf_pa; 2052 int i, kidx, bidx, err; 2053 2054 rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev, 2055 sizeof(union vnic_rss_key), 2056 &rss_key_buf_pa, GFP_ATOMIC); 2057 if (!rss_key_buf_va) 2058 return -ENOMEM; 2059 2060 for (i = 0; i < ENIC_RSS_LEN; i++) { 2061 kidx = i / ENIC_RSS_BYTES_PER_KEY; 2062 bidx = i % ENIC_RSS_BYTES_PER_KEY; 2063 rss_key_buf_va->key[kidx].b[bidx] = enic->rss_key[i]; 2064 } 2065 spin_lock_bh(&enic->devcmd_lock); 2066 err = enic_set_rss_key(enic, 2067 rss_key_buf_pa, 2068 sizeof(union vnic_rss_key)); 2069 spin_unlock_bh(&enic->devcmd_lock); 2070 2071 dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key), 2072 rss_key_buf_va, rss_key_buf_pa); 2073 2074 return err; 2075 } 2076 2077 static int enic_set_rsskey(struct enic *enic) 2078 { 2079 netdev_rss_key_fill(enic->rss_key, ENIC_RSS_LEN); 2080 2081 return __enic_set_rsskey(enic); 2082 } 2083 2084 static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) 2085 { 2086 dma_addr_t rss_cpu_buf_pa; 2087 union vnic_rss_cpu *rss_cpu_buf_va = NULL; 2088 unsigned int i; 2089 int err; 2090 2091 rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev, 2092 sizeof(union vnic_rss_cpu), 2093 &rss_cpu_buf_pa, GFP_ATOMIC); 2094 if (!rss_cpu_buf_va) 2095 return -ENOMEM; 2096 2097 for (i = 0; i < (1 << rss_hash_bits); i++) 2098 (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count; 2099 2100 spin_lock_bh(&enic->devcmd_lock); 2101 err = enic_set_rss_cpu(enic, 2102 rss_cpu_buf_pa, 2103 sizeof(union vnic_rss_cpu)); 2104 spin_unlock_bh(&enic->devcmd_lock); 2105 2106 dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu), 2107 rss_cpu_buf_va, rss_cpu_buf_pa); 2108 2109 return err; 2110 } 2111 2112 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu, 2113 u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable) 2114 { 2115 const u8 tso_ipid_split_en = 0; 2116 const u8 ig_vlan_strip_en = 1; 2117 int err; 2118 2119 /* Enable VLAN tag stripping. 2120 */ 2121 2122 spin_lock_bh(&enic->devcmd_lock); 2123 err = enic_set_nic_cfg(enic, 2124 rss_default_cpu, rss_hash_type, 2125 rss_hash_bits, rss_base_cpu, 2126 rss_enable, tso_ipid_split_en, 2127 ig_vlan_strip_en); 2128 spin_unlock_bh(&enic->devcmd_lock); 2129 2130 return err; 2131 } 2132 2133 static int enic_set_rss_nic_cfg(struct enic *enic) 2134 { 2135 struct device *dev = enic_get_dev(enic); 2136 const u8 rss_default_cpu = 0; 2137 const u8 rss_hash_bits = 7; 2138 const u8 rss_base_cpu = 0; 2139 u8 rss_hash_type; 2140 int res; 2141 u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1); 2142 2143 spin_lock_bh(&enic->devcmd_lock); 2144 res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type); 2145 spin_unlock_bh(&enic->devcmd_lock); 2146 if (res) { 2147 /* defaults for old adapters 2148 */ 2149 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | 2150 NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | 2151 NIC_CFG_RSS_HASH_TYPE_IPV6 | 2152 NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; 2153 } 2154 2155 if (rss_enable) { 2156 if (!enic_set_rsskey(enic)) { 2157 if (enic_set_rsscpu(enic, rss_hash_bits)) { 2158 rss_enable = 0; 2159 dev_warn(dev, "RSS disabled, " 2160 "Failed to set RSS cpu indirection table."); 2161 } 2162 } else { 2163 rss_enable = 0; 2164 dev_warn(dev, "RSS disabled, Failed to set RSS key.\n"); 2165 } 2166 } 2167 2168 return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type, 2169 rss_hash_bits, rss_base_cpu, rss_enable); 2170 } 2171 2172 static void enic_set_api_busy(struct enic *enic, bool busy) 2173 { 2174 spin_lock(&enic->enic_api_lock); 2175 enic->enic_api_busy = busy; 2176 spin_unlock(&enic->enic_api_lock); 2177 } 2178 2179 static void enic_reset(struct work_struct *work) 2180 { 2181 struct enic *enic = container_of(work, struct enic, reset); 2182 2183 if (!netif_running(enic->netdev)) 2184 return; 2185 2186 rtnl_lock(); 2187 2188 /* Stop any activity from infiniband */ 2189 enic_set_api_busy(enic, true); 2190 2191 enic_stop(enic->netdev); 2192 enic_dev_soft_reset(enic); 2193 enic_reset_addr_lists(enic); 2194 enic_init_vnic_resources(enic); 2195 enic_set_rss_nic_cfg(enic); 2196 enic_dev_set_ig_vlan_rewrite_mode(enic); 2197 enic_open(enic->netdev); 2198 2199 /* Allow infiniband to fiddle with the device again */ 2200 enic_set_api_busy(enic, false); 2201 2202 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2203 2204 rtnl_unlock(); 2205 } 2206 2207 static void enic_tx_hang_reset(struct work_struct *work) 2208 { 2209 struct enic *enic = container_of(work, struct enic, tx_hang_reset); 2210 2211 rtnl_lock(); 2212 2213 /* Stop any activity from infiniband */ 2214 enic_set_api_busy(enic, true); 2215 2216 enic_dev_hang_notify(enic); 2217 enic_stop(enic->netdev); 2218 enic_dev_hang_reset(enic); 2219 enic_reset_addr_lists(enic); 2220 enic_init_vnic_resources(enic); 2221 enic_set_rss_nic_cfg(enic); 2222 enic_dev_set_ig_vlan_rewrite_mode(enic); 2223 enic_open(enic->netdev); 2224 2225 /* Allow infiniband to fiddle with the device again */ 2226 enic_set_api_busy(enic, false); 2227 2228 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2229 2230 rtnl_unlock(); 2231 } 2232 2233 static int enic_set_intr_mode(struct enic *enic) 2234 { 2235 unsigned int i; 2236 int num_intr; 2237 2238 /* Set interrupt mode (INTx, MSI, MSI-X) depending 2239 * on system capabilities. 2240 * 2241 * Try MSI-X first 2242 */ 2243 2244 if (enic->config.intr_mode < 1 && 2245 enic->intr_avail >= ENIC_MSIX_MIN_INTR) { 2246 for (i = 0; i < enic->intr_avail; i++) 2247 enic->msix_entry[i].entry = i; 2248 2249 num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, 2250 ENIC_MSIX_MIN_INTR, 2251 enic->intr_avail); 2252 if (num_intr > 0) { 2253 vnic_dev_set_intr_mode(enic->vdev, 2254 VNIC_DEV_INTR_MODE_MSIX); 2255 enic->intr_avail = num_intr; 2256 return 0; 2257 } 2258 } 2259 2260 /* Next try MSI 2261 * 2262 * We need 1 INTR 2263 */ 2264 2265 if (enic->config.intr_mode < 2 && 2266 enic->intr_avail >= 1 && 2267 !pci_enable_msi(enic->pdev)) { 2268 enic->intr_avail = 1; 2269 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); 2270 return 0; 2271 } 2272 2273 /* Next try INTx 2274 * 2275 * We need 3 INTRs 2276 * (the first INTR is used for WQ/RQ) 2277 * (the second INTR is used for WQ/RQ errors) 2278 * (the last INTR is used for notifications) 2279 */ 2280 2281 if (enic->config.intr_mode < 3 && 2282 enic->intr_avail >= 3) { 2283 enic->intr_avail = 3; 2284 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); 2285 return 0; 2286 } 2287 2288 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2289 2290 return -EINVAL; 2291 } 2292 2293 static void enic_clear_intr_mode(struct enic *enic) 2294 { 2295 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2296 case VNIC_DEV_INTR_MODE_MSIX: 2297 pci_disable_msix(enic->pdev); 2298 break; 2299 case VNIC_DEV_INTR_MODE_MSI: 2300 pci_disable_msi(enic->pdev); 2301 break; 2302 default: 2303 break; 2304 } 2305 2306 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2307 } 2308 2309 static int enic_adjust_resources(struct enic *enic) 2310 { 2311 unsigned int max_queues; 2312 unsigned int rq_default; 2313 unsigned int rq_avail; 2314 unsigned int wq_avail; 2315 2316 if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { 2317 dev_err(enic_get_dev(enic), 2318 "Not enough resources available rq: %d wq: %d cq: %d\n", 2319 enic->rq_avail, enic->wq_avail, 2320 enic->cq_avail); 2321 return -ENOSPC; 2322 } 2323 2324 if (is_kdump_kernel()) { 2325 dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); 2326 enic->rq_avail = 1; 2327 enic->wq_avail = 1; 2328 enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; 2329 enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; 2330 enic->config.mtu = min_t(u16, 1500, enic->config.mtu); 2331 } 2332 2333 /* if RSS isn't set, then we can only use one RQ */ 2334 if (!ENIC_SETTING(enic, RSS)) 2335 enic->rq_avail = 1; 2336 2337 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2338 case VNIC_DEV_INTR_MODE_INTX: 2339 case VNIC_DEV_INTR_MODE_MSI: 2340 enic->rq_count = 1; 2341 enic->wq_count = 1; 2342 enic->cq_count = 2; 2343 enic->intr_count = enic->intr_avail; 2344 break; 2345 case VNIC_DEV_INTR_MODE_MSIX: 2346 /* Adjust the number of wqs/rqs/cqs/interrupts that will be 2347 * used based on which resource is the most constrained 2348 */ 2349 wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); 2350 rq_default = netif_get_num_default_rss_queues(); 2351 rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); 2352 max_queues = min(enic->cq_avail, 2353 enic->intr_avail - ENIC_MSIX_RESERVED_INTR); 2354 if (wq_avail + rq_avail <= max_queues) { 2355 enic->rq_count = rq_avail; 2356 enic->wq_count = wq_avail; 2357 } else { 2358 /* recalculate wq/rq count */ 2359 if (rq_avail < wq_avail) { 2360 enic->rq_count = min(rq_avail, max_queues / 2); 2361 enic->wq_count = max_queues - enic->rq_count; 2362 } else { 2363 enic->wq_count = min(wq_avail, max_queues / 2); 2364 enic->rq_count = max_queues - enic->wq_count; 2365 } 2366 } 2367 enic->cq_count = enic->rq_count + enic->wq_count; 2368 enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; 2369 2370 break; 2371 default: 2372 dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); 2373 return -EINVAL; 2374 } 2375 2376 return 0; 2377 } 2378 2379 static void enic_get_queue_stats_rx(struct net_device *dev, int idx, 2380 struct netdev_queue_stats_rx *rxs) 2381 { 2382 struct enic *enic = netdev_priv(dev); 2383 struct enic_rq_stats *rqstats = &enic->rq[idx].stats; 2384 2385 rxs->bytes = rqstats->bytes; 2386 rxs->packets = rqstats->packets; 2387 rxs->hw_drops = rqstats->bad_fcs + rqstats->pkt_truncated; 2388 rxs->hw_drop_overruns = rqstats->pkt_truncated; 2389 rxs->csum_unnecessary = rqstats->csum_unnecessary + 2390 rqstats->csum_unnecessary_encap; 2391 rxs->alloc_fail = rqstats->pp_alloc_fail; 2392 } 2393 2394 static void enic_get_queue_stats_tx(struct net_device *dev, int idx, 2395 struct netdev_queue_stats_tx *txs) 2396 { 2397 struct enic *enic = netdev_priv(dev); 2398 struct enic_wq_stats *wqstats = &enic->wq[idx].stats; 2399 2400 txs->bytes = wqstats->bytes; 2401 txs->packets = wqstats->packets; 2402 txs->csum_none = wqstats->csum_none; 2403 txs->needs_csum = wqstats->csum_partial + wqstats->encap_csum + 2404 wqstats->tso; 2405 txs->hw_gso_packets = wqstats->tso; 2406 txs->stop = wqstats->stopped; 2407 txs->wake = wqstats->wake; 2408 } 2409 2410 static void enic_get_base_stats(struct net_device *dev, 2411 struct netdev_queue_stats_rx *rxs, 2412 struct netdev_queue_stats_tx *txs) 2413 { 2414 rxs->bytes = 0; 2415 rxs->packets = 0; 2416 rxs->hw_drops = 0; 2417 rxs->hw_drop_overruns = 0; 2418 rxs->csum_unnecessary = 0; 2419 rxs->alloc_fail = 0; 2420 txs->bytes = 0; 2421 txs->packets = 0; 2422 txs->csum_none = 0; 2423 txs->needs_csum = 0; 2424 txs->hw_gso_packets = 0; 2425 txs->stop = 0; 2426 txs->wake = 0; 2427 } 2428 2429 static const struct net_device_ops enic_netdev_dynamic_ops = { 2430 .ndo_open = enic_open, 2431 .ndo_stop = enic_stop, 2432 .ndo_start_xmit = enic_hard_start_xmit, 2433 .ndo_get_stats64 = enic_get_stats, 2434 .ndo_validate_addr = eth_validate_addr, 2435 .ndo_set_rx_mode = enic_set_rx_mode, 2436 .ndo_set_mac_address = enic_set_mac_address_dynamic, 2437 .ndo_change_mtu = enic_change_mtu, 2438 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2439 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2440 .ndo_tx_timeout = enic_tx_timeout, 2441 .ndo_set_vf_port = enic_set_vf_port, 2442 .ndo_get_vf_port = enic_get_vf_port, 2443 .ndo_set_vf_mac = enic_set_vf_mac, 2444 #ifdef CONFIG_NET_POLL_CONTROLLER 2445 .ndo_poll_controller = enic_poll_controller, 2446 #endif 2447 #ifdef CONFIG_RFS_ACCEL 2448 .ndo_rx_flow_steer = enic_rx_flow_steer, 2449 #endif 2450 .ndo_features_check = enic_features_check, 2451 }; 2452 2453 static const struct net_device_ops enic_netdev_ops = { 2454 .ndo_open = enic_open, 2455 .ndo_stop = enic_stop, 2456 .ndo_start_xmit = enic_hard_start_xmit, 2457 .ndo_get_stats64 = enic_get_stats, 2458 .ndo_validate_addr = eth_validate_addr, 2459 .ndo_set_mac_address = enic_set_mac_address, 2460 .ndo_set_rx_mode = enic_set_rx_mode, 2461 .ndo_change_mtu = enic_change_mtu, 2462 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2463 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2464 .ndo_tx_timeout = enic_tx_timeout, 2465 .ndo_set_vf_port = enic_set_vf_port, 2466 .ndo_get_vf_port = enic_get_vf_port, 2467 .ndo_set_vf_mac = enic_set_vf_mac, 2468 #ifdef CONFIG_NET_POLL_CONTROLLER 2469 .ndo_poll_controller = enic_poll_controller, 2470 #endif 2471 #ifdef CONFIG_RFS_ACCEL 2472 .ndo_rx_flow_steer = enic_rx_flow_steer, 2473 #endif 2474 .ndo_features_check = enic_features_check, 2475 }; 2476 2477 static const struct netdev_stat_ops enic_netdev_stat_ops = { 2478 .get_queue_stats_rx = enic_get_queue_stats_rx, 2479 .get_queue_stats_tx = enic_get_queue_stats_tx, 2480 .get_base_stats = enic_get_base_stats, 2481 }; 2482 2483 static void enic_free_enic_resources(struct enic *enic) 2484 { 2485 kfree(enic->wq); 2486 enic->wq = NULL; 2487 2488 kfree(enic->rq); 2489 enic->rq = NULL; 2490 2491 kfree(enic->cq); 2492 enic->cq = NULL; 2493 2494 kfree(enic->napi); 2495 enic->napi = NULL; 2496 2497 kfree(enic->msix_entry); 2498 enic->msix_entry = NULL; 2499 2500 kfree(enic->msix); 2501 enic->msix = NULL; 2502 2503 kfree(enic->intr); 2504 enic->intr = NULL; 2505 } 2506 2507 static int enic_alloc_enic_resources(struct enic *enic) 2508 { 2509 enic->wq = kcalloc(enic->wq_avail, sizeof(struct enic_wq), GFP_KERNEL); 2510 if (!enic->wq) 2511 goto free_queues; 2512 2513 enic->rq = kcalloc(enic->rq_avail, sizeof(struct enic_rq), GFP_KERNEL); 2514 if (!enic->rq) 2515 goto free_queues; 2516 2517 enic->cq = kcalloc(enic->cq_avail, sizeof(struct vnic_cq), GFP_KERNEL); 2518 if (!enic->cq) 2519 goto free_queues; 2520 2521 enic->napi = kcalloc(enic->wq_avail + enic->rq_avail, 2522 sizeof(struct napi_struct), GFP_KERNEL); 2523 if (!enic->napi) 2524 goto free_queues; 2525 2526 enic->msix_entry = kcalloc(enic->intr_avail, sizeof(struct msix_entry), 2527 GFP_KERNEL); 2528 if (!enic->msix_entry) 2529 goto free_queues; 2530 2531 enic->msix = kcalloc(enic->intr_avail, sizeof(struct enic_msix_entry), 2532 GFP_KERNEL); 2533 if (!enic->msix) 2534 goto free_queues; 2535 2536 enic->intr = kcalloc(enic->intr_avail, sizeof(struct vnic_intr), 2537 GFP_KERNEL); 2538 if (!enic->intr) 2539 goto free_queues; 2540 2541 return 0; 2542 2543 free_queues: 2544 enic_free_enic_resources(enic); 2545 return -ENOMEM; 2546 } 2547 2548 static void enic_dev_deinit(struct enic *enic) 2549 { 2550 unsigned int i; 2551 2552 for (i = 0; i < enic->rq_count; i++) 2553 __netif_napi_del(&enic->napi[i]); 2554 2555 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 2556 for (i = 0; i < enic->wq_count; i++) 2557 __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); 2558 2559 /* observe RCU grace period after __netif_napi_del() calls */ 2560 synchronize_net(); 2561 2562 enic_free_vnic_resources(enic); 2563 enic_clear_intr_mode(enic); 2564 enic_free_affinity_hint(enic); 2565 enic_free_enic_resources(enic); 2566 } 2567 2568 static int enic_dev_init(struct enic *enic) 2569 { 2570 struct device *dev = enic_get_dev(enic); 2571 struct net_device *netdev = enic->netdev; 2572 unsigned int i; 2573 int err; 2574 2575 /* Get interrupt coalesce timer info */ 2576 err = enic_dev_intr_coal_timer_info(enic); 2577 if (err) { 2578 dev_warn(dev, "Using default conversion factor for " 2579 "interrupt coalesce timer\n"); 2580 vnic_dev_intr_coal_timer_info_default(enic->vdev); 2581 } 2582 2583 /* Get vNIC configuration 2584 */ 2585 2586 err = enic_get_vnic_config(enic); 2587 if (err) { 2588 dev_err(dev, "Get vNIC configuration failed, aborting\n"); 2589 return err; 2590 } 2591 2592 /* Get available resource counts 2593 */ 2594 2595 enic_get_res_counts(enic); 2596 2597 err = enic_alloc_enic_resources(enic); 2598 if (err) { 2599 dev_err(dev, "Failed to allocate enic resources\n"); 2600 return err; 2601 } 2602 2603 /* Set interrupt mode based on system capabilities */ 2604 2605 err = enic_set_intr_mode(enic); 2606 if (err) { 2607 dev_err(dev, "Failed to set intr mode based on resource " 2608 "counts and system capabilities, aborting\n"); 2609 goto err_out_free_vnic_resources; 2610 } 2611 2612 /* Adjust resource counts based on most constrained resources */ 2613 err = enic_adjust_resources(enic); 2614 if (err) { 2615 dev_err(dev, "Failed to adjust resources\n"); 2616 goto err_out_free_vnic_resources; 2617 } 2618 2619 /* Allocate and configure vNIC resources 2620 */ 2621 2622 err = enic_alloc_vnic_resources(enic); 2623 if (err) { 2624 dev_err(dev, "Failed to alloc vNIC resources, aborting\n"); 2625 goto err_out_free_vnic_resources; 2626 } 2627 2628 enic_init_vnic_resources(enic); 2629 2630 err = enic_set_rss_nic_cfg(enic); 2631 if (err) { 2632 dev_err(dev, "Failed to config nic, aborting\n"); 2633 goto err_out_free_vnic_resources; 2634 } 2635 2636 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2637 default: 2638 netif_napi_add(netdev, &enic->napi[0], enic_poll); 2639 break; 2640 case VNIC_DEV_INTR_MODE_MSIX: 2641 for (i = 0; i < enic->rq_count; i++) { 2642 netif_napi_add(netdev, &enic->napi[i], 2643 enic_poll_msix_rq); 2644 } 2645 for (i = 0; i < enic->wq_count; i++) 2646 netif_napi_add(netdev, 2647 &enic->napi[enic_cq_wq(enic, i)], 2648 enic_poll_msix_wq); 2649 break; 2650 } 2651 2652 return 0; 2653 2654 err_out_free_vnic_resources: 2655 enic_free_affinity_hint(enic); 2656 enic_clear_intr_mode(enic); 2657 enic_free_vnic_resources(enic); 2658 enic_free_enic_resources(enic); 2659 2660 return err; 2661 } 2662 2663 static void enic_iounmap(struct enic *enic) 2664 { 2665 unsigned int i; 2666 2667 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) 2668 if (enic->bar[i].vaddr) 2669 iounmap(enic->bar[i].vaddr); 2670 } 2671 2672 static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2673 { 2674 struct device *dev = &pdev->dev; 2675 struct net_device *netdev; 2676 struct enic *enic; 2677 int using_dac = 0; 2678 unsigned int i; 2679 int err; 2680 #ifdef CONFIG_PCI_IOV 2681 int pos = 0; 2682 #endif 2683 int num_pps = 1; 2684 2685 /* Allocate net device structure and initialize. Private 2686 * instance data is initialized to zero. 2687 */ 2688 2689 netdev = alloc_etherdev_mqs(sizeof(struct enic), 2690 ENIC_RQ_MAX, ENIC_WQ_MAX); 2691 if (!netdev) 2692 return -ENOMEM; 2693 2694 pci_set_drvdata(pdev, netdev); 2695 2696 SET_NETDEV_DEV(netdev, &pdev->dev); 2697 2698 enic = netdev_priv(netdev); 2699 enic->netdev = netdev; 2700 enic->pdev = pdev; 2701 2702 /* Setup PCI resources 2703 */ 2704 2705 err = pci_enable_device_mem(pdev); 2706 if (err) { 2707 dev_err(dev, "Cannot enable PCI device, aborting\n"); 2708 goto err_out_free_netdev; 2709 } 2710 2711 err = pci_request_regions(pdev, DRV_NAME); 2712 if (err) { 2713 dev_err(dev, "Cannot request PCI regions, aborting\n"); 2714 goto err_out_disable_device; 2715 } 2716 2717 pci_set_master(pdev); 2718 2719 /* Query PCI controller on system for DMA addressing 2720 * limitation for the device. Try 47-bit first, and 2721 * fail to 32-bit. 2722 */ 2723 2724 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(47)); 2725 if (err) { 2726 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 2727 if (err) { 2728 dev_err(dev, "No usable DMA configuration, aborting\n"); 2729 goto err_out_release_regions; 2730 } 2731 } else { 2732 using_dac = 1; 2733 } 2734 2735 /* Map vNIC resources from BAR0-5 2736 */ 2737 2738 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) { 2739 if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 2740 continue; 2741 enic->bar[i].len = pci_resource_len(pdev, i); 2742 enic->bar[i].vaddr = pci_iomap(pdev, i, enic->bar[i].len); 2743 if (!enic->bar[i].vaddr) { 2744 dev_err(dev, "Cannot memory-map BAR %d, aborting\n", i); 2745 err = -ENODEV; 2746 goto err_out_iounmap; 2747 } 2748 enic->bar[i].bus_addr = pci_resource_start(pdev, i); 2749 } 2750 2751 /* Register vNIC device 2752 */ 2753 2754 enic->vdev = vnic_dev_register(NULL, enic, pdev, enic->bar, 2755 ARRAY_SIZE(enic->bar)); 2756 if (!enic->vdev) { 2757 dev_err(dev, "vNIC registration failed, aborting\n"); 2758 err = -ENODEV; 2759 goto err_out_iounmap; 2760 } 2761 2762 err = vnic_devcmd_init(enic->vdev); 2763 2764 if (err) 2765 goto err_out_vnic_unregister; 2766 2767 #ifdef CONFIG_PCI_IOV 2768 /* Get number of subvnics */ 2769 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); 2770 if (pos) { 2771 pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF, 2772 &enic->num_vfs); 2773 if (enic->num_vfs) { 2774 err = pci_enable_sriov(pdev, enic->num_vfs); 2775 if (err) { 2776 dev_err(dev, "SRIOV enable failed, aborting." 2777 " pci_enable_sriov() returned %d\n", 2778 err); 2779 goto err_out_vnic_unregister; 2780 } 2781 enic->priv_flags |= ENIC_SRIOV_ENABLED; 2782 num_pps = enic->num_vfs; 2783 } 2784 } 2785 #endif 2786 2787 /* Allocate structure for port profiles */ 2788 enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL); 2789 if (!enic->pp) { 2790 err = -ENOMEM; 2791 goto err_out_disable_sriov_pp; 2792 } 2793 2794 /* Issue device open to get device in known state 2795 */ 2796 2797 err = enic_dev_open(enic); 2798 if (err) { 2799 dev_err(dev, "vNIC dev open failed, aborting\n"); 2800 goto err_out_disable_sriov; 2801 } 2802 2803 /* Setup devcmd lock 2804 */ 2805 2806 spin_lock_init(&enic->devcmd_lock); 2807 spin_lock_init(&enic->enic_api_lock); 2808 2809 /* 2810 * Set ingress vlan rewrite mode before vnic initialization 2811 */ 2812 2813 err = enic_dev_set_ig_vlan_rewrite_mode(enic); 2814 if (err) { 2815 dev_err(dev, 2816 "Failed to set ingress vlan rewrite mode, aborting.\n"); 2817 goto err_out_dev_close; 2818 } 2819 2820 /* Issue device init to initialize the vnic-to-switch link. 2821 * We'll start with carrier off and wait for link UP 2822 * notification later to turn on carrier. We don't need 2823 * to wait here for the vnic-to-switch link initialization 2824 * to complete; link UP notification is the indication that 2825 * the process is complete. 2826 */ 2827 2828 netif_carrier_off(netdev); 2829 2830 /* Do not call dev_init for a dynamic vnic. 2831 * For a dynamic vnic, init_prov_info will be 2832 * called later by an upper layer. 2833 */ 2834 2835 if (!enic_is_dynamic(enic)) { 2836 err = vnic_dev_init(enic->vdev, 0); 2837 if (err) { 2838 dev_err(dev, "vNIC dev init failed, aborting\n"); 2839 goto err_out_dev_close; 2840 } 2841 } 2842 2843 err = enic_dev_init(enic); 2844 if (err) { 2845 dev_err(dev, "Device initialization failed, aborting\n"); 2846 goto err_out_dev_close; 2847 } 2848 2849 netif_set_real_num_tx_queues(netdev, enic->wq_count); 2850 netif_set_real_num_rx_queues(netdev, enic->rq_count); 2851 2852 /* Setup notification timer, HW reset task, and wq locks 2853 */ 2854 2855 timer_setup(&enic->notify_timer, enic_notify_timer, 0); 2856 2857 enic_rfs_flw_tbl_init(enic); 2858 INIT_WORK(&enic->reset, enic_reset); 2859 INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); 2860 INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); 2861 2862 for (i = 0; i < enic->wq_count; i++) 2863 spin_lock_init(&enic->wq[i].lock); 2864 2865 /* Register net device 2866 */ 2867 2868 enic->port_mtu = enic->config.mtu; 2869 2870 err = enic_set_mac_addr(netdev, enic->mac_addr); 2871 if (err) { 2872 dev_err(dev, "Invalid MAC address, aborting\n"); 2873 goto err_out_dev_deinit; 2874 } 2875 2876 enic->tx_coalesce_usecs = enic->config.intr_timer_usec; 2877 /* rx coalesce time already got initialized. This gets used 2878 * if adaptive coal is turned off 2879 */ 2880 enic->rx_coalesce_usecs = enic->tx_coalesce_usecs; 2881 2882 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 2883 netdev->netdev_ops = &enic_netdev_dynamic_ops; 2884 else 2885 netdev->netdev_ops = &enic_netdev_ops; 2886 netdev->stat_ops = &enic_netdev_stat_ops; 2887 2888 netdev->watchdog_timeo = 2 * HZ; 2889 enic_set_ethtool_ops(netdev); 2890 2891 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 2892 if (ENIC_SETTING(enic, LOOP)) { 2893 netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX; 2894 enic->loop_enable = 1; 2895 enic->loop_tag = enic->config.loop_tag; 2896 dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag); 2897 } 2898 if (ENIC_SETTING(enic, TXCSUM)) 2899 netdev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; 2900 if (ENIC_SETTING(enic, TSO)) 2901 netdev->hw_features |= NETIF_F_TSO | 2902 NETIF_F_TSO6 | NETIF_F_TSO_ECN; 2903 if (ENIC_SETTING(enic, RSS)) 2904 netdev->hw_features |= NETIF_F_RXHASH; 2905 if (ENIC_SETTING(enic, RXCSUM)) 2906 netdev->hw_features |= NETIF_F_RXCSUM; 2907 if (ENIC_SETTING(enic, VXLAN)) { 2908 u64 patch_level; 2909 u64 a1 = 0; 2910 2911 netdev->hw_enc_features |= NETIF_F_RXCSUM | 2912 NETIF_F_TSO | 2913 NETIF_F_TSO6 | 2914 NETIF_F_TSO_ECN | 2915 NETIF_F_GSO_UDP_TUNNEL | 2916 NETIF_F_HW_CSUM | 2917 NETIF_F_GSO_UDP_TUNNEL_CSUM; 2918 netdev->hw_features |= netdev->hw_enc_features; 2919 /* get bit mask from hw about supported offload bit level 2920 * BIT(0) = fw supports patch_level 0 2921 * fcoe bit = encap 2922 * fcoe_fc_crc_ok = outer csum ok 2923 * BIT(1) = always set by fw 2924 * BIT(2) = fw supports patch_level 2 2925 * BIT(0) in rss_hash = encap 2926 * BIT(1,2) in rss_hash = outer_ip_csum_ok/ 2927 * outer_tcp_csum_ok 2928 * used in enic_rq_indicate_buf 2929 */ 2930 err = vnic_dev_get_supported_feature_ver(enic->vdev, 2931 VIC_FEATURE_VXLAN, 2932 &patch_level, &a1); 2933 if (err) 2934 patch_level = 0; 2935 enic->vxlan.flags = (u8)a1; 2936 /* mask bits that are supported by driver 2937 */ 2938 patch_level &= BIT_ULL(0) | BIT_ULL(2); 2939 patch_level = fls(patch_level); 2940 patch_level = patch_level ? patch_level - 1 : 0; 2941 enic->vxlan.patch_level = patch_level; 2942 2943 if (vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) == 1 || 2944 enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ) { 2945 netdev->udp_tunnel_nic_info = &enic_udp_tunnels_v4; 2946 if (enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6) 2947 netdev->udp_tunnel_nic_info = &enic_udp_tunnels; 2948 } 2949 } 2950 2951 netdev->features |= netdev->hw_features; 2952 netdev->vlan_features |= netdev->features; 2953 2954 #ifdef CONFIG_RFS_ACCEL 2955 netdev->hw_features |= NETIF_F_NTUPLE; 2956 #endif 2957 2958 if (using_dac) 2959 netdev->features |= NETIF_F_HIGHDMA; 2960 2961 netdev->priv_flags |= IFF_UNICAST_FLT; 2962 2963 /* MTU range: 68 - 9000 */ 2964 netdev->min_mtu = ENIC_MIN_MTU; 2965 netdev->max_mtu = ENIC_MAX_MTU; 2966 netdev->mtu = enic->port_mtu; 2967 2968 err = register_netdev(netdev); 2969 if (err) { 2970 dev_err(dev, "Cannot register net device, aborting\n"); 2971 goto err_out_dev_deinit; 2972 } 2973 2974 return 0; 2975 2976 err_out_dev_deinit: 2977 enic_dev_deinit(enic); 2978 err_out_dev_close: 2979 vnic_dev_close(enic->vdev); 2980 err_out_disable_sriov: 2981 kfree(enic->pp); 2982 err_out_disable_sriov_pp: 2983 #ifdef CONFIG_PCI_IOV 2984 if (enic_sriov_enabled(enic)) { 2985 pci_disable_sriov(pdev); 2986 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 2987 } 2988 #endif 2989 err_out_vnic_unregister: 2990 vnic_dev_unregister(enic->vdev); 2991 err_out_iounmap: 2992 enic_iounmap(enic); 2993 err_out_release_regions: 2994 pci_release_regions(pdev); 2995 err_out_disable_device: 2996 pci_disable_device(pdev); 2997 err_out_free_netdev: 2998 free_netdev(netdev); 2999 3000 return err; 3001 } 3002 3003 static void enic_remove(struct pci_dev *pdev) 3004 { 3005 struct net_device *netdev = pci_get_drvdata(pdev); 3006 3007 if (netdev) { 3008 struct enic *enic = netdev_priv(netdev); 3009 3010 cancel_work_sync(&enic->reset); 3011 cancel_work_sync(&enic->change_mtu_work); 3012 unregister_netdev(netdev); 3013 enic_dev_deinit(enic); 3014 vnic_dev_close(enic->vdev); 3015 #ifdef CONFIG_PCI_IOV 3016 if (enic_sriov_enabled(enic)) { 3017 pci_disable_sriov(pdev); 3018 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 3019 } 3020 #endif 3021 kfree(enic->pp); 3022 vnic_dev_unregister(enic->vdev); 3023 enic_iounmap(enic); 3024 pci_release_regions(pdev); 3025 pci_disable_device(pdev); 3026 free_netdev(netdev); 3027 } 3028 } 3029 3030 static struct pci_driver enic_driver = { 3031 .name = DRV_NAME, 3032 .id_table = enic_id_table, 3033 .probe = enic_probe, 3034 .remove = enic_remove, 3035 }; 3036 3037 module_pci_driver(enic_driver); 3038