1 /* 2 * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. 3 * Copyright 2007 Nuova Systems, Inc. All rights reserved. 4 * 5 * This program is free software; you may redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; version 2 of the License. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 16 * SOFTWARE. 17 * 18 */ 19 20 #include <linux/module.h> 21 #include <linux/kernel.h> 22 #include <linux/string.h> 23 #include <linux/errno.h> 24 #include <linux/types.h> 25 #include <linux/init.h> 26 #include <linux/interrupt.h> 27 #include <linux/workqueue.h> 28 #include <linux/pci.h> 29 #include <linux/netdevice.h> 30 #include <linux/etherdevice.h> 31 #include <linux/if.h> 32 #include <linux/if_ether.h> 33 #include <linux/if_vlan.h> 34 #include <linux/in.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/tcp.h> 38 #include <linux/rtnetlink.h> 39 #include <linux/prefetch.h> 40 #include <net/ip6_checksum.h> 41 #include <linux/ktime.h> 42 #include <linux/numa.h> 43 #ifdef CONFIG_RFS_ACCEL 44 #include <linux/cpu_rmap.h> 45 #endif 46 #include <linux/crash_dump.h> 47 #include <net/busy_poll.h> 48 #include <net/vxlan.h> 49 #include <net/netdev_queues.h> 50 51 #include "cq_enet_desc.h" 52 #include "vnic_dev.h" 53 #include "vnic_intr.h" 54 #include "vnic_stats.h" 55 #include "vnic_vic.h" 56 #include "enic_res.h" 57 #include "enic.h" 58 #include "enic_dev.h" 59 #include "enic_pp.h" 60 #include "enic_clsf.h" 61 #include "enic_rq.h" 62 #include "enic_wq.h" 63 64 #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) 65 66 #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ 67 #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ 68 #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ 69 70 /* Supported devices */ 71 static const struct pci_device_id enic_id_table[] = { 72 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, 73 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, 74 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, 75 { 0, } /* end of table */ 76 }; 77 78 MODULE_DESCRIPTION(DRV_DESCRIPTION); 79 MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>"); 80 MODULE_LICENSE("GPL"); 81 MODULE_DEVICE_TABLE(pci, enic_id_table); 82 83 #define ENIC_LARGE_PKT_THRESHOLD 1000 84 #define ENIC_MAX_COALESCE_TIMERS 10 85 /* Interrupt moderation table, which will be used to decide the 86 * coalescing timer values 87 * {rx_rate in Mbps, mapping percentage of the range} 88 */ 89 static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = { 90 {4000, 0}, 91 {4400, 10}, 92 {5060, 20}, 93 {5230, 30}, 94 {5540, 40}, 95 {5820, 50}, 96 {6120, 60}, 97 {6435, 70}, 98 {6745, 80}, 99 {7000, 90}, 100 {0xFFFFFFFF, 100} 101 }; 102 103 /* This table helps the driver to pick different ranges for rx coalescing 104 * timer depending on the link speed. 105 */ 106 static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = { 107 {0, 0}, /* 0 - 4 Gbps */ 108 {0, 3}, /* 4 - 10 Gbps */ 109 {3, 6}, /* 10+ Gbps */ 110 }; 111 112 static void enic_init_affinity_hint(struct enic *enic) 113 { 114 int numa_node = dev_to_node(&enic->pdev->dev); 115 int i; 116 117 for (i = 0; i < enic->intr_count; i++) { 118 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || 119 (cpumask_available(enic->msix[i].affinity_mask) && 120 !cpumask_empty(enic->msix[i].affinity_mask))) 121 continue; 122 if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, 123 GFP_KERNEL)) 124 cpumask_set_cpu(cpumask_local_spread(i, numa_node), 125 enic->msix[i].affinity_mask); 126 } 127 } 128 129 static void enic_free_affinity_hint(struct enic *enic) 130 { 131 int i; 132 133 for (i = 0; i < enic->intr_count; i++) { 134 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i)) 135 continue; 136 free_cpumask_var(enic->msix[i].affinity_mask); 137 } 138 } 139 140 static void enic_set_affinity_hint(struct enic *enic) 141 { 142 int i; 143 int err; 144 145 for (i = 0; i < enic->intr_count; i++) { 146 if (enic_is_err_intr(enic, i) || 147 enic_is_notify_intr(enic, i) || 148 !cpumask_available(enic->msix[i].affinity_mask) || 149 cpumask_empty(enic->msix[i].affinity_mask)) 150 continue; 151 err = irq_update_affinity_hint(enic->msix_entry[i].vector, 152 enic->msix[i].affinity_mask); 153 if (err) 154 netdev_warn(enic->netdev, "irq_update_affinity_hint failed, err %d\n", 155 err); 156 } 157 158 for (i = 0; i < enic->wq_count; i++) { 159 int wq_intr = enic_msix_wq_intr(enic, i); 160 161 if (cpumask_available(enic->msix[wq_intr].affinity_mask) && 162 !cpumask_empty(enic->msix[wq_intr].affinity_mask)) 163 netif_set_xps_queue(enic->netdev, 164 enic->msix[wq_intr].affinity_mask, 165 i); 166 } 167 } 168 169 static void enic_unset_affinity_hint(struct enic *enic) 170 { 171 int i; 172 173 for (i = 0; i < enic->intr_count; i++) 174 irq_update_affinity_hint(enic->msix_entry[i].vector, NULL); 175 } 176 177 static int enic_udp_tunnel_set_port(struct net_device *netdev, 178 unsigned int table, unsigned int entry, 179 struct udp_tunnel_info *ti) 180 { 181 struct enic *enic = netdev_priv(netdev); 182 int err; 183 184 spin_lock_bh(&enic->devcmd_lock); 185 186 err = vnic_dev_overlay_offload_cfg(enic->vdev, 187 OVERLAY_CFG_VXLAN_PORT_UPDATE, 188 ntohs(ti->port)); 189 if (err) 190 goto error; 191 192 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 193 enic->vxlan.patch_level); 194 if (err) 195 goto error; 196 197 enic->vxlan.vxlan_udp_port_number = ntohs(ti->port); 198 error: 199 spin_unlock_bh(&enic->devcmd_lock); 200 201 return err; 202 } 203 204 static int enic_udp_tunnel_unset_port(struct net_device *netdev, 205 unsigned int table, unsigned int entry, 206 struct udp_tunnel_info *ti) 207 { 208 struct enic *enic = netdev_priv(netdev); 209 int err; 210 211 spin_lock_bh(&enic->devcmd_lock); 212 213 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 214 OVERLAY_OFFLOAD_DISABLE); 215 if (err) 216 goto unlock; 217 218 enic->vxlan.vxlan_udp_port_number = 0; 219 220 unlock: 221 spin_unlock_bh(&enic->devcmd_lock); 222 223 return err; 224 } 225 226 static const struct udp_tunnel_nic_info enic_udp_tunnels = { 227 .set_port = enic_udp_tunnel_set_port, 228 .unset_port = enic_udp_tunnel_unset_port, 229 .tables = { 230 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, 231 }, 232 }, enic_udp_tunnels_v4 = { 233 .set_port = enic_udp_tunnel_set_port, 234 .unset_port = enic_udp_tunnel_unset_port, 235 .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, 236 .tables = { 237 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, 238 }, 239 }; 240 241 static netdev_features_t enic_features_check(struct sk_buff *skb, 242 struct net_device *dev, 243 netdev_features_t features) 244 { 245 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 246 struct enic *enic = netdev_priv(dev); 247 struct udphdr *udph; 248 u16 port = 0; 249 u8 proto; 250 251 if (!skb->encapsulation) 252 return features; 253 254 features = vxlan_features_check(skb, features); 255 256 switch (vlan_get_protocol(skb)) { 257 case htons(ETH_P_IPV6): 258 if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) 259 goto out; 260 proto = ipv6_hdr(skb)->nexthdr; 261 break; 262 case htons(ETH_P_IP): 263 proto = ip_hdr(skb)->protocol; 264 break; 265 default: 266 goto out; 267 } 268 269 switch (eth->h_proto) { 270 case ntohs(ETH_P_IPV6): 271 if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6)) 272 goto out; 273 fallthrough; 274 case ntohs(ETH_P_IP): 275 break; 276 default: 277 goto out; 278 } 279 280 281 if (proto == IPPROTO_UDP) { 282 udph = udp_hdr(skb); 283 port = be16_to_cpu(udph->dest); 284 } 285 286 /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK 287 * for other UDP port tunnels 288 */ 289 if (port != enic->vxlan.vxlan_udp_port_number) 290 goto out; 291 292 return features; 293 294 out: 295 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 296 } 297 298 int enic_is_dynamic(struct enic *enic) 299 { 300 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; 301 } 302 303 int enic_sriov_enabled(struct enic *enic) 304 { 305 return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; 306 } 307 308 static int enic_is_sriov_vf(struct enic *enic) 309 { 310 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; 311 } 312 313 int enic_is_valid_vf(struct enic *enic, int vf) 314 { 315 #ifdef CONFIG_PCI_IOV 316 return vf >= 0 && vf < enic->num_vfs; 317 #else 318 return 0; 319 #endif 320 } 321 322 static bool enic_log_q_error(struct enic *enic) 323 { 324 unsigned int i; 325 u32 error_status; 326 bool err = false; 327 328 for (i = 0; i < enic->wq_count; i++) { 329 error_status = vnic_wq_error_status(&enic->wq[i].vwq); 330 err |= error_status; 331 if (error_status) 332 netdev_err(enic->netdev, "WQ[%d] error_status %d\n", 333 i, error_status); 334 } 335 336 for (i = 0; i < enic->rq_count; i++) { 337 error_status = vnic_rq_error_status(&enic->rq[i].vrq); 338 err |= error_status; 339 if (error_status) 340 netdev_err(enic->netdev, "RQ[%d] error_status %d\n", 341 i, error_status); 342 } 343 344 return err; 345 } 346 347 static void enic_msglvl_check(struct enic *enic) 348 { 349 u32 msg_enable = vnic_dev_msg_lvl(enic->vdev); 350 351 if (msg_enable != enic->msg_enable) { 352 netdev_info(enic->netdev, "msg lvl changed from 0x%x to 0x%x\n", 353 enic->msg_enable, msg_enable); 354 enic->msg_enable = msg_enable; 355 } 356 } 357 358 static void enic_mtu_check(struct enic *enic) 359 { 360 u32 mtu = vnic_dev_mtu(enic->vdev); 361 struct net_device *netdev = enic->netdev; 362 363 if (mtu && mtu != enic->port_mtu) { 364 enic->port_mtu = mtu; 365 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 366 mtu = max_t(int, ENIC_MIN_MTU, 367 min_t(int, ENIC_MAX_MTU, mtu)); 368 if (mtu != netdev->mtu) 369 schedule_work(&enic->change_mtu_work); 370 } else { 371 if (mtu < netdev->mtu) 372 netdev_warn(netdev, 373 "interface MTU (%d) set higher " 374 "than switch port MTU (%d)\n", 375 netdev->mtu, mtu); 376 } 377 } 378 } 379 380 static void enic_set_rx_coal_setting(struct enic *enic) 381 { 382 unsigned int speed; 383 int index = -1; 384 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 385 386 /* 1. Read the link speed from fw 387 * 2. Pick the default range for the speed 388 * 3. Update it in enic->rx_coalesce_setting 389 */ 390 speed = vnic_dev_port_speed(enic->vdev); 391 if (speed > ENIC_LINK_SPEED_10G) 392 index = ENIC_LINK_40G_INDEX; 393 else if (speed > ENIC_LINK_SPEED_4G) 394 index = ENIC_LINK_10G_INDEX; 395 else 396 index = ENIC_LINK_4G_INDEX; 397 398 rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; 399 rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; 400 rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; 401 402 /* Start with the value provided by UCSM */ 403 for (index = 0; index < enic->rq_count; index++) 404 enic->cq[index].cur_rx_coal_timeval = 405 enic->config.intr_timer_usec; 406 407 rx_coal->use_adaptive_rx_coalesce = 1; 408 } 409 410 static void enic_link_check(struct enic *enic) 411 { 412 int link_status = vnic_dev_link_status(enic->vdev); 413 int carrier_ok = netif_carrier_ok(enic->netdev); 414 415 if (link_status && !carrier_ok) { 416 netdev_info(enic->netdev, "Link UP\n"); 417 netif_carrier_on(enic->netdev); 418 enic_set_rx_coal_setting(enic); 419 } else if (!link_status && carrier_ok) { 420 netdev_info(enic->netdev, "Link DOWN\n"); 421 netif_carrier_off(enic->netdev); 422 } 423 } 424 425 static void enic_notify_check(struct enic *enic) 426 { 427 enic_msglvl_check(enic); 428 enic_mtu_check(enic); 429 enic_link_check(enic); 430 } 431 432 #define ENIC_TEST_INTR(pba, i) (pba & (1 << i)) 433 434 static irqreturn_t enic_isr_legacy(int irq, void *data) 435 { 436 struct net_device *netdev = data; 437 struct enic *enic = netdev_priv(netdev); 438 unsigned int io_intr = ENIC_LEGACY_IO_INTR; 439 unsigned int err_intr = ENIC_LEGACY_ERR_INTR; 440 unsigned int notify_intr = ENIC_LEGACY_NOTIFY_INTR; 441 u32 pba; 442 443 vnic_intr_mask(&enic->intr[io_intr]); 444 445 pba = vnic_intr_legacy_pba(enic->legacy_pba); 446 if (!pba) { 447 vnic_intr_unmask(&enic->intr[io_intr]); 448 return IRQ_NONE; /* not our interrupt */ 449 } 450 451 if (ENIC_TEST_INTR(pba, notify_intr)) { 452 enic_notify_check(enic); 453 vnic_intr_return_all_credits(&enic->intr[notify_intr]); 454 } 455 456 if (ENIC_TEST_INTR(pba, err_intr)) { 457 vnic_intr_return_all_credits(&enic->intr[err_intr]); 458 enic_log_q_error(enic); 459 /* schedule recovery from WQ/RQ error */ 460 schedule_work(&enic->reset); 461 return IRQ_HANDLED; 462 } 463 464 if (ENIC_TEST_INTR(pba, io_intr)) 465 napi_schedule_irqoff(&enic->napi[0]); 466 else 467 vnic_intr_unmask(&enic->intr[io_intr]); 468 469 return IRQ_HANDLED; 470 } 471 472 static irqreturn_t enic_isr_msi(int irq, void *data) 473 { 474 struct enic *enic = data; 475 476 /* With MSI, there is no sharing of interrupts, so this is 477 * our interrupt and there is no need to ack it. The device 478 * is not providing per-vector masking, so the OS will not 479 * write to PCI config space to mask/unmask the interrupt. 480 * We're using mask_on_assertion for MSI, so the device 481 * automatically masks the interrupt when the interrupt is 482 * generated. Later, when exiting polling, the interrupt 483 * will be unmasked (see enic_poll). 484 * 485 * Also, the device uses the same PCIe Traffic Class (TC) 486 * for Memory Write data and MSI, so there are no ordering 487 * issues; the MSI will always arrive at the Root Complex 488 * _after_ corresponding Memory Writes (i.e. descriptor 489 * writes). 490 */ 491 492 napi_schedule_irqoff(&enic->napi[0]); 493 494 return IRQ_HANDLED; 495 } 496 497 static irqreturn_t enic_isr_msix(int irq, void *data) 498 { 499 struct napi_struct *napi = data; 500 501 napi_schedule_irqoff(napi); 502 503 return IRQ_HANDLED; 504 } 505 506 static irqreturn_t enic_isr_msix_err(int irq, void *data) 507 { 508 struct enic *enic = data; 509 unsigned int intr = enic_msix_err_intr(enic); 510 511 vnic_intr_return_all_credits(&enic->intr[intr]); 512 513 if (enic_log_q_error(enic)) 514 /* schedule recovery from WQ/RQ error */ 515 schedule_work(&enic->reset); 516 517 return IRQ_HANDLED; 518 } 519 520 static irqreturn_t enic_isr_msix_notify(int irq, void *data) 521 { 522 struct enic *enic = data; 523 unsigned int intr = enic_msix_notify_intr(enic); 524 525 enic_notify_check(enic); 526 vnic_intr_return_all_credits(&enic->intr[intr]); 527 528 return IRQ_HANDLED; 529 } 530 531 static int enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq, 532 struct sk_buff *skb, unsigned int len_left, 533 int loopback) 534 { 535 const skb_frag_t *frag; 536 dma_addr_t dma_addr; 537 538 /* Queue additional data fragments */ 539 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 540 len_left -= skb_frag_size(frag); 541 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 0, 542 skb_frag_size(frag), 543 DMA_TO_DEVICE); 544 if (unlikely(enic_dma_map_check(enic, dma_addr))) 545 return -ENOMEM; 546 enic_queue_wq_desc_cont(wq, skb, dma_addr, skb_frag_size(frag), 547 (len_left == 0), /* EOP? */ 548 loopback); 549 } 550 551 return 0; 552 } 553 554 static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, 555 struct sk_buff *skb, int vlan_tag_insert, 556 unsigned int vlan_tag, int loopback) 557 { 558 unsigned int head_len = skb_headlen(skb); 559 unsigned int len_left = skb->len - head_len; 560 int eop = (len_left == 0); 561 dma_addr_t dma_addr; 562 int err = 0; 563 564 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 565 DMA_TO_DEVICE); 566 if (unlikely(enic_dma_map_check(enic, dma_addr))) 567 return -ENOMEM; 568 569 /* Queue the main skb fragment. The fragments are no larger 570 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 571 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 572 * per fragment is queued. 573 */ 574 enic_queue_wq_desc(wq, skb, dma_addr, head_len, vlan_tag_insert, 575 vlan_tag, eop, loopback); 576 577 if (!eop) 578 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 579 580 /* The enic_queue_wq_desc() above does not do HW checksum */ 581 enic->wq[wq->index].stats.csum_none++; 582 enic->wq[wq->index].stats.packets++; 583 enic->wq[wq->index].stats.bytes += skb->len; 584 585 return err; 586 } 587 588 static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, 589 struct sk_buff *skb, int vlan_tag_insert, 590 unsigned int vlan_tag, int loopback) 591 { 592 unsigned int head_len = skb_headlen(skb); 593 unsigned int len_left = skb->len - head_len; 594 unsigned int hdr_len = skb_checksum_start_offset(skb); 595 unsigned int csum_offset = hdr_len + skb->csum_offset; 596 int eop = (len_left == 0); 597 dma_addr_t dma_addr; 598 int err = 0; 599 600 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 601 DMA_TO_DEVICE); 602 if (unlikely(enic_dma_map_check(enic, dma_addr))) 603 return -ENOMEM; 604 605 /* Queue the main skb fragment. The fragments are no larger 606 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 607 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 608 * per fragment is queued. 609 */ 610 enic_queue_wq_desc_csum_l4(wq, skb, dma_addr, head_len, csum_offset, 611 hdr_len, vlan_tag_insert, vlan_tag, eop, 612 loopback); 613 614 if (!eop) 615 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 616 617 enic->wq[wq->index].stats.csum_partial++; 618 enic->wq[wq->index].stats.packets++; 619 enic->wq[wq->index].stats.bytes += skb->len; 620 621 return err; 622 } 623 624 static void enic_preload_tcp_csum_encap(struct sk_buff *skb) 625 { 626 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 627 628 switch (eth->h_proto) { 629 case ntohs(ETH_P_IP): 630 inner_ip_hdr(skb)->check = 0; 631 inner_tcp_hdr(skb)->check = 632 ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, 633 inner_ip_hdr(skb)->daddr, 0, 634 IPPROTO_TCP, 0); 635 break; 636 case ntohs(ETH_P_IPV6): 637 inner_tcp_hdr(skb)->check = 638 ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, 639 &inner_ipv6_hdr(skb)->daddr, 0, 640 IPPROTO_TCP, 0); 641 break; 642 default: 643 WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload"); 644 break; 645 } 646 } 647 648 static void enic_preload_tcp_csum(struct sk_buff *skb) 649 { 650 /* Preload TCP csum field with IP pseudo hdr calculated 651 * with IP length set to zero. HW will later add in length 652 * to each TCP segment resulting from the TSO. 653 */ 654 655 if (skb->protocol == cpu_to_be16(ETH_P_IP)) { 656 ip_hdr(skb)->check = 0; 657 tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 658 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 659 } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { 660 tcp_v6_gso_csum_prep(skb); 661 } 662 } 663 664 static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, 665 struct sk_buff *skb, unsigned int mss, 666 int vlan_tag_insert, unsigned int vlan_tag, 667 int loopback) 668 { 669 unsigned int frag_len_left = skb_headlen(skb); 670 unsigned int len_left = skb->len - frag_len_left; 671 int eop = (len_left == 0); 672 unsigned int offset = 0; 673 unsigned int hdr_len; 674 dma_addr_t dma_addr; 675 unsigned int pkts; 676 unsigned int len; 677 skb_frag_t *frag; 678 679 if (skb->encapsulation) { 680 hdr_len = skb_inner_tcp_all_headers(skb); 681 enic_preload_tcp_csum_encap(skb); 682 enic->wq[wq->index].stats.encap_tso++; 683 } else { 684 hdr_len = skb_tcp_all_headers(skb); 685 enic_preload_tcp_csum(skb); 686 enic->wq[wq->index].stats.tso++; 687 } 688 689 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 690 * for the main skb fragment 691 */ 692 while (frag_len_left) { 693 len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN); 694 dma_addr = dma_map_single(&enic->pdev->dev, 695 skb->data + offset, len, 696 DMA_TO_DEVICE); 697 if (unlikely(enic_dma_map_check(enic, dma_addr))) 698 return -ENOMEM; 699 enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len, 700 vlan_tag_insert, vlan_tag, 701 eop && (len == frag_len_left), loopback); 702 frag_len_left -= len; 703 offset += len; 704 } 705 706 if (eop) 707 goto tso_out_stats; 708 709 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 710 * for additional data fragments 711 */ 712 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 713 len_left -= skb_frag_size(frag); 714 frag_len_left = skb_frag_size(frag); 715 offset = 0; 716 717 while (frag_len_left) { 718 len = min(frag_len_left, 719 (unsigned int)WQ_ENET_MAX_DESC_LEN); 720 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 721 offset, len, 722 DMA_TO_DEVICE); 723 if (unlikely(enic_dma_map_check(enic, dma_addr))) 724 return -ENOMEM; 725 enic_queue_wq_desc_cont(wq, skb, dma_addr, len, 726 (len_left == 0) && 727 (len == frag_len_left),/*EOP*/ 728 loopback); 729 frag_len_left -= len; 730 offset += len; 731 } 732 } 733 734 tso_out_stats: 735 /* calculate how many packets tso sent */ 736 len = skb->len - hdr_len; 737 pkts = len / mss; 738 if ((len % mss) > 0) 739 pkts++; 740 enic->wq[wq->index].stats.packets += pkts; 741 enic->wq[wq->index].stats.bytes += (len + (pkts * hdr_len)); 742 743 return 0; 744 } 745 746 static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, 747 struct sk_buff *skb, 748 int vlan_tag_insert, 749 unsigned int vlan_tag, int loopback) 750 { 751 unsigned int head_len = skb_headlen(skb); 752 unsigned int len_left = skb->len - head_len; 753 /* Hardware will overwrite the checksum fields, calculating from 754 * scratch and ignoring the value placed by software. 755 * Offload mode = 00 756 * mss[2], mss[1], mss[0] bits are set 757 */ 758 unsigned int mss_or_csum = 7; 759 int eop = (len_left == 0); 760 dma_addr_t dma_addr; 761 int err = 0; 762 763 dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len, 764 DMA_TO_DEVICE); 765 if (unlikely(enic_dma_map_check(enic, dma_addr))) 766 return -ENOMEM; 767 768 enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, 769 vlan_tag_insert, vlan_tag, 770 WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, 771 loopback); 772 if (!eop) 773 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 774 775 enic->wq[wq->index].stats.encap_csum++; 776 enic->wq[wq->index].stats.packets++; 777 enic->wq[wq->index].stats.bytes += skb->len; 778 779 return err; 780 } 781 782 static inline int enic_queue_wq_skb(struct enic *enic, 783 struct vnic_wq *wq, struct sk_buff *skb) 784 { 785 unsigned int mss = skb_shinfo(skb)->gso_size; 786 unsigned int vlan_tag = 0; 787 int vlan_tag_insert = 0; 788 int loopback = 0; 789 int err; 790 791 if (skb_vlan_tag_present(skb)) { 792 /* VLAN tag from trunking driver */ 793 vlan_tag_insert = 1; 794 vlan_tag = skb_vlan_tag_get(skb); 795 enic->wq[wq->index].stats.add_vlan++; 796 } else if (enic->loop_enable) { 797 vlan_tag = enic->loop_tag; 798 loopback = 1; 799 } 800 801 if (mss) 802 err = enic_queue_wq_skb_tso(enic, wq, skb, mss, 803 vlan_tag_insert, vlan_tag, 804 loopback); 805 else if (skb->encapsulation) 806 err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, 807 vlan_tag, loopback); 808 else if (skb->ip_summed == CHECKSUM_PARTIAL) 809 err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, 810 vlan_tag, loopback); 811 else 812 err = enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert, 813 vlan_tag, loopback); 814 if (unlikely(err)) { 815 struct vnic_wq_buf *buf; 816 817 buf = wq->to_use->prev; 818 /* while not EOP of previous pkt && queue not empty. 819 * For all non EOP bufs, os_buf is NULL. 820 */ 821 while (!buf->os_buf && (buf->next != wq->to_clean)) { 822 enic_free_wq_buf(wq, buf); 823 wq->ring.desc_avail++; 824 buf = buf->prev; 825 } 826 wq->to_use = buf->next; 827 dev_kfree_skb(skb); 828 } 829 return err; 830 } 831 832 /* netif_tx_lock held, process context with BHs disabled, or BH */ 833 static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, 834 struct net_device *netdev) 835 { 836 struct enic *enic = netdev_priv(netdev); 837 struct vnic_wq *wq; 838 unsigned int txq_map; 839 struct netdev_queue *txq; 840 841 txq_map = skb_get_queue_mapping(skb) % enic->wq_count; 842 wq = &enic->wq[txq_map].vwq; 843 844 if (skb->len <= 0) { 845 dev_kfree_skb_any(skb); 846 enic->wq[wq->index].stats.null_pkt++; 847 return NETDEV_TX_OK; 848 } 849 850 txq = netdev_get_tx_queue(netdev, txq_map); 851 852 /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs, 853 * which is very likely. In the off chance it's going to take 854 * more than * ENIC_NON_TSO_MAX_DESC, linearize the skb. 855 */ 856 857 if (skb_shinfo(skb)->gso_size == 0 && 858 skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC && 859 skb_linearize(skb)) { 860 dev_kfree_skb_any(skb); 861 enic->wq[wq->index].stats.skb_linear_fail++; 862 return NETDEV_TX_OK; 863 } 864 865 spin_lock(&enic->wq[txq_map].lock); 866 867 if (vnic_wq_desc_avail(wq) < 868 skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) { 869 netif_tx_stop_queue(txq); 870 /* This is a hard error, log it */ 871 netdev_err(netdev, "BUG! Tx ring full when queue awake!\n"); 872 spin_unlock(&enic->wq[txq_map].lock); 873 enic->wq[wq->index].stats.desc_full_awake++; 874 return NETDEV_TX_BUSY; 875 } 876 877 if (enic_queue_wq_skb(enic, wq, skb)) 878 goto error; 879 880 if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) { 881 netif_tx_stop_queue(txq); 882 enic->wq[wq->index].stats.stopped++; 883 } 884 skb_tx_timestamp(skb); 885 if (!netdev_xmit_more() || netif_xmit_stopped(txq)) 886 vnic_wq_doorbell(wq); 887 888 error: 889 spin_unlock(&enic->wq[txq_map].lock); 890 891 return NETDEV_TX_OK; 892 } 893 894 /* rcu_read_lock potentially held, nominally process context */ 895 static void enic_get_stats(struct net_device *netdev, 896 struct rtnl_link_stats64 *net_stats) 897 { 898 struct enic *enic = netdev_priv(netdev); 899 struct vnic_stats *stats; 900 u64 pkt_truncated = 0; 901 u64 bad_fcs = 0; 902 int err; 903 int i; 904 905 err = enic_dev_stats_dump(enic, &stats); 906 /* return only when dma_alloc_coherent fails in vnic_dev_stats_dump 907 * For other failures, like devcmd failure, we return previously 908 * recorded stats. 909 */ 910 if (err == -ENOMEM) 911 return; 912 913 net_stats->tx_packets = stats->tx.tx_frames_ok; 914 net_stats->tx_bytes = stats->tx.tx_bytes_ok; 915 net_stats->tx_errors = stats->tx.tx_errors; 916 net_stats->tx_dropped = stats->tx.tx_drops; 917 918 net_stats->rx_packets = stats->rx.rx_frames_ok; 919 net_stats->rx_bytes = stats->rx.rx_bytes_ok; 920 net_stats->rx_errors = stats->rx.rx_errors; 921 net_stats->multicast = stats->rx.rx_multicast_frames_ok; 922 923 for (i = 0; i < enic->rq_count; i++) { 924 struct enic_rq_stats *rqs = &enic->rq[i].stats; 925 926 if (!enic->rq[i].vrq.ctrl) 927 break; 928 pkt_truncated += rqs->pkt_truncated; 929 bad_fcs += rqs->bad_fcs; 930 } 931 net_stats->rx_over_errors = pkt_truncated; 932 net_stats->rx_crc_errors = bad_fcs; 933 net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; 934 } 935 936 static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr) 937 { 938 struct enic *enic = netdev_priv(netdev); 939 940 if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) { 941 unsigned int mc_count = netdev_mc_count(netdev); 942 943 netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n", 944 ENIC_MULTICAST_PERFECT_FILTERS, mc_count); 945 946 return -ENOSPC; 947 } 948 949 enic_dev_add_addr(enic, mc_addr); 950 enic->mc_count++; 951 952 return 0; 953 } 954 955 static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr) 956 { 957 struct enic *enic = netdev_priv(netdev); 958 959 enic_dev_del_addr(enic, mc_addr); 960 enic->mc_count--; 961 962 return 0; 963 } 964 965 static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr) 966 { 967 struct enic *enic = netdev_priv(netdev); 968 969 if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) { 970 unsigned int uc_count = netdev_uc_count(netdev); 971 972 netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n", 973 ENIC_UNICAST_PERFECT_FILTERS, uc_count); 974 975 return -ENOSPC; 976 } 977 978 enic_dev_add_addr(enic, uc_addr); 979 enic->uc_count++; 980 981 return 0; 982 } 983 984 static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr) 985 { 986 struct enic *enic = netdev_priv(netdev); 987 988 enic_dev_del_addr(enic, uc_addr); 989 enic->uc_count--; 990 991 return 0; 992 } 993 994 void enic_reset_addr_lists(struct enic *enic) 995 { 996 struct net_device *netdev = enic->netdev; 997 998 __dev_uc_unsync(netdev, NULL); 999 __dev_mc_unsync(netdev, NULL); 1000 1001 enic->mc_count = 0; 1002 enic->uc_count = 0; 1003 enic->flags = 0; 1004 } 1005 1006 static int enic_set_mac_addr(struct net_device *netdev, char *addr) 1007 { 1008 struct enic *enic = netdev_priv(netdev); 1009 1010 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 1011 if (!is_valid_ether_addr(addr) && !is_zero_ether_addr(addr)) 1012 return -EADDRNOTAVAIL; 1013 } else { 1014 if (!is_valid_ether_addr(addr)) 1015 return -EADDRNOTAVAIL; 1016 } 1017 1018 eth_hw_addr_set(netdev, addr); 1019 1020 return 0; 1021 } 1022 1023 static int enic_set_mac_address_dynamic(struct net_device *netdev, void *p) 1024 { 1025 struct enic *enic = netdev_priv(netdev); 1026 struct sockaddr *saddr = p; 1027 char *addr = saddr->sa_data; 1028 int err; 1029 1030 if (netif_running(enic->netdev)) { 1031 err = enic_dev_del_station_addr(enic); 1032 if (err) 1033 return err; 1034 } 1035 1036 err = enic_set_mac_addr(netdev, addr); 1037 if (err) 1038 return err; 1039 1040 if (netif_running(enic->netdev)) { 1041 err = enic_dev_add_station_addr(enic); 1042 if (err) 1043 return err; 1044 } 1045 1046 return err; 1047 } 1048 1049 static int enic_set_mac_address(struct net_device *netdev, void *p) 1050 { 1051 struct sockaddr *saddr = p; 1052 char *addr = saddr->sa_data; 1053 struct enic *enic = netdev_priv(netdev); 1054 int err; 1055 1056 err = enic_dev_del_station_addr(enic); 1057 if (err) 1058 return err; 1059 1060 err = enic_set_mac_addr(netdev, addr); 1061 if (err) 1062 return err; 1063 1064 return enic_dev_add_station_addr(enic); 1065 } 1066 1067 /* netif_tx_lock held, BHs disabled */ 1068 static void enic_set_rx_mode(struct net_device *netdev) 1069 { 1070 struct enic *enic = netdev_priv(netdev); 1071 int directed = 1; 1072 int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0; 1073 int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0; 1074 int promisc = (netdev->flags & IFF_PROMISC) || 1075 netdev_uc_count(netdev) > ENIC_UNICAST_PERFECT_FILTERS; 1076 int allmulti = (netdev->flags & IFF_ALLMULTI) || 1077 netdev_mc_count(netdev) > ENIC_MULTICAST_PERFECT_FILTERS; 1078 unsigned int flags = netdev->flags | 1079 (allmulti ? IFF_ALLMULTI : 0) | 1080 (promisc ? IFF_PROMISC : 0); 1081 1082 if (enic->flags != flags) { 1083 enic->flags = flags; 1084 enic_dev_packet_filter(enic, directed, 1085 multicast, broadcast, promisc, allmulti); 1086 } 1087 1088 if (!promisc) { 1089 __dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync); 1090 if (!allmulti) 1091 __dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync); 1092 } 1093 } 1094 1095 /* netif_tx_lock held, BHs disabled */ 1096 static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) 1097 { 1098 struct enic *enic = netdev_priv(netdev); 1099 schedule_work(&enic->tx_hang_reset); 1100 } 1101 1102 static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 1103 { 1104 struct enic *enic = netdev_priv(netdev); 1105 struct enic_port_profile *pp; 1106 int err; 1107 1108 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1109 if (err) 1110 return err; 1111 1112 if (is_valid_ether_addr(mac) || is_zero_ether_addr(mac)) { 1113 if (vf == PORT_SELF_VF) { 1114 memcpy(pp->vf_mac, mac, ETH_ALEN); 1115 return 0; 1116 } else { 1117 /* 1118 * For sriov vf's set the mac in hw 1119 */ 1120 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1121 vnic_dev_set_mac_addr, mac); 1122 return enic_dev_status_to_errno(err); 1123 } 1124 } else 1125 return -EINVAL; 1126 } 1127 1128 static int enic_set_vf_port(struct net_device *netdev, int vf, 1129 struct nlattr *port[]) 1130 { 1131 static const u8 zero_addr[ETH_ALEN] = {}; 1132 struct enic *enic = netdev_priv(netdev); 1133 struct enic_port_profile prev_pp; 1134 struct enic_port_profile *pp; 1135 int err = 0, restore_pp = 1; 1136 1137 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1138 if (err) 1139 return err; 1140 1141 if (!port[IFLA_PORT_REQUEST]) 1142 return -EOPNOTSUPP; 1143 1144 memcpy(&prev_pp, pp, sizeof(*enic->pp)); 1145 memset(pp, 0, sizeof(*enic->pp)); 1146 1147 pp->set |= ENIC_SET_REQUEST; 1148 pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); 1149 1150 if (port[IFLA_PORT_PROFILE]) { 1151 if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { 1152 memcpy(pp, &prev_pp, sizeof(*pp)); 1153 return -EINVAL; 1154 } 1155 pp->set |= ENIC_SET_NAME; 1156 memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), 1157 PORT_PROFILE_MAX); 1158 } 1159 1160 if (port[IFLA_PORT_INSTANCE_UUID]) { 1161 if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { 1162 memcpy(pp, &prev_pp, sizeof(*pp)); 1163 return -EINVAL; 1164 } 1165 pp->set |= ENIC_SET_INSTANCE; 1166 memcpy(pp->instance_uuid, 1167 nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); 1168 } 1169 1170 if (port[IFLA_PORT_HOST_UUID]) { 1171 if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { 1172 memcpy(pp, &prev_pp, sizeof(*pp)); 1173 return -EINVAL; 1174 } 1175 pp->set |= ENIC_SET_HOST; 1176 memcpy(pp->host_uuid, 1177 nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); 1178 } 1179 1180 if (vf == PORT_SELF_VF) { 1181 /* Special case handling: mac came from IFLA_VF_MAC */ 1182 if (!is_zero_ether_addr(prev_pp.vf_mac)) 1183 memcpy(pp->mac_addr, prev_pp.vf_mac, ETH_ALEN); 1184 1185 if (is_zero_ether_addr(netdev->dev_addr)) 1186 eth_hw_addr_random(netdev); 1187 } else { 1188 /* SR-IOV VF: get mac from adapter */ 1189 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1190 vnic_dev_get_mac_addr, pp->mac_addr); 1191 if (err) { 1192 netdev_err(netdev, "Error getting mac for vf %d\n", vf); 1193 memcpy(pp, &prev_pp, sizeof(*pp)); 1194 return enic_dev_status_to_errno(err); 1195 } 1196 } 1197 1198 err = enic_process_set_pp_request(enic, vf, &prev_pp, &restore_pp); 1199 if (err) { 1200 if (restore_pp) { 1201 /* Things are still the way they were: Implicit 1202 * DISASSOCIATE failed 1203 */ 1204 memcpy(pp, &prev_pp, sizeof(*pp)); 1205 } else { 1206 memset(pp, 0, sizeof(*pp)); 1207 if (vf == PORT_SELF_VF) 1208 eth_hw_addr_set(netdev, zero_addr); 1209 } 1210 } else { 1211 /* Set flag to indicate that the port assoc/disassoc 1212 * request has been sent out to fw 1213 */ 1214 pp->set |= ENIC_PORT_REQUEST_APPLIED; 1215 1216 /* If DISASSOCIATE, clean up all assigned/saved macaddresses */ 1217 if (pp->request == PORT_REQUEST_DISASSOCIATE) { 1218 eth_zero_addr(pp->mac_addr); 1219 if (vf == PORT_SELF_VF) 1220 eth_hw_addr_set(netdev, zero_addr); 1221 } 1222 } 1223 1224 if (vf == PORT_SELF_VF) 1225 eth_zero_addr(pp->vf_mac); 1226 1227 return err; 1228 } 1229 1230 static int enic_get_vf_port(struct net_device *netdev, int vf, 1231 struct sk_buff *skb) 1232 { 1233 struct enic *enic = netdev_priv(netdev); 1234 u16 response = PORT_PROFILE_RESPONSE_SUCCESS; 1235 struct enic_port_profile *pp; 1236 int err; 1237 1238 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1239 if (err) 1240 return err; 1241 1242 if (!(pp->set & ENIC_PORT_REQUEST_APPLIED)) 1243 return -ENODATA; 1244 1245 err = enic_process_get_pp_request(enic, vf, pp->request, &response); 1246 if (err) 1247 return err; 1248 1249 if (nla_put_u16(skb, IFLA_PORT_REQUEST, pp->request) || 1250 nla_put_u16(skb, IFLA_PORT_RESPONSE, response) || 1251 ((pp->set & ENIC_SET_NAME) && 1252 nla_put(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, pp->name)) || 1253 ((pp->set & ENIC_SET_INSTANCE) && 1254 nla_put(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, 1255 pp->instance_uuid)) || 1256 ((pp->set & ENIC_SET_HOST) && 1257 nla_put(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, pp->host_uuid))) 1258 goto nla_put_failure; 1259 return 0; 1260 1261 nla_put_failure: 1262 return -EMSGSIZE; 1263 } 1264 1265 static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) 1266 { 1267 unsigned int intr = enic_msix_rq_intr(enic, rq->index); 1268 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1269 u32 timer = cq->tobe_rx_coal_timeval; 1270 1271 if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) { 1272 vnic_intr_coalescing_timer_set(&enic->intr[intr], timer); 1273 cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval; 1274 } 1275 } 1276 1277 static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq) 1278 { 1279 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 1280 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1281 struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter; 1282 int index; 1283 u32 timer; 1284 u32 range_start; 1285 u32 traffic; 1286 u64 delta; 1287 ktime_t now = ktime_get(); 1288 1289 delta = ktime_us_delta(now, cq->prev_ts); 1290 if (delta < ENIC_AIC_TS_BREAK) 1291 return; 1292 cq->prev_ts = now; 1293 1294 traffic = pkt_size_counter->large_pkt_bytes_cnt + 1295 pkt_size_counter->small_pkt_bytes_cnt; 1296 /* The table takes Mbps 1297 * traffic *= 8 => bits 1298 * traffic *= (10^6 / delta) => bps 1299 * traffic /= 10^6 => Mbps 1300 * 1301 * Combining, traffic *= (8 / delta) 1302 */ 1303 1304 traffic <<= 3; 1305 traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta; 1306 1307 for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++) 1308 if (traffic < mod_table[index].rx_rate) 1309 break; 1310 range_start = (pkt_size_counter->small_pkt_bytes_cnt > 1311 pkt_size_counter->large_pkt_bytes_cnt << 1) ? 1312 rx_coal->small_pkt_range_start : 1313 rx_coal->large_pkt_range_start; 1314 timer = range_start + ((rx_coal->range_end - range_start) * 1315 mod_table[index].range_percent / 100); 1316 /* Damping */ 1317 cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1; 1318 1319 pkt_size_counter->large_pkt_bytes_cnt = 0; 1320 pkt_size_counter->small_pkt_bytes_cnt = 0; 1321 } 1322 1323 static int enic_poll(struct napi_struct *napi, int budget) 1324 { 1325 struct net_device *netdev = napi->dev; 1326 struct enic *enic = netdev_priv(netdev); 1327 unsigned int cq_rq = enic_cq_rq(enic, 0); 1328 unsigned int cq_wq = enic_cq_wq(enic, 0); 1329 unsigned int intr = ENIC_LEGACY_IO_INTR; 1330 unsigned int rq_work_to_do = budget; 1331 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1332 unsigned int work_done, rq_work_done = 0, wq_work_done; 1333 int err; 1334 1335 wq_work_done = enic_wq_cq_service(enic, cq_wq, wq_work_to_do); 1336 1337 if (budget > 0) 1338 rq_work_done = enic_rq_cq_service(enic, cq_rq, rq_work_to_do); 1339 1340 /* Accumulate intr event credits for this polling 1341 * cycle. An intr event is the completion of a 1342 * a WQ or RQ packet. 1343 */ 1344 1345 work_done = rq_work_done + wq_work_done; 1346 1347 if (work_done > 0) 1348 vnic_intr_return_credits(&enic->intr[intr], 1349 work_done, 1350 0 /* don't unmask intr */, 1351 0 /* don't reset intr timer */); 1352 1353 err = vnic_rq_fill(&enic->rq[0].vrq, enic_rq_alloc_buf); 1354 1355 /* Buffer allocation failed. Stay in polling 1356 * mode so we can try to fill the ring again. 1357 */ 1358 1359 if (err) 1360 rq_work_done = rq_work_to_do; 1361 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1362 /* Call the function which refreshes the intr coalescing timer 1363 * value based on the traffic. 1364 */ 1365 enic_calc_int_moderation(enic, &enic->rq[0].vrq); 1366 1367 if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) { 1368 1369 /* Some work done, but not enough to stay in polling, 1370 * exit polling 1371 */ 1372 1373 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1374 enic_set_int_moderation(enic, &enic->rq[0].vrq); 1375 vnic_intr_unmask(&enic->intr[intr]); 1376 enic->rq[0].stats.napi_complete++; 1377 } else { 1378 enic->rq[0].stats.napi_repoll++; 1379 } 1380 1381 return rq_work_done; 1382 } 1383 1384 #ifdef CONFIG_RFS_ACCEL 1385 static void enic_free_rx_cpu_rmap(struct enic *enic) 1386 { 1387 free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap); 1388 enic->netdev->rx_cpu_rmap = NULL; 1389 } 1390 1391 static void enic_set_rx_cpu_rmap(struct enic *enic) 1392 { 1393 int i, res; 1394 1395 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) { 1396 enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count); 1397 if (unlikely(!enic->netdev->rx_cpu_rmap)) 1398 return; 1399 for (i = 0; i < enic->rq_count; i++) { 1400 res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap, 1401 enic->msix_entry[i].vector); 1402 if (unlikely(res)) { 1403 enic_free_rx_cpu_rmap(enic); 1404 return; 1405 } 1406 } 1407 } 1408 } 1409 1410 #else 1411 1412 static void enic_free_rx_cpu_rmap(struct enic *enic) 1413 { 1414 } 1415 1416 static void enic_set_rx_cpu_rmap(struct enic *enic) 1417 { 1418 } 1419 1420 #endif /* CONFIG_RFS_ACCEL */ 1421 1422 static int enic_poll_msix_wq(struct napi_struct *napi, int budget) 1423 { 1424 struct net_device *netdev = napi->dev; 1425 struct enic *enic = netdev_priv(netdev); 1426 unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; 1427 struct vnic_wq *wq = &enic->wq[wq_index].vwq; 1428 unsigned int cq; 1429 unsigned int intr; 1430 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1431 unsigned int wq_work_done; 1432 unsigned int wq_irq; 1433 1434 wq_irq = wq->index; 1435 cq = enic_cq_wq(enic, wq_irq); 1436 intr = enic_msix_wq_intr(enic, wq_irq); 1437 1438 wq_work_done = enic_wq_cq_service(enic, cq, wq_work_to_do); 1439 1440 vnic_intr_return_credits(&enic->intr[intr], wq_work_done, 1441 0 /* don't unmask intr */, 1442 1 /* reset intr timer */); 1443 if (!wq_work_done) { 1444 napi_complete(napi); 1445 vnic_intr_unmask(&enic->intr[intr]); 1446 return 0; 1447 } 1448 1449 return budget; 1450 } 1451 1452 static int enic_poll_msix_rq(struct napi_struct *napi, int budget) 1453 { 1454 struct net_device *netdev = napi->dev; 1455 struct enic *enic = netdev_priv(netdev); 1456 unsigned int rq = (napi - &enic->napi[0]); 1457 unsigned int cq = enic_cq_rq(enic, rq); 1458 unsigned int intr = enic_msix_rq_intr(enic, rq); 1459 unsigned int work_to_do = budget; 1460 unsigned int work_done = 0; 1461 int err; 1462 1463 /* Service RQ 1464 */ 1465 1466 if (budget > 0) 1467 work_done = enic_rq_cq_service(enic, cq, work_to_do); 1468 1469 /* Return intr event credits for this polling 1470 * cycle. An intr event is the completion of a 1471 * RQ packet. 1472 */ 1473 1474 if (work_done > 0) 1475 vnic_intr_return_credits(&enic->intr[intr], 1476 work_done, 1477 0 /* don't unmask intr */, 1478 0 /* don't reset intr timer */); 1479 1480 err = vnic_rq_fill(&enic->rq[rq].vrq, enic_rq_alloc_buf); 1481 1482 /* Buffer allocation failed. Stay in polling mode 1483 * so we can try to fill the ring again. 1484 */ 1485 1486 if (err) 1487 work_done = work_to_do; 1488 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1489 /* Call the function which refreshes the intr coalescing timer 1490 * value based on the traffic. 1491 */ 1492 enic_calc_int_moderation(enic, &enic->rq[rq].vrq); 1493 1494 if ((work_done < budget) && napi_complete_done(napi, work_done)) { 1495 1496 /* Some work done, but not enough to stay in polling, 1497 * exit polling 1498 */ 1499 1500 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1501 enic_set_int_moderation(enic, &enic->rq[rq].vrq); 1502 vnic_intr_unmask(&enic->intr[intr]); 1503 enic->rq[rq].stats.napi_complete++; 1504 } else { 1505 enic->rq[rq].stats.napi_repoll++; 1506 } 1507 1508 return work_done; 1509 } 1510 1511 static void enic_notify_timer(struct timer_list *t) 1512 { 1513 struct enic *enic = timer_container_of(enic, t, notify_timer); 1514 1515 enic_notify_check(enic); 1516 1517 mod_timer(&enic->notify_timer, 1518 round_jiffies(jiffies + ENIC_NOTIFY_TIMER_PERIOD)); 1519 } 1520 1521 static void enic_free_intr(struct enic *enic) 1522 { 1523 struct net_device *netdev = enic->netdev; 1524 unsigned int i; 1525 1526 enic_free_rx_cpu_rmap(enic); 1527 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1528 case VNIC_DEV_INTR_MODE_INTX: 1529 free_irq(enic->pdev->irq, netdev); 1530 break; 1531 case VNIC_DEV_INTR_MODE_MSI: 1532 free_irq(enic->pdev->irq, enic); 1533 break; 1534 case VNIC_DEV_INTR_MODE_MSIX: 1535 for (i = 0; i < enic->intr_count; i++) 1536 if (enic->msix[i].requested) 1537 free_irq(enic->msix_entry[i].vector, 1538 enic->msix[i].devid); 1539 break; 1540 default: 1541 break; 1542 } 1543 } 1544 1545 static int enic_request_intr(struct enic *enic) 1546 { 1547 struct net_device *netdev = enic->netdev; 1548 unsigned int i, intr; 1549 int err = 0; 1550 1551 enic_set_rx_cpu_rmap(enic); 1552 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1553 1554 case VNIC_DEV_INTR_MODE_INTX: 1555 1556 err = request_irq(enic->pdev->irq, enic_isr_legacy, 1557 IRQF_SHARED, netdev->name, netdev); 1558 break; 1559 1560 case VNIC_DEV_INTR_MODE_MSI: 1561 1562 err = request_irq(enic->pdev->irq, enic_isr_msi, 1563 0, netdev->name, enic); 1564 break; 1565 1566 case VNIC_DEV_INTR_MODE_MSIX: 1567 1568 for (i = 0; i < enic->rq_count; i++) { 1569 intr = enic_msix_rq_intr(enic, i); 1570 snprintf(enic->msix[intr].devname, 1571 sizeof(enic->msix[intr].devname), 1572 "%s-rx-%u", netdev->name, i); 1573 enic->msix[intr].isr = enic_isr_msix; 1574 enic->msix[intr].devid = &enic->napi[i]; 1575 } 1576 1577 for (i = 0; i < enic->wq_count; i++) { 1578 int wq = enic_cq_wq(enic, i); 1579 1580 intr = enic_msix_wq_intr(enic, i); 1581 snprintf(enic->msix[intr].devname, 1582 sizeof(enic->msix[intr].devname), 1583 "%s-tx-%u", netdev->name, i); 1584 enic->msix[intr].isr = enic_isr_msix; 1585 enic->msix[intr].devid = &enic->napi[wq]; 1586 } 1587 1588 intr = enic_msix_err_intr(enic); 1589 snprintf(enic->msix[intr].devname, 1590 sizeof(enic->msix[intr].devname), 1591 "%s-err", netdev->name); 1592 enic->msix[intr].isr = enic_isr_msix_err; 1593 enic->msix[intr].devid = enic; 1594 1595 intr = enic_msix_notify_intr(enic); 1596 snprintf(enic->msix[intr].devname, 1597 sizeof(enic->msix[intr].devname), 1598 "%s-notify", netdev->name); 1599 enic->msix[intr].isr = enic_isr_msix_notify; 1600 enic->msix[intr].devid = enic; 1601 1602 for (i = 0; i < enic->intr_count; i++) 1603 enic->msix[i].requested = 0; 1604 1605 for (i = 0; i < enic->intr_count; i++) { 1606 err = request_irq(enic->msix_entry[i].vector, 1607 enic->msix[i].isr, 0, 1608 enic->msix[i].devname, 1609 enic->msix[i].devid); 1610 if (err) { 1611 enic_free_intr(enic); 1612 break; 1613 } 1614 enic->msix[i].requested = 1; 1615 } 1616 1617 break; 1618 1619 default: 1620 break; 1621 } 1622 1623 return err; 1624 } 1625 1626 static void enic_synchronize_irqs(struct enic *enic) 1627 { 1628 unsigned int i; 1629 1630 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1631 case VNIC_DEV_INTR_MODE_INTX: 1632 case VNIC_DEV_INTR_MODE_MSI: 1633 synchronize_irq(enic->pdev->irq); 1634 break; 1635 case VNIC_DEV_INTR_MODE_MSIX: 1636 for (i = 0; i < enic->intr_count; i++) 1637 synchronize_irq(enic->msix_entry[i].vector); 1638 break; 1639 default: 1640 break; 1641 } 1642 } 1643 1644 static int enic_dev_notify_set(struct enic *enic) 1645 { 1646 int err; 1647 1648 spin_lock_bh(&enic->devcmd_lock); 1649 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1650 case VNIC_DEV_INTR_MODE_INTX: 1651 err = vnic_dev_notify_set(enic->vdev, ENIC_LEGACY_NOTIFY_INTR); 1652 break; 1653 case VNIC_DEV_INTR_MODE_MSIX: 1654 err = vnic_dev_notify_set(enic->vdev, 1655 enic_msix_notify_intr(enic)); 1656 break; 1657 default: 1658 err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */); 1659 break; 1660 } 1661 spin_unlock_bh(&enic->devcmd_lock); 1662 1663 return err; 1664 } 1665 1666 static void enic_notify_timer_start(struct enic *enic) 1667 { 1668 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1669 case VNIC_DEV_INTR_MODE_MSI: 1670 mod_timer(&enic->notify_timer, jiffies); 1671 break; 1672 default: 1673 /* Using intr for notification for INTx/MSI-X */ 1674 break; 1675 } 1676 } 1677 1678 /* rtnl lock is held, process context */ 1679 static int enic_open(struct net_device *netdev) 1680 { 1681 struct enic *enic = netdev_priv(netdev); 1682 unsigned int i; 1683 int err, ret; 1684 unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; 1685 struct page_pool_params pp_params = { 1686 .order = get_order(max_pkt_len), 1687 .pool_size = enic->config.rq_desc_count, 1688 .nid = dev_to_node(&enic->pdev->dev), 1689 .dev = &enic->pdev->dev, 1690 .dma_dir = DMA_FROM_DEVICE, 1691 .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, 1692 .netdev = netdev, 1693 .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, 1694 }; 1695 1696 err = enic_request_intr(enic); 1697 if (err) { 1698 netdev_err(netdev, "Unable to request irq.\n"); 1699 return err; 1700 } 1701 enic_init_affinity_hint(enic); 1702 enic_set_affinity_hint(enic); 1703 1704 err = enic_dev_notify_set(enic); 1705 if (err) { 1706 netdev_err(netdev, 1707 "Failed to alloc notify buffer, aborting.\n"); 1708 goto err_out_free_intr; 1709 } 1710 1711 for (i = 0; i < enic->rq_count; i++) { 1712 /* create a page pool for each RQ */ 1713 pp_params.napi = &enic->napi[i]; 1714 pp_params.queue_idx = i; 1715 enic->rq[i].pool = page_pool_create(&pp_params); 1716 if (IS_ERR(enic->rq[i].pool)) { 1717 err = PTR_ERR(enic->rq[i].pool); 1718 enic->rq[i].pool = NULL; 1719 goto err_out_free_rq; 1720 } 1721 1722 /* enable rq before updating rq desc */ 1723 vnic_rq_enable(&enic->rq[i].vrq); 1724 vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); 1725 /* Need at least one buffer on ring to get going */ 1726 if (vnic_rq_desc_used(&enic->rq[i].vrq) == 0) { 1727 netdev_err(netdev, "Unable to alloc receive buffers\n"); 1728 err = -ENOMEM; 1729 goto err_out_free_rq; 1730 } 1731 } 1732 1733 for (i = 0; i < enic->wq_count; i++) 1734 vnic_wq_enable(&enic->wq[i].vwq); 1735 1736 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 1737 enic_dev_add_station_addr(enic); 1738 1739 enic_set_rx_mode(netdev); 1740 1741 netif_tx_wake_all_queues(netdev); 1742 1743 for (i = 0; i < enic->rq_count; i++) 1744 napi_enable(&enic->napi[i]); 1745 1746 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 1747 for (i = 0; i < enic->wq_count; i++) 1748 napi_enable(&enic->napi[enic_cq_wq(enic, i)]); 1749 enic_dev_enable(enic); 1750 1751 for (i = 0; i < enic->intr_count; i++) 1752 vnic_intr_unmask(&enic->intr[i]); 1753 1754 enic_notify_timer_start(enic); 1755 enic_rfs_timer_start(enic); 1756 1757 return 0; 1758 1759 err_out_free_rq: 1760 for (i = 0; i < enic->rq_count; i++) { 1761 ret = vnic_rq_disable(&enic->rq[i].vrq); 1762 if (!ret) { 1763 vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); 1764 page_pool_destroy(enic->rq[i].pool); 1765 enic->rq[i].pool = NULL; 1766 } 1767 } 1768 enic_dev_notify_unset(enic); 1769 err_out_free_intr: 1770 enic_unset_affinity_hint(enic); 1771 enic_free_intr(enic); 1772 1773 return err; 1774 } 1775 1776 /* rtnl lock is held, process context */ 1777 static int enic_stop(struct net_device *netdev) 1778 { 1779 struct enic *enic = netdev_priv(netdev); 1780 unsigned int i; 1781 int err; 1782 1783 for (i = 0; i < enic->intr_count; i++) { 1784 vnic_intr_mask(&enic->intr[i]); 1785 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */ 1786 } 1787 1788 enic_synchronize_irqs(enic); 1789 1790 timer_delete_sync(&enic->notify_timer); 1791 enic_rfs_flw_tbl_free(enic); 1792 1793 enic_dev_disable(enic); 1794 1795 for (i = 0; i < enic->rq_count; i++) 1796 napi_disable(&enic->napi[i]); 1797 1798 netif_carrier_off(netdev); 1799 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 1800 for (i = 0; i < enic->wq_count; i++) 1801 napi_disable(&enic->napi[enic_cq_wq(enic, i)]); 1802 netif_tx_disable(netdev); 1803 1804 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 1805 enic_dev_del_station_addr(enic); 1806 1807 for (i = 0; i < enic->wq_count; i++) { 1808 err = vnic_wq_disable(&enic->wq[i].vwq); 1809 if (err) 1810 return err; 1811 } 1812 for (i = 0; i < enic->rq_count; i++) { 1813 err = vnic_rq_disable(&enic->rq[i].vrq); 1814 if (err) 1815 return err; 1816 } 1817 1818 enic_dev_notify_unset(enic); 1819 enic_unset_affinity_hint(enic); 1820 enic_free_intr(enic); 1821 1822 for (i = 0; i < enic->wq_count; i++) 1823 vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); 1824 for (i = 0; i < enic->rq_count; i++) { 1825 vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); 1826 page_pool_destroy(enic->rq[i].pool); 1827 enic->rq[i].pool = NULL; 1828 } 1829 for (i = 0; i < enic->cq_count; i++) 1830 vnic_cq_clean(&enic->cq[i]); 1831 for (i = 0; i < enic->intr_count; i++) 1832 vnic_intr_clean(&enic->intr[i]); 1833 1834 return 0; 1835 } 1836 1837 static int _enic_change_mtu(struct net_device *netdev, int new_mtu) 1838 { 1839 bool running = netif_running(netdev); 1840 int err = 0; 1841 1842 ASSERT_RTNL(); 1843 if (running) { 1844 err = enic_stop(netdev); 1845 if (err) 1846 return err; 1847 } 1848 1849 WRITE_ONCE(netdev->mtu, new_mtu); 1850 1851 if (running) { 1852 err = enic_open(netdev); 1853 if (err) 1854 return err; 1855 } 1856 1857 return 0; 1858 } 1859 1860 static int enic_change_mtu(struct net_device *netdev, int new_mtu) 1861 { 1862 struct enic *enic = netdev_priv(netdev); 1863 1864 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 1865 return -EOPNOTSUPP; 1866 1867 if (new_mtu > enic->port_mtu) 1868 netdev_warn(netdev, 1869 "interface MTU (%d) set higher than port MTU (%d)\n", 1870 new_mtu, enic->port_mtu); 1871 1872 return _enic_change_mtu(netdev, new_mtu); 1873 } 1874 1875 static void enic_change_mtu_work(struct work_struct *work) 1876 { 1877 struct enic *enic = container_of(work, struct enic, change_mtu_work); 1878 struct net_device *netdev = enic->netdev; 1879 int new_mtu = vnic_dev_mtu(enic->vdev); 1880 1881 rtnl_lock(); 1882 (void)_enic_change_mtu(netdev, new_mtu); 1883 rtnl_unlock(); 1884 1885 netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); 1886 } 1887 1888 #ifdef CONFIG_NET_POLL_CONTROLLER 1889 static void enic_poll_controller(struct net_device *netdev) 1890 { 1891 struct enic *enic = netdev_priv(netdev); 1892 struct vnic_dev *vdev = enic->vdev; 1893 unsigned int i, intr; 1894 1895 switch (vnic_dev_get_intr_mode(vdev)) { 1896 case VNIC_DEV_INTR_MODE_MSIX: 1897 for (i = 0; i < enic->rq_count; i++) { 1898 intr = enic_msix_rq_intr(enic, i); 1899 enic_isr_msix(enic->msix_entry[intr].vector, 1900 &enic->napi[i]); 1901 } 1902 1903 for (i = 0; i < enic->wq_count; i++) { 1904 intr = enic_msix_wq_intr(enic, i); 1905 enic_isr_msix(enic->msix_entry[intr].vector, 1906 &enic->napi[enic_cq_wq(enic, i)]); 1907 } 1908 1909 break; 1910 case VNIC_DEV_INTR_MODE_MSI: 1911 enic_isr_msi(enic->pdev->irq, enic); 1912 break; 1913 case VNIC_DEV_INTR_MODE_INTX: 1914 enic_isr_legacy(enic->pdev->irq, netdev); 1915 break; 1916 default: 1917 break; 1918 } 1919 } 1920 #endif 1921 1922 static int enic_dev_wait(struct vnic_dev *vdev, 1923 int (*start)(struct vnic_dev *, int), 1924 int (*finished)(struct vnic_dev *, int *), 1925 int arg) 1926 { 1927 unsigned long time; 1928 int done; 1929 int err; 1930 1931 err = start(vdev, arg); 1932 if (err) 1933 return err; 1934 1935 /* Wait for func to complete...2 seconds max 1936 */ 1937 1938 time = jiffies + (HZ * 2); 1939 do { 1940 1941 err = finished(vdev, &done); 1942 if (err) 1943 return err; 1944 1945 if (done) 1946 return 0; 1947 1948 schedule_timeout_uninterruptible(HZ / 10); 1949 1950 } while (time_after(time, jiffies)); 1951 1952 return -ETIMEDOUT; 1953 } 1954 1955 static int enic_dev_open(struct enic *enic) 1956 { 1957 int err; 1958 u32 flags = CMD_OPENF_IG_DESCCACHE; 1959 1960 err = enic_dev_wait(enic->vdev, vnic_dev_open, 1961 vnic_dev_open_done, flags); 1962 if (err) 1963 dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n", 1964 err); 1965 1966 return err; 1967 } 1968 1969 static int enic_dev_soft_reset(struct enic *enic) 1970 { 1971 int err; 1972 1973 err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset, 1974 vnic_dev_soft_reset_done, 0); 1975 if (err) 1976 netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n", 1977 err); 1978 1979 return err; 1980 } 1981 1982 static int enic_dev_hang_reset(struct enic *enic) 1983 { 1984 int err; 1985 1986 err = enic_dev_wait(enic->vdev, vnic_dev_hang_reset, 1987 vnic_dev_hang_reset_done, 0); 1988 if (err) 1989 netdev_err(enic->netdev, "vNIC hang reset failed, err %d\n", 1990 err); 1991 1992 return err; 1993 } 1994 1995 int __enic_set_rsskey(struct enic *enic) 1996 { 1997 union vnic_rss_key *rss_key_buf_va; 1998 dma_addr_t rss_key_buf_pa; 1999 int i, kidx, bidx, err; 2000 2001 rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev, 2002 sizeof(union vnic_rss_key), 2003 &rss_key_buf_pa, GFP_ATOMIC); 2004 if (!rss_key_buf_va) 2005 return -ENOMEM; 2006 2007 for (i = 0; i < ENIC_RSS_LEN; i++) { 2008 kidx = i / ENIC_RSS_BYTES_PER_KEY; 2009 bidx = i % ENIC_RSS_BYTES_PER_KEY; 2010 rss_key_buf_va->key[kidx].b[bidx] = enic->rss_key[i]; 2011 } 2012 spin_lock_bh(&enic->devcmd_lock); 2013 err = enic_set_rss_key(enic, 2014 rss_key_buf_pa, 2015 sizeof(union vnic_rss_key)); 2016 spin_unlock_bh(&enic->devcmd_lock); 2017 2018 dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key), 2019 rss_key_buf_va, rss_key_buf_pa); 2020 2021 return err; 2022 } 2023 2024 static int enic_set_rsskey(struct enic *enic) 2025 { 2026 netdev_rss_key_fill(enic->rss_key, ENIC_RSS_LEN); 2027 2028 return __enic_set_rsskey(enic); 2029 } 2030 2031 static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) 2032 { 2033 dma_addr_t rss_cpu_buf_pa; 2034 union vnic_rss_cpu *rss_cpu_buf_va = NULL; 2035 unsigned int i; 2036 int err; 2037 2038 rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev, 2039 sizeof(union vnic_rss_cpu), 2040 &rss_cpu_buf_pa, GFP_ATOMIC); 2041 if (!rss_cpu_buf_va) 2042 return -ENOMEM; 2043 2044 for (i = 0; i < (1 << rss_hash_bits); i++) 2045 (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count; 2046 2047 spin_lock_bh(&enic->devcmd_lock); 2048 err = enic_set_rss_cpu(enic, 2049 rss_cpu_buf_pa, 2050 sizeof(union vnic_rss_cpu)); 2051 spin_unlock_bh(&enic->devcmd_lock); 2052 2053 dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu), 2054 rss_cpu_buf_va, rss_cpu_buf_pa); 2055 2056 return err; 2057 } 2058 2059 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu, 2060 u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable) 2061 { 2062 const u8 tso_ipid_split_en = 0; 2063 const u8 ig_vlan_strip_en = 1; 2064 int err; 2065 2066 /* Enable VLAN tag stripping. 2067 */ 2068 2069 spin_lock_bh(&enic->devcmd_lock); 2070 err = enic_set_nic_cfg(enic, 2071 rss_default_cpu, rss_hash_type, 2072 rss_hash_bits, rss_base_cpu, 2073 rss_enable, tso_ipid_split_en, 2074 ig_vlan_strip_en); 2075 spin_unlock_bh(&enic->devcmd_lock); 2076 2077 return err; 2078 } 2079 2080 static int enic_set_rss_nic_cfg(struct enic *enic) 2081 { 2082 struct device *dev = enic_get_dev(enic); 2083 const u8 rss_default_cpu = 0; 2084 const u8 rss_hash_bits = 7; 2085 const u8 rss_base_cpu = 0; 2086 u8 rss_hash_type; 2087 int res; 2088 u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1); 2089 2090 spin_lock_bh(&enic->devcmd_lock); 2091 res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type); 2092 spin_unlock_bh(&enic->devcmd_lock); 2093 if (res) { 2094 /* defaults for old adapters 2095 */ 2096 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | 2097 NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | 2098 NIC_CFG_RSS_HASH_TYPE_IPV6 | 2099 NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; 2100 } 2101 2102 if (rss_enable) { 2103 if (!enic_set_rsskey(enic)) { 2104 if (enic_set_rsscpu(enic, rss_hash_bits)) { 2105 rss_enable = 0; 2106 dev_warn(dev, "RSS disabled, " 2107 "Failed to set RSS cpu indirection table."); 2108 } 2109 } else { 2110 rss_enable = 0; 2111 dev_warn(dev, "RSS disabled, Failed to set RSS key.\n"); 2112 } 2113 } 2114 2115 return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type, 2116 rss_hash_bits, rss_base_cpu, rss_enable); 2117 } 2118 2119 static void enic_set_api_busy(struct enic *enic, bool busy) 2120 { 2121 spin_lock(&enic->enic_api_lock); 2122 enic->enic_api_busy = busy; 2123 spin_unlock(&enic->enic_api_lock); 2124 } 2125 2126 static void enic_reset(struct work_struct *work) 2127 { 2128 struct enic *enic = container_of(work, struct enic, reset); 2129 2130 if (!netif_running(enic->netdev)) 2131 return; 2132 2133 rtnl_lock(); 2134 2135 /* Stop any activity from infiniband */ 2136 enic_set_api_busy(enic, true); 2137 2138 enic_stop(enic->netdev); 2139 enic_dev_soft_reset(enic); 2140 enic_reset_addr_lists(enic); 2141 enic_init_vnic_resources(enic); 2142 enic_set_rss_nic_cfg(enic); 2143 enic_dev_set_ig_vlan_rewrite_mode(enic); 2144 enic_ext_cq(enic); 2145 enic_open(enic->netdev); 2146 2147 /* Allow infiniband to fiddle with the device again */ 2148 enic_set_api_busy(enic, false); 2149 2150 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2151 2152 rtnl_unlock(); 2153 } 2154 2155 static void enic_tx_hang_reset(struct work_struct *work) 2156 { 2157 struct enic *enic = container_of(work, struct enic, tx_hang_reset); 2158 2159 rtnl_lock(); 2160 2161 /* Stop any activity from infiniband */ 2162 enic_set_api_busy(enic, true); 2163 2164 enic_dev_hang_notify(enic); 2165 enic_stop(enic->netdev); 2166 enic_dev_hang_reset(enic); 2167 enic_reset_addr_lists(enic); 2168 enic_init_vnic_resources(enic); 2169 enic_set_rss_nic_cfg(enic); 2170 enic_dev_set_ig_vlan_rewrite_mode(enic); 2171 enic_ext_cq(enic); 2172 enic_open(enic->netdev); 2173 2174 /* Allow infiniband to fiddle with the device again */ 2175 enic_set_api_busy(enic, false); 2176 2177 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2178 2179 rtnl_unlock(); 2180 } 2181 2182 static int enic_set_intr_mode(struct enic *enic) 2183 { 2184 unsigned int i; 2185 int num_intr; 2186 2187 /* Set interrupt mode (INTx, MSI, MSI-X) depending 2188 * on system capabilities. 2189 * 2190 * Try MSI-X first 2191 */ 2192 2193 if (enic->config.intr_mode < 1 && 2194 enic->intr_avail >= ENIC_MSIX_MIN_INTR) { 2195 for (i = 0; i < enic->intr_avail; i++) 2196 enic->msix_entry[i].entry = i; 2197 2198 num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, 2199 ENIC_MSIX_MIN_INTR, 2200 enic->intr_avail); 2201 if (num_intr > 0) { 2202 vnic_dev_set_intr_mode(enic->vdev, 2203 VNIC_DEV_INTR_MODE_MSIX); 2204 enic->intr_avail = num_intr; 2205 return 0; 2206 } 2207 } 2208 2209 /* Next try MSI 2210 * 2211 * We need 1 INTR 2212 */ 2213 2214 if (enic->config.intr_mode < 2 && 2215 enic->intr_avail >= 1 && 2216 !pci_enable_msi(enic->pdev)) { 2217 enic->intr_avail = 1; 2218 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); 2219 return 0; 2220 } 2221 2222 /* Next try INTx 2223 * 2224 * We need 3 INTRs 2225 * (the first INTR is used for WQ/RQ) 2226 * (the second INTR is used for WQ/RQ errors) 2227 * (the last INTR is used for notifications) 2228 */ 2229 2230 if (enic->config.intr_mode < 3 && 2231 enic->intr_avail >= 3) { 2232 enic->intr_avail = 3; 2233 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); 2234 return 0; 2235 } 2236 2237 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2238 2239 return -EINVAL; 2240 } 2241 2242 static void enic_clear_intr_mode(struct enic *enic) 2243 { 2244 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2245 case VNIC_DEV_INTR_MODE_MSIX: 2246 pci_disable_msix(enic->pdev); 2247 break; 2248 case VNIC_DEV_INTR_MODE_MSI: 2249 pci_disable_msi(enic->pdev); 2250 break; 2251 default: 2252 break; 2253 } 2254 2255 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2256 } 2257 2258 static int enic_adjust_resources(struct enic *enic) 2259 { 2260 unsigned int max_queues; 2261 unsigned int rq_default; 2262 unsigned int rq_avail; 2263 unsigned int wq_avail; 2264 2265 if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { 2266 dev_err(enic_get_dev(enic), 2267 "Not enough resources available rq: %d wq: %d cq: %d\n", 2268 enic->rq_avail, enic->wq_avail, 2269 enic->cq_avail); 2270 return -ENOSPC; 2271 } 2272 2273 if (is_kdump_kernel()) { 2274 dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); 2275 enic->rq_avail = 1; 2276 enic->wq_avail = 1; 2277 enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; 2278 enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; 2279 enic->config.mtu = min_t(u16, 1500, enic->config.mtu); 2280 } 2281 2282 /* if RSS isn't set, then we can only use one RQ */ 2283 if (!ENIC_SETTING(enic, RSS)) 2284 enic->rq_avail = 1; 2285 2286 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2287 case VNIC_DEV_INTR_MODE_INTX: 2288 case VNIC_DEV_INTR_MODE_MSI: 2289 enic->rq_count = 1; 2290 enic->wq_count = 1; 2291 enic->cq_count = 2; 2292 enic->intr_count = enic->intr_avail; 2293 break; 2294 case VNIC_DEV_INTR_MODE_MSIX: 2295 /* Adjust the number of wqs/rqs/cqs/interrupts that will be 2296 * used based on which resource is the most constrained 2297 */ 2298 wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); 2299 rq_default = max(netif_get_num_default_rss_queues(), 2300 ENIC_RQ_MIN_DEFAULT); 2301 rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); 2302 max_queues = min(enic->cq_avail, 2303 enic->intr_avail - ENIC_MSIX_RESERVED_INTR); 2304 if (wq_avail + rq_avail <= max_queues) { 2305 enic->rq_count = rq_avail; 2306 enic->wq_count = wq_avail; 2307 } else { 2308 /* recalculate wq/rq count */ 2309 if (rq_avail < wq_avail) { 2310 enic->rq_count = min(rq_avail, max_queues / 2); 2311 enic->wq_count = max_queues - enic->rq_count; 2312 } else { 2313 enic->wq_count = min(wq_avail, max_queues / 2); 2314 enic->rq_count = max_queues - enic->wq_count; 2315 } 2316 } 2317 enic->cq_count = enic->rq_count + enic->wq_count; 2318 enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; 2319 2320 break; 2321 default: 2322 dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); 2323 return -EINVAL; 2324 } 2325 2326 return 0; 2327 } 2328 2329 static void enic_get_queue_stats_rx(struct net_device *dev, int idx, 2330 struct netdev_queue_stats_rx *rxs) 2331 { 2332 struct enic *enic = netdev_priv(dev); 2333 struct enic_rq_stats *rqstats = &enic->rq[idx].stats; 2334 2335 rxs->bytes = rqstats->bytes; 2336 rxs->packets = rqstats->packets; 2337 rxs->hw_drops = rqstats->bad_fcs + rqstats->pkt_truncated; 2338 rxs->hw_drop_overruns = rqstats->pkt_truncated; 2339 rxs->csum_unnecessary = rqstats->csum_unnecessary + 2340 rqstats->csum_unnecessary_encap; 2341 rxs->alloc_fail = rqstats->pp_alloc_fail; 2342 } 2343 2344 static void enic_get_queue_stats_tx(struct net_device *dev, int idx, 2345 struct netdev_queue_stats_tx *txs) 2346 { 2347 struct enic *enic = netdev_priv(dev); 2348 struct enic_wq_stats *wqstats = &enic->wq[idx].stats; 2349 2350 txs->bytes = wqstats->bytes; 2351 txs->packets = wqstats->packets; 2352 txs->csum_none = wqstats->csum_none; 2353 txs->needs_csum = wqstats->csum_partial + wqstats->encap_csum + 2354 wqstats->tso; 2355 txs->hw_gso_packets = wqstats->tso; 2356 txs->stop = wqstats->stopped; 2357 txs->wake = wqstats->wake; 2358 } 2359 2360 static void enic_get_base_stats(struct net_device *dev, 2361 struct netdev_queue_stats_rx *rxs, 2362 struct netdev_queue_stats_tx *txs) 2363 { 2364 rxs->bytes = 0; 2365 rxs->packets = 0; 2366 rxs->hw_drops = 0; 2367 rxs->hw_drop_overruns = 0; 2368 rxs->csum_unnecessary = 0; 2369 rxs->alloc_fail = 0; 2370 txs->bytes = 0; 2371 txs->packets = 0; 2372 txs->csum_none = 0; 2373 txs->needs_csum = 0; 2374 txs->hw_gso_packets = 0; 2375 txs->stop = 0; 2376 txs->wake = 0; 2377 } 2378 2379 static const struct net_device_ops enic_netdev_dynamic_ops = { 2380 .ndo_open = enic_open, 2381 .ndo_stop = enic_stop, 2382 .ndo_start_xmit = enic_hard_start_xmit, 2383 .ndo_get_stats64 = enic_get_stats, 2384 .ndo_validate_addr = eth_validate_addr, 2385 .ndo_set_rx_mode = enic_set_rx_mode, 2386 .ndo_set_mac_address = enic_set_mac_address_dynamic, 2387 .ndo_change_mtu = enic_change_mtu, 2388 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2389 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2390 .ndo_tx_timeout = enic_tx_timeout, 2391 .ndo_set_vf_port = enic_set_vf_port, 2392 .ndo_get_vf_port = enic_get_vf_port, 2393 .ndo_set_vf_mac = enic_set_vf_mac, 2394 #ifdef CONFIG_NET_POLL_CONTROLLER 2395 .ndo_poll_controller = enic_poll_controller, 2396 #endif 2397 #ifdef CONFIG_RFS_ACCEL 2398 .ndo_rx_flow_steer = enic_rx_flow_steer, 2399 #endif 2400 .ndo_features_check = enic_features_check, 2401 }; 2402 2403 static const struct net_device_ops enic_netdev_ops = { 2404 .ndo_open = enic_open, 2405 .ndo_stop = enic_stop, 2406 .ndo_start_xmit = enic_hard_start_xmit, 2407 .ndo_get_stats64 = enic_get_stats, 2408 .ndo_validate_addr = eth_validate_addr, 2409 .ndo_set_mac_address = enic_set_mac_address, 2410 .ndo_set_rx_mode = enic_set_rx_mode, 2411 .ndo_change_mtu = enic_change_mtu, 2412 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2413 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2414 .ndo_tx_timeout = enic_tx_timeout, 2415 .ndo_set_vf_port = enic_set_vf_port, 2416 .ndo_get_vf_port = enic_get_vf_port, 2417 .ndo_set_vf_mac = enic_set_vf_mac, 2418 #ifdef CONFIG_NET_POLL_CONTROLLER 2419 .ndo_poll_controller = enic_poll_controller, 2420 #endif 2421 #ifdef CONFIG_RFS_ACCEL 2422 .ndo_rx_flow_steer = enic_rx_flow_steer, 2423 #endif 2424 .ndo_features_check = enic_features_check, 2425 }; 2426 2427 static const struct netdev_stat_ops enic_netdev_stat_ops = { 2428 .get_queue_stats_rx = enic_get_queue_stats_rx, 2429 .get_queue_stats_tx = enic_get_queue_stats_tx, 2430 .get_base_stats = enic_get_base_stats, 2431 }; 2432 2433 static void enic_free_enic_resources(struct enic *enic) 2434 { 2435 kfree(enic->wq); 2436 enic->wq = NULL; 2437 2438 kfree(enic->rq); 2439 enic->rq = NULL; 2440 2441 kfree(enic->cq); 2442 enic->cq = NULL; 2443 2444 kfree(enic->napi); 2445 enic->napi = NULL; 2446 2447 kfree(enic->msix_entry); 2448 enic->msix_entry = NULL; 2449 2450 kfree(enic->msix); 2451 enic->msix = NULL; 2452 2453 kfree(enic->intr); 2454 enic->intr = NULL; 2455 } 2456 2457 static int enic_alloc_enic_resources(struct enic *enic) 2458 { 2459 enic->wq = kcalloc(enic->wq_avail, sizeof(struct enic_wq), GFP_KERNEL); 2460 if (!enic->wq) 2461 goto free_queues; 2462 2463 enic->rq = kcalloc(enic->rq_avail, sizeof(struct enic_rq), GFP_KERNEL); 2464 if (!enic->rq) 2465 goto free_queues; 2466 2467 enic->cq = kcalloc(enic->cq_avail, sizeof(struct vnic_cq), GFP_KERNEL); 2468 if (!enic->cq) 2469 goto free_queues; 2470 2471 enic->napi = kcalloc(enic->wq_avail + enic->rq_avail, 2472 sizeof(struct napi_struct), GFP_KERNEL); 2473 if (!enic->napi) 2474 goto free_queues; 2475 2476 enic->msix_entry = kcalloc(enic->intr_avail, sizeof(struct msix_entry), 2477 GFP_KERNEL); 2478 if (!enic->msix_entry) 2479 goto free_queues; 2480 2481 enic->msix = kcalloc(enic->intr_avail, sizeof(struct enic_msix_entry), 2482 GFP_KERNEL); 2483 if (!enic->msix) 2484 goto free_queues; 2485 2486 enic->intr = kcalloc(enic->intr_avail, sizeof(struct vnic_intr), 2487 GFP_KERNEL); 2488 if (!enic->intr) 2489 goto free_queues; 2490 2491 return 0; 2492 2493 free_queues: 2494 enic_free_enic_resources(enic); 2495 return -ENOMEM; 2496 } 2497 2498 static void enic_dev_deinit(struct enic *enic) 2499 { 2500 unsigned int i; 2501 2502 for (i = 0; i < enic->rq_count; i++) 2503 __netif_napi_del(&enic->napi[i]); 2504 2505 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 2506 for (i = 0; i < enic->wq_count; i++) 2507 __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); 2508 2509 /* observe RCU grace period after __netif_napi_del() calls */ 2510 synchronize_net(); 2511 2512 enic_free_vnic_resources(enic); 2513 enic_clear_intr_mode(enic); 2514 enic_free_affinity_hint(enic); 2515 enic_free_enic_resources(enic); 2516 } 2517 2518 static int enic_dev_init(struct enic *enic) 2519 { 2520 struct device *dev = enic_get_dev(enic); 2521 struct net_device *netdev = enic->netdev; 2522 unsigned int i; 2523 int err; 2524 2525 /* Get interrupt coalesce timer info */ 2526 err = enic_dev_intr_coal_timer_info(enic); 2527 if (err) { 2528 dev_warn(dev, "Using default conversion factor for " 2529 "interrupt coalesce timer\n"); 2530 vnic_dev_intr_coal_timer_info_default(enic->vdev); 2531 } 2532 2533 /* Get vNIC configuration 2534 */ 2535 2536 err = enic_get_vnic_config(enic); 2537 if (err) { 2538 dev_err(dev, "Get vNIC configuration failed, aborting\n"); 2539 return err; 2540 } 2541 2542 /* Get available resource counts 2543 */ 2544 2545 enic_get_res_counts(enic); 2546 2547 enic_ext_cq(enic); 2548 2549 err = enic_alloc_enic_resources(enic); 2550 if (err) { 2551 dev_err(dev, "Failed to allocate enic resources\n"); 2552 return err; 2553 } 2554 2555 /* Set interrupt mode based on system capabilities */ 2556 2557 err = enic_set_intr_mode(enic); 2558 if (err) { 2559 dev_err(dev, "Failed to set intr mode based on resource " 2560 "counts and system capabilities, aborting\n"); 2561 goto err_out_free_vnic_resources; 2562 } 2563 2564 /* Adjust resource counts based on most constrained resources */ 2565 err = enic_adjust_resources(enic); 2566 if (err) { 2567 dev_err(dev, "Failed to adjust resources\n"); 2568 goto err_out_free_vnic_resources; 2569 } 2570 2571 /* Allocate and configure vNIC resources 2572 */ 2573 2574 err = enic_alloc_vnic_resources(enic); 2575 if (err) { 2576 dev_err(dev, "Failed to alloc vNIC resources, aborting\n"); 2577 goto err_out_free_vnic_resources; 2578 } 2579 2580 enic_init_vnic_resources(enic); 2581 2582 err = enic_set_rss_nic_cfg(enic); 2583 if (err) { 2584 dev_err(dev, "Failed to config nic, aborting\n"); 2585 goto err_out_free_vnic_resources; 2586 } 2587 2588 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2589 default: 2590 netif_napi_add(netdev, &enic->napi[0], enic_poll); 2591 break; 2592 case VNIC_DEV_INTR_MODE_MSIX: 2593 for (i = 0; i < enic->rq_count; i++) { 2594 netif_napi_add(netdev, &enic->napi[i], 2595 enic_poll_msix_rq); 2596 } 2597 for (i = 0; i < enic->wq_count; i++) 2598 netif_napi_add(netdev, 2599 &enic->napi[enic_cq_wq(enic, i)], 2600 enic_poll_msix_wq); 2601 break; 2602 } 2603 2604 return 0; 2605 2606 err_out_free_vnic_resources: 2607 enic_free_affinity_hint(enic); 2608 enic_clear_intr_mode(enic); 2609 enic_free_vnic_resources(enic); 2610 enic_free_enic_resources(enic); 2611 2612 return err; 2613 } 2614 2615 static void enic_iounmap(struct enic *enic) 2616 { 2617 unsigned int i; 2618 2619 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) 2620 if (enic->bar[i].vaddr) 2621 iounmap(enic->bar[i].vaddr); 2622 } 2623 2624 static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2625 { 2626 struct device *dev = &pdev->dev; 2627 struct net_device *netdev; 2628 struct enic *enic; 2629 int using_dac = 0; 2630 unsigned int i; 2631 int err; 2632 #ifdef CONFIG_PCI_IOV 2633 int pos = 0; 2634 #endif 2635 int num_pps = 1; 2636 2637 /* Allocate net device structure and initialize. Private 2638 * instance data is initialized to zero. 2639 */ 2640 2641 netdev = alloc_etherdev_mqs(sizeof(struct enic), 2642 ENIC_RQ_MAX, ENIC_WQ_MAX); 2643 if (!netdev) 2644 return -ENOMEM; 2645 2646 pci_set_drvdata(pdev, netdev); 2647 2648 SET_NETDEV_DEV(netdev, &pdev->dev); 2649 2650 enic = netdev_priv(netdev); 2651 enic->netdev = netdev; 2652 enic->pdev = pdev; 2653 2654 /* Setup PCI resources 2655 */ 2656 2657 err = pci_enable_device_mem(pdev); 2658 if (err) { 2659 dev_err(dev, "Cannot enable PCI device, aborting\n"); 2660 goto err_out_free_netdev; 2661 } 2662 2663 err = pci_request_regions(pdev, DRV_NAME); 2664 if (err) { 2665 dev_err(dev, "Cannot request PCI regions, aborting\n"); 2666 goto err_out_disable_device; 2667 } 2668 2669 pci_set_master(pdev); 2670 2671 /* Query PCI controller on system for DMA addressing 2672 * limitation for the device. Try 47-bit first, and 2673 * fail to 32-bit. 2674 */ 2675 2676 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(47)); 2677 if (err) { 2678 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 2679 if (err) { 2680 dev_err(dev, "No usable DMA configuration, aborting\n"); 2681 goto err_out_release_regions; 2682 } 2683 } else { 2684 using_dac = 1; 2685 } 2686 2687 /* Map vNIC resources from BAR0-5 2688 */ 2689 2690 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) { 2691 if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 2692 continue; 2693 enic->bar[i].len = pci_resource_len(pdev, i); 2694 enic->bar[i].vaddr = pci_iomap(pdev, i, enic->bar[i].len); 2695 if (!enic->bar[i].vaddr) { 2696 dev_err(dev, "Cannot memory-map BAR %d, aborting\n", i); 2697 err = -ENODEV; 2698 goto err_out_iounmap; 2699 } 2700 enic->bar[i].bus_addr = pci_resource_start(pdev, i); 2701 } 2702 2703 /* Register vNIC device 2704 */ 2705 2706 enic->vdev = vnic_dev_register(NULL, enic, pdev, enic->bar, 2707 ARRAY_SIZE(enic->bar)); 2708 if (!enic->vdev) { 2709 dev_err(dev, "vNIC registration failed, aborting\n"); 2710 err = -ENODEV; 2711 goto err_out_iounmap; 2712 } 2713 2714 err = vnic_devcmd_init(enic->vdev); 2715 2716 if (err) 2717 goto err_out_vnic_unregister; 2718 2719 #ifdef CONFIG_PCI_IOV 2720 /* Get number of subvnics */ 2721 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); 2722 if (pos) { 2723 pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF, 2724 &enic->num_vfs); 2725 if (enic->num_vfs) { 2726 err = pci_enable_sriov(pdev, enic->num_vfs); 2727 if (err) { 2728 dev_err(dev, "SRIOV enable failed, aborting." 2729 " pci_enable_sriov() returned %d\n", 2730 err); 2731 goto err_out_vnic_unregister; 2732 } 2733 enic->priv_flags |= ENIC_SRIOV_ENABLED; 2734 num_pps = enic->num_vfs; 2735 } 2736 } 2737 #endif 2738 2739 /* Allocate structure for port profiles */ 2740 enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL); 2741 if (!enic->pp) { 2742 err = -ENOMEM; 2743 goto err_out_disable_sriov_pp; 2744 } 2745 2746 /* Issue device open to get device in known state 2747 */ 2748 2749 err = enic_dev_open(enic); 2750 if (err) { 2751 dev_err(dev, "vNIC dev open failed, aborting\n"); 2752 goto err_out_disable_sriov; 2753 } 2754 2755 /* Setup devcmd lock 2756 */ 2757 2758 spin_lock_init(&enic->devcmd_lock); 2759 spin_lock_init(&enic->enic_api_lock); 2760 2761 /* 2762 * Set ingress vlan rewrite mode before vnic initialization 2763 */ 2764 2765 err = enic_dev_set_ig_vlan_rewrite_mode(enic); 2766 if (err) { 2767 dev_err(dev, 2768 "Failed to set ingress vlan rewrite mode, aborting.\n"); 2769 goto err_out_dev_close; 2770 } 2771 2772 /* Issue device init to initialize the vnic-to-switch link. 2773 * We'll start with carrier off and wait for link UP 2774 * notification later to turn on carrier. We don't need 2775 * to wait here for the vnic-to-switch link initialization 2776 * to complete; link UP notification is the indication that 2777 * the process is complete. 2778 */ 2779 2780 netif_carrier_off(netdev); 2781 2782 /* Do not call dev_init for a dynamic vnic. 2783 * For a dynamic vnic, init_prov_info will be 2784 * called later by an upper layer. 2785 */ 2786 2787 if (!enic_is_dynamic(enic)) { 2788 err = vnic_dev_init(enic->vdev, 0); 2789 if (err) { 2790 dev_err(dev, "vNIC dev init failed, aborting\n"); 2791 goto err_out_dev_close; 2792 } 2793 } 2794 2795 err = enic_dev_init(enic); 2796 if (err) { 2797 dev_err(dev, "Device initialization failed, aborting\n"); 2798 goto err_out_dev_close; 2799 } 2800 2801 netif_set_real_num_tx_queues(netdev, enic->wq_count); 2802 netif_set_real_num_rx_queues(netdev, enic->rq_count); 2803 2804 /* Setup notification timer, HW reset task, and wq locks 2805 */ 2806 2807 timer_setup(&enic->notify_timer, enic_notify_timer, 0); 2808 2809 enic_rfs_flw_tbl_init(enic); 2810 INIT_WORK(&enic->reset, enic_reset); 2811 INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); 2812 INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); 2813 2814 for (i = 0; i < enic->wq_count; i++) 2815 spin_lock_init(&enic->wq[i].lock); 2816 2817 /* Register net device 2818 */ 2819 2820 enic->port_mtu = enic->config.mtu; 2821 2822 err = enic_set_mac_addr(netdev, enic->mac_addr); 2823 if (err) { 2824 dev_err(dev, "Invalid MAC address, aborting\n"); 2825 goto err_out_dev_deinit; 2826 } 2827 2828 enic->tx_coalesce_usecs = enic->config.intr_timer_usec; 2829 /* rx coalesce time already got initialized. This gets used 2830 * if adaptive coal is turned off 2831 */ 2832 enic->rx_coalesce_usecs = enic->tx_coalesce_usecs; 2833 2834 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 2835 netdev->netdev_ops = &enic_netdev_dynamic_ops; 2836 else 2837 netdev->netdev_ops = &enic_netdev_ops; 2838 netdev->stat_ops = &enic_netdev_stat_ops; 2839 2840 netdev->watchdog_timeo = 2 * HZ; 2841 enic_set_ethtool_ops(netdev); 2842 2843 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 2844 if (ENIC_SETTING(enic, LOOP)) { 2845 netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX; 2846 enic->loop_enable = 1; 2847 enic->loop_tag = enic->config.loop_tag; 2848 dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag); 2849 } 2850 if (ENIC_SETTING(enic, TXCSUM)) 2851 netdev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; 2852 if (ENIC_SETTING(enic, TSO)) 2853 netdev->hw_features |= NETIF_F_TSO | 2854 NETIF_F_TSO6 | NETIF_F_TSO_ECN; 2855 if (ENIC_SETTING(enic, RSS)) 2856 netdev->hw_features |= NETIF_F_RXHASH; 2857 if (ENIC_SETTING(enic, RXCSUM)) 2858 netdev->hw_features |= NETIF_F_RXCSUM; 2859 if (ENIC_SETTING(enic, VXLAN)) { 2860 u64 patch_level; 2861 u64 a1 = 0; 2862 2863 netdev->hw_enc_features |= NETIF_F_RXCSUM | 2864 NETIF_F_TSO | 2865 NETIF_F_TSO6 | 2866 NETIF_F_TSO_ECN | 2867 NETIF_F_GSO_UDP_TUNNEL | 2868 NETIF_F_HW_CSUM | 2869 NETIF_F_GSO_UDP_TUNNEL_CSUM; 2870 netdev->hw_features |= netdev->hw_enc_features; 2871 /* get bit mask from hw about supported offload bit level 2872 * BIT(0) = fw supports patch_level 0 2873 * fcoe bit = encap 2874 * fcoe_fc_crc_ok = outer csum ok 2875 * BIT(1) = always set by fw 2876 * BIT(2) = fw supports patch_level 2 2877 * BIT(0) in rss_hash = encap 2878 * BIT(1,2) in rss_hash = outer_ip_csum_ok/ 2879 * outer_tcp_csum_ok 2880 * used in enic_rq_indicate_buf 2881 */ 2882 err = vnic_dev_get_supported_feature_ver(enic->vdev, 2883 VIC_FEATURE_VXLAN, 2884 &patch_level, &a1); 2885 if (err) 2886 patch_level = 0; 2887 enic->vxlan.flags = (u8)a1; 2888 /* mask bits that are supported by driver 2889 */ 2890 patch_level &= BIT_ULL(0) | BIT_ULL(2); 2891 patch_level = fls(patch_level); 2892 patch_level = patch_level ? patch_level - 1 : 0; 2893 enic->vxlan.patch_level = patch_level; 2894 2895 if (vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) == 1 || 2896 enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ) { 2897 netdev->udp_tunnel_nic_info = &enic_udp_tunnels_v4; 2898 if (enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6) 2899 netdev->udp_tunnel_nic_info = &enic_udp_tunnels; 2900 } 2901 } 2902 2903 netdev->features |= netdev->hw_features; 2904 netdev->vlan_features |= netdev->features; 2905 2906 #ifdef CONFIG_RFS_ACCEL 2907 netdev->hw_features |= NETIF_F_NTUPLE; 2908 #endif 2909 2910 if (using_dac) 2911 netdev->features |= NETIF_F_HIGHDMA; 2912 2913 netdev->priv_flags |= IFF_UNICAST_FLT; 2914 2915 /* MTU range: 68 - 9000 */ 2916 netdev->min_mtu = ENIC_MIN_MTU; 2917 netdev->max_mtu = ENIC_MAX_MTU; 2918 netdev->mtu = enic->port_mtu; 2919 2920 err = register_netdev(netdev); 2921 if (err) { 2922 dev_err(dev, "Cannot register net device, aborting\n"); 2923 goto err_out_dev_deinit; 2924 } 2925 2926 return 0; 2927 2928 err_out_dev_deinit: 2929 enic_dev_deinit(enic); 2930 err_out_dev_close: 2931 vnic_dev_close(enic->vdev); 2932 err_out_disable_sriov: 2933 kfree(enic->pp); 2934 err_out_disable_sriov_pp: 2935 #ifdef CONFIG_PCI_IOV 2936 if (enic_sriov_enabled(enic)) { 2937 pci_disable_sriov(pdev); 2938 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 2939 } 2940 #endif 2941 err_out_vnic_unregister: 2942 vnic_dev_unregister(enic->vdev); 2943 err_out_iounmap: 2944 enic_iounmap(enic); 2945 err_out_release_regions: 2946 pci_release_regions(pdev); 2947 err_out_disable_device: 2948 pci_disable_device(pdev); 2949 err_out_free_netdev: 2950 free_netdev(netdev); 2951 2952 return err; 2953 } 2954 2955 static void enic_remove(struct pci_dev *pdev) 2956 { 2957 struct net_device *netdev = pci_get_drvdata(pdev); 2958 2959 if (netdev) { 2960 struct enic *enic = netdev_priv(netdev); 2961 2962 cancel_work_sync(&enic->reset); 2963 cancel_work_sync(&enic->change_mtu_work); 2964 unregister_netdev(netdev); 2965 enic_dev_deinit(enic); 2966 vnic_dev_close(enic->vdev); 2967 #ifdef CONFIG_PCI_IOV 2968 if (enic_sriov_enabled(enic)) { 2969 pci_disable_sriov(pdev); 2970 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 2971 } 2972 #endif 2973 kfree(enic->pp); 2974 vnic_dev_unregister(enic->vdev); 2975 enic_iounmap(enic); 2976 pci_release_regions(pdev); 2977 pci_disable_device(pdev); 2978 free_netdev(netdev); 2979 } 2980 } 2981 2982 static struct pci_driver enic_driver = { 2983 .name = DRV_NAME, 2984 .id_table = enic_id_table, 2985 .probe = enic_probe, 2986 .remove = enic_remove, 2987 }; 2988 2989 module_pci_driver(enic_driver); 2990