1 /* 2 * Copyright (C) 2015 Cavium, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License 6 * as published by the Free Software Foundation. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/interrupt.h> 11 #include <linux/pci.h> 12 #include <linux/netdevice.h> 13 #include <linux/if_vlan.h> 14 #include <linux/etherdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/log2.h> 17 #include <linux/prefetch.h> 18 #include <linux/irq.h> 19 #include <linux/iommu.h> 20 #include <linux/bpf.h> 21 #include <linux/bpf_trace.h> 22 #include <linux/filter.h> 23 24 #include "nic_reg.h" 25 #include "nic.h" 26 #include "nicvf_queues.h" 27 #include "thunder_bgx.h" 28 29 #define DRV_NAME "thunder-nicvf" 30 #define DRV_VERSION "1.0" 31 32 /* Supported devices */ 33 static const struct pci_device_id nicvf_id_table[] = { 34 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 35 PCI_DEVICE_ID_THUNDER_NIC_VF, 36 PCI_VENDOR_ID_CAVIUM, 37 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 38 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 39 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 40 PCI_VENDOR_ID_CAVIUM, 41 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 42 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 43 PCI_DEVICE_ID_THUNDER_NIC_VF, 44 PCI_VENDOR_ID_CAVIUM, 45 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 46 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 47 PCI_DEVICE_ID_THUNDER_NIC_VF, 48 PCI_VENDOR_ID_CAVIUM, 49 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 50 { 0, } /* end of table */ 51 }; 52 53 MODULE_AUTHOR("Sunil Goutham"); 54 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 55 MODULE_LICENSE("GPL v2"); 56 MODULE_VERSION(DRV_VERSION); 57 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 58 59 static int debug = 0x00; 60 module_param(debug, int, 0644); 61 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 62 63 static int cpi_alg = CPI_ALG_NONE; 64 module_param(cpi_alg, int, S_IRUGO); 65 MODULE_PARM_DESC(cpi_alg, 66 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 67 68 struct nicvf_xdp_tx { 69 u64 dma_addr; 70 u8 qidx; 71 }; 72 73 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 74 { 75 if (nic->sqs_mode) 76 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 77 else 78 return qidx; 79 } 80 81 /* The Cavium ThunderX network controller can *only* be found in SoCs 82 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 83 * registers on this platform are implicitly strongly ordered with respect 84 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 85 * with no memory barriers in this driver. The readq()/writeq() functions add 86 * explicit ordering operation which in this case are redundant, and only 87 * add overhead. 88 */ 89 90 /* Register read/write APIs */ 91 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 92 { 93 writeq_relaxed(val, nic->reg_base + offset); 94 } 95 96 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 97 { 98 return readq_relaxed(nic->reg_base + offset); 99 } 100 101 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 102 u64 qidx, u64 val) 103 { 104 void __iomem *addr = nic->reg_base + offset; 105 106 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 107 } 108 109 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 110 { 111 void __iomem *addr = nic->reg_base + offset; 112 113 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 114 } 115 116 /* VF -> PF mailbox communication */ 117 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 118 { 119 u64 *msg = (u64 *)mbx; 120 121 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 122 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 123 } 124 125 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 126 { 127 int timeout = NIC_MBOX_MSG_TIMEOUT; 128 int sleep = 10; 129 130 nic->pf_acked = false; 131 nic->pf_nacked = false; 132 133 nicvf_write_to_mbx(nic, mbx); 134 135 /* Wait for previous message to be acked, timeout 2sec */ 136 while (!nic->pf_acked) { 137 if (nic->pf_nacked) { 138 netdev_err(nic->netdev, 139 "PF NACK to mbox msg 0x%02x from VF%d\n", 140 (mbx->msg.msg & 0xFF), nic->vf_id); 141 return -EINVAL; 142 } 143 msleep(sleep); 144 if (nic->pf_acked) 145 break; 146 timeout -= sleep; 147 if (!timeout) { 148 netdev_err(nic->netdev, 149 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 150 (mbx->msg.msg & 0xFF), nic->vf_id); 151 return -EBUSY; 152 } 153 } 154 return 0; 155 } 156 157 /* Checks if VF is able to comminicate with PF 158 * and also gets the VNIC number this VF is associated to. 159 */ 160 static int nicvf_check_pf_ready(struct nicvf *nic) 161 { 162 union nic_mbx mbx = {}; 163 164 mbx.msg.msg = NIC_MBOX_MSG_READY; 165 if (nicvf_send_msg_to_pf(nic, &mbx)) { 166 netdev_err(nic->netdev, 167 "PF didn't respond to READY msg\n"); 168 return 0; 169 } 170 171 return 1; 172 } 173 174 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 175 { 176 if (bgx->rx) 177 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 178 else 179 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 180 } 181 182 static void nicvf_handle_mbx_intr(struct nicvf *nic) 183 { 184 union nic_mbx mbx = {}; 185 u64 *mbx_data; 186 u64 mbx_addr; 187 int i; 188 189 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 190 mbx_data = (u64 *)&mbx; 191 192 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 193 *mbx_data = nicvf_reg_read(nic, mbx_addr); 194 mbx_data++; 195 mbx_addr += sizeof(u64); 196 } 197 198 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 199 switch (mbx.msg.msg) { 200 case NIC_MBOX_MSG_READY: 201 nic->pf_acked = true; 202 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 203 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 204 nic->node = mbx.nic_cfg.node_id; 205 if (!nic->set_mac_pending) 206 ether_addr_copy(nic->netdev->dev_addr, 207 mbx.nic_cfg.mac_addr); 208 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 209 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 210 nic->link_up = false; 211 nic->duplex = 0; 212 nic->speed = 0; 213 break; 214 case NIC_MBOX_MSG_ACK: 215 nic->pf_acked = true; 216 break; 217 case NIC_MBOX_MSG_NACK: 218 nic->pf_nacked = true; 219 break; 220 case NIC_MBOX_MSG_RSS_SIZE: 221 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 222 nic->pf_acked = true; 223 break; 224 case NIC_MBOX_MSG_BGX_STATS: 225 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 226 nic->pf_acked = true; 227 break; 228 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 229 nic->pf_acked = true; 230 nic->link_up = mbx.link_status.link_up; 231 nic->duplex = mbx.link_status.duplex; 232 nic->speed = mbx.link_status.speed; 233 nic->mac_type = mbx.link_status.mac_type; 234 if (nic->link_up) { 235 netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", 236 nic->speed, 237 nic->duplex == DUPLEX_FULL ? 238 "Full" : "Half"); 239 netif_carrier_on(nic->netdev); 240 netif_tx_start_all_queues(nic->netdev); 241 } else { 242 netdev_info(nic->netdev, "Link is Down\n"); 243 netif_carrier_off(nic->netdev); 244 netif_tx_stop_all_queues(nic->netdev); 245 } 246 break; 247 case NIC_MBOX_MSG_ALLOC_SQS: 248 nic->sqs_count = mbx.sqs_alloc.qs_count; 249 nic->pf_acked = true; 250 break; 251 case NIC_MBOX_MSG_SNICVF_PTR: 252 /* Primary VF: make note of secondary VF's pointer 253 * to be used while packet transmission. 254 */ 255 nic->snicvf[mbx.nicvf.sqs_id] = 256 (struct nicvf *)mbx.nicvf.nicvf; 257 nic->pf_acked = true; 258 break; 259 case NIC_MBOX_MSG_PNICVF_PTR: 260 /* Secondary VF/Qset: make note of primary VF's pointer 261 * to be used while packet reception, to handover packet 262 * to primary VF's netdev. 263 */ 264 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 265 nic->pf_acked = true; 266 break; 267 case NIC_MBOX_MSG_PFC: 268 nic->pfc.autoneg = mbx.pfc.autoneg; 269 nic->pfc.fc_rx = mbx.pfc.fc_rx; 270 nic->pfc.fc_tx = mbx.pfc.fc_tx; 271 nic->pf_acked = true; 272 break; 273 default: 274 netdev_err(nic->netdev, 275 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 276 break; 277 } 278 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 279 } 280 281 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 282 { 283 union nic_mbx mbx = {}; 284 285 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 286 mbx.mac.vf_id = nic->vf_id; 287 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 288 289 return nicvf_send_msg_to_pf(nic, &mbx); 290 } 291 292 static void nicvf_config_cpi(struct nicvf *nic) 293 { 294 union nic_mbx mbx = {}; 295 296 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 297 mbx.cpi_cfg.vf_id = nic->vf_id; 298 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 299 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 300 301 nicvf_send_msg_to_pf(nic, &mbx); 302 } 303 304 static void nicvf_get_rss_size(struct nicvf *nic) 305 { 306 union nic_mbx mbx = {}; 307 308 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 309 mbx.rss_size.vf_id = nic->vf_id; 310 nicvf_send_msg_to_pf(nic, &mbx); 311 } 312 313 void nicvf_config_rss(struct nicvf *nic) 314 { 315 union nic_mbx mbx = {}; 316 struct nicvf_rss_info *rss = &nic->rss_info; 317 int ind_tbl_len = rss->rss_size; 318 int i, nextq = 0; 319 320 mbx.rss_cfg.vf_id = nic->vf_id; 321 mbx.rss_cfg.hash_bits = rss->hash_bits; 322 while (ind_tbl_len) { 323 mbx.rss_cfg.tbl_offset = nextq; 324 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 325 RSS_IND_TBL_LEN_PER_MBX_MSG); 326 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 327 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 328 329 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 330 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 331 332 nicvf_send_msg_to_pf(nic, &mbx); 333 334 ind_tbl_len -= mbx.rss_cfg.tbl_len; 335 } 336 } 337 338 void nicvf_set_rss_key(struct nicvf *nic) 339 { 340 struct nicvf_rss_info *rss = &nic->rss_info; 341 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 342 int idx; 343 344 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 345 nicvf_reg_write(nic, key_addr, rss->key[idx]); 346 key_addr += sizeof(u64); 347 } 348 } 349 350 static int nicvf_rss_init(struct nicvf *nic) 351 { 352 struct nicvf_rss_info *rss = &nic->rss_info; 353 int idx; 354 355 nicvf_get_rss_size(nic); 356 357 if (cpi_alg != CPI_ALG_NONE) { 358 rss->enable = false; 359 rss->hash_bits = 0; 360 return 0; 361 } 362 363 rss->enable = true; 364 365 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 366 nicvf_set_rss_key(nic); 367 368 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 369 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 370 371 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 372 373 for (idx = 0; idx < rss->rss_size; idx++) 374 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 375 nic->rx_queues); 376 nicvf_config_rss(nic); 377 return 1; 378 } 379 380 /* Request PF to allocate additional Qsets */ 381 static void nicvf_request_sqs(struct nicvf *nic) 382 { 383 union nic_mbx mbx = {}; 384 int sqs; 385 int sqs_count = nic->sqs_count; 386 int rx_queues = 0, tx_queues = 0; 387 388 /* Only primary VF should request */ 389 if (nic->sqs_mode || !nic->sqs_count) 390 return; 391 392 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 393 mbx.sqs_alloc.vf_id = nic->vf_id; 394 mbx.sqs_alloc.qs_count = nic->sqs_count; 395 if (nicvf_send_msg_to_pf(nic, &mbx)) { 396 /* No response from PF */ 397 nic->sqs_count = 0; 398 return; 399 } 400 401 /* Return if no Secondary Qsets available */ 402 if (!nic->sqs_count) 403 return; 404 405 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 406 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 407 408 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 409 if (tx_queues > MAX_SND_QUEUES_PER_QS) 410 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 411 412 /* Set no of Rx/Tx queues in each of the SQsets */ 413 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 414 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 415 mbx.nicvf.vf_id = nic->vf_id; 416 mbx.nicvf.sqs_id = sqs; 417 nicvf_send_msg_to_pf(nic, &mbx); 418 419 nic->snicvf[sqs]->sqs_id = sqs; 420 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 421 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 422 rx_queues -= MAX_RCV_QUEUES_PER_QS; 423 } else { 424 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 425 rx_queues = 0; 426 } 427 428 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 429 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 430 tx_queues -= MAX_SND_QUEUES_PER_QS; 431 } else { 432 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 433 tx_queues = 0; 434 } 435 436 nic->snicvf[sqs]->qs->cq_cnt = 437 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 438 439 /* Initialize secondary Qset's queues and its interrupts */ 440 nicvf_open(nic->snicvf[sqs]->netdev); 441 } 442 443 /* Update stack with actual Rx/Tx queue count allocated */ 444 if (sqs_count != nic->sqs_count) 445 nicvf_set_real_num_queues(nic->netdev, 446 nic->tx_queues, nic->rx_queues); 447 } 448 449 /* Send this Qset's nicvf pointer to PF. 450 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 451 * so that packets received by these Qsets can use primary VF's netdev 452 */ 453 static void nicvf_send_vf_struct(struct nicvf *nic) 454 { 455 union nic_mbx mbx = {}; 456 457 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 458 mbx.nicvf.sqs_mode = nic->sqs_mode; 459 mbx.nicvf.nicvf = (u64)nic; 460 nicvf_send_msg_to_pf(nic, &mbx); 461 } 462 463 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 464 { 465 union nic_mbx mbx = {}; 466 467 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 468 nicvf_send_msg_to_pf(nic, &mbx); 469 } 470 471 int nicvf_set_real_num_queues(struct net_device *netdev, 472 int tx_queues, int rx_queues) 473 { 474 int err = 0; 475 476 err = netif_set_real_num_tx_queues(netdev, tx_queues); 477 if (err) { 478 netdev_err(netdev, 479 "Failed to set no of Tx queues: %d\n", tx_queues); 480 return err; 481 } 482 483 err = netif_set_real_num_rx_queues(netdev, rx_queues); 484 if (err) 485 netdev_err(netdev, 486 "Failed to set no of Rx queues: %d\n", rx_queues); 487 return err; 488 } 489 490 static int nicvf_init_resources(struct nicvf *nic) 491 { 492 int err; 493 494 /* Enable Qset */ 495 nicvf_qset_config(nic, true); 496 497 /* Initialize queues and HW for data transfer */ 498 err = nicvf_config_data_transfer(nic, true); 499 if (err) { 500 netdev_err(nic->netdev, 501 "Failed to alloc/config VF's QSet resources\n"); 502 return err; 503 } 504 505 return 0; 506 } 507 508 static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr) 509 { 510 /* Check if it's a recycled page, if not unmap the DMA mapping. 511 * Recycled page holds an extra reference. 512 */ 513 if (page_ref_count(page) == 1) { 514 dma_addr &= PAGE_MASK; 515 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 516 RCV_FRAG_LEN + XDP_HEADROOM, 517 DMA_FROM_DEVICE, 518 DMA_ATTR_SKIP_CPU_SYNC); 519 } 520 } 521 522 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 523 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 524 struct sk_buff **skb) 525 { 526 struct xdp_buff xdp; 527 struct page *page; 528 struct nicvf_xdp_tx *xdp_tx = NULL; 529 u32 action; 530 u16 len, err, offset = 0; 531 u64 dma_addr, cpu_addr; 532 void *orig_data; 533 534 /* Retrieve packet buffer's DMA address and length */ 535 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 536 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 537 538 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 539 if (!cpu_addr) 540 return false; 541 cpu_addr = (u64)phys_to_virt(cpu_addr); 542 page = virt_to_page((void *)cpu_addr); 543 544 xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM; 545 xdp.data = (void *)cpu_addr; 546 xdp_set_data_meta_invalid(&xdp); 547 xdp.data_end = xdp.data + len; 548 orig_data = xdp.data; 549 550 rcu_read_lock(); 551 action = bpf_prog_run_xdp(prog, &xdp); 552 rcu_read_unlock(); 553 554 /* Check if XDP program has changed headers */ 555 if (orig_data != xdp.data) { 556 len = xdp.data_end - xdp.data; 557 offset = orig_data - xdp.data; 558 dma_addr -= offset; 559 } 560 561 switch (action) { 562 case XDP_PASS: 563 nicvf_unmap_page(nic, page, dma_addr); 564 565 /* Build SKB and pass on packet to network stack */ 566 *skb = build_skb(xdp.data, 567 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 568 if (!*skb) 569 put_page(page); 570 else 571 skb_put(*skb, len); 572 return false; 573 case XDP_TX: 574 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 575 return true; 576 case XDP_REDIRECT: 577 /* Save DMA address for use while transmitting */ 578 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 579 xdp_tx->dma_addr = dma_addr; 580 xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 581 582 err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog); 583 if (!err) 584 return true; 585 586 /* Free the page on error */ 587 nicvf_unmap_page(nic, page, dma_addr); 588 put_page(page); 589 break; 590 default: 591 bpf_warn_invalid_xdp_action(action); 592 /* fall through */ 593 case XDP_ABORTED: 594 trace_xdp_exception(nic->netdev, prog, action); 595 /* fall through */ 596 case XDP_DROP: 597 nicvf_unmap_page(nic, page, dma_addr); 598 put_page(page); 599 return true; 600 } 601 return false; 602 } 603 604 static void nicvf_snd_pkt_handler(struct net_device *netdev, 605 struct cqe_send_t *cqe_tx, 606 int budget, int *subdesc_cnt, 607 unsigned int *tx_pkts, unsigned int *tx_bytes) 608 { 609 struct sk_buff *skb = NULL; 610 struct page *page; 611 struct nicvf *nic = netdev_priv(netdev); 612 struct snd_queue *sq; 613 struct sq_hdr_subdesc *hdr; 614 struct sq_hdr_subdesc *tso_sqe; 615 616 sq = &nic->qs->sq[cqe_tx->sq_idx]; 617 618 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 619 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 620 return; 621 622 /* Check for errors */ 623 if (cqe_tx->send_status) 624 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 625 626 /* Is this a XDP designated Tx queue */ 627 if (sq->is_xdp) { 628 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 629 /* Check if it's recycled page or else unmap DMA mapping */ 630 if (page && (page_ref_count(page) == 1)) 631 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 632 hdr->subdesc_cnt); 633 634 /* Release page reference for recycling */ 635 if (page) 636 put_page(page); 637 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 638 *subdesc_cnt += hdr->subdesc_cnt + 1; 639 return; 640 } 641 642 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 643 if (skb) { 644 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 645 if (hdr->dont_send) { 646 /* Get actual TSO descriptors and free them */ 647 tso_sqe = 648 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 649 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 650 tso_sqe->subdesc_cnt); 651 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 652 } else { 653 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 654 hdr->subdesc_cnt); 655 } 656 *subdesc_cnt += hdr->subdesc_cnt + 1; 657 prefetch(skb); 658 (*tx_pkts)++; 659 *tx_bytes += skb->len; 660 napi_consume_skb(skb, budget); 661 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 662 } else { 663 /* In case of SW TSO on 88xx, only last segment will have 664 * a SKB attached, so just free SQEs here. 665 */ 666 if (!nic->hw_tso) 667 *subdesc_cnt += hdr->subdesc_cnt + 1; 668 } 669 } 670 671 static inline void nicvf_set_rxhash(struct net_device *netdev, 672 struct cqe_rx_t *cqe_rx, 673 struct sk_buff *skb) 674 { 675 u8 hash_type; 676 u32 hash; 677 678 if (!(netdev->features & NETIF_F_RXHASH)) 679 return; 680 681 switch (cqe_rx->rss_alg) { 682 case RSS_ALG_TCP_IP: 683 case RSS_ALG_UDP_IP: 684 hash_type = PKT_HASH_TYPE_L4; 685 hash = cqe_rx->rss_tag; 686 break; 687 case RSS_ALG_IP: 688 hash_type = PKT_HASH_TYPE_L3; 689 hash = cqe_rx->rss_tag; 690 break; 691 default: 692 hash_type = PKT_HASH_TYPE_NONE; 693 hash = 0; 694 } 695 696 skb_set_hash(skb, hash, hash_type); 697 } 698 699 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 700 struct napi_struct *napi, 701 struct cqe_rx_t *cqe_rx, struct snd_queue *sq) 702 { 703 struct sk_buff *skb = NULL; 704 struct nicvf *nic = netdev_priv(netdev); 705 struct nicvf *snic = nic; 706 int err = 0; 707 int rq_idx; 708 709 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 710 711 if (nic->sqs_mode) { 712 /* Use primary VF's 'nicvf' struct */ 713 nic = nic->pnicvf; 714 netdev = nic->netdev; 715 } 716 717 /* Check for errors */ 718 if (cqe_rx->err_level || cqe_rx->err_opcode) { 719 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 720 if (err && !cqe_rx->rb_cnt) 721 return; 722 } 723 724 /* For XDP, ignore pkts spanning multiple pages */ 725 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 726 /* Packet consumed by XDP */ 727 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) 728 return; 729 } else { 730 skb = nicvf_get_rcv_skb(snic, cqe_rx, 731 nic->xdp_prog ? true : false); 732 } 733 734 if (!skb) 735 return; 736 737 if (netif_msg_pktdata(nic)) { 738 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 739 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 740 skb->data, skb->len, true); 741 } 742 743 /* If error packet, drop it here */ 744 if (err) { 745 dev_kfree_skb_any(skb); 746 return; 747 } 748 749 nicvf_set_rxhash(netdev, cqe_rx, skb); 750 751 skb_record_rx_queue(skb, rq_idx); 752 if (netdev->hw_features & NETIF_F_RXCSUM) { 753 /* HW by default verifies TCP/UDP/SCTP checksums */ 754 skb->ip_summed = CHECKSUM_UNNECESSARY; 755 } else { 756 skb_checksum_none_assert(skb); 757 } 758 759 skb->protocol = eth_type_trans(skb, netdev); 760 761 /* Check for stripped VLAN */ 762 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 763 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 764 ntohs((__force __be16)cqe_rx->vlan_tci)); 765 766 if (napi && (netdev->features & NETIF_F_GRO)) 767 napi_gro_receive(napi, skb); 768 else 769 netif_receive_skb(skb); 770 } 771 772 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 773 struct napi_struct *napi, int budget) 774 { 775 int processed_cqe, work_done = 0, tx_done = 0; 776 int cqe_count, cqe_head; 777 int subdesc_cnt = 0; 778 struct nicvf *nic = netdev_priv(netdev); 779 struct queue_set *qs = nic->qs; 780 struct cmp_queue *cq = &qs->cq[cq_idx]; 781 struct cqe_rx_t *cq_desc; 782 struct netdev_queue *txq; 783 struct snd_queue *sq = &qs->sq[cq_idx]; 784 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 785 786 spin_lock_bh(&cq->lock); 787 loop: 788 processed_cqe = 0; 789 /* Get no of valid CQ entries to process */ 790 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 791 cqe_count &= CQ_CQE_COUNT; 792 if (!cqe_count) 793 goto done; 794 795 /* Get head of the valid CQ entries */ 796 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 797 cqe_head &= 0xFFFF; 798 799 while (processed_cqe < cqe_count) { 800 /* Get the CQ descriptor */ 801 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 802 cqe_head++; 803 cqe_head &= (cq->dmem.q_len - 1); 804 /* Initiate prefetch for next descriptor */ 805 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 806 807 if ((work_done >= budget) && napi && 808 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 809 break; 810 } 811 812 switch (cq_desc->cqe_type) { 813 case CQE_TYPE_RX: 814 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); 815 work_done++; 816 break; 817 case CQE_TYPE_SEND: 818 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 819 budget, &subdesc_cnt, 820 &tx_pkts, &tx_bytes); 821 tx_done++; 822 break; 823 case CQE_TYPE_INVALID: 824 case CQE_TYPE_RX_SPLIT: 825 case CQE_TYPE_RX_TCP: 826 case CQE_TYPE_SEND_PTP: 827 /* Ignore for now */ 828 break; 829 } 830 processed_cqe++; 831 } 832 833 /* Ring doorbell to inform H/W to reuse processed CQEs */ 834 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 835 cq_idx, processed_cqe); 836 837 if ((work_done < budget) && napi) 838 goto loop; 839 840 done: 841 /* Update SQ's descriptor free count */ 842 if (subdesc_cnt) 843 nicvf_put_sq_desc(sq, subdesc_cnt); 844 845 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 846 /* Handle XDP TX queues */ 847 if (nic->pnicvf->xdp_prog) { 848 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 849 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 850 goto out; 851 } 852 nic = nic->pnicvf; 853 txq_idx -= nic->pnicvf->xdp_tx_queues; 854 } 855 856 /* Wakeup TXQ if its stopped earlier due to SQ full */ 857 if (tx_done || 858 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 859 netdev = nic->pnicvf->netdev; 860 txq = netdev_get_tx_queue(netdev, txq_idx); 861 if (tx_pkts) 862 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 863 864 /* To read updated queue and carrier status */ 865 smp_mb(); 866 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 867 netif_tx_wake_queue(txq); 868 nic = nic->pnicvf; 869 this_cpu_inc(nic->drv_stats->txq_wake); 870 netif_warn(nic, tx_err, netdev, 871 "Transmit queue wakeup SQ%d\n", txq_idx); 872 } 873 } 874 875 out: 876 spin_unlock_bh(&cq->lock); 877 return work_done; 878 } 879 880 static int nicvf_poll(struct napi_struct *napi, int budget) 881 { 882 u64 cq_head; 883 int work_done = 0; 884 struct net_device *netdev = napi->dev; 885 struct nicvf *nic = netdev_priv(netdev); 886 struct nicvf_cq_poll *cq; 887 888 cq = container_of(napi, struct nicvf_cq_poll, napi); 889 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 890 891 if (work_done < budget) { 892 /* Slow packet rate, exit polling */ 893 napi_complete_done(napi, work_done); 894 /* Re-enable interrupts */ 895 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 896 cq->cq_idx); 897 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 898 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 899 cq->cq_idx, cq_head); 900 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 901 } 902 return work_done; 903 } 904 905 /* Qset error interrupt handler 906 * 907 * As of now only CQ errors are handled 908 */ 909 static void nicvf_handle_qs_err(unsigned long data) 910 { 911 struct nicvf *nic = (struct nicvf *)data; 912 struct queue_set *qs = nic->qs; 913 int qidx; 914 u64 status; 915 916 netif_tx_disable(nic->netdev); 917 918 /* Check if it is CQ err */ 919 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 920 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 921 qidx); 922 if (!(status & CQ_ERR_MASK)) 923 continue; 924 /* Process already queued CQEs and reconfig CQ */ 925 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 926 nicvf_sq_disable(nic, qidx); 927 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 928 nicvf_cmp_queue_config(nic, qs, qidx, true); 929 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 930 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 931 932 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 933 } 934 935 netif_tx_start_all_queues(nic->netdev); 936 /* Re-enable Qset error interrupt */ 937 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 938 } 939 940 static void nicvf_dump_intr_status(struct nicvf *nic) 941 { 942 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 943 nicvf_reg_read(nic, NIC_VF_INT)); 944 } 945 946 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 947 { 948 struct nicvf *nic = (struct nicvf *)nicvf_irq; 949 u64 intr; 950 951 nicvf_dump_intr_status(nic); 952 953 intr = nicvf_reg_read(nic, NIC_VF_INT); 954 /* Check for spurious interrupt */ 955 if (!(intr & NICVF_INTR_MBOX_MASK)) 956 return IRQ_HANDLED; 957 958 nicvf_handle_mbx_intr(nic); 959 960 return IRQ_HANDLED; 961 } 962 963 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 964 { 965 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 966 struct nicvf *nic = cq_poll->nicvf; 967 int qidx = cq_poll->cq_idx; 968 969 nicvf_dump_intr_status(nic); 970 971 /* Disable interrupts */ 972 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 973 974 /* Schedule NAPI */ 975 napi_schedule_irqoff(&cq_poll->napi); 976 977 /* Clear interrupt */ 978 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 979 980 return IRQ_HANDLED; 981 } 982 983 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 984 { 985 struct nicvf *nic = (struct nicvf *)nicvf_irq; 986 u8 qidx; 987 988 989 nicvf_dump_intr_status(nic); 990 991 /* Disable RBDR interrupt and schedule softirq */ 992 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 993 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 994 continue; 995 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 996 tasklet_hi_schedule(&nic->rbdr_task); 997 /* Clear interrupt */ 998 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 999 } 1000 1001 return IRQ_HANDLED; 1002 } 1003 1004 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1005 { 1006 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1007 1008 nicvf_dump_intr_status(nic); 1009 1010 /* Disable Qset err interrupt and schedule softirq */ 1011 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1012 tasklet_hi_schedule(&nic->qs_err_task); 1013 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1014 1015 return IRQ_HANDLED; 1016 } 1017 1018 static void nicvf_set_irq_affinity(struct nicvf *nic) 1019 { 1020 int vec, cpu; 1021 1022 for (vec = 0; vec < nic->num_vec; vec++) { 1023 if (!nic->irq_allocated[vec]) 1024 continue; 1025 1026 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1027 return; 1028 /* CQ interrupts */ 1029 if (vec < NICVF_INTR_ID_SQ) 1030 /* Leave CPU0 for RBDR and other interrupts */ 1031 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1032 else 1033 cpu = 0; 1034 1035 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1036 nic->affinity_mask[vec]); 1037 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1038 nic->affinity_mask[vec]); 1039 } 1040 } 1041 1042 static int nicvf_register_interrupts(struct nicvf *nic) 1043 { 1044 int irq, ret = 0; 1045 1046 for_each_cq_irq(irq) 1047 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1048 nic->pnicvf->netdev->name, 1049 nicvf_netdev_qidx(nic, irq)); 1050 1051 for_each_sq_irq(irq) 1052 sprintf(nic->irq_name[irq], "%s-sq-%d", 1053 nic->pnicvf->netdev->name, 1054 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1055 1056 for_each_rbdr_irq(irq) 1057 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1058 nic->pnicvf->netdev->name, 1059 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1060 1061 /* Register CQ interrupts */ 1062 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1063 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1064 nicvf_intr_handler, 1065 0, nic->irq_name[irq], nic->napi[irq]); 1066 if (ret) 1067 goto err; 1068 nic->irq_allocated[irq] = true; 1069 } 1070 1071 /* Register RBDR interrupt */ 1072 for (irq = NICVF_INTR_ID_RBDR; 1073 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1074 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1075 nicvf_rbdr_intr_handler, 1076 0, nic->irq_name[irq], nic); 1077 if (ret) 1078 goto err; 1079 nic->irq_allocated[irq] = true; 1080 } 1081 1082 /* Register QS error interrupt */ 1083 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1084 nic->pnicvf->netdev->name, 1085 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1086 irq = NICVF_INTR_ID_QS_ERR; 1087 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1088 nicvf_qs_err_intr_handler, 1089 0, nic->irq_name[irq], nic); 1090 if (ret) 1091 goto err; 1092 1093 nic->irq_allocated[irq] = true; 1094 1095 /* Set IRQ affinities */ 1096 nicvf_set_irq_affinity(nic); 1097 1098 err: 1099 if (ret) 1100 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1101 1102 return ret; 1103 } 1104 1105 static void nicvf_unregister_interrupts(struct nicvf *nic) 1106 { 1107 struct pci_dev *pdev = nic->pdev; 1108 int irq; 1109 1110 /* Free registered interrupts */ 1111 for (irq = 0; irq < nic->num_vec; irq++) { 1112 if (!nic->irq_allocated[irq]) 1113 continue; 1114 1115 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1116 free_cpumask_var(nic->affinity_mask[irq]); 1117 1118 if (irq < NICVF_INTR_ID_SQ) 1119 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1120 else 1121 free_irq(pci_irq_vector(pdev, irq), nic); 1122 1123 nic->irq_allocated[irq] = false; 1124 } 1125 1126 /* Disable MSI-X */ 1127 pci_free_irq_vectors(pdev); 1128 nic->num_vec = 0; 1129 } 1130 1131 /* Initialize MSIX vectors and register MISC interrupt. 1132 * Send READY message to PF to check if its alive 1133 */ 1134 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1135 { 1136 int ret = 0; 1137 int irq = NICVF_INTR_ID_MISC; 1138 1139 /* Return if mailbox interrupt is already registered */ 1140 if (nic->pdev->msix_enabled) 1141 return 0; 1142 1143 /* Enable MSI-X */ 1144 nic->num_vec = pci_msix_vec_count(nic->pdev); 1145 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1146 PCI_IRQ_MSIX); 1147 if (ret < 0) { 1148 netdev_err(nic->netdev, 1149 "Req for #%d msix vectors failed\n", nic->num_vec); 1150 return 1; 1151 } 1152 1153 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1154 /* Register Misc interrupt */ 1155 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1156 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1157 1158 if (ret) 1159 return ret; 1160 nic->irq_allocated[irq] = true; 1161 1162 /* Enable mailbox interrupt */ 1163 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1164 1165 /* Check if VF is able to communicate with PF */ 1166 if (!nicvf_check_pf_ready(nic)) { 1167 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1168 nicvf_unregister_interrupts(nic); 1169 return 1; 1170 } 1171 1172 return 0; 1173 } 1174 1175 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1176 { 1177 struct nicvf *nic = netdev_priv(netdev); 1178 int qid = skb_get_queue_mapping(skb); 1179 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1180 struct nicvf *snic; 1181 struct snd_queue *sq; 1182 int tmp; 1183 1184 /* Check for minimum packet length */ 1185 if (skb->len <= ETH_HLEN) { 1186 dev_kfree_skb(skb); 1187 return NETDEV_TX_OK; 1188 } 1189 1190 /* In XDP case, initial HW tx queues are used for XDP, 1191 * but stack's queue mapping starts at '0', so skip the 1192 * Tx queues attached to Rx queues for XDP. 1193 */ 1194 if (nic->xdp_prog) 1195 qid += nic->xdp_tx_queues; 1196 1197 snic = nic; 1198 /* Get secondary Qset's SQ structure */ 1199 if (qid >= MAX_SND_QUEUES_PER_QS) { 1200 tmp = qid / MAX_SND_QUEUES_PER_QS; 1201 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1202 if (!snic) { 1203 netdev_warn(nic->netdev, 1204 "Secondary Qset#%d's ptr not initialized\n", 1205 tmp - 1); 1206 dev_kfree_skb(skb); 1207 return NETDEV_TX_OK; 1208 } 1209 qid = qid % MAX_SND_QUEUES_PER_QS; 1210 } 1211 1212 sq = &snic->qs->sq[qid]; 1213 if (!netif_tx_queue_stopped(txq) && 1214 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1215 netif_tx_stop_queue(txq); 1216 1217 /* Barrier, so that stop_queue visible to other cpus */ 1218 smp_mb(); 1219 1220 /* Check again, incase another cpu freed descriptors */ 1221 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1222 netif_tx_wake_queue(txq); 1223 } else { 1224 this_cpu_inc(nic->drv_stats->txq_stop); 1225 netif_warn(nic, tx_err, netdev, 1226 "Transmit ring full, stopping SQ%d\n", qid); 1227 } 1228 return NETDEV_TX_BUSY; 1229 } 1230 1231 return NETDEV_TX_OK; 1232 } 1233 1234 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1235 { 1236 struct nicvf_cq_poll *cq_poll; 1237 int qidx; 1238 1239 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1240 cq_poll = nic->napi[qidx]; 1241 if (!cq_poll) 1242 continue; 1243 nic->napi[qidx] = NULL; 1244 kfree(cq_poll); 1245 } 1246 } 1247 1248 int nicvf_stop(struct net_device *netdev) 1249 { 1250 int irq, qidx; 1251 struct nicvf *nic = netdev_priv(netdev); 1252 struct queue_set *qs = nic->qs; 1253 struct nicvf_cq_poll *cq_poll = NULL; 1254 union nic_mbx mbx = {}; 1255 1256 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1257 nicvf_send_msg_to_pf(nic, &mbx); 1258 1259 netif_carrier_off(netdev); 1260 netif_tx_stop_all_queues(nic->netdev); 1261 nic->link_up = false; 1262 1263 /* Teardown secondary qsets first */ 1264 if (!nic->sqs_mode) { 1265 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1266 if (!nic->snicvf[qidx]) 1267 continue; 1268 nicvf_stop(nic->snicvf[qidx]->netdev); 1269 nic->snicvf[qidx] = NULL; 1270 } 1271 } 1272 1273 /* Disable RBDR & QS error interrupts */ 1274 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1275 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1276 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1277 } 1278 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1279 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1280 1281 /* Wait for pending IRQ handlers to finish */ 1282 for (irq = 0; irq < nic->num_vec; irq++) 1283 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1284 1285 tasklet_kill(&nic->rbdr_task); 1286 tasklet_kill(&nic->qs_err_task); 1287 if (nic->rb_work_scheduled) 1288 cancel_delayed_work_sync(&nic->rbdr_work); 1289 1290 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1291 cq_poll = nic->napi[qidx]; 1292 if (!cq_poll) 1293 continue; 1294 napi_synchronize(&cq_poll->napi); 1295 /* CQ intr is enabled while napi_complete, 1296 * so disable it now 1297 */ 1298 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1299 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1300 napi_disable(&cq_poll->napi); 1301 netif_napi_del(&cq_poll->napi); 1302 } 1303 1304 netif_tx_disable(netdev); 1305 1306 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1307 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1308 1309 /* Free resources */ 1310 nicvf_config_data_transfer(nic, false); 1311 1312 /* Disable HW Qset */ 1313 nicvf_qset_config(nic, false); 1314 1315 /* disable mailbox interrupt */ 1316 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1317 1318 nicvf_unregister_interrupts(nic); 1319 1320 nicvf_free_cq_poll(nic); 1321 1322 /* Clear multiqset info */ 1323 nic->pnicvf = nic; 1324 1325 return 0; 1326 } 1327 1328 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1329 { 1330 union nic_mbx mbx = {}; 1331 1332 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1333 mbx.frs.max_frs = mtu; 1334 mbx.frs.vf_id = nic->vf_id; 1335 1336 return nicvf_send_msg_to_pf(nic, &mbx); 1337 } 1338 1339 int nicvf_open(struct net_device *netdev) 1340 { 1341 int cpu, err, qidx; 1342 struct nicvf *nic = netdev_priv(netdev); 1343 struct queue_set *qs = nic->qs; 1344 struct nicvf_cq_poll *cq_poll = NULL; 1345 union nic_mbx mbx = {}; 1346 1347 netif_carrier_off(netdev); 1348 1349 err = nicvf_register_misc_interrupt(nic); 1350 if (err) 1351 return err; 1352 1353 /* Register NAPI handler for processing CQEs */ 1354 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1355 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1356 if (!cq_poll) { 1357 err = -ENOMEM; 1358 goto napi_del; 1359 } 1360 cq_poll->cq_idx = qidx; 1361 cq_poll->nicvf = nic; 1362 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1363 NAPI_POLL_WEIGHT); 1364 napi_enable(&cq_poll->napi); 1365 nic->napi[qidx] = cq_poll; 1366 } 1367 1368 /* Check if we got MAC address from PF or else generate a radom MAC */ 1369 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1370 eth_hw_addr_random(netdev); 1371 nicvf_hw_set_mac_addr(nic, netdev); 1372 } 1373 1374 if (nic->set_mac_pending) { 1375 nic->set_mac_pending = false; 1376 nicvf_hw_set_mac_addr(nic, netdev); 1377 } 1378 1379 /* Init tasklet for handling Qset err interrupt */ 1380 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1381 (unsigned long)nic); 1382 1383 /* Init RBDR tasklet which will refill RBDR */ 1384 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1385 (unsigned long)nic); 1386 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1387 1388 /* Configure CPI alorithm */ 1389 nic->cpi_alg = cpi_alg; 1390 if (!nic->sqs_mode) 1391 nicvf_config_cpi(nic); 1392 1393 nicvf_request_sqs(nic); 1394 if (nic->sqs_mode) 1395 nicvf_get_primary_vf_struct(nic); 1396 1397 /* Configure receive side scaling and MTU */ 1398 if (!nic->sqs_mode) { 1399 nicvf_rss_init(nic); 1400 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1401 if (err) 1402 goto cleanup; 1403 1404 /* Clear percpu stats */ 1405 for_each_possible_cpu(cpu) 1406 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1407 sizeof(struct nicvf_drv_stats)); 1408 } 1409 1410 err = nicvf_register_interrupts(nic); 1411 if (err) 1412 goto cleanup; 1413 1414 /* Initialize the queues */ 1415 err = nicvf_init_resources(nic); 1416 if (err) 1417 goto cleanup; 1418 1419 /* Make sure queue initialization is written */ 1420 wmb(); 1421 1422 nicvf_reg_write(nic, NIC_VF_INT, -1); 1423 /* Enable Qset err interrupt */ 1424 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1425 1426 /* Enable completion queue interrupt */ 1427 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1428 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1429 1430 /* Enable RBDR threshold interrupt */ 1431 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1432 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1433 1434 /* Send VF config done msg to PF */ 1435 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 1436 nicvf_write_to_mbx(nic, &mbx); 1437 1438 return 0; 1439 cleanup: 1440 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1441 nicvf_unregister_interrupts(nic); 1442 tasklet_kill(&nic->qs_err_task); 1443 tasklet_kill(&nic->rbdr_task); 1444 napi_del: 1445 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1446 cq_poll = nic->napi[qidx]; 1447 if (!cq_poll) 1448 continue; 1449 napi_disable(&cq_poll->napi); 1450 netif_napi_del(&cq_poll->napi); 1451 } 1452 nicvf_free_cq_poll(nic); 1453 return err; 1454 } 1455 1456 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1457 { 1458 struct nicvf *nic = netdev_priv(netdev); 1459 int orig_mtu = netdev->mtu; 1460 1461 netdev->mtu = new_mtu; 1462 1463 if (!netif_running(netdev)) 1464 return 0; 1465 1466 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1467 netdev->mtu = orig_mtu; 1468 return -EINVAL; 1469 } 1470 1471 return 0; 1472 } 1473 1474 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1475 { 1476 struct sockaddr *addr = p; 1477 struct nicvf *nic = netdev_priv(netdev); 1478 1479 if (!is_valid_ether_addr(addr->sa_data)) 1480 return -EADDRNOTAVAIL; 1481 1482 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1483 1484 if (nic->pdev->msix_enabled) { 1485 if (nicvf_hw_set_mac_addr(nic, netdev)) 1486 return -EBUSY; 1487 } else { 1488 nic->set_mac_pending = true; 1489 } 1490 1491 return 0; 1492 } 1493 1494 void nicvf_update_lmac_stats(struct nicvf *nic) 1495 { 1496 int stat = 0; 1497 union nic_mbx mbx = {}; 1498 1499 if (!netif_running(nic->netdev)) 1500 return; 1501 1502 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1503 mbx.bgx_stats.vf_id = nic->vf_id; 1504 /* Rx stats */ 1505 mbx.bgx_stats.rx = 1; 1506 while (stat < BGX_RX_STATS_COUNT) { 1507 mbx.bgx_stats.idx = stat; 1508 if (nicvf_send_msg_to_pf(nic, &mbx)) 1509 return; 1510 stat++; 1511 } 1512 1513 stat = 0; 1514 1515 /* Tx stats */ 1516 mbx.bgx_stats.rx = 0; 1517 while (stat < BGX_TX_STATS_COUNT) { 1518 mbx.bgx_stats.idx = stat; 1519 if (nicvf_send_msg_to_pf(nic, &mbx)) 1520 return; 1521 stat++; 1522 } 1523 } 1524 1525 void nicvf_update_stats(struct nicvf *nic) 1526 { 1527 int qidx, cpu; 1528 u64 tmp_stats = 0; 1529 struct nicvf_hw_stats *stats = &nic->hw_stats; 1530 struct nicvf_drv_stats *drv_stats; 1531 struct queue_set *qs = nic->qs; 1532 1533 #define GET_RX_STATS(reg) \ 1534 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1535 #define GET_TX_STATS(reg) \ 1536 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1537 1538 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1539 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1540 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1541 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1542 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1543 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1544 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1545 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1546 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1547 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1548 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1549 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1550 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1551 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1552 1553 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1554 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1555 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1556 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1557 stats->tx_drops = GET_TX_STATS(TX_DROP); 1558 1559 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1560 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1561 * pointed by dummy SQE and results in tx_drops counter being 1562 * incremented. Subtracting it from tx_tso counter will give 1563 * exact tx_drops counter. 1564 */ 1565 if (nic->t88 && nic->hw_tso) { 1566 for_each_possible_cpu(cpu) { 1567 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1568 tmp_stats += drv_stats->tx_tso; 1569 } 1570 stats->tx_drops = tmp_stats - stats->tx_drops; 1571 } 1572 stats->tx_frames = stats->tx_ucast_frames + 1573 stats->tx_bcast_frames + 1574 stats->tx_mcast_frames; 1575 stats->rx_frames = stats->rx_ucast_frames + 1576 stats->rx_bcast_frames + 1577 stats->rx_mcast_frames; 1578 stats->rx_drops = stats->rx_drop_red + 1579 stats->rx_drop_overrun; 1580 1581 /* Update RQ and SQ stats */ 1582 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1583 nicvf_update_rq_stats(nic, qidx); 1584 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1585 nicvf_update_sq_stats(nic, qidx); 1586 } 1587 1588 static void nicvf_get_stats64(struct net_device *netdev, 1589 struct rtnl_link_stats64 *stats) 1590 { 1591 struct nicvf *nic = netdev_priv(netdev); 1592 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1593 1594 nicvf_update_stats(nic); 1595 1596 stats->rx_bytes = hw_stats->rx_bytes; 1597 stats->rx_packets = hw_stats->rx_frames; 1598 stats->rx_dropped = hw_stats->rx_drops; 1599 stats->multicast = hw_stats->rx_mcast_frames; 1600 1601 stats->tx_bytes = hw_stats->tx_bytes; 1602 stats->tx_packets = hw_stats->tx_frames; 1603 stats->tx_dropped = hw_stats->tx_drops; 1604 1605 } 1606 1607 static void nicvf_tx_timeout(struct net_device *dev) 1608 { 1609 struct nicvf *nic = netdev_priv(dev); 1610 1611 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1612 1613 this_cpu_inc(nic->drv_stats->tx_timeout); 1614 schedule_work(&nic->reset_task); 1615 } 1616 1617 static void nicvf_reset_task(struct work_struct *work) 1618 { 1619 struct nicvf *nic; 1620 1621 nic = container_of(work, struct nicvf, reset_task); 1622 1623 if (!netif_running(nic->netdev)) 1624 return; 1625 1626 nicvf_stop(nic->netdev); 1627 nicvf_open(nic->netdev); 1628 netif_trans_update(nic->netdev); 1629 } 1630 1631 static int nicvf_config_loopback(struct nicvf *nic, 1632 netdev_features_t features) 1633 { 1634 union nic_mbx mbx = {}; 1635 1636 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1637 mbx.lbk.vf_id = nic->vf_id; 1638 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1639 1640 return nicvf_send_msg_to_pf(nic, &mbx); 1641 } 1642 1643 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1644 netdev_features_t features) 1645 { 1646 struct nicvf *nic = netdev_priv(netdev); 1647 1648 if ((features & NETIF_F_LOOPBACK) && 1649 netif_running(netdev) && !nic->loopback_supported) 1650 features &= ~NETIF_F_LOOPBACK; 1651 1652 return features; 1653 } 1654 1655 static int nicvf_set_features(struct net_device *netdev, 1656 netdev_features_t features) 1657 { 1658 struct nicvf *nic = netdev_priv(netdev); 1659 netdev_features_t changed = features ^ netdev->features; 1660 1661 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1662 nicvf_config_vlan_stripping(nic, features); 1663 1664 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1665 return nicvf_config_loopback(nic, features); 1666 1667 return 0; 1668 } 1669 1670 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1671 { 1672 u8 cq_count, txq_count; 1673 1674 /* Set XDP Tx queue count same as Rx queue count */ 1675 if (!bpf_attached) 1676 nic->xdp_tx_queues = 0; 1677 else 1678 nic->xdp_tx_queues = nic->rx_queues; 1679 1680 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1681 * needs to be allocated, check how many. 1682 */ 1683 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1684 cq_count = max(nic->rx_queues, txq_count); 1685 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1686 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1687 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1688 } else { 1689 nic->sqs_count = 0; 1690 } 1691 1692 /* Set primary Qset's resources */ 1693 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1694 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1695 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1696 1697 /* Update stack */ 1698 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1699 } 1700 1701 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1702 { 1703 struct net_device *dev = nic->netdev; 1704 bool if_up = netif_running(nic->netdev); 1705 struct bpf_prog *old_prog; 1706 bool bpf_attached = false; 1707 1708 /* For now just support only the usual MTU sized frames */ 1709 if (prog && (dev->mtu > 1500)) { 1710 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1711 dev->mtu); 1712 return -EOPNOTSUPP; 1713 } 1714 1715 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1716 * XDP Tx queues and more Tx queues are allocated for 1717 * network stack to send pkts out. 1718 * 1719 * No of Tx queues are either same as Rx queues or whatever 1720 * is left in max no of queues possible. 1721 */ 1722 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1723 netdev_warn(dev, 1724 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1725 nic->max_queues); 1726 return -ENOMEM; 1727 } 1728 1729 if (if_up) 1730 nicvf_stop(nic->netdev); 1731 1732 old_prog = xchg(&nic->xdp_prog, prog); 1733 /* Detach old prog, if any */ 1734 if (old_prog) 1735 bpf_prog_put(old_prog); 1736 1737 if (nic->xdp_prog) { 1738 /* Attach BPF program */ 1739 nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1740 if (!IS_ERR(nic->xdp_prog)) 1741 bpf_attached = true; 1742 } 1743 1744 /* Calculate Tx queues needed for XDP and network stack */ 1745 nicvf_set_xdp_queues(nic, bpf_attached); 1746 1747 if (if_up) { 1748 /* Reinitialize interface, clean slate */ 1749 nicvf_open(nic->netdev); 1750 netif_trans_update(nic->netdev); 1751 } 1752 1753 return 0; 1754 } 1755 1756 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1757 { 1758 struct nicvf *nic = netdev_priv(netdev); 1759 1760 /* To avoid checks while retrieving buffer address from CQE_RX, 1761 * do not support XDP for T88 pass1.x silicons which are anyway 1762 * not in use widely. 1763 */ 1764 if (pass1_silicon(nic->pdev)) 1765 return -EOPNOTSUPP; 1766 1767 switch (xdp->command) { 1768 case XDP_SETUP_PROG: 1769 return nicvf_xdp_setup(nic, xdp->prog); 1770 case XDP_QUERY_PROG: 1771 xdp->prog_attached = !!nic->xdp_prog; 1772 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1773 return 0; 1774 default: 1775 return -EINVAL; 1776 } 1777 } 1778 1779 static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp) 1780 { 1781 struct nicvf *nic = netdev_priv(netdev); 1782 struct nicvf *snic = nic; 1783 struct nicvf_xdp_tx *xdp_tx; 1784 struct snd_queue *sq; 1785 struct page *page; 1786 int err, qidx; 1787 1788 if (!netif_running(netdev) || !nic->xdp_prog) 1789 return -EINVAL; 1790 1791 page = virt_to_page(xdp->data); 1792 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 1793 qidx = xdp_tx->qidx; 1794 1795 if (xdp_tx->qidx >= nic->xdp_tx_queues) 1796 return -EINVAL; 1797 1798 /* Get secondary Qset's info */ 1799 if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) { 1800 qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS; 1801 snic = (struct nicvf *)nic->snicvf[qidx - 1]; 1802 if (!snic) 1803 return -EINVAL; 1804 qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS; 1805 } 1806 1807 sq = &snic->qs->sq[qidx]; 1808 err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data, 1809 xdp_tx->dma_addr, 1810 xdp->data_end - xdp->data); 1811 if (err) 1812 return -ENOMEM; 1813 1814 nicvf_xdp_sq_doorbell(snic, sq, qidx); 1815 return 0; 1816 } 1817 1818 static void nicvf_xdp_flush(struct net_device *dev) 1819 { 1820 return; 1821 } 1822 1823 static const struct net_device_ops nicvf_netdev_ops = { 1824 .ndo_open = nicvf_open, 1825 .ndo_stop = nicvf_stop, 1826 .ndo_start_xmit = nicvf_xmit, 1827 .ndo_change_mtu = nicvf_change_mtu, 1828 .ndo_set_mac_address = nicvf_set_mac_address, 1829 .ndo_get_stats64 = nicvf_get_stats64, 1830 .ndo_tx_timeout = nicvf_tx_timeout, 1831 .ndo_fix_features = nicvf_fix_features, 1832 .ndo_set_features = nicvf_set_features, 1833 .ndo_bpf = nicvf_xdp, 1834 .ndo_xdp_xmit = nicvf_xdp_xmit, 1835 .ndo_xdp_flush = nicvf_xdp_flush, 1836 }; 1837 1838 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1839 { 1840 struct device *dev = &pdev->dev; 1841 struct net_device *netdev; 1842 struct nicvf *nic; 1843 int err, qcount; 1844 u16 sdevid; 1845 1846 err = pci_enable_device(pdev); 1847 if (err) { 1848 dev_err(dev, "Failed to enable PCI device\n"); 1849 return err; 1850 } 1851 1852 err = pci_request_regions(pdev, DRV_NAME); 1853 if (err) { 1854 dev_err(dev, "PCI request regions failed 0x%x\n", err); 1855 goto err_disable_device; 1856 } 1857 1858 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 1859 if (err) { 1860 dev_err(dev, "Unable to get usable DMA configuration\n"); 1861 goto err_release_regions; 1862 } 1863 1864 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 1865 if (err) { 1866 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 1867 goto err_release_regions; 1868 } 1869 1870 qcount = netif_get_num_default_rss_queues(); 1871 1872 /* Restrict multiqset support only for host bound VFs */ 1873 if (pdev->is_virtfn) { 1874 /* Set max number of queues per VF */ 1875 qcount = min_t(int, num_online_cpus(), 1876 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 1877 } 1878 1879 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 1880 if (!netdev) { 1881 err = -ENOMEM; 1882 goto err_release_regions; 1883 } 1884 1885 pci_set_drvdata(pdev, netdev); 1886 1887 SET_NETDEV_DEV(netdev, &pdev->dev); 1888 1889 nic = netdev_priv(netdev); 1890 nic->netdev = netdev; 1891 nic->pdev = pdev; 1892 nic->pnicvf = nic; 1893 nic->max_queues = qcount; 1894 /* If no of CPUs are too low, there won't be any queues left 1895 * for XDP_TX, hence double it. 1896 */ 1897 if (!nic->t88) 1898 nic->max_queues *= 2; 1899 1900 /* MAP VF's configuration registers */ 1901 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 1902 if (!nic->reg_base) { 1903 dev_err(dev, "Cannot map config register space, aborting\n"); 1904 err = -ENOMEM; 1905 goto err_free_netdev; 1906 } 1907 1908 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 1909 if (!nic->drv_stats) { 1910 err = -ENOMEM; 1911 goto err_free_netdev; 1912 } 1913 1914 err = nicvf_set_qset_resources(nic); 1915 if (err) 1916 goto err_free_netdev; 1917 1918 /* Check if PF is alive and get MAC address for this VF */ 1919 err = nicvf_register_misc_interrupt(nic); 1920 if (err) 1921 goto err_free_netdev; 1922 1923 nicvf_send_vf_struct(nic); 1924 1925 if (!pass1_silicon(nic->pdev)) 1926 nic->hw_tso = true; 1927 1928 /* Get iommu domain for iova to physical addr conversion */ 1929 nic->iommu_domain = iommu_get_domain_for_dev(dev); 1930 1931 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 1932 if (sdevid == 0xA134) 1933 nic->t88 = true; 1934 1935 /* Check if this VF is in QS only mode */ 1936 if (nic->sqs_mode) 1937 return 0; 1938 1939 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 1940 if (err) 1941 goto err_unregister_interrupts; 1942 1943 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 1944 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 1945 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 1946 NETIF_F_HW_VLAN_CTAG_RX); 1947 1948 netdev->hw_features |= NETIF_F_RXHASH; 1949 1950 netdev->features |= netdev->hw_features; 1951 netdev->hw_features |= NETIF_F_LOOPBACK; 1952 1953 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 1954 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 1955 1956 netdev->netdev_ops = &nicvf_netdev_ops; 1957 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 1958 1959 /* MTU range: 64 - 9200 */ 1960 netdev->min_mtu = NIC_HW_MIN_FRS; 1961 netdev->max_mtu = NIC_HW_MAX_FRS; 1962 1963 INIT_WORK(&nic->reset_task, nicvf_reset_task); 1964 1965 err = register_netdev(netdev); 1966 if (err) { 1967 dev_err(dev, "Failed to register netdevice\n"); 1968 goto err_unregister_interrupts; 1969 } 1970 1971 nic->msg_enable = debug; 1972 1973 nicvf_set_ethtool_ops(netdev); 1974 1975 return 0; 1976 1977 err_unregister_interrupts: 1978 nicvf_unregister_interrupts(nic); 1979 err_free_netdev: 1980 pci_set_drvdata(pdev, NULL); 1981 if (nic->drv_stats) 1982 free_percpu(nic->drv_stats); 1983 free_netdev(netdev); 1984 err_release_regions: 1985 pci_release_regions(pdev); 1986 err_disable_device: 1987 pci_disable_device(pdev); 1988 return err; 1989 } 1990 1991 static void nicvf_remove(struct pci_dev *pdev) 1992 { 1993 struct net_device *netdev = pci_get_drvdata(pdev); 1994 struct nicvf *nic; 1995 struct net_device *pnetdev; 1996 1997 if (!netdev) 1998 return; 1999 2000 nic = netdev_priv(netdev); 2001 pnetdev = nic->pnicvf->netdev; 2002 2003 /* Check if this Qset is assigned to different VF. 2004 * If yes, clean primary and all secondary Qsets. 2005 */ 2006 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2007 unregister_netdev(pnetdev); 2008 nicvf_unregister_interrupts(nic); 2009 pci_set_drvdata(pdev, NULL); 2010 if (nic->drv_stats) 2011 free_percpu(nic->drv_stats); 2012 free_netdev(netdev); 2013 pci_release_regions(pdev); 2014 pci_disable_device(pdev); 2015 } 2016 2017 static void nicvf_shutdown(struct pci_dev *pdev) 2018 { 2019 nicvf_remove(pdev); 2020 } 2021 2022 static struct pci_driver nicvf_driver = { 2023 .name = DRV_NAME, 2024 .id_table = nicvf_id_table, 2025 .probe = nicvf_probe, 2026 .remove = nicvf_remove, 2027 .shutdown = nicvf_shutdown, 2028 }; 2029 2030 static int __init nicvf_init_module(void) 2031 { 2032 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2033 2034 return pci_register_driver(&nicvf_driver); 2035 } 2036 2037 static void __exit nicvf_cleanup_module(void) 2038 { 2039 pci_unregister_driver(&nicvf_driver); 2040 } 2041 2042 module_init(nicvf_init_module); 2043 module_exit(nicvf_cleanup_module); 2044