1 /* 2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet 3 * driver for Linux. 4 * 5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 37 38 #include <linux/module.h> 39 #include <linux/moduleparam.h> 40 #include <linux/init.h> 41 #include <linux/pci.h> 42 #include <linux/dma-mapping.h> 43 #include <linux/netdevice.h> 44 #include <linux/etherdevice.h> 45 #include <linux/debugfs.h> 46 #include <linux/ethtool.h> 47 #include <linux/mdio.h> 48 49 #include "t4vf_common.h" 50 #include "t4vf_defs.h" 51 52 #include "../cxgb4/t4_regs.h" 53 #include "../cxgb4/t4_msg.h" 54 55 /* 56 * Generic information about the driver. 57 */ 58 #define DRV_VERSION "2.0.0-ko" 59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver" 60 61 /* 62 * Module Parameters. 63 * ================== 64 */ 65 66 /* 67 * Default ethtool "message level" for adapters. 68 */ 69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ 70 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ 71 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) 72 73 /* 74 * The driver uses the best interrupt scheme available on a platform in the 75 * order MSI-X then MSI. This parameter determines which of these schemes the 76 * driver may consider as follows: 77 * 78 * msi = 2: choose from among MSI-X and MSI 79 * msi = 1: only consider MSI interrupts 80 * 81 * Note that unlike the Physical Function driver, this Virtual Function driver 82 * does _not_ support legacy INTx interrupts (this limitation is mandated by 83 * the PCI-E SR-IOV standard). 84 */ 85 #define MSI_MSIX 2 86 #define MSI_MSI 1 87 #define MSI_DEFAULT MSI_MSIX 88 89 static int msi = MSI_DEFAULT; 90 91 module_param(msi, int, 0644); 92 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI"); 93 94 /* 95 * Fundamental constants. 96 * ====================== 97 */ 98 99 enum { 100 MAX_TXQ_ENTRIES = 16384, 101 MAX_RSPQ_ENTRIES = 16384, 102 MAX_RX_BUFFERS = 16384, 103 104 MIN_TXQ_ENTRIES = 32, 105 MIN_RSPQ_ENTRIES = 128, 106 MIN_FL_ENTRIES = 16, 107 108 /* 109 * For purposes of manipulating the Free List size we need to 110 * recognize that Free Lists are actually Egress Queues (the host 111 * produces free buffers which the hardware consumes), Egress Queues 112 * indices are all in units of Egress Context Units bytes, and free 113 * list entries are 64-bit PCI DMA addresses. And since the state of 114 * the Producer Index == the Consumer Index implies an EMPTY list, we 115 * always have at least one Egress Unit's worth of Free List entries 116 * unused. See sge.c for more details ... 117 */ 118 EQ_UNIT = SGE_EQ_IDXSIZE, 119 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64), 120 MIN_FL_RESID = FL_PER_EQ_UNIT, 121 }; 122 123 /* 124 * Global driver state. 125 * ==================== 126 */ 127 128 static struct dentry *cxgb4vf_debugfs_root; 129 130 /* 131 * OS "Callback" functions. 132 * ======================== 133 */ 134 135 /* 136 * The link status has changed on the indicated "port" (Virtual Interface). 137 */ 138 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok) 139 { 140 struct net_device *dev = adapter->port[pidx]; 141 142 /* 143 * If the port is disabled or the current recorded "link up" 144 * status matches the new status, just return. 145 */ 146 if (!netif_running(dev) || link_ok == netif_carrier_ok(dev)) 147 return; 148 149 /* 150 * Tell the OS that the link status has changed and print a short 151 * informative message on the console about the event. 152 */ 153 if (link_ok) { 154 const char *s; 155 const char *fc; 156 const struct port_info *pi = netdev_priv(dev); 157 158 switch (pi->link_cfg.speed) { 159 case 100: 160 s = "100Mbps"; 161 break; 162 case 1000: 163 s = "1Gbps"; 164 break; 165 case 10000: 166 s = "10Gbps"; 167 break; 168 case 25000: 169 s = "25Gbps"; 170 break; 171 case 40000: 172 s = "40Gbps"; 173 break; 174 case 100000: 175 s = "100Gbps"; 176 break; 177 178 default: 179 s = "unknown"; 180 break; 181 } 182 183 switch ((int)pi->link_cfg.fc) { 184 case PAUSE_RX: 185 fc = "RX"; 186 break; 187 188 case PAUSE_TX: 189 fc = "TX"; 190 break; 191 192 case PAUSE_RX | PAUSE_TX: 193 fc = "RX/TX"; 194 break; 195 196 default: 197 fc = "no"; 198 break; 199 } 200 201 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc); 202 } else { 203 netdev_info(dev, "link down\n"); 204 } 205 } 206 207 /* 208 * THe port module type has changed on the indicated "port" (Virtual 209 * Interface). 210 */ 211 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx) 212 { 213 static const char * const mod_str[] = { 214 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM" 215 }; 216 const struct net_device *dev = adapter->port[pidx]; 217 const struct port_info *pi = netdev_priv(dev); 218 219 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) 220 dev_info(adapter->pdev_dev, "%s: port module unplugged\n", 221 dev->name); 222 else if (pi->mod_type < ARRAY_SIZE(mod_str)) 223 dev_info(adapter->pdev_dev, "%s: %s port module inserted\n", 224 dev->name, mod_str[pi->mod_type]); 225 else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) 226 dev_info(adapter->pdev_dev, "%s: unsupported optical port " 227 "module inserted\n", dev->name); 228 else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) 229 dev_info(adapter->pdev_dev, "%s: unknown port module inserted," 230 "forcing TWINAX\n", dev->name); 231 else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR) 232 dev_info(adapter->pdev_dev, "%s: transceiver module error\n", 233 dev->name); 234 else 235 dev_info(adapter->pdev_dev, "%s: unknown module type %d " 236 "inserted\n", dev->name, pi->mod_type); 237 } 238 239 static int cxgb4vf_set_addr_hash(struct port_info *pi) 240 { 241 struct adapter *adapter = pi->adapter; 242 u64 vec = 0; 243 bool ucast = false; 244 struct hash_mac_addr *entry; 245 246 /* Calculate the hash vector for the updated list and program it */ 247 list_for_each_entry(entry, &adapter->mac_hlist, list) { 248 ucast |= is_unicast_ether_addr(entry->addr); 249 vec |= (1ULL << hash_mac_addr(entry->addr)); 250 } 251 return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false); 252 } 253 254 /** 255 * cxgb4vf_change_mac - Update match filter for a MAC address. 256 * @pi: the port_info 257 * @viid: the VI id 258 * @tcam_idx: TCAM index of existing filter for old value of MAC address, 259 * or -1 260 * @addr: the new MAC address value 261 * @persist: whether a new MAC allocation should be persistent 262 * @add_smt: if true also add the address to the HW SMT 263 * 264 * Modifies an MPS filter and sets it to the new MAC address if 265 * @tcam_idx >= 0, or adds the MAC address to a new filter if 266 * @tcam_idx < 0. In the latter case the address is added persistently 267 * if @persist is %true. 268 * Addresses are programmed to hash region, if tcam runs out of entries. 269 * 270 */ 271 static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid, 272 int *tcam_idx, const u8 *addr, bool persistent) 273 { 274 struct hash_mac_addr *new_entry, *entry; 275 struct adapter *adapter = pi->adapter; 276 int ret; 277 278 ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent); 279 /* We ran out of TCAM entries. try programming hash region. */ 280 if (ret == -ENOMEM) { 281 /* If the MAC address to be updated is in the hash addr 282 * list, update it from the list 283 */ 284 list_for_each_entry(entry, &adapter->mac_hlist, list) { 285 if (entry->iface_mac) { 286 ether_addr_copy(entry->addr, addr); 287 goto set_hash; 288 } 289 } 290 new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); 291 if (!new_entry) 292 return -ENOMEM; 293 ether_addr_copy(new_entry->addr, addr); 294 new_entry->iface_mac = true; 295 list_add_tail(&new_entry->list, &adapter->mac_hlist); 296 set_hash: 297 ret = cxgb4vf_set_addr_hash(pi); 298 } else if (ret >= 0) { 299 *tcam_idx = ret; 300 ret = 0; 301 } 302 303 return ret; 304 } 305 306 /* 307 * Net device operations. 308 * ====================== 309 */ 310 311 312 313 314 /* 315 * Perform the MAC and PHY actions needed to enable a "port" (Virtual 316 * Interface). 317 */ 318 static int link_start(struct net_device *dev) 319 { 320 int ret; 321 struct port_info *pi = netdev_priv(dev); 322 323 /* 324 * We do not set address filters and promiscuity here, the stack does 325 * that step explicitly. Enable vlan accel. 326 */ 327 ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1, 328 true); 329 if (ret == 0) 330 ret = cxgb4vf_change_mac(pi, pi->viid, 331 &pi->xact_addr_filt, 332 dev->dev_addr, true); 333 334 /* 335 * We don't need to actually "start the link" itself since the 336 * firmware will do that for us when the first Virtual Interface 337 * is enabled on a port. 338 */ 339 if (ret == 0) 340 ret = t4vf_enable_pi(pi->adapter, pi, true, true); 341 342 /* The Virtual Interfaces are connected to an internal switch on the 343 * chip which allows VIs attached to the same port to talk to each 344 * other even when the port link is down. As a result, we generally 345 * want to always report a VI's link as being "up", provided there are 346 * no errors in enabling vi. 347 */ 348 349 if (ret == 0) 350 netif_carrier_on(dev); 351 352 return ret; 353 } 354 355 /* 356 * Name the MSI-X interrupts. 357 */ 358 static void name_msix_vecs(struct adapter *adapter) 359 { 360 int namelen = sizeof(adapter->msix_info[0].desc) - 1; 361 int pidx; 362 363 /* 364 * Firmware events. 365 */ 366 snprintf(adapter->msix_info[MSIX_FW].desc, namelen, 367 "%s-FWeventq", adapter->name); 368 adapter->msix_info[MSIX_FW].desc[namelen] = 0; 369 370 /* 371 * Ethernet queues. 372 */ 373 for_each_port(adapter, pidx) { 374 struct net_device *dev = adapter->port[pidx]; 375 const struct port_info *pi = netdev_priv(dev); 376 int qs, msi; 377 378 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) { 379 snprintf(adapter->msix_info[msi].desc, namelen, 380 "%s-%d", dev->name, qs); 381 adapter->msix_info[msi].desc[namelen] = 0; 382 } 383 } 384 } 385 386 /* 387 * Request all of our MSI-X resources. 388 */ 389 static int request_msix_queue_irqs(struct adapter *adapter) 390 { 391 struct sge *s = &adapter->sge; 392 int rxq, msi, err; 393 394 /* 395 * Firmware events. 396 */ 397 err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix, 398 0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq); 399 if (err) 400 return err; 401 402 /* 403 * Ethernet queues. 404 */ 405 msi = MSIX_IQFLINT; 406 for_each_ethrxq(s, rxq) { 407 err = request_irq(adapter->msix_info[msi].vec, 408 t4vf_sge_intr_msix, 0, 409 adapter->msix_info[msi].desc, 410 &s->ethrxq[rxq].rspq); 411 if (err) 412 goto err_free_irqs; 413 msi++; 414 } 415 return 0; 416 417 err_free_irqs: 418 while (--rxq >= 0) 419 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq); 420 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 421 return err; 422 } 423 424 /* 425 * Free our MSI-X resources. 426 */ 427 static void free_msix_queue_irqs(struct adapter *adapter) 428 { 429 struct sge *s = &adapter->sge; 430 int rxq, msi; 431 432 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 433 msi = MSIX_IQFLINT; 434 for_each_ethrxq(s, rxq) 435 free_irq(adapter->msix_info[msi++].vec, 436 &s->ethrxq[rxq].rspq); 437 } 438 439 /* 440 * Turn on NAPI and start up interrupts on a response queue. 441 */ 442 static void qenable(struct sge_rspq *rspq) 443 { 444 napi_enable(&rspq->napi); 445 446 /* 447 * 0-increment the Going To Sleep register to start the timer and 448 * enable interrupts. 449 */ 450 t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 451 CIDXINC_V(0) | 452 SEINTARM_V(rspq->intr_params) | 453 INGRESSQID_V(rspq->cntxt_id)); 454 } 455 456 /* 457 * Enable NAPI scheduling and interrupt generation for all Receive Queues. 458 */ 459 static void enable_rx(struct adapter *adapter) 460 { 461 int rxq; 462 struct sge *s = &adapter->sge; 463 464 for_each_ethrxq(s, rxq) 465 qenable(&s->ethrxq[rxq].rspq); 466 qenable(&s->fw_evtq); 467 468 /* 469 * The interrupt queue doesn't use NAPI so we do the 0-increment of 470 * its Going To Sleep register here to get it started. 471 */ 472 if (adapter->flags & USING_MSI) 473 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 474 CIDXINC_V(0) | 475 SEINTARM_V(s->intrq.intr_params) | 476 INGRESSQID_V(s->intrq.cntxt_id)); 477 478 } 479 480 /* 481 * Wait until all NAPI handlers are descheduled. 482 */ 483 static void quiesce_rx(struct adapter *adapter) 484 { 485 struct sge *s = &adapter->sge; 486 int rxq; 487 488 for_each_ethrxq(s, rxq) 489 napi_disable(&s->ethrxq[rxq].rspq.napi); 490 napi_disable(&s->fw_evtq.napi); 491 } 492 493 /* 494 * Response queue handler for the firmware event queue. 495 */ 496 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp, 497 const struct pkt_gl *gl) 498 { 499 /* 500 * Extract response opcode and get pointer to CPL message body. 501 */ 502 struct adapter *adapter = rspq->adapter; 503 u8 opcode = ((const struct rss_header *)rsp)->opcode; 504 void *cpl = (void *)(rsp + 1); 505 506 switch (opcode) { 507 case CPL_FW6_MSG: { 508 /* 509 * We've received an asynchronous message from the firmware. 510 */ 511 const struct cpl_fw6_msg *fw_msg = cpl; 512 if (fw_msg->type == FW6_TYPE_CMD_RPL) 513 t4vf_handle_fw_rpl(adapter, fw_msg->data); 514 break; 515 } 516 517 case CPL_FW4_MSG: { 518 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG. 519 */ 520 const struct cpl_sge_egr_update *p = (void *)(rsp + 3); 521 opcode = CPL_OPCODE_G(ntohl(p->opcode_qid)); 522 if (opcode != CPL_SGE_EGR_UPDATE) { 523 dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n" 524 , opcode); 525 break; 526 } 527 cpl = (void *)p; 528 /*FALLTHROUGH*/ 529 } 530 531 case CPL_SGE_EGR_UPDATE: { 532 /* 533 * We've received an Egress Queue Status Update message. We 534 * get these, if the SGE is configured to send these when the 535 * firmware passes certain points in processing our TX 536 * Ethernet Queue or if we make an explicit request for one. 537 * We use these updates to determine when we may need to 538 * restart a TX Ethernet Queue which was stopped for lack of 539 * free TX Queue Descriptors ... 540 */ 541 const struct cpl_sge_egr_update *p = cpl; 542 unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid)); 543 struct sge *s = &adapter->sge; 544 struct sge_txq *tq; 545 struct sge_eth_txq *txq; 546 unsigned int eq_idx; 547 548 /* 549 * Perform sanity checking on the Queue ID to make sure it 550 * really refers to one of our TX Ethernet Egress Queues which 551 * is active and matches the queue's ID. None of these error 552 * conditions should ever happen so we may want to either make 553 * them fatal and/or conditionalized under DEBUG. 554 */ 555 eq_idx = EQ_IDX(s, qid); 556 if (unlikely(eq_idx >= MAX_EGRQ)) { 557 dev_err(adapter->pdev_dev, 558 "Egress Update QID %d out of range\n", qid); 559 break; 560 } 561 tq = s->egr_map[eq_idx]; 562 if (unlikely(tq == NULL)) { 563 dev_err(adapter->pdev_dev, 564 "Egress Update QID %d TXQ=NULL\n", qid); 565 break; 566 } 567 txq = container_of(tq, struct sge_eth_txq, q); 568 if (unlikely(tq->abs_id != qid)) { 569 dev_err(adapter->pdev_dev, 570 "Egress Update QID %d refers to TXQ %d\n", 571 qid, tq->abs_id); 572 break; 573 } 574 575 /* 576 * Restart a stopped TX Queue which has less than half of its 577 * TX ring in use ... 578 */ 579 txq->q.restarts++; 580 netif_tx_wake_queue(txq->txq); 581 break; 582 } 583 584 default: 585 dev_err(adapter->pdev_dev, 586 "unexpected CPL %#x on FW event queue\n", opcode); 587 } 588 589 return 0; 590 } 591 592 /* 593 * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues 594 * to use and initializes them. We support multiple "Queue Sets" per port if 595 * we have MSI-X, otherwise just one queue set per port. 596 */ 597 static int setup_sge_queues(struct adapter *adapter) 598 { 599 struct sge *s = &adapter->sge; 600 int err, pidx, msix; 601 602 /* 603 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error 604 * state. 605 */ 606 bitmap_zero(s->starving_fl, MAX_EGRQ); 607 608 /* 609 * If we're using MSI interrupt mode we need to set up a "forwarded 610 * interrupt" queue which we'll set up with our MSI vector. The rest 611 * of the ingress queues will be set up to forward their interrupts to 612 * this queue ... This must be first since t4vf_sge_alloc_rxq() uses 613 * the intrq's queue ID as the interrupt forwarding queue for the 614 * subsequent calls ... 615 */ 616 if (adapter->flags & USING_MSI) { 617 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false, 618 adapter->port[0], 0, NULL, NULL); 619 if (err) 620 goto err_free_queues; 621 } 622 623 /* 624 * Allocate our ingress queue for asynchronous firmware messages. 625 */ 626 err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0], 627 MSIX_FW, NULL, fwevtq_handler); 628 if (err) 629 goto err_free_queues; 630 631 /* 632 * Allocate each "port"'s initial Queue Sets. These can be changed 633 * later on ... up to the point where any interface on the adapter is 634 * brought up at which point lots of things get nailed down 635 * permanently ... 636 */ 637 msix = MSIX_IQFLINT; 638 for_each_port(adapter, pidx) { 639 struct net_device *dev = adapter->port[pidx]; 640 struct port_info *pi = netdev_priv(dev); 641 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 642 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 643 int qs; 644 645 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 646 err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false, 647 dev, msix++, 648 &rxq->fl, t4vf_ethrx_handler); 649 if (err) 650 goto err_free_queues; 651 652 err = t4vf_sge_alloc_eth_txq(adapter, txq, dev, 653 netdev_get_tx_queue(dev, qs), 654 s->fw_evtq.cntxt_id); 655 if (err) 656 goto err_free_queues; 657 658 rxq->rspq.idx = qs; 659 memset(&rxq->stats, 0, sizeof(rxq->stats)); 660 } 661 } 662 663 /* 664 * Create the reverse mappings for the queues. 665 */ 666 s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id; 667 s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id; 668 IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq; 669 for_each_port(adapter, pidx) { 670 struct net_device *dev = adapter->port[pidx]; 671 struct port_info *pi = netdev_priv(dev); 672 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 673 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 674 int qs; 675 676 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 677 IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq; 678 EQ_MAP(s, txq->q.abs_id) = &txq->q; 679 680 /* 681 * The FW_IQ_CMD doesn't return the Absolute Queue IDs 682 * for Free Lists but since all of the Egress Queues 683 * (including Free Lists) have Relative Queue IDs 684 * which are computed as Absolute - Base Queue ID, we 685 * can synthesize the Absolute Queue IDs for the Free 686 * Lists. This is useful for debugging purposes when 687 * we want to dump Queue Contexts via the PF Driver. 688 */ 689 rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base; 690 EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl; 691 } 692 } 693 return 0; 694 695 err_free_queues: 696 t4vf_free_sge_resources(adapter); 697 return err; 698 } 699 700 /* 701 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive 702 * queues. We configure the RSS CPU lookup table to distribute to the number 703 * of HW receive queues, and the response queue lookup table to narrow that 704 * down to the response queues actually configured for each "port" (Virtual 705 * Interface). We always configure the RSS mapping for all ports since the 706 * mapping table has plenty of entries. 707 */ 708 static int setup_rss(struct adapter *adapter) 709 { 710 int pidx; 711 712 for_each_port(adapter, pidx) { 713 struct port_info *pi = adap2pinfo(adapter, pidx); 714 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 715 u16 rss[MAX_PORT_QSETS]; 716 int qs, err; 717 718 for (qs = 0; qs < pi->nqsets; qs++) 719 rss[qs] = rxq[qs].rspq.abs_id; 720 721 err = t4vf_config_rss_range(adapter, pi->viid, 722 0, pi->rss_size, rss, pi->nqsets); 723 if (err) 724 return err; 725 726 /* 727 * Perform Global RSS Mode-specific initialization. 728 */ 729 switch (adapter->params.rss.mode) { 730 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: 731 /* 732 * If Tunnel All Lookup isn't specified in the global 733 * RSS Configuration, then we need to specify a 734 * default Ingress Queue for any ingress packets which 735 * aren't hashed. We'll use our first ingress queue 736 * ... 737 */ 738 if (!adapter->params.rss.u.basicvirtual.tnlalllookup) { 739 union rss_vi_config config; 740 err = t4vf_read_rss_vi_config(adapter, 741 pi->viid, 742 &config); 743 if (err) 744 return err; 745 config.basicvirtual.defaultq = 746 rxq[0].rspq.abs_id; 747 err = t4vf_write_rss_vi_config(adapter, 748 pi->viid, 749 &config); 750 if (err) 751 return err; 752 } 753 break; 754 } 755 } 756 757 return 0; 758 } 759 760 /* 761 * Bring the adapter up. Called whenever we go from no "ports" open to having 762 * one open. This function performs the actions necessary to make an adapter 763 * operational, such as completing the initialization of HW modules, and 764 * enabling interrupts. Must be called with the rtnl lock held. (Note that 765 * this is called "cxgb_up" in the PF Driver.) 766 */ 767 static int adapter_up(struct adapter *adapter) 768 { 769 int err; 770 771 /* 772 * If this is the first time we've been called, perform basic 773 * adapter setup. Once we've done this, many of our adapter 774 * parameters can no longer be changed ... 775 */ 776 if ((adapter->flags & FULL_INIT_DONE) == 0) { 777 err = setup_sge_queues(adapter); 778 if (err) 779 return err; 780 err = setup_rss(adapter); 781 if (err) { 782 t4vf_free_sge_resources(adapter); 783 return err; 784 } 785 786 if (adapter->flags & USING_MSIX) 787 name_msix_vecs(adapter); 788 789 adapter->flags |= FULL_INIT_DONE; 790 } 791 792 /* 793 * Acquire our interrupt resources. We only support MSI-X and MSI. 794 */ 795 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 796 if (adapter->flags & USING_MSIX) 797 err = request_msix_queue_irqs(adapter); 798 else 799 err = request_irq(adapter->pdev->irq, 800 t4vf_intr_handler(adapter), 0, 801 adapter->name, adapter); 802 if (err) { 803 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n", 804 err); 805 return err; 806 } 807 808 /* 809 * Enable NAPI ingress processing and return success. 810 */ 811 enable_rx(adapter); 812 t4vf_sge_start(adapter); 813 814 return 0; 815 } 816 817 /* 818 * Bring the adapter down. Called whenever the last "port" (Virtual 819 * Interface) closed. (Note that this routine is called "cxgb_down" in the PF 820 * Driver.) 821 */ 822 static void adapter_down(struct adapter *adapter) 823 { 824 /* 825 * Free interrupt resources. 826 */ 827 if (adapter->flags & USING_MSIX) 828 free_msix_queue_irqs(adapter); 829 else 830 free_irq(adapter->pdev->irq, adapter); 831 832 /* 833 * Wait for NAPI handlers to finish. 834 */ 835 quiesce_rx(adapter); 836 } 837 838 /* 839 * Start up a net device. 840 */ 841 static int cxgb4vf_open(struct net_device *dev) 842 { 843 int err; 844 struct port_info *pi = netdev_priv(dev); 845 struct adapter *adapter = pi->adapter; 846 847 /* 848 * If this is the first interface that we're opening on the "adapter", 849 * bring the "adapter" up now. 850 */ 851 if (adapter->open_device_map == 0) { 852 err = adapter_up(adapter); 853 if (err) 854 return err; 855 } 856 857 /* It's possible that the basic port information could have 858 * changed since we first read it. 859 */ 860 err = t4vf_update_port_info(pi); 861 if (err < 0) 862 return err; 863 864 /* 865 * Note that this interface is up and start everything up ... 866 */ 867 err = link_start(dev); 868 if (err) 869 goto err_unwind; 870 871 pi->vlan_id = t4vf_get_vf_vlan_acl(adapter); 872 873 netif_tx_start_all_queues(dev); 874 set_bit(pi->port_id, &adapter->open_device_map); 875 return 0; 876 877 err_unwind: 878 if (adapter->open_device_map == 0) 879 adapter_down(adapter); 880 return err; 881 } 882 883 /* 884 * Shut down a net device. This routine is called "cxgb_close" in the PF 885 * Driver ... 886 */ 887 static int cxgb4vf_stop(struct net_device *dev) 888 { 889 struct port_info *pi = netdev_priv(dev); 890 struct adapter *adapter = pi->adapter; 891 892 netif_tx_stop_all_queues(dev); 893 netif_carrier_off(dev); 894 t4vf_enable_pi(adapter, pi, false, false); 895 896 clear_bit(pi->port_id, &adapter->open_device_map); 897 if (adapter->open_device_map == 0) 898 adapter_down(adapter); 899 return 0; 900 } 901 902 /* 903 * Translate our basic statistics into the standard "ifconfig" statistics. 904 */ 905 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) 906 { 907 struct t4vf_port_stats stats; 908 struct port_info *pi = netdev2pinfo(dev); 909 struct adapter *adapter = pi->adapter; 910 struct net_device_stats *ns = &dev->stats; 911 int err; 912 913 spin_lock(&adapter->stats_lock); 914 err = t4vf_get_port_stats(adapter, pi->pidx, &stats); 915 spin_unlock(&adapter->stats_lock); 916 917 memset(ns, 0, sizeof(*ns)); 918 if (err) 919 return ns; 920 921 ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes + 922 stats.tx_ucast_bytes + stats.tx_offload_bytes); 923 ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames + 924 stats.tx_ucast_frames + stats.tx_offload_frames); 925 ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes + 926 stats.rx_ucast_bytes); 927 ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames + 928 stats.rx_ucast_frames); 929 ns->multicast = stats.rx_mcast_frames; 930 ns->tx_errors = stats.tx_drop_frames; 931 ns->rx_errors = stats.rx_err_frames; 932 933 return ns; 934 } 935 936 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr) 937 { 938 struct port_info *pi = netdev_priv(netdev); 939 struct adapter *adapter = pi->adapter; 940 int ret; 941 u64 mhash = 0; 942 u64 uhash = 0; 943 bool free = false; 944 bool ucast = is_unicast_ether_addr(mac_addr); 945 const u8 *maclist[1] = {mac_addr}; 946 struct hash_mac_addr *new_entry; 947 948 ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist, 949 NULL, ucast ? &uhash : &mhash, false); 950 if (ret < 0) 951 goto out; 952 /* if hash != 0, then add the addr to hash addr list 953 * so on the end we will calculate the hash for the 954 * list and program it 955 */ 956 if (uhash || mhash) { 957 new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); 958 if (!new_entry) 959 return -ENOMEM; 960 ether_addr_copy(new_entry->addr, mac_addr); 961 list_add_tail(&new_entry->list, &adapter->mac_hlist); 962 ret = cxgb4vf_set_addr_hash(pi); 963 } 964 out: 965 return ret < 0 ? ret : 0; 966 } 967 968 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr) 969 { 970 struct port_info *pi = netdev_priv(netdev); 971 struct adapter *adapter = pi->adapter; 972 int ret; 973 const u8 *maclist[1] = {mac_addr}; 974 struct hash_mac_addr *entry, *tmp; 975 976 /* If the MAC address to be removed is in the hash addr 977 * list, delete it from the list and update hash vector 978 */ 979 list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) { 980 if (ether_addr_equal(entry->addr, mac_addr)) { 981 list_del(&entry->list); 982 kfree(entry); 983 return cxgb4vf_set_addr_hash(pi); 984 } 985 } 986 987 ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false); 988 return ret < 0 ? -EINVAL : 0; 989 } 990 991 /* 992 * Set RX properties of a port, such as promiscruity, address filters, and MTU. 993 * If @mtu is -1 it is left unchanged. 994 */ 995 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) 996 { 997 struct port_info *pi = netdev_priv(dev); 998 999 __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync); 1000 __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync); 1001 return t4vf_set_rxmode(pi->adapter, pi->viid, -1, 1002 (dev->flags & IFF_PROMISC) != 0, 1003 (dev->flags & IFF_ALLMULTI) != 0, 1004 1, -1, sleep_ok); 1005 } 1006 1007 /* 1008 * Set the current receive modes on the device. 1009 */ 1010 static void cxgb4vf_set_rxmode(struct net_device *dev) 1011 { 1012 /* unfortunately we can't return errors to the stack */ 1013 set_rxmode(dev, -1, false); 1014 } 1015 1016 /* 1017 * Find the entry in the interrupt holdoff timer value array which comes 1018 * closest to the specified interrupt holdoff value. 1019 */ 1020 static int closest_timer(const struct sge *s, int us) 1021 { 1022 int i, timer_idx = 0, min_delta = INT_MAX; 1023 1024 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) { 1025 int delta = us - s->timer_val[i]; 1026 if (delta < 0) 1027 delta = -delta; 1028 if (delta < min_delta) { 1029 min_delta = delta; 1030 timer_idx = i; 1031 } 1032 } 1033 return timer_idx; 1034 } 1035 1036 static int closest_thres(const struct sge *s, int thres) 1037 { 1038 int i, delta, pktcnt_idx = 0, min_delta = INT_MAX; 1039 1040 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) { 1041 delta = thres - s->counter_val[i]; 1042 if (delta < 0) 1043 delta = -delta; 1044 if (delta < min_delta) { 1045 min_delta = delta; 1046 pktcnt_idx = i; 1047 } 1048 } 1049 return pktcnt_idx; 1050 } 1051 1052 /* 1053 * Return a queue's interrupt hold-off time in us. 0 means no timer. 1054 */ 1055 static unsigned int qtimer_val(const struct adapter *adapter, 1056 const struct sge_rspq *rspq) 1057 { 1058 unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params); 1059 1060 return timer_idx < SGE_NTIMERS 1061 ? adapter->sge.timer_val[timer_idx] 1062 : 0; 1063 } 1064 1065 /** 1066 * set_rxq_intr_params - set a queue's interrupt holdoff parameters 1067 * @adapter: the adapter 1068 * @rspq: the RX response queue 1069 * @us: the hold-off time in us, or 0 to disable timer 1070 * @cnt: the hold-off packet count, or 0 to disable counter 1071 * 1072 * Sets an RX response queue's interrupt hold-off time and packet count. 1073 * At least one of the two needs to be enabled for the queue to generate 1074 * interrupts. 1075 */ 1076 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, 1077 unsigned int us, unsigned int cnt) 1078 { 1079 unsigned int timer_idx; 1080 1081 /* 1082 * If both the interrupt holdoff timer and count are specified as 1083 * zero, default to a holdoff count of 1 ... 1084 */ 1085 if ((us | cnt) == 0) 1086 cnt = 1; 1087 1088 /* 1089 * If an interrupt holdoff count has been specified, then find the 1090 * closest configured holdoff count and use that. If the response 1091 * queue has already been created, then update its queue context 1092 * parameters ... 1093 */ 1094 if (cnt) { 1095 int err; 1096 u32 v, pktcnt_idx; 1097 1098 pktcnt_idx = closest_thres(&adapter->sge, cnt); 1099 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) { 1100 v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | 1101 FW_PARAMS_PARAM_X_V( 1102 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) | 1103 FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id); 1104 err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx); 1105 if (err) 1106 return err; 1107 } 1108 rspq->pktcnt_idx = pktcnt_idx; 1109 } 1110 1111 /* 1112 * Compute the closest holdoff timer index from the supplied holdoff 1113 * timer value. 1114 */ 1115 timer_idx = (us == 0 1116 ? SGE_TIMER_RSTRT_CNTR 1117 : closest_timer(&adapter->sge, us)); 1118 1119 /* 1120 * Update the response queue's interrupt coalescing parameters and 1121 * return success. 1122 */ 1123 rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | 1124 QINTR_CNT_EN_V(cnt > 0)); 1125 return 0; 1126 } 1127 1128 /* 1129 * Return a version number to identify the type of adapter. The scheme is: 1130 * - bits 0..9: chip version 1131 * - bits 10..15: chip revision 1132 */ 1133 static inline unsigned int mk_adap_vers(const struct adapter *adapter) 1134 { 1135 /* 1136 * Chip version 4, revision 0x3f (cxgb4vf). 1137 */ 1138 return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10); 1139 } 1140 1141 /* 1142 * Execute the specified ioctl command. 1143 */ 1144 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1145 { 1146 int ret = 0; 1147 1148 switch (cmd) { 1149 /* 1150 * The VF Driver doesn't have access to any of the other 1151 * common Ethernet device ioctl()'s (like reading/writing 1152 * PHY registers, etc. 1153 */ 1154 1155 default: 1156 ret = -EOPNOTSUPP; 1157 break; 1158 } 1159 return ret; 1160 } 1161 1162 /* 1163 * Change the device's MTU. 1164 */ 1165 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu) 1166 { 1167 int ret; 1168 struct port_info *pi = netdev_priv(dev); 1169 1170 ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu, 1171 -1, -1, -1, -1, true); 1172 if (!ret) 1173 dev->mtu = new_mtu; 1174 return ret; 1175 } 1176 1177 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev, 1178 netdev_features_t features) 1179 { 1180 /* 1181 * Since there is no support for separate rx/tx vlan accel 1182 * enable/disable make sure tx flag is always in same state as rx. 1183 */ 1184 if (features & NETIF_F_HW_VLAN_CTAG_RX) 1185 features |= NETIF_F_HW_VLAN_CTAG_TX; 1186 else 1187 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 1188 1189 return features; 1190 } 1191 1192 static int cxgb4vf_set_features(struct net_device *dev, 1193 netdev_features_t features) 1194 { 1195 struct port_info *pi = netdev_priv(dev); 1196 netdev_features_t changed = dev->features ^ features; 1197 1198 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1199 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, 1200 features & NETIF_F_HW_VLAN_CTAG_TX, 0); 1201 1202 return 0; 1203 } 1204 1205 /* 1206 * Change the devices MAC address. 1207 */ 1208 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr) 1209 { 1210 int ret; 1211 struct sockaddr *addr = _addr; 1212 struct port_info *pi = netdev_priv(dev); 1213 1214 if (!is_valid_ether_addr(addr->sa_data)) 1215 return -EADDRNOTAVAIL; 1216 1217 ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt, 1218 addr->sa_data, true); 1219 if (ret < 0) 1220 return ret; 1221 1222 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); 1223 return 0; 1224 } 1225 1226 #ifdef CONFIG_NET_POLL_CONTROLLER 1227 /* 1228 * Poll all of our receive queues. This is called outside of normal interrupt 1229 * context. 1230 */ 1231 static void cxgb4vf_poll_controller(struct net_device *dev) 1232 { 1233 struct port_info *pi = netdev_priv(dev); 1234 struct adapter *adapter = pi->adapter; 1235 1236 if (adapter->flags & USING_MSIX) { 1237 struct sge_eth_rxq *rxq; 1238 int nqsets; 1239 1240 rxq = &adapter->sge.ethrxq[pi->first_qset]; 1241 for (nqsets = pi->nqsets; nqsets; nqsets--) { 1242 t4vf_sge_intr_msix(0, &rxq->rspq); 1243 rxq++; 1244 } 1245 } else 1246 t4vf_intr_handler(adapter)(0, adapter); 1247 } 1248 #endif 1249 1250 /* 1251 * Ethtool operations. 1252 * =================== 1253 * 1254 * Note that we don't support any ethtool operations which change the physical 1255 * state of the port to which we're linked. 1256 */ 1257 1258 /** 1259 * from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool 1260 * @port_type: Firmware Port Type 1261 * @mod_type: Firmware Module Type 1262 * 1263 * Translate Firmware Port/Module type to Ethtool Port Type. 1264 */ 1265 static int from_fw_port_mod_type(enum fw_port_type port_type, 1266 enum fw_port_module_type mod_type) 1267 { 1268 if (port_type == FW_PORT_TYPE_BT_SGMII || 1269 port_type == FW_PORT_TYPE_BT_XFI || 1270 port_type == FW_PORT_TYPE_BT_XAUI) { 1271 return PORT_TP; 1272 } else if (port_type == FW_PORT_TYPE_FIBER_XFI || 1273 port_type == FW_PORT_TYPE_FIBER_XAUI) { 1274 return PORT_FIBRE; 1275 } else if (port_type == FW_PORT_TYPE_SFP || 1276 port_type == FW_PORT_TYPE_QSFP_10G || 1277 port_type == FW_PORT_TYPE_QSA || 1278 port_type == FW_PORT_TYPE_QSFP || 1279 port_type == FW_PORT_TYPE_CR4_QSFP || 1280 port_type == FW_PORT_TYPE_CR_QSFP || 1281 port_type == FW_PORT_TYPE_CR2_QSFP || 1282 port_type == FW_PORT_TYPE_SFP28) { 1283 if (mod_type == FW_PORT_MOD_TYPE_LR || 1284 mod_type == FW_PORT_MOD_TYPE_SR || 1285 mod_type == FW_PORT_MOD_TYPE_ER || 1286 mod_type == FW_PORT_MOD_TYPE_LRM) 1287 return PORT_FIBRE; 1288 else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE || 1289 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE) 1290 return PORT_DA; 1291 else 1292 return PORT_OTHER; 1293 } else if (port_type == FW_PORT_TYPE_KR4_100G || 1294 port_type == FW_PORT_TYPE_KR_SFP28 || 1295 port_type == FW_PORT_TYPE_KR_XLAUI) { 1296 return PORT_NONE; 1297 } 1298 1299 return PORT_OTHER; 1300 } 1301 1302 /** 1303 * fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask 1304 * @port_type: Firmware Port Type 1305 * @fw_caps: Firmware Port Capabilities 1306 * @link_mode_mask: ethtool Link Mode Mask 1307 * 1308 * Translate a Firmware Port Capabilities specification to an ethtool 1309 * Link Mode Mask. 1310 */ 1311 static void fw_caps_to_lmm(enum fw_port_type port_type, 1312 unsigned int fw_caps, 1313 unsigned long *link_mode_mask) 1314 { 1315 #define SET_LMM(__lmm_name) \ 1316 __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \ 1317 link_mode_mask) 1318 1319 #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \ 1320 do { \ 1321 if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \ 1322 SET_LMM(__lmm_name); \ 1323 } while (0) 1324 1325 switch (port_type) { 1326 case FW_PORT_TYPE_BT_SGMII: 1327 case FW_PORT_TYPE_BT_XFI: 1328 case FW_PORT_TYPE_BT_XAUI: 1329 SET_LMM(TP); 1330 FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full); 1331 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1332 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); 1333 break; 1334 1335 case FW_PORT_TYPE_KX4: 1336 case FW_PORT_TYPE_KX: 1337 SET_LMM(Backplane); 1338 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full); 1339 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full); 1340 break; 1341 1342 case FW_PORT_TYPE_KR: 1343 SET_LMM(Backplane); 1344 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1345 break; 1346 1347 case FW_PORT_TYPE_BP_AP: 1348 SET_LMM(Backplane); 1349 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full); 1350 FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC); 1351 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1352 break; 1353 1354 case FW_PORT_TYPE_BP4_AP: 1355 SET_LMM(Backplane); 1356 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full); 1357 FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC); 1358 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1359 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full); 1360 break; 1361 1362 case FW_PORT_TYPE_FIBER_XFI: 1363 case FW_PORT_TYPE_FIBER_XAUI: 1364 case FW_PORT_TYPE_SFP: 1365 case FW_PORT_TYPE_QSFP_10G: 1366 case FW_PORT_TYPE_QSA: 1367 SET_LMM(FIBRE); 1368 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1369 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); 1370 break; 1371 1372 case FW_PORT_TYPE_BP40_BA: 1373 case FW_PORT_TYPE_QSFP: 1374 SET_LMM(FIBRE); 1375 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1376 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); 1377 FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); 1378 break; 1379 1380 case FW_PORT_TYPE_CR_QSFP: 1381 case FW_PORT_TYPE_SFP28: 1382 SET_LMM(FIBRE); 1383 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1384 FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); 1385 FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); 1386 break; 1387 1388 case FW_PORT_TYPE_KR_SFP28: 1389 SET_LMM(Backplane); 1390 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1391 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1392 FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full); 1393 break; 1394 1395 case FW_PORT_TYPE_KR_XLAUI: 1396 SET_LMM(Backplane); 1397 FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full); 1398 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1399 FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full); 1400 break; 1401 1402 case FW_PORT_TYPE_CR2_QSFP: 1403 SET_LMM(FIBRE); 1404 FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full); 1405 break; 1406 1407 case FW_PORT_TYPE_KR4_100G: 1408 case FW_PORT_TYPE_CR4_QSFP: 1409 SET_LMM(FIBRE); 1410 FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); 1411 FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); 1412 FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); 1413 FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); 1414 FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full); 1415 FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full); 1416 break; 1417 1418 default: 1419 break; 1420 } 1421 1422 if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) { 1423 FW_CAPS_TO_LMM(FEC_RS, FEC_RS); 1424 FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER); 1425 } else { 1426 SET_LMM(FEC_NONE); 1427 } 1428 1429 FW_CAPS_TO_LMM(ANEG, Autoneg); 1430 FW_CAPS_TO_LMM(802_3_PAUSE, Pause); 1431 FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause); 1432 1433 #undef FW_CAPS_TO_LMM 1434 #undef SET_LMM 1435 } 1436 1437 static int cxgb4vf_get_link_ksettings(struct net_device *dev, 1438 struct ethtool_link_ksettings *link_ksettings) 1439 { 1440 struct port_info *pi = netdev_priv(dev); 1441 struct ethtool_link_settings *base = &link_ksettings->base; 1442 1443 /* For the nonce, the Firmware doesn't send up Port State changes 1444 * when the Virtual Interface attached to the Port is down. So 1445 * if it's down, let's grab any changes. 1446 */ 1447 if (!netif_running(dev)) 1448 (void)t4vf_update_port_info(pi); 1449 1450 ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); 1451 ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); 1452 ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising); 1453 1454 base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type); 1455 1456 if (pi->mdio_addr >= 0) { 1457 base->phy_address = pi->mdio_addr; 1458 base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII 1459 ? ETH_MDIO_SUPPORTS_C22 1460 : ETH_MDIO_SUPPORTS_C45); 1461 } else { 1462 base->phy_address = 255; 1463 base->mdio_support = 0; 1464 } 1465 1466 fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps, 1467 link_ksettings->link_modes.supported); 1468 fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps, 1469 link_ksettings->link_modes.advertising); 1470 fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps, 1471 link_ksettings->link_modes.lp_advertising); 1472 1473 if (netif_carrier_ok(dev)) { 1474 base->speed = pi->link_cfg.speed; 1475 base->duplex = DUPLEX_FULL; 1476 } else { 1477 base->speed = SPEED_UNKNOWN; 1478 base->duplex = DUPLEX_UNKNOWN; 1479 } 1480 1481 if (pi->link_cfg.fc & PAUSE_RX) { 1482 if (pi->link_cfg.fc & PAUSE_TX) { 1483 ethtool_link_ksettings_add_link_mode(link_ksettings, 1484 advertising, 1485 Pause); 1486 } else { 1487 ethtool_link_ksettings_add_link_mode(link_ksettings, 1488 advertising, 1489 Asym_Pause); 1490 } 1491 } else if (pi->link_cfg.fc & PAUSE_TX) { 1492 ethtool_link_ksettings_add_link_mode(link_ksettings, 1493 advertising, 1494 Asym_Pause); 1495 } 1496 1497 base->autoneg = pi->link_cfg.autoneg; 1498 if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG) 1499 ethtool_link_ksettings_add_link_mode(link_ksettings, 1500 supported, Autoneg); 1501 if (pi->link_cfg.autoneg) 1502 ethtool_link_ksettings_add_link_mode(link_ksettings, 1503 advertising, Autoneg); 1504 1505 return 0; 1506 } 1507 1508 /* Translate the Firmware FEC value into the ethtool value. */ 1509 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec) 1510 { 1511 unsigned int eth_fec = 0; 1512 1513 if (fw_fec & FW_PORT_CAP32_FEC_RS) 1514 eth_fec |= ETHTOOL_FEC_RS; 1515 if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS) 1516 eth_fec |= ETHTOOL_FEC_BASER; 1517 1518 /* if nothing is set, then FEC is off */ 1519 if (!eth_fec) 1520 eth_fec = ETHTOOL_FEC_OFF; 1521 1522 return eth_fec; 1523 } 1524 1525 /* Translate Common Code FEC value into ethtool value. */ 1526 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec) 1527 { 1528 unsigned int eth_fec = 0; 1529 1530 if (cc_fec & FEC_AUTO) 1531 eth_fec |= ETHTOOL_FEC_AUTO; 1532 if (cc_fec & FEC_RS) 1533 eth_fec |= ETHTOOL_FEC_RS; 1534 if (cc_fec & FEC_BASER_RS) 1535 eth_fec |= ETHTOOL_FEC_BASER; 1536 1537 /* if nothing is set, then FEC is off */ 1538 if (!eth_fec) 1539 eth_fec = ETHTOOL_FEC_OFF; 1540 1541 return eth_fec; 1542 } 1543 1544 static int cxgb4vf_get_fecparam(struct net_device *dev, 1545 struct ethtool_fecparam *fec) 1546 { 1547 const struct port_info *pi = netdev_priv(dev); 1548 const struct link_config *lc = &pi->link_cfg; 1549 1550 /* Translate the Firmware FEC Support into the ethtool value. We 1551 * always support IEEE 802.3 "automatic" selection of Link FEC type if 1552 * any FEC is supported. 1553 */ 1554 fec->fec = fwcap_to_eth_fec(lc->pcaps); 1555 if (fec->fec != ETHTOOL_FEC_OFF) 1556 fec->fec |= ETHTOOL_FEC_AUTO; 1557 1558 /* Translate the current internal FEC parameters into the 1559 * ethtool values. 1560 */ 1561 fec->active_fec = cc_to_eth_fec(lc->fec); 1562 return 0; 1563 } 1564 1565 /* 1566 * Return our driver information. 1567 */ 1568 static void cxgb4vf_get_drvinfo(struct net_device *dev, 1569 struct ethtool_drvinfo *drvinfo) 1570 { 1571 struct adapter *adapter = netdev2adap(dev); 1572 1573 strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); 1574 strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); 1575 strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)), 1576 sizeof(drvinfo->bus_info)); 1577 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), 1578 "%u.%u.%u.%u, TP %u.%u.%u.%u", 1579 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev), 1580 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev), 1581 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev), 1582 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev), 1583 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev), 1584 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev), 1585 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev), 1586 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev)); 1587 } 1588 1589 /* 1590 * Return current adapter message level. 1591 */ 1592 static u32 cxgb4vf_get_msglevel(struct net_device *dev) 1593 { 1594 return netdev2adap(dev)->msg_enable; 1595 } 1596 1597 /* 1598 * Set current adapter message level. 1599 */ 1600 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel) 1601 { 1602 netdev2adap(dev)->msg_enable = msglevel; 1603 } 1604 1605 /* 1606 * Return the device's current Queue Set ring size parameters along with the 1607 * allowed maximum values. Since ethtool doesn't understand the concept of 1608 * multi-queue devices, we just return the current values associated with the 1609 * first Queue Set. 1610 */ 1611 static void cxgb4vf_get_ringparam(struct net_device *dev, 1612 struct ethtool_ringparam *rp) 1613 { 1614 const struct port_info *pi = netdev_priv(dev); 1615 const struct sge *s = &pi->adapter->sge; 1616 1617 rp->rx_max_pending = MAX_RX_BUFFERS; 1618 rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES; 1619 rp->rx_jumbo_max_pending = 0; 1620 rp->tx_max_pending = MAX_TXQ_ENTRIES; 1621 1622 rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID; 1623 rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size; 1624 rp->rx_jumbo_pending = 0; 1625 rp->tx_pending = s->ethtxq[pi->first_qset].q.size; 1626 } 1627 1628 /* 1629 * Set the Queue Set ring size parameters for the device. Again, since 1630 * ethtool doesn't allow for the concept of multiple queues per device, we'll 1631 * apply these new values across all of the Queue Sets associated with the 1632 * device -- after vetting them of course! 1633 */ 1634 static int cxgb4vf_set_ringparam(struct net_device *dev, 1635 struct ethtool_ringparam *rp) 1636 { 1637 const struct port_info *pi = netdev_priv(dev); 1638 struct adapter *adapter = pi->adapter; 1639 struct sge *s = &adapter->sge; 1640 int qs; 1641 1642 if (rp->rx_pending > MAX_RX_BUFFERS || 1643 rp->rx_jumbo_pending || 1644 rp->tx_pending > MAX_TXQ_ENTRIES || 1645 rp->rx_mini_pending > MAX_RSPQ_ENTRIES || 1646 rp->rx_mini_pending < MIN_RSPQ_ENTRIES || 1647 rp->rx_pending < MIN_FL_ENTRIES || 1648 rp->tx_pending < MIN_TXQ_ENTRIES) 1649 return -EINVAL; 1650 1651 if (adapter->flags & FULL_INIT_DONE) 1652 return -EBUSY; 1653 1654 for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) { 1655 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID; 1656 s->ethrxq[qs].rspq.size = rp->rx_mini_pending; 1657 s->ethtxq[qs].q.size = rp->tx_pending; 1658 } 1659 return 0; 1660 } 1661 1662 /* 1663 * Return the interrupt holdoff timer and count for the first Queue Set on the 1664 * device. Our extension ioctl() (the cxgbtool interface) allows the 1665 * interrupt holdoff timer to be read on all of the device's Queue Sets. 1666 */ 1667 static int cxgb4vf_get_coalesce(struct net_device *dev, 1668 struct ethtool_coalesce *coalesce) 1669 { 1670 const struct port_info *pi = netdev_priv(dev); 1671 const struct adapter *adapter = pi->adapter; 1672 const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq; 1673 1674 coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq); 1675 coalesce->rx_max_coalesced_frames = 1676 ((rspq->intr_params & QINTR_CNT_EN_F) 1677 ? adapter->sge.counter_val[rspq->pktcnt_idx] 1678 : 0); 1679 return 0; 1680 } 1681 1682 /* 1683 * Set the RX interrupt holdoff timer and count for the first Queue Set on the 1684 * interface. Our extension ioctl() (the cxgbtool interface) allows us to set 1685 * the interrupt holdoff timer on any of the device's Queue Sets. 1686 */ 1687 static int cxgb4vf_set_coalesce(struct net_device *dev, 1688 struct ethtool_coalesce *coalesce) 1689 { 1690 const struct port_info *pi = netdev_priv(dev); 1691 struct adapter *adapter = pi->adapter; 1692 1693 return set_rxq_intr_params(adapter, 1694 &adapter->sge.ethrxq[pi->first_qset].rspq, 1695 coalesce->rx_coalesce_usecs, 1696 coalesce->rx_max_coalesced_frames); 1697 } 1698 1699 /* 1700 * Report current port link pause parameter settings. 1701 */ 1702 static void cxgb4vf_get_pauseparam(struct net_device *dev, 1703 struct ethtool_pauseparam *pauseparam) 1704 { 1705 struct port_info *pi = netdev_priv(dev); 1706 1707 pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0; 1708 pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0; 1709 pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0; 1710 } 1711 1712 /* 1713 * Identify the port by blinking the port's LED. 1714 */ 1715 static int cxgb4vf_phys_id(struct net_device *dev, 1716 enum ethtool_phys_id_state state) 1717 { 1718 unsigned int val; 1719 struct port_info *pi = netdev_priv(dev); 1720 1721 if (state == ETHTOOL_ID_ACTIVE) 1722 val = 0xffff; 1723 else if (state == ETHTOOL_ID_INACTIVE) 1724 val = 0; 1725 else 1726 return -EINVAL; 1727 1728 return t4vf_identify_port(pi->adapter, pi->viid, val); 1729 } 1730 1731 /* 1732 * Port stats maintained per queue of the port. 1733 */ 1734 struct queue_port_stats { 1735 u64 tso; 1736 u64 tx_csum; 1737 u64 rx_csum; 1738 u64 vlan_ex; 1739 u64 vlan_ins; 1740 u64 lro_pkts; 1741 u64 lro_merged; 1742 }; 1743 1744 /* 1745 * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that 1746 * these need to match the order of statistics returned by 1747 * t4vf_get_port_stats(). 1748 */ 1749 static const char stats_strings[][ETH_GSTRING_LEN] = { 1750 /* 1751 * These must match the layout of the t4vf_port_stats structure. 1752 */ 1753 "TxBroadcastBytes ", 1754 "TxBroadcastFrames ", 1755 "TxMulticastBytes ", 1756 "TxMulticastFrames ", 1757 "TxUnicastBytes ", 1758 "TxUnicastFrames ", 1759 "TxDroppedFrames ", 1760 "TxOffloadBytes ", 1761 "TxOffloadFrames ", 1762 "RxBroadcastBytes ", 1763 "RxBroadcastFrames ", 1764 "RxMulticastBytes ", 1765 "RxMulticastFrames ", 1766 "RxUnicastBytes ", 1767 "RxUnicastFrames ", 1768 "RxErrorFrames ", 1769 1770 /* 1771 * These are accumulated per-queue statistics and must match the 1772 * order of the fields in the queue_port_stats structure. 1773 */ 1774 "TSO ", 1775 "TxCsumOffload ", 1776 "RxCsumGood ", 1777 "VLANextractions ", 1778 "VLANinsertions ", 1779 "GROPackets ", 1780 "GROMerged ", 1781 }; 1782 1783 /* 1784 * Return the number of statistics in the specified statistics set. 1785 */ 1786 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset) 1787 { 1788 switch (sset) { 1789 case ETH_SS_STATS: 1790 return ARRAY_SIZE(stats_strings); 1791 default: 1792 return -EOPNOTSUPP; 1793 } 1794 /*NOTREACHED*/ 1795 } 1796 1797 /* 1798 * Return the strings for the specified statistics set. 1799 */ 1800 static void cxgb4vf_get_strings(struct net_device *dev, 1801 u32 sset, 1802 u8 *data) 1803 { 1804 switch (sset) { 1805 case ETH_SS_STATS: 1806 memcpy(data, stats_strings, sizeof(stats_strings)); 1807 break; 1808 } 1809 } 1810 1811 /* 1812 * Small utility routine to accumulate queue statistics across the queues of 1813 * a "port". 1814 */ 1815 static void collect_sge_port_stats(const struct adapter *adapter, 1816 const struct port_info *pi, 1817 struct queue_port_stats *stats) 1818 { 1819 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset]; 1820 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 1821 int qs; 1822 1823 memset(stats, 0, sizeof(*stats)); 1824 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 1825 stats->tso += txq->tso; 1826 stats->tx_csum += txq->tx_cso; 1827 stats->rx_csum += rxq->stats.rx_cso; 1828 stats->vlan_ex += rxq->stats.vlan_ex; 1829 stats->vlan_ins += txq->vlan_ins; 1830 stats->lro_pkts += rxq->stats.lro_pkts; 1831 stats->lro_merged += rxq->stats.lro_merged; 1832 } 1833 } 1834 1835 /* 1836 * Return the ETH_SS_STATS statistics set. 1837 */ 1838 static void cxgb4vf_get_ethtool_stats(struct net_device *dev, 1839 struct ethtool_stats *stats, 1840 u64 *data) 1841 { 1842 struct port_info *pi = netdev2pinfo(dev); 1843 struct adapter *adapter = pi->adapter; 1844 int err = t4vf_get_port_stats(adapter, pi->pidx, 1845 (struct t4vf_port_stats *)data); 1846 if (err) 1847 memset(data, 0, sizeof(struct t4vf_port_stats)); 1848 1849 data += sizeof(struct t4vf_port_stats) / sizeof(u64); 1850 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data); 1851 } 1852 1853 /* 1854 * Return the size of our register map. 1855 */ 1856 static int cxgb4vf_get_regs_len(struct net_device *dev) 1857 { 1858 return T4VF_REGMAP_SIZE; 1859 } 1860 1861 /* 1862 * Dump a block of registers, start to end inclusive, into a buffer. 1863 */ 1864 static void reg_block_dump(struct adapter *adapter, void *regbuf, 1865 unsigned int start, unsigned int end) 1866 { 1867 u32 *bp = regbuf + start - T4VF_REGMAP_START; 1868 1869 for ( ; start <= end; start += sizeof(u32)) { 1870 /* 1871 * Avoid reading the Mailbox Control register since that 1872 * can trigger a Mailbox Ownership Arbitration cycle and 1873 * interfere with communication with the firmware. 1874 */ 1875 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL) 1876 *bp++ = 0xffff; 1877 else 1878 *bp++ = t4_read_reg(adapter, start); 1879 } 1880 } 1881 1882 /* 1883 * Copy our entire register map into the provided buffer. 1884 */ 1885 static void cxgb4vf_get_regs(struct net_device *dev, 1886 struct ethtool_regs *regs, 1887 void *regbuf) 1888 { 1889 struct adapter *adapter = netdev2adap(dev); 1890 1891 regs->version = mk_adap_vers(adapter); 1892 1893 /* 1894 * Fill in register buffer with our register map. 1895 */ 1896 memset(regbuf, 0, T4VF_REGMAP_SIZE); 1897 1898 reg_block_dump(adapter, regbuf, 1899 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST, 1900 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST); 1901 reg_block_dump(adapter, regbuf, 1902 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST, 1903 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST); 1904 1905 /* T5 adds new registers in the PL Register map. 1906 */ 1907 reg_block_dump(adapter, regbuf, 1908 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST, 1909 T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip) 1910 ? PL_VF_WHOAMI_A : PL_VF_REVISION_A)); 1911 reg_block_dump(adapter, regbuf, 1912 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST, 1913 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST); 1914 1915 reg_block_dump(adapter, regbuf, 1916 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST, 1917 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST); 1918 } 1919 1920 /* 1921 * Report current Wake On LAN settings. 1922 */ 1923 static void cxgb4vf_get_wol(struct net_device *dev, 1924 struct ethtool_wolinfo *wol) 1925 { 1926 wol->supported = 0; 1927 wol->wolopts = 0; 1928 memset(&wol->sopass, 0, sizeof(wol->sopass)); 1929 } 1930 1931 /* 1932 * TCP Segmentation Offload flags which we support. 1933 */ 1934 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) 1935 1936 static const struct ethtool_ops cxgb4vf_ethtool_ops = { 1937 .get_link_ksettings = cxgb4vf_get_link_ksettings, 1938 .get_fecparam = cxgb4vf_get_fecparam, 1939 .get_drvinfo = cxgb4vf_get_drvinfo, 1940 .get_msglevel = cxgb4vf_get_msglevel, 1941 .set_msglevel = cxgb4vf_set_msglevel, 1942 .get_ringparam = cxgb4vf_get_ringparam, 1943 .set_ringparam = cxgb4vf_set_ringparam, 1944 .get_coalesce = cxgb4vf_get_coalesce, 1945 .set_coalesce = cxgb4vf_set_coalesce, 1946 .get_pauseparam = cxgb4vf_get_pauseparam, 1947 .get_link = ethtool_op_get_link, 1948 .get_strings = cxgb4vf_get_strings, 1949 .set_phys_id = cxgb4vf_phys_id, 1950 .get_sset_count = cxgb4vf_get_sset_count, 1951 .get_ethtool_stats = cxgb4vf_get_ethtool_stats, 1952 .get_regs_len = cxgb4vf_get_regs_len, 1953 .get_regs = cxgb4vf_get_regs, 1954 .get_wol = cxgb4vf_get_wol, 1955 }; 1956 1957 /* 1958 * /sys/kernel/debug/cxgb4vf support code and data. 1959 * ================================================ 1960 */ 1961 1962 /* 1963 * Show Firmware Mailbox Command/Reply Log 1964 * 1965 * Note that we don't do any locking when dumping the Firmware Mailbox Log so 1966 * it's possible that we can catch things during a log update and therefore 1967 * see partially corrupted log entries. But i9t's probably Good Enough(tm). 1968 * If we ever decide that we want to make sure that we're dumping a coherent 1969 * log, we'd need to perform locking in the mailbox logging and in 1970 * mboxlog_open() where we'd need to grab the entire mailbox log in one go 1971 * like we do for the Firmware Device Log. But as stated above, meh ... 1972 */ 1973 static int mboxlog_show(struct seq_file *seq, void *v) 1974 { 1975 struct adapter *adapter = seq->private; 1976 struct mbox_cmd_log *log = adapter->mbox_log; 1977 struct mbox_cmd *entry; 1978 int entry_idx, i; 1979 1980 if (v == SEQ_START_TOKEN) { 1981 seq_printf(seq, 1982 "%10s %15s %5s %5s %s\n", 1983 "Seq#", "Tstamp", "Atime", "Etime", 1984 "Command/Reply"); 1985 return 0; 1986 } 1987 1988 entry_idx = log->cursor + ((uintptr_t)v - 2); 1989 if (entry_idx >= log->size) 1990 entry_idx -= log->size; 1991 entry = mbox_cmd_log_entry(log, entry_idx); 1992 1993 /* skip over unused entries */ 1994 if (entry->timestamp == 0) 1995 return 0; 1996 1997 seq_printf(seq, "%10u %15llu %5d %5d", 1998 entry->seqno, entry->timestamp, 1999 entry->access, entry->execute); 2000 for (i = 0; i < MBOX_LEN / 8; i++) { 2001 u64 flit = entry->cmd[i]; 2002 u32 hi = (u32)(flit >> 32); 2003 u32 lo = (u32)flit; 2004 2005 seq_printf(seq, " %08x %08x", hi, lo); 2006 } 2007 seq_puts(seq, "\n"); 2008 return 0; 2009 } 2010 2011 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos) 2012 { 2013 struct adapter *adapter = seq->private; 2014 struct mbox_cmd_log *log = adapter->mbox_log; 2015 2016 return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL); 2017 } 2018 2019 static void *mboxlog_start(struct seq_file *seq, loff_t *pos) 2020 { 2021 return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN; 2022 } 2023 2024 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos) 2025 { 2026 ++*pos; 2027 return mboxlog_get_idx(seq, *pos); 2028 } 2029 2030 static void mboxlog_stop(struct seq_file *seq, void *v) 2031 { 2032 } 2033 2034 static const struct seq_operations mboxlog_seq_ops = { 2035 .start = mboxlog_start, 2036 .next = mboxlog_next, 2037 .stop = mboxlog_stop, 2038 .show = mboxlog_show 2039 }; 2040 2041 static int mboxlog_open(struct inode *inode, struct file *file) 2042 { 2043 int res = seq_open(file, &mboxlog_seq_ops); 2044 2045 if (!res) { 2046 struct seq_file *seq = file->private_data; 2047 2048 seq->private = inode->i_private; 2049 } 2050 return res; 2051 } 2052 2053 static const struct file_operations mboxlog_fops = { 2054 .owner = THIS_MODULE, 2055 .open = mboxlog_open, 2056 .read = seq_read, 2057 .llseek = seq_lseek, 2058 .release = seq_release, 2059 }; 2060 2061 /* 2062 * Show SGE Queue Set information. We display QPL Queues Sets per line. 2063 */ 2064 #define QPL 4 2065 2066 static int sge_qinfo_show(struct seq_file *seq, void *v) 2067 { 2068 struct adapter *adapter = seq->private; 2069 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 2070 int qs, r = (uintptr_t)v - 1; 2071 2072 if (r) 2073 seq_putc(seq, '\n'); 2074 2075 #define S3(fmt_spec, s, v) \ 2076 do {\ 2077 seq_printf(seq, "%-12s", s); \ 2078 for (qs = 0; qs < n; ++qs) \ 2079 seq_printf(seq, " %16" fmt_spec, v); \ 2080 seq_putc(seq, '\n'); \ 2081 } while (0) 2082 #define S(s, v) S3("s", s, v) 2083 #define T(s, v) S3("u", s, txq[qs].v) 2084 #define R(s, v) S3("u", s, rxq[qs].v) 2085 2086 if (r < eth_entries) { 2087 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 2088 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 2089 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 2090 2091 S("QType:", "Ethernet"); 2092 S("Interface:", 2093 (rxq[qs].rspq.netdev 2094 ? rxq[qs].rspq.netdev->name 2095 : "N/A")); 2096 S3("d", "Port:", 2097 (rxq[qs].rspq.netdev 2098 ? ((struct port_info *) 2099 netdev_priv(rxq[qs].rspq.netdev))->port_id 2100 : -1)); 2101 T("TxQ ID:", q.abs_id); 2102 T("TxQ size:", q.size); 2103 T("TxQ inuse:", q.in_use); 2104 T("TxQ PIdx:", q.pidx); 2105 T("TxQ CIdx:", q.cidx); 2106 R("RspQ ID:", rspq.abs_id); 2107 R("RspQ size:", rspq.size); 2108 R("RspQE size:", rspq.iqe_len); 2109 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq)); 2110 S3("u", "Intr pktcnt:", 2111 adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]); 2112 R("RspQ CIdx:", rspq.cidx); 2113 R("RspQ Gen:", rspq.gen); 2114 R("FL ID:", fl.abs_id); 2115 R("FL size:", fl.size - MIN_FL_RESID); 2116 R("FL avail:", fl.avail); 2117 R("FL PIdx:", fl.pidx); 2118 R("FL CIdx:", fl.cidx); 2119 return 0; 2120 } 2121 2122 r -= eth_entries; 2123 if (r == 0) { 2124 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 2125 2126 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue"); 2127 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id); 2128 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 2129 qtimer_val(adapter, evtq)); 2130 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 2131 adapter->sge.counter_val[evtq->pktcnt_idx]); 2132 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx); 2133 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen); 2134 } else if (r == 1) { 2135 const struct sge_rspq *intrq = &adapter->sge.intrq; 2136 2137 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue"); 2138 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id); 2139 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 2140 qtimer_val(adapter, intrq)); 2141 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 2142 adapter->sge.counter_val[intrq->pktcnt_idx]); 2143 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx); 2144 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen); 2145 } 2146 2147 #undef R 2148 #undef T 2149 #undef S 2150 #undef S3 2151 2152 return 0; 2153 } 2154 2155 /* 2156 * Return the number of "entries" in our "file". We group the multi-Queue 2157 * sections with QPL Queue Sets per "entry". The sections of the output are: 2158 * 2159 * Ethernet RX/TX Queue Sets 2160 * Firmware Event Queue 2161 * Forwarded Interrupt Queue (if in MSI mode) 2162 */ 2163 static int sge_queue_entries(const struct adapter *adapter) 2164 { 2165 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 2166 ((adapter->flags & USING_MSI) != 0); 2167 } 2168 2169 static void *sge_queue_start(struct seq_file *seq, loff_t *pos) 2170 { 2171 int entries = sge_queue_entries(seq->private); 2172 2173 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 2174 } 2175 2176 static void sge_queue_stop(struct seq_file *seq, void *v) 2177 { 2178 } 2179 2180 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos) 2181 { 2182 int entries = sge_queue_entries(seq->private); 2183 2184 ++*pos; 2185 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 2186 } 2187 2188 static const struct seq_operations sge_qinfo_seq_ops = { 2189 .start = sge_queue_start, 2190 .next = sge_queue_next, 2191 .stop = sge_queue_stop, 2192 .show = sge_qinfo_show 2193 }; 2194 2195 static int sge_qinfo_open(struct inode *inode, struct file *file) 2196 { 2197 int res = seq_open(file, &sge_qinfo_seq_ops); 2198 2199 if (!res) { 2200 struct seq_file *seq = file->private_data; 2201 seq->private = inode->i_private; 2202 } 2203 return res; 2204 } 2205 2206 static const struct file_operations sge_qinfo_debugfs_fops = { 2207 .owner = THIS_MODULE, 2208 .open = sge_qinfo_open, 2209 .read = seq_read, 2210 .llseek = seq_lseek, 2211 .release = seq_release, 2212 }; 2213 2214 /* 2215 * Show SGE Queue Set statistics. We display QPL Queues Sets per line. 2216 */ 2217 #define QPL 4 2218 2219 static int sge_qstats_show(struct seq_file *seq, void *v) 2220 { 2221 struct adapter *adapter = seq->private; 2222 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 2223 int qs, r = (uintptr_t)v - 1; 2224 2225 if (r) 2226 seq_putc(seq, '\n'); 2227 2228 #define S3(fmt, s, v) \ 2229 do { \ 2230 seq_printf(seq, "%-16s", s); \ 2231 for (qs = 0; qs < n; ++qs) \ 2232 seq_printf(seq, " %8" fmt, v); \ 2233 seq_putc(seq, '\n'); \ 2234 } while (0) 2235 #define S(s, v) S3("s", s, v) 2236 2237 #define T3(fmt, s, v) S3(fmt, s, txq[qs].v) 2238 #define T(s, v) T3("lu", s, v) 2239 2240 #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v) 2241 #define R(s, v) R3("lu", s, v) 2242 2243 if (r < eth_entries) { 2244 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 2245 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 2246 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 2247 2248 S("QType:", "Ethernet"); 2249 S("Interface:", 2250 (rxq[qs].rspq.netdev 2251 ? rxq[qs].rspq.netdev->name 2252 : "N/A")); 2253 R3("u", "RspQNullInts:", rspq.unhandled_irqs); 2254 R("RxPackets:", stats.pkts); 2255 R("RxCSO:", stats.rx_cso); 2256 R("VLANxtract:", stats.vlan_ex); 2257 R("LROmerged:", stats.lro_merged); 2258 R("LROpackets:", stats.lro_pkts); 2259 R("RxDrops:", stats.rx_drops); 2260 T("TSO:", tso); 2261 T("TxCSO:", tx_cso); 2262 T("VLANins:", vlan_ins); 2263 T("TxQFull:", q.stops); 2264 T("TxQRestarts:", q.restarts); 2265 T("TxMapErr:", mapping_err); 2266 R("FLAllocErr:", fl.alloc_failed); 2267 R("FLLrgAlcErr:", fl.large_alloc_failed); 2268 R("FLStarving:", fl.starving); 2269 return 0; 2270 } 2271 2272 r -= eth_entries; 2273 if (r == 0) { 2274 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 2275 2276 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue"); 2277 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 2278 evtq->unhandled_irqs); 2279 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx); 2280 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen); 2281 } else if (r == 1) { 2282 const struct sge_rspq *intrq = &adapter->sge.intrq; 2283 2284 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue"); 2285 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 2286 intrq->unhandled_irqs); 2287 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx); 2288 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen); 2289 } 2290 2291 #undef R 2292 #undef T 2293 #undef S 2294 #undef R3 2295 #undef T3 2296 #undef S3 2297 2298 return 0; 2299 } 2300 2301 /* 2302 * Return the number of "entries" in our "file". We group the multi-Queue 2303 * sections with QPL Queue Sets per "entry". The sections of the output are: 2304 * 2305 * Ethernet RX/TX Queue Sets 2306 * Firmware Event Queue 2307 * Forwarded Interrupt Queue (if in MSI mode) 2308 */ 2309 static int sge_qstats_entries(const struct adapter *adapter) 2310 { 2311 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 2312 ((adapter->flags & USING_MSI) != 0); 2313 } 2314 2315 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos) 2316 { 2317 int entries = sge_qstats_entries(seq->private); 2318 2319 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 2320 } 2321 2322 static void sge_qstats_stop(struct seq_file *seq, void *v) 2323 { 2324 } 2325 2326 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos) 2327 { 2328 int entries = sge_qstats_entries(seq->private); 2329 2330 (*pos)++; 2331 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 2332 } 2333 2334 static const struct seq_operations sge_qstats_seq_ops = { 2335 .start = sge_qstats_start, 2336 .next = sge_qstats_next, 2337 .stop = sge_qstats_stop, 2338 .show = sge_qstats_show 2339 }; 2340 2341 static int sge_qstats_open(struct inode *inode, struct file *file) 2342 { 2343 int res = seq_open(file, &sge_qstats_seq_ops); 2344 2345 if (res == 0) { 2346 struct seq_file *seq = file->private_data; 2347 seq->private = inode->i_private; 2348 } 2349 return res; 2350 } 2351 2352 static const struct file_operations sge_qstats_proc_fops = { 2353 .owner = THIS_MODULE, 2354 .open = sge_qstats_open, 2355 .read = seq_read, 2356 .llseek = seq_lseek, 2357 .release = seq_release, 2358 }; 2359 2360 /* 2361 * Show PCI-E SR-IOV Virtual Function Resource Limits. 2362 */ 2363 static int resources_show(struct seq_file *seq, void *v) 2364 { 2365 struct adapter *adapter = seq->private; 2366 struct vf_resources *vfres = &adapter->params.vfres; 2367 2368 #define S(desc, fmt, var) \ 2369 seq_printf(seq, "%-60s " fmt "\n", \ 2370 desc " (" #var "):", vfres->var) 2371 2372 S("Virtual Interfaces", "%d", nvi); 2373 S("Egress Queues", "%d", neq); 2374 S("Ethernet Control", "%d", nethctrl); 2375 S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint); 2376 S("Ingress Queues", "%d", niq); 2377 S("Traffic Class", "%d", tc); 2378 S("Port Access Rights Mask", "%#x", pmask); 2379 S("MAC Address Filters", "%d", nexactf); 2380 S("Firmware Command Read Capabilities", "%#x", r_caps); 2381 S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps); 2382 2383 #undef S 2384 2385 return 0; 2386 } 2387 DEFINE_SHOW_ATTRIBUTE(resources); 2388 2389 /* 2390 * Show Virtual Interfaces. 2391 */ 2392 static int interfaces_show(struct seq_file *seq, void *v) 2393 { 2394 if (v == SEQ_START_TOKEN) { 2395 seq_puts(seq, "Interface Port VIID\n"); 2396 } else { 2397 struct adapter *adapter = seq->private; 2398 int pidx = (uintptr_t)v - 2; 2399 struct net_device *dev = adapter->port[pidx]; 2400 struct port_info *pi = netdev_priv(dev); 2401 2402 seq_printf(seq, "%9s %4d %#5x\n", 2403 dev->name, pi->port_id, pi->viid); 2404 } 2405 return 0; 2406 } 2407 2408 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos) 2409 { 2410 return pos <= adapter->params.nports 2411 ? (void *)(uintptr_t)(pos + 1) 2412 : NULL; 2413 } 2414 2415 static void *interfaces_start(struct seq_file *seq, loff_t *pos) 2416 { 2417 return *pos 2418 ? interfaces_get_idx(seq->private, *pos) 2419 : SEQ_START_TOKEN; 2420 } 2421 2422 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos) 2423 { 2424 (*pos)++; 2425 return interfaces_get_idx(seq->private, *pos); 2426 } 2427 2428 static void interfaces_stop(struct seq_file *seq, void *v) 2429 { 2430 } 2431 2432 static const struct seq_operations interfaces_seq_ops = { 2433 .start = interfaces_start, 2434 .next = interfaces_next, 2435 .stop = interfaces_stop, 2436 .show = interfaces_show 2437 }; 2438 2439 static int interfaces_open(struct inode *inode, struct file *file) 2440 { 2441 int res = seq_open(file, &interfaces_seq_ops); 2442 2443 if (res == 0) { 2444 struct seq_file *seq = file->private_data; 2445 seq->private = inode->i_private; 2446 } 2447 return res; 2448 } 2449 2450 static const struct file_operations interfaces_proc_fops = { 2451 .owner = THIS_MODULE, 2452 .open = interfaces_open, 2453 .read = seq_read, 2454 .llseek = seq_lseek, 2455 .release = seq_release, 2456 }; 2457 2458 /* 2459 * /sys/kernel/debugfs/cxgb4vf/ files list. 2460 */ 2461 struct cxgb4vf_debugfs_entry { 2462 const char *name; /* name of debugfs node */ 2463 umode_t mode; /* file system mode */ 2464 const struct file_operations *fops; 2465 }; 2466 2467 static struct cxgb4vf_debugfs_entry debugfs_files[] = { 2468 { "mboxlog", 0444, &mboxlog_fops }, 2469 { "sge_qinfo", 0444, &sge_qinfo_debugfs_fops }, 2470 { "sge_qstats", 0444, &sge_qstats_proc_fops }, 2471 { "resources", 0444, &resources_fops }, 2472 { "interfaces", 0444, &interfaces_proc_fops }, 2473 }; 2474 2475 /* 2476 * Module and device initialization and cleanup code. 2477 * ================================================== 2478 */ 2479 2480 /* 2481 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the 2482 * directory (debugfs_root) has already been set up. 2483 */ 2484 static int setup_debugfs(struct adapter *adapter) 2485 { 2486 int i; 2487 2488 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2489 2490 /* 2491 * Debugfs support is best effort. 2492 */ 2493 for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) 2494 (void)debugfs_create_file(debugfs_files[i].name, 2495 debugfs_files[i].mode, 2496 adapter->debugfs_root, 2497 (void *)adapter, 2498 debugfs_files[i].fops); 2499 2500 return 0; 2501 } 2502 2503 /* 2504 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave 2505 * it to our caller to tear down the directory (debugfs_root). 2506 */ 2507 static void cleanup_debugfs(struct adapter *adapter) 2508 { 2509 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2510 2511 /* 2512 * Unlike our sister routine cleanup_proc(), we don't need to remove 2513 * individual entries because a call will be made to 2514 * debugfs_remove_recursive(). We just need to clean up any ancillary 2515 * persistent state. 2516 */ 2517 /* nothing to do */ 2518 } 2519 2520 /* Figure out how many Ports and Queue Sets we can support. This depends on 2521 * knowing our Virtual Function Resources and may be called a second time if 2522 * we fall back from MSI-X to MSI Interrupt Mode. 2523 */ 2524 static void size_nports_qsets(struct adapter *adapter) 2525 { 2526 struct vf_resources *vfres = &adapter->params.vfres; 2527 unsigned int ethqsets, pmask_nports; 2528 2529 /* The number of "ports" which we support is equal to the number of 2530 * Virtual Interfaces with which we've been provisioned. 2531 */ 2532 adapter->params.nports = vfres->nvi; 2533 if (adapter->params.nports > MAX_NPORTS) { 2534 dev_warn(adapter->pdev_dev, "only using %d of %d maximum" 2535 " allowed virtual interfaces\n", MAX_NPORTS, 2536 adapter->params.nports); 2537 adapter->params.nports = MAX_NPORTS; 2538 } 2539 2540 /* We may have been provisioned with more VIs than the number of 2541 * ports we're allowed to access (our Port Access Rights Mask). 2542 * This is obviously a configuration conflict but we don't want to 2543 * crash the kernel or anything silly just because of that. 2544 */ 2545 pmask_nports = hweight32(adapter->params.vfres.pmask); 2546 if (pmask_nports < adapter->params.nports) { 2547 dev_warn(adapter->pdev_dev, "only using %d of %d provisioned" 2548 " virtual interfaces; limited by Port Access Rights" 2549 " mask %#x\n", pmask_nports, adapter->params.nports, 2550 adapter->params.vfres.pmask); 2551 adapter->params.nports = pmask_nports; 2552 } 2553 2554 /* We need to reserve an Ingress Queue for the Asynchronous Firmware 2555 * Event Queue. And if we're using MSI Interrupts, we'll also need to 2556 * reserve an Ingress Queue for a Forwarded Interrupts. 2557 * 2558 * The rest of the FL/Intr-capable ingress queues will be matched up 2559 * one-for-one with Ethernet/Control egress queues in order to form 2560 * "Queue Sets" which will be aportioned between the "ports". For 2561 * each Queue Set, we'll need the ability to allocate two Egress 2562 * Contexts -- one for the Ingress Queue Free List and one for the TX 2563 * Ethernet Queue. 2564 * 2565 * Note that even if we're currently configured to use MSI-X 2566 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded 2567 * to MSI Interrupts if we can't get enough MSI-X Interrupts. If that 2568 * happens we'll need to adjust things later. 2569 */ 2570 ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI); 2571 if (vfres->nethctrl != ethqsets) 2572 ethqsets = min(vfres->nethctrl, ethqsets); 2573 if (vfres->neq < ethqsets*2) 2574 ethqsets = vfres->neq/2; 2575 if (ethqsets > MAX_ETH_QSETS) 2576 ethqsets = MAX_ETH_QSETS; 2577 adapter->sge.max_ethqsets = ethqsets; 2578 2579 if (adapter->sge.max_ethqsets < adapter->params.nports) { 2580 dev_warn(adapter->pdev_dev, "only using %d of %d available" 2581 " virtual interfaces (too few Queue Sets)\n", 2582 adapter->sge.max_ethqsets, adapter->params.nports); 2583 adapter->params.nports = adapter->sge.max_ethqsets; 2584 } 2585 } 2586 2587 /* 2588 * Perform early "adapter" initialization. This is where we discover what 2589 * adapter parameters we're going to be using and initialize basic adapter 2590 * hardware support. 2591 */ 2592 static int adap_init0(struct adapter *adapter) 2593 { 2594 struct sge_params *sge_params = &adapter->params.sge; 2595 struct sge *s = &adapter->sge; 2596 int err; 2597 u32 param, val = 0; 2598 2599 /* 2600 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux 2601 * 2.6.31 and later we can't call pci_reset_function() in order to 2602 * issue an FLR because of a self- deadlock on the device semaphore. 2603 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the 2604 * cases where they're needed -- for instance, some versions of KVM 2605 * fail to reset "Assigned Devices" when the VM reboots. Therefore we 2606 * use the firmware based reset in order to reset any per function 2607 * state. 2608 */ 2609 err = t4vf_fw_reset(adapter); 2610 if (err < 0) { 2611 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err); 2612 return err; 2613 } 2614 2615 /* 2616 * Grab basic operational parameters. These will predominantly have 2617 * been set up by the Physical Function Driver or will be hard coded 2618 * into the adapter. We just have to live with them ... Note that 2619 * we _must_ get our VPD parameters before our SGE parameters because 2620 * we need to know the adapter's core clock from the VPD in order to 2621 * properly decode the SGE Timer Values. 2622 */ 2623 err = t4vf_get_dev_params(adapter); 2624 if (err) { 2625 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2626 " device parameters: err=%d\n", err); 2627 return err; 2628 } 2629 err = t4vf_get_vpd_params(adapter); 2630 if (err) { 2631 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2632 " VPD parameters: err=%d\n", err); 2633 return err; 2634 } 2635 err = t4vf_get_sge_params(adapter); 2636 if (err) { 2637 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2638 " SGE parameters: err=%d\n", err); 2639 return err; 2640 } 2641 err = t4vf_get_rss_glb_config(adapter); 2642 if (err) { 2643 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2644 " RSS parameters: err=%d\n", err); 2645 return err; 2646 } 2647 if (adapter->params.rss.mode != 2648 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) { 2649 dev_err(adapter->pdev_dev, "unable to operate with global RSS" 2650 " mode %d\n", adapter->params.rss.mode); 2651 return -EINVAL; 2652 } 2653 err = t4vf_sge_init(adapter); 2654 if (err) { 2655 dev_err(adapter->pdev_dev, "unable to use adapter parameters:" 2656 " err=%d\n", err); 2657 return err; 2658 } 2659 2660 /* If we're running on newer firmware, let it know that we're 2661 * prepared to deal with encapsulated CPL messages. Older 2662 * firmware won't understand this and we'll just get 2663 * unencapsulated messages ... 2664 */ 2665 param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | 2666 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP); 2667 val = 1; 2668 (void) t4vf_set_params(adapter, 1, ¶m, &val); 2669 2670 /* 2671 * Retrieve our RX interrupt holdoff timer values and counter 2672 * threshold values from the SGE parameters. 2673 */ 2674 s->timer_val[0] = core_ticks_to_us(adapter, 2675 TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1)); 2676 s->timer_val[1] = core_ticks_to_us(adapter, 2677 TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1)); 2678 s->timer_val[2] = core_ticks_to_us(adapter, 2679 TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3)); 2680 s->timer_val[3] = core_ticks_to_us(adapter, 2681 TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3)); 2682 s->timer_val[4] = core_ticks_to_us(adapter, 2683 TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5)); 2684 s->timer_val[5] = core_ticks_to_us(adapter, 2685 TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5)); 2686 2687 s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold); 2688 s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold); 2689 s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold); 2690 s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold); 2691 2692 /* 2693 * Grab our Virtual Interface resource allocation, extract the 2694 * features that we're interested in and do a bit of sanity testing on 2695 * what we discover. 2696 */ 2697 err = t4vf_get_vfres(adapter); 2698 if (err) { 2699 dev_err(adapter->pdev_dev, "unable to get virtual interface" 2700 " resources: err=%d\n", err); 2701 return err; 2702 } 2703 2704 /* Check for various parameter sanity issues */ 2705 if (adapter->params.vfres.pmask == 0) { 2706 dev_err(adapter->pdev_dev, "no port access configured\n" 2707 "usable!\n"); 2708 return -EINVAL; 2709 } 2710 if (adapter->params.vfres.nvi == 0) { 2711 dev_err(adapter->pdev_dev, "no virtual interfaces configured/" 2712 "usable!\n"); 2713 return -EINVAL; 2714 } 2715 2716 /* Initialize nports and max_ethqsets now that we have our Virtual 2717 * Function Resources. 2718 */ 2719 size_nports_qsets(adapter); 2720 2721 return 0; 2722 } 2723 2724 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx, 2725 u8 pkt_cnt_idx, unsigned int size, 2726 unsigned int iqe_size) 2727 { 2728 rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | 2729 (pkt_cnt_idx < SGE_NCOUNTERS ? 2730 QINTR_CNT_EN_F : 0)); 2731 rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS 2732 ? pkt_cnt_idx 2733 : 0); 2734 rspq->iqe_len = iqe_size; 2735 rspq->size = size; 2736 } 2737 2738 /* 2739 * Perform default configuration of DMA queues depending on the number and 2740 * type of ports we found and the number of available CPUs. Most settings can 2741 * be modified by the admin via ethtool and cxgbtool prior to the adapter 2742 * being brought up for the first time. 2743 */ 2744 static void cfg_queues(struct adapter *adapter) 2745 { 2746 struct sge *s = &adapter->sge; 2747 int q10g, n10g, qidx, pidx, qs; 2748 size_t iqe_size; 2749 2750 /* 2751 * We should not be called till we know how many Queue Sets we can 2752 * support. In particular, this means that we need to know what kind 2753 * of interrupts we'll be using ... 2754 */ 2755 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 2756 2757 /* 2758 * Count the number of 10GbE Virtual Interfaces that we have. 2759 */ 2760 n10g = 0; 2761 for_each_port(adapter, pidx) 2762 n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg); 2763 2764 /* 2765 * We default to 1 queue per non-10G port and up to # of cores queues 2766 * per 10G port. 2767 */ 2768 if (n10g == 0) 2769 q10g = 0; 2770 else { 2771 int n1g = (adapter->params.nports - n10g); 2772 q10g = (adapter->sge.max_ethqsets - n1g) / n10g; 2773 if (q10g > num_online_cpus()) 2774 q10g = num_online_cpus(); 2775 } 2776 2777 /* 2778 * Allocate the "Queue Sets" to the various Virtual Interfaces. 2779 * The layout will be established in setup_sge_queues() when the 2780 * adapter is brough up for the first time. 2781 */ 2782 qidx = 0; 2783 for_each_port(adapter, pidx) { 2784 struct port_info *pi = adap2pinfo(adapter, pidx); 2785 2786 pi->first_qset = qidx; 2787 pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1; 2788 qidx += pi->nqsets; 2789 } 2790 s->ethqsets = qidx; 2791 2792 /* 2793 * The Ingress Queue Entry Size for our various Response Queues needs 2794 * to be big enough to accommodate the largest message we can receive 2795 * from the chip/firmware; which is 64 bytes ... 2796 */ 2797 iqe_size = 64; 2798 2799 /* 2800 * Set up default Queue Set parameters ... Start off with the 2801 * shortest interrupt holdoff timer. 2802 */ 2803 for (qs = 0; qs < s->max_ethqsets; qs++) { 2804 struct sge_eth_rxq *rxq = &s->ethrxq[qs]; 2805 struct sge_eth_txq *txq = &s->ethtxq[qs]; 2806 2807 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size); 2808 rxq->fl.size = 72; 2809 txq->q.size = 1024; 2810 } 2811 2812 /* 2813 * The firmware event queue is used for link state changes and 2814 * notifications of TX DMA completions. 2815 */ 2816 init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size); 2817 2818 /* 2819 * The forwarded interrupt queue is used when we're in MSI interrupt 2820 * mode. In this mode all interrupts associated with RX queues will 2821 * be forwarded to a single queue which we'll associate with our MSI 2822 * interrupt vector. The messages dropped in the forwarded interrupt 2823 * queue will indicate which ingress queue needs servicing ... This 2824 * queue needs to be large enough to accommodate all of the ingress 2825 * queues which are forwarding their interrupt (+1 to prevent the PIDX 2826 * from equalling the CIDX if every ingress queue has an outstanding 2827 * interrupt). The queue doesn't need to be any larger because no 2828 * ingress queue will ever have more than one outstanding interrupt at 2829 * any time ... 2830 */ 2831 init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1, 2832 iqe_size); 2833 } 2834 2835 /* 2836 * Reduce the number of Ethernet queues across all ports to at most n. 2837 * n provides at least one queue per port. 2838 */ 2839 static void reduce_ethqs(struct adapter *adapter, int n) 2840 { 2841 int i; 2842 struct port_info *pi; 2843 2844 /* 2845 * While we have too many active Ether Queue Sets, interate across the 2846 * "ports" and reduce their individual Queue Set allocations. 2847 */ 2848 BUG_ON(n < adapter->params.nports); 2849 while (n < adapter->sge.ethqsets) 2850 for_each_port(adapter, i) { 2851 pi = adap2pinfo(adapter, i); 2852 if (pi->nqsets > 1) { 2853 pi->nqsets--; 2854 adapter->sge.ethqsets--; 2855 if (adapter->sge.ethqsets <= n) 2856 break; 2857 } 2858 } 2859 2860 /* 2861 * Reassign the starting Queue Sets for each of the "ports" ... 2862 */ 2863 n = 0; 2864 for_each_port(adapter, i) { 2865 pi = adap2pinfo(adapter, i); 2866 pi->first_qset = n; 2867 n += pi->nqsets; 2868 } 2869 } 2870 2871 /* 2872 * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally 2873 * we get a separate MSI-X vector for every "Queue Set" plus any extras we 2874 * need. Minimally we need one for every Virtual Interface plus those needed 2875 * for our "extras". Note that this process may lower the maximum number of 2876 * allowed Queue Sets ... 2877 */ 2878 static int enable_msix(struct adapter *adapter) 2879 { 2880 int i, want, need, nqsets; 2881 struct msix_entry entries[MSIX_ENTRIES]; 2882 struct sge *s = &adapter->sge; 2883 2884 for (i = 0; i < MSIX_ENTRIES; ++i) 2885 entries[i].entry = i; 2886 2887 /* 2888 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets" 2889 * plus those needed for our "extras" (for example, the firmware 2890 * message queue). We _need_ at least one "Queue Set" per Virtual 2891 * Interface plus those needed for our "extras". So now we get to see 2892 * if the song is right ... 2893 */ 2894 want = s->max_ethqsets + MSIX_EXTRAS; 2895 need = adapter->params.nports + MSIX_EXTRAS; 2896 2897 want = pci_enable_msix_range(adapter->pdev, entries, need, want); 2898 if (want < 0) 2899 return want; 2900 2901 nqsets = want - MSIX_EXTRAS; 2902 if (nqsets < s->max_ethqsets) { 2903 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors" 2904 " for %d Queue Sets\n", nqsets); 2905 s->max_ethqsets = nqsets; 2906 if (nqsets < s->ethqsets) 2907 reduce_ethqs(adapter, nqsets); 2908 } 2909 for (i = 0; i < want; ++i) 2910 adapter->msix_info[i].vec = entries[i].vector; 2911 2912 return 0; 2913 } 2914 2915 static const struct net_device_ops cxgb4vf_netdev_ops = { 2916 .ndo_open = cxgb4vf_open, 2917 .ndo_stop = cxgb4vf_stop, 2918 .ndo_start_xmit = t4vf_eth_xmit, 2919 .ndo_get_stats = cxgb4vf_get_stats, 2920 .ndo_set_rx_mode = cxgb4vf_set_rxmode, 2921 .ndo_set_mac_address = cxgb4vf_set_mac_addr, 2922 .ndo_validate_addr = eth_validate_addr, 2923 .ndo_do_ioctl = cxgb4vf_do_ioctl, 2924 .ndo_change_mtu = cxgb4vf_change_mtu, 2925 .ndo_fix_features = cxgb4vf_fix_features, 2926 .ndo_set_features = cxgb4vf_set_features, 2927 #ifdef CONFIG_NET_POLL_CONTROLLER 2928 .ndo_poll_controller = cxgb4vf_poll_controller, 2929 #endif 2930 }; 2931 2932 /* 2933 * "Probe" a device: initialize a device and construct all kernel and driver 2934 * state needed to manage the device. This routine is called "init_one" in 2935 * the PF Driver ... 2936 */ 2937 static int cxgb4vf_pci_probe(struct pci_dev *pdev, 2938 const struct pci_device_id *ent) 2939 { 2940 int pci_using_dac; 2941 int err, pidx; 2942 unsigned int pmask; 2943 struct adapter *adapter; 2944 struct port_info *pi; 2945 struct net_device *netdev; 2946 unsigned int pf; 2947 2948 /* 2949 * Print our driver banner the first time we're called to initialize a 2950 * device. 2951 */ 2952 pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION); 2953 2954 /* 2955 * Initialize generic PCI device state. 2956 */ 2957 err = pci_enable_device(pdev); 2958 if (err) { 2959 dev_err(&pdev->dev, "cannot enable PCI device\n"); 2960 return err; 2961 } 2962 2963 /* 2964 * Reserve PCI resources for the device. If we can't get them some 2965 * other driver may have already claimed the device ... 2966 */ 2967 err = pci_request_regions(pdev, KBUILD_MODNAME); 2968 if (err) { 2969 dev_err(&pdev->dev, "cannot obtain PCI resources\n"); 2970 goto err_disable_device; 2971 } 2972 2973 /* 2974 * Set up our DMA mask: try for 64-bit address masking first and 2975 * fall back to 32-bit if we can't get 64 bits ... 2976 */ 2977 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 2978 if (err == 0) { 2979 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 2980 if (err) { 2981 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for" 2982 " coherent allocations\n"); 2983 goto err_release_regions; 2984 } 2985 pci_using_dac = 1; 2986 } else { 2987 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 2988 if (err != 0) { 2989 dev_err(&pdev->dev, "no usable DMA configuration\n"); 2990 goto err_release_regions; 2991 } 2992 pci_using_dac = 0; 2993 } 2994 2995 /* 2996 * Enable bus mastering for the device ... 2997 */ 2998 pci_set_master(pdev); 2999 3000 /* 3001 * Allocate our adapter data structure and attach it to the device. 3002 */ 3003 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); 3004 if (!adapter) { 3005 err = -ENOMEM; 3006 goto err_release_regions; 3007 } 3008 pci_set_drvdata(pdev, adapter); 3009 adapter->pdev = pdev; 3010 adapter->pdev_dev = &pdev->dev; 3011 3012 adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + 3013 (sizeof(struct mbox_cmd) * 3014 T4VF_OS_LOG_MBOX_CMDS), 3015 GFP_KERNEL); 3016 if (!adapter->mbox_log) { 3017 err = -ENOMEM; 3018 goto err_free_adapter; 3019 } 3020 adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS; 3021 3022 /* 3023 * Initialize SMP data synchronization resources. 3024 */ 3025 spin_lock_init(&adapter->stats_lock); 3026 spin_lock_init(&adapter->mbox_lock); 3027 INIT_LIST_HEAD(&adapter->mlist.list); 3028 3029 /* 3030 * Map our I/O registers in BAR0. 3031 */ 3032 adapter->regs = pci_ioremap_bar(pdev, 0); 3033 if (!adapter->regs) { 3034 dev_err(&pdev->dev, "cannot map device registers\n"); 3035 err = -ENOMEM; 3036 goto err_free_adapter; 3037 } 3038 3039 /* Wait for the device to become ready before proceeding ... 3040 */ 3041 err = t4vf_prep_adapter(adapter); 3042 if (err) { 3043 dev_err(adapter->pdev_dev, "device didn't become ready:" 3044 " err=%d\n", err); 3045 goto err_unmap_bar0; 3046 } 3047 3048 /* For T5 and later we want to use the new BAR-based User Doorbells, 3049 * so we need to map BAR2 here ... 3050 */ 3051 if (!is_t4(adapter->params.chip)) { 3052 adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2), 3053 pci_resource_len(pdev, 2)); 3054 if (!adapter->bar2) { 3055 dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n"); 3056 err = -ENOMEM; 3057 goto err_unmap_bar0; 3058 } 3059 } 3060 /* 3061 * Initialize adapter level features. 3062 */ 3063 adapter->name = pci_name(pdev); 3064 adapter->msg_enable = DFLT_MSG_ENABLE; 3065 3066 /* If possible, we use PCIe Relaxed Ordering Attribute to deliver 3067 * Ingress Packet Data to Free List Buffers in order to allow for 3068 * chipset performance optimizations between the Root Complex and 3069 * Memory Controllers. (Messages to the associated Ingress Queue 3070 * notifying new Packet Placement in the Free Lists Buffers will be 3071 * send without the Relaxed Ordering Attribute thus guaranteeing that 3072 * all preceding PCIe Transaction Layer Packets will be processed 3073 * first.) But some Root Complexes have various issues with Upstream 3074 * Transaction Layer Packets with the Relaxed Ordering Attribute set. 3075 * The PCIe devices which under the Root Complexes will be cleared the 3076 * Relaxed Ordering bit in the configuration space, So we check our 3077 * PCIe configuration space to see if it's flagged with advice against 3078 * using Relaxed Ordering. 3079 */ 3080 if (!pcie_relaxed_ordering_enabled(pdev)) 3081 adapter->flags |= ROOT_NO_RELAXED_ORDERING; 3082 3083 err = adap_init0(adapter); 3084 if (err) 3085 goto err_unmap_bar; 3086 3087 /* Initialize hash mac addr list */ 3088 INIT_LIST_HEAD(&adapter->mac_hlist); 3089 3090 /* 3091 * Allocate our "adapter ports" and stitch everything together. 3092 */ 3093 pmask = adapter->params.vfres.pmask; 3094 pf = t4vf_get_pf_from_vf(adapter); 3095 for_each_port(adapter, pidx) { 3096 int port_id, viid; 3097 u8 mac[ETH_ALEN]; 3098 unsigned int naddr = 1; 3099 3100 /* 3101 * We simplistically allocate our virtual interfaces 3102 * sequentially across the port numbers to which we have 3103 * access rights. This should be configurable in some manner 3104 * ... 3105 */ 3106 if (pmask == 0) 3107 break; 3108 port_id = ffs(pmask) - 1; 3109 pmask &= ~(1 << port_id); 3110 viid = t4vf_alloc_vi(adapter, port_id); 3111 if (viid < 0) { 3112 dev_err(&pdev->dev, "cannot allocate VI for port %d:" 3113 " err=%d\n", port_id, viid); 3114 err = viid; 3115 goto err_free_dev; 3116 } 3117 3118 /* 3119 * Allocate our network device and stitch things together. 3120 */ 3121 netdev = alloc_etherdev_mq(sizeof(struct port_info), 3122 MAX_PORT_QSETS); 3123 if (netdev == NULL) { 3124 t4vf_free_vi(adapter, viid); 3125 err = -ENOMEM; 3126 goto err_free_dev; 3127 } 3128 adapter->port[pidx] = netdev; 3129 SET_NETDEV_DEV(netdev, &pdev->dev); 3130 pi = netdev_priv(netdev); 3131 pi->adapter = adapter; 3132 pi->pidx = pidx; 3133 pi->port_id = port_id; 3134 pi->viid = viid; 3135 3136 /* 3137 * Initialize the starting state of our "port" and register 3138 * it. 3139 */ 3140 pi->xact_addr_filt = -1; 3141 netif_carrier_off(netdev); 3142 netdev->irq = pdev->irq; 3143 3144 netdev->hw_features = NETIF_F_SG | TSO_FLAGS | 3145 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 3146 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; 3147 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS | 3148 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 3149 NETIF_F_HIGHDMA; 3150 netdev->features = netdev->hw_features | 3151 NETIF_F_HW_VLAN_CTAG_TX; 3152 if (pci_using_dac) 3153 netdev->features |= NETIF_F_HIGHDMA; 3154 3155 netdev->priv_flags |= IFF_UNICAST_FLT; 3156 netdev->min_mtu = 81; 3157 netdev->max_mtu = ETH_MAX_MTU; 3158 3159 netdev->netdev_ops = &cxgb4vf_netdev_ops; 3160 netdev->ethtool_ops = &cxgb4vf_ethtool_ops; 3161 netdev->dev_port = pi->port_id; 3162 3163 /* 3164 * Initialize the hardware/software state for the port. 3165 */ 3166 err = t4vf_port_init(adapter, pidx); 3167 if (err) { 3168 dev_err(&pdev->dev, "cannot initialize port %d\n", 3169 pidx); 3170 goto err_free_dev; 3171 } 3172 3173 err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac); 3174 if (err) { 3175 dev_err(&pdev->dev, 3176 "unable to determine MAC ACL address, " 3177 "continuing anyway.. (status %d)\n", err); 3178 } else if (naddr && adapter->params.vfres.nvi == 1) { 3179 struct sockaddr addr; 3180 3181 ether_addr_copy(addr.sa_data, mac); 3182 err = cxgb4vf_set_mac_addr(netdev, &addr); 3183 if (err) { 3184 dev_err(&pdev->dev, 3185 "unable to set MAC address %pM\n", 3186 mac); 3187 goto err_free_dev; 3188 } 3189 dev_info(&pdev->dev, 3190 "Using assigned MAC ACL: %pM\n", mac); 3191 } 3192 } 3193 3194 /* See what interrupts we'll be using. If we've been configured to 3195 * use MSI-X interrupts, try to enable them but fall back to using 3196 * MSI interrupts if we can't enable MSI-X interrupts. If we can't 3197 * get MSI interrupts we bail with the error. 3198 */ 3199 if (msi == MSI_MSIX && enable_msix(adapter) == 0) 3200 adapter->flags |= USING_MSIX; 3201 else { 3202 if (msi == MSI_MSIX) { 3203 dev_info(adapter->pdev_dev, 3204 "Unable to use MSI-X Interrupts; falling " 3205 "back to MSI Interrupts\n"); 3206 3207 /* We're going to need a Forwarded Interrupt Queue so 3208 * that may cut into how many Queue Sets we can 3209 * support. 3210 */ 3211 msi = MSI_MSI; 3212 size_nports_qsets(adapter); 3213 } 3214 err = pci_enable_msi(pdev); 3215 if (err) { 3216 dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;" 3217 " err=%d\n", err); 3218 goto err_free_dev; 3219 } 3220 adapter->flags |= USING_MSI; 3221 } 3222 3223 /* Now that we know how many "ports" we have and what interrupt 3224 * mechanism we're going to use, we can configure our queue resources. 3225 */ 3226 cfg_queues(adapter); 3227 3228 /* 3229 * The "card" is now ready to go. If any errors occur during device 3230 * registration we do not fail the whole "card" but rather proceed 3231 * only with the ports we manage to register successfully. However we 3232 * must register at least one net device. 3233 */ 3234 for_each_port(adapter, pidx) { 3235 struct port_info *pi = netdev_priv(adapter->port[pidx]); 3236 netdev = adapter->port[pidx]; 3237 if (netdev == NULL) 3238 continue; 3239 3240 netif_set_real_num_tx_queues(netdev, pi->nqsets); 3241 netif_set_real_num_rx_queues(netdev, pi->nqsets); 3242 3243 err = register_netdev(netdev); 3244 if (err) { 3245 dev_warn(&pdev->dev, "cannot register net device %s," 3246 " skipping\n", netdev->name); 3247 continue; 3248 } 3249 3250 set_bit(pidx, &adapter->registered_device_map); 3251 } 3252 if (adapter->registered_device_map == 0) { 3253 dev_err(&pdev->dev, "could not register any net devices\n"); 3254 goto err_disable_interrupts; 3255 } 3256 3257 /* 3258 * Set up our debugfs entries. 3259 */ 3260 if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) { 3261 adapter->debugfs_root = 3262 debugfs_create_dir(pci_name(pdev), 3263 cxgb4vf_debugfs_root); 3264 if (IS_ERR_OR_NULL(adapter->debugfs_root)) 3265 dev_warn(&pdev->dev, "could not create debugfs" 3266 " directory"); 3267 else 3268 setup_debugfs(adapter); 3269 } 3270 3271 /* 3272 * Print a short notice on the existence and configuration of the new 3273 * VF network device ... 3274 */ 3275 for_each_port(adapter, pidx) { 3276 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n", 3277 adapter->port[pidx]->name, 3278 (adapter->flags & USING_MSIX) ? "MSI-X" : 3279 (adapter->flags & USING_MSI) ? "MSI" : ""); 3280 } 3281 3282 /* 3283 * Return success! 3284 */ 3285 return 0; 3286 3287 /* 3288 * Error recovery and exit code. Unwind state that's been created 3289 * so far and return the error. 3290 */ 3291 err_disable_interrupts: 3292 if (adapter->flags & USING_MSIX) { 3293 pci_disable_msix(adapter->pdev); 3294 adapter->flags &= ~USING_MSIX; 3295 } else if (adapter->flags & USING_MSI) { 3296 pci_disable_msi(adapter->pdev); 3297 adapter->flags &= ~USING_MSI; 3298 } 3299 3300 err_free_dev: 3301 for_each_port(adapter, pidx) { 3302 netdev = adapter->port[pidx]; 3303 if (netdev == NULL) 3304 continue; 3305 pi = netdev_priv(netdev); 3306 t4vf_free_vi(adapter, pi->viid); 3307 if (test_bit(pidx, &adapter->registered_device_map)) 3308 unregister_netdev(netdev); 3309 free_netdev(netdev); 3310 } 3311 3312 err_unmap_bar: 3313 if (!is_t4(adapter->params.chip)) 3314 iounmap(adapter->bar2); 3315 3316 err_unmap_bar0: 3317 iounmap(adapter->regs); 3318 3319 err_free_adapter: 3320 kfree(adapter->mbox_log); 3321 kfree(adapter); 3322 3323 err_release_regions: 3324 pci_release_regions(pdev); 3325 pci_clear_master(pdev); 3326 3327 err_disable_device: 3328 pci_disable_device(pdev); 3329 3330 return err; 3331 } 3332 3333 /* 3334 * "Remove" a device: tear down all kernel and driver state created in the 3335 * "probe" routine and quiesce the device (disable interrupts, etc.). (Note 3336 * that this is called "remove_one" in the PF Driver.) 3337 */ 3338 static void cxgb4vf_pci_remove(struct pci_dev *pdev) 3339 { 3340 struct adapter *adapter = pci_get_drvdata(pdev); 3341 struct hash_mac_addr *entry, *tmp; 3342 3343 /* 3344 * Tear down driver state associated with device. 3345 */ 3346 if (adapter) { 3347 int pidx; 3348 3349 /* 3350 * Stop all of our activity. Unregister network port, 3351 * disable interrupts, etc. 3352 */ 3353 for_each_port(adapter, pidx) 3354 if (test_bit(pidx, &adapter->registered_device_map)) 3355 unregister_netdev(adapter->port[pidx]); 3356 t4vf_sge_stop(adapter); 3357 if (adapter->flags & USING_MSIX) { 3358 pci_disable_msix(adapter->pdev); 3359 adapter->flags &= ~USING_MSIX; 3360 } else if (adapter->flags & USING_MSI) { 3361 pci_disable_msi(adapter->pdev); 3362 adapter->flags &= ~USING_MSI; 3363 } 3364 3365 /* 3366 * Tear down our debugfs entries. 3367 */ 3368 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) { 3369 cleanup_debugfs(adapter); 3370 debugfs_remove_recursive(adapter->debugfs_root); 3371 } 3372 3373 /* 3374 * Free all of the various resources which we've acquired ... 3375 */ 3376 t4vf_free_sge_resources(adapter); 3377 for_each_port(adapter, pidx) { 3378 struct net_device *netdev = adapter->port[pidx]; 3379 struct port_info *pi; 3380 3381 if (netdev == NULL) 3382 continue; 3383 3384 pi = netdev_priv(netdev); 3385 t4vf_free_vi(adapter, pi->viid); 3386 free_netdev(netdev); 3387 } 3388 iounmap(adapter->regs); 3389 if (!is_t4(adapter->params.chip)) 3390 iounmap(adapter->bar2); 3391 kfree(adapter->mbox_log); 3392 list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, 3393 list) { 3394 list_del(&entry->list); 3395 kfree(entry); 3396 } 3397 kfree(adapter); 3398 } 3399 3400 /* 3401 * Disable the device and release its PCI resources. 3402 */ 3403 pci_disable_device(pdev); 3404 pci_clear_master(pdev); 3405 pci_release_regions(pdev); 3406 } 3407 3408 /* 3409 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt 3410 * delivery. 3411 */ 3412 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev) 3413 { 3414 struct adapter *adapter; 3415 int pidx; 3416 3417 adapter = pci_get_drvdata(pdev); 3418 if (!adapter) 3419 return; 3420 3421 /* Disable all Virtual Interfaces. This will shut down the 3422 * delivery of all ingress packets into the chip for these 3423 * Virtual Interfaces. 3424 */ 3425 for_each_port(adapter, pidx) 3426 if (test_bit(pidx, &adapter->registered_device_map)) 3427 unregister_netdev(adapter->port[pidx]); 3428 3429 /* Free up all Queues which will prevent further DMA and 3430 * Interrupts allowing various internal pathways to drain. 3431 */ 3432 t4vf_sge_stop(adapter); 3433 if (adapter->flags & USING_MSIX) { 3434 pci_disable_msix(adapter->pdev); 3435 adapter->flags &= ~USING_MSIX; 3436 } else if (adapter->flags & USING_MSI) { 3437 pci_disable_msi(adapter->pdev); 3438 adapter->flags &= ~USING_MSI; 3439 } 3440 3441 /* 3442 * Free up all Queues which will prevent further DMA and 3443 * Interrupts allowing various internal pathways to drain. 3444 */ 3445 t4vf_free_sge_resources(adapter); 3446 pci_set_drvdata(pdev, NULL); 3447 } 3448 3449 /* Macros needed to support the PCI Device ID Table ... 3450 */ 3451 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ 3452 static const struct pci_device_id cxgb4vf_pci_tbl[] = { 3453 #define CH_PCI_DEVICE_ID_FUNCTION 0x8 3454 3455 #define CH_PCI_ID_TABLE_ENTRY(devid) \ 3456 { PCI_VDEVICE(CHELSIO, (devid)), 0 } 3457 3458 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } } 3459 3460 #include "../cxgb4/t4_pci_id_tbl.h" 3461 3462 MODULE_DESCRIPTION(DRV_DESC); 3463 MODULE_AUTHOR("Chelsio Communications"); 3464 MODULE_LICENSE("Dual BSD/GPL"); 3465 MODULE_VERSION(DRV_VERSION); 3466 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl); 3467 3468 static struct pci_driver cxgb4vf_driver = { 3469 .name = KBUILD_MODNAME, 3470 .id_table = cxgb4vf_pci_tbl, 3471 .probe = cxgb4vf_pci_probe, 3472 .remove = cxgb4vf_pci_remove, 3473 .shutdown = cxgb4vf_pci_shutdown, 3474 }; 3475 3476 /* 3477 * Initialize global driver state. 3478 */ 3479 static int __init cxgb4vf_module_init(void) 3480 { 3481 int ret; 3482 3483 /* 3484 * Vet our module parameters. 3485 */ 3486 if (msi != MSI_MSIX && msi != MSI_MSI) { 3487 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n", 3488 msi, MSI_MSIX, MSI_MSI); 3489 return -EINVAL; 3490 } 3491 3492 /* Debugfs support is optional, just warn if this fails */ 3493 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); 3494 if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 3495 pr_warn("could not create debugfs entry, continuing\n"); 3496 3497 ret = pci_register_driver(&cxgb4vf_driver); 3498 if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 3499 debugfs_remove(cxgb4vf_debugfs_root); 3500 return ret; 3501 } 3502 3503 /* 3504 * Tear down global driver state. 3505 */ 3506 static void __exit cxgb4vf_module_exit(void) 3507 { 3508 pci_unregister_driver(&cxgb4vf_driver); 3509 debugfs_remove(cxgb4vf_debugfs_root); 3510 } 3511 3512 module_init(cxgb4vf_module_init); 3513 module_exit(cxgb4vf_module_exit); 3514