1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 case MXGEFW_CMD_ERROR_I2C_ABSENT: 887 err = ENXIO; 888 break; 889 default: 890 device_printf(sc->dev, 891 "mxge: command %d " 892 "failed, result = %d\n", 893 cmd, be32toh(response->result)); 894 err = ENXIO; 895 break; 896 } 897 if (err != EAGAIN) 898 break; 899 } 900 if (err == EAGAIN) 901 device_printf(sc->dev, "mxge: command %d timed out" 902 "result = %d\n", 903 cmd, be32toh(response->result)); 904 mtx_unlock(&sc->cmd_mtx); 905 return err; 906 } 907 908 static int 909 mxge_adopt_running_firmware(mxge_softc_t *sc) 910 { 911 struct mcp_gen_header *hdr; 912 const size_t bytes = sizeof (struct mcp_gen_header); 913 size_t hdr_offset; 914 int status; 915 916 /* find running firmware header */ 917 hdr_offset = htobe32(*(volatile uint32_t *) 918 (sc->sram + MCP_HEADER_PTR_OFFSET)); 919 920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 921 device_printf(sc->dev, 922 "Running firmware has bad header offset (%d)\n", 923 (int)hdr_offset); 924 return EIO; 925 } 926 927 /* copy header of running firmware from SRAM to host memory to 928 * validate firmware */ 929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 930 if (hdr == NULL) { 931 device_printf(sc->dev, "could not malloc firmware hdr\n"); 932 return ENOMEM; 933 } 934 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 935 rman_get_bushandle(sc->mem_res), 936 hdr_offset, (char *)hdr, bytes); 937 status = mxge_validate_firmware(sc, hdr); 938 free(hdr, M_DEVBUF); 939 940 /* 941 * check to see if adopted firmware has bug where adopting 942 * it will cause broadcasts to be filtered unless the NIC 943 * is kept in ALLMULTI mode 944 */ 945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 947 sc->adopted_rx_filter_bug = 1; 948 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 949 "working around rx filter bug\n", 950 sc->fw_ver_major, sc->fw_ver_minor, 951 sc->fw_ver_tiny); 952 } 953 954 return status; 955 } 956 957 958 static int 959 mxge_load_firmware(mxge_softc_t *sc, int adopt) 960 { 961 volatile uint32_t *confirm; 962 volatile char *submit; 963 char buf_bytes[72]; 964 uint32_t *buf, size, dma_low, dma_high; 965 int status, i; 966 967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 968 969 size = sc->sram_size; 970 status = mxge_load_firmware_helper(sc, &size); 971 if (status) { 972 if (!adopt) 973 return status; 974 /* Try to use the currently running firmware, if 975 it is new enough */ 976 status = mxge_adopt_running_firmware(sc); 977 if (status) { 978 device_printf(sc->dev, 979 "failed to adopt running firmware\n"); 980 return status; 981 } 982 device_printf(sc->dev, 983 "Successfully adopted running firmware\n"); 984 if (sc->tx_boundary == 4096) { 985 device_printf(sc->dev, 986 "Using firmware currently running on NIC" 987 ". For optimal\n"); 988 device_printf(sc->dev, 989 "performance consider loading optimized " 990 "firmware\n"); 991 } 992 sc->fw_name = mxge_fw_unaligned; 993 sc->tx_boundary = 2048; 994 return 0; 995 } 996 /* clear confirmation addr */ 997 confirm = (volatile uint32_t *)sc->cmd; 998 *confirm = 0; 999 wmb(); 1000 /* send a reload command to the bootstrap MCP, and wait for the 1001 response in the confirmation address. The firmware should 1002 write a -1 there to indicate it is alive and well 1003 */ 1004 1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1007 1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1010 buf[2] = htobe32(0xffffffff); /* confirm data */ 1011 1012 /* FIX: All newest firmware should un-protect the bottom of 1013 the sram before handoff. However, the very first interfaces 1014 do not. Therefore the handoff copy must skip the first 8 bytes 1015 */ 1016 /* where the code starts*/ 1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1018 buf[4] = htobe32(size - 8); /* length of code */ 1019 buf[5] = htobe32(8); /* where to copy to */ 1020 buf[6] = htobe32(0); /* where to jump to */ 1021 1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1023 mxge_pio_copy(submit, buf, 64); 1024 wmb(); 1025 DELAY(1000); 1026 wmb(); 1027 i = 0; 1028 while (*confirm != 0xffffffff && i < 20) { 1029 DELAY(1000*10); 1030 i++; 1031 bus_dmamap_sync(sc->cmd_dma.dmat, 1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1033 } 1034 if (*confirm != 0xffffffff) { 1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1036 confirm, *confirm); 1037 1038 return ENXIO; 1039 } 1040 return 0; 1041 } 1042 1043 static int 1044 mxge_update_mac_address(mxge_softc_t *sc) 1045 { 1046 mxge_cmd_t cmd; 1047 uint8_t *addr = sc->mac_addr; 1048 int status; 1049 1050 1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1052 | (addr[2] << 8) | addr[3]); 1053 1054 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1055 1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1057 return status; 1058 } 1059 1060 static int 1061 mxge_change_pause(mxge_softc_t *sc, int pause) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (pause) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set flow control mode\n"); 1075 return ENXIO; 1076 } 1077 sc->pause = pause; 1078 return 0; 1079 } 1080 1081 static void 1082 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1083 { 1084 mxge_cmd_t cmd; 1085 int status; 1086 1087 if (mxge_always_promisc) 1088 promisc = 1; 1089 1090 if (promisc) 1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1092 &cmd); 1093 else 1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1095 &cmd); 1096 1097 if (status) { 1098 device_printf(sc->dev, "Failed to set promisc mode\n"); 1099 } 1100 } 1101 1102 static void 1103 mxge_set_multicast_list(mxge_softc_t *sc) 1104 { 1105 mxge_cmd_t cmd; 1106 struct ifmultiaddr *ifma; 1107 struct ifnet *ifp = sc->ifp; 1108 int err; 1109 1110 /* This firmware is known to not support multicast */ 1111 if (!sc->fw_multicast_support) 1112 return; 1113 1114 /* Disable multicast filtering while we play with the lists*/ 1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1118 " error status: %d\n", err); 1119 return; 1120 } 1121 1122 if (sc->adopted_rx_filter_bug) 1123 return; 1124 1125 if (ifp->if_flags & IFF_ALLMULTI) 1126 /* request to disable multicast filtering, so quit here */ 1127 return; 1128 1129 /* Flush all the filters */ 1130 1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, 1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1135 ", error status: %d\n", err); 1136 return; 1137 } 1138 1139 /* Walk the multicast list, and add each address */ 1140 1141 if_maddr_rlock(ifp); 1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1143 if (ifma->ifma_addr->sa_family != AF_LINK) 1144 continue; 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1146 &cmd.data0, 4); 1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1148 &cmd.data1, 2); 1149 cmd.data0 = htonl(cmd.data0); 1150 cmd.data1 = htonl(cmd.data1); 1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, "Failed " 1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1155 "%d\t", err); 1156 /* abort, leaving multicast filtering off */ 1157 if_maddr_runlock(ifp); 1158 return; 1159 } 1160 } 1161 if_maddr_runlock(ifp); 1162 /* Enable multicast filtering */ 1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1164 if (err != 0) { 1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1166 ", error status: %d\n", err); 1167 } 1168 } 1169 1170 static int 1171 mxge_max_mtu(mxge_softc_t *sc) 1172 { 1173 mxge_cmd_t cmd; 1174 int status; 1175 1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* try to set nbufs to see if it we can 1180 use virtually contiguous jumbos */ 1181 cmd.data0 = 0; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1183 &cmd); 1184 if (status == 0) 1185 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1186 1187 /* otherwise, we're limited to MJUMPAGESIZE */ 1188 return MJUMPAGESIZE - MXGEFW_PAD; 1189 } 1190 1191 static int 1192 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1193 { 1194 struct mxge_slice_state *ss; 1195 mxge_rx_done_t *rx_done; 1196 volatile uint32_t *irq_claim; 1197 mxge_cmd_t cmd; 1198 int slice, status; 1199 1200 /* try to send a reset command to the card to see if it 1201 is alive */ 1202 memset(&cmd, 0, sizeof (cmd)); 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1204 if (status != 0) { 1205 device_printf(sc->dev, "failed reset\n"); 1206 return ENXIO; 1207 } 1208 1209 mxge_dummy_rdma(sc, 1); 1210 1211 1212 /* set the intrq size */ 1213 cmd.data0 = sc->rx_ring_size; 1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1215 1216 /* 1217 * Even though we already know how many slices are supported 1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1219 * has magic side effects, and must be called after a reset. 1220 * It must be called prior to calling any RSS related cmds, 1221 * including assigning an interrupt queue for anything but 1222 * slice 0. It must also be called *after* 1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1224 * the firmware to compute offsets. 1225 */ 1226 1227 if (sc->num_slices > 1) { 1228 /* ask the maximum number of slices it supports */ 1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1230 &cmd); 1231 if (status != 0) { 1232 device_printf(sc->dev, 1233 "failed to get number of slices\n"); 1234 return status; 1235 } 1236 /* 1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1238 * to setting up the interrupt queue DMA 1239 */ 1240 cmd.data0 = sc->num_slices; 1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1242 #ifdef IFNET_BUF_RING 1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1244 #endif 1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1246 &cmd); 1247 if (status != 0) { 1248 device_printf(sc->dev, 1249 "failed to set number of slices\n"); 1250 return status; 1251 } 1252 } 1253 1254 1255 if (interrupts_setup) { 1256 /* Now exchange information about interrupts */ 1257 for (slice = 0; slice < sc->num_slices; slice++) { 1258 rx_done = &sc->ss[slice].rx_done; 1259 memset(rx_done->entry, 0, sc->rx_ring_size); 1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1262 cmd.data2 = slice; 1263 status |= mxge_send_cmd(sc, 1264 MXGEFW_CMD_SET_INTRQ_DMA, 1265 &cmd); 1266 } 1267 } 1268 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1271 1272 1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1280 &cmd); 1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 if (status != 0) { 1283 device_printf(sc->dev, "failed set interrupt parameters\n"); 1284 return status; 1285 } 1286 1287 1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1289 1290 1291 /* run a DMA benchmark */ 1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1293 1294 for (slice = 0; slice < sc->num_slices; slice++) { 1295 ss = &sc->ss[slice]; 1296 1297 ss->irq_claim = irq_claim + (2 * slice); 1298 /* reset mcp/driver shared state back to 0 */ 1299 ss->rx_done.idx = 0; 1300 ss->rx_done.cnt = 0; 1301 ss->tx.req = 0; 1302 ss->tx.done = 0; 1303 ss->tx.pkt_done = 0; 1304 ss->tx.queue_active = 0; 1305 ss->tx.activate = 0; 1306 ss->tx.deactivate = 0; 1307 ss->tx.wake = 0; 1308 ss->tx.defrag = 0; 1309 ss->tx.stall = 0; 1310 ss->rx_big.cnt = 0; 1311 ss->rx_small.cnt = 0; 1312 ss->lro_bad_csum = 0; 1313 ss->lro_queued = 0; 1314 ss->lro_flushed = 0; 1315 if (ss->fw_stats != NULL) { 1316 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1317 } 1318 } 1319 sc->rdma_tags_available = 15; 1320 status = mxge_update_mac_address(sc); 1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1322 mxge_change_pause(sc, sc->pause); 1323 mxge_set_multicast_list(sc); 1324 if (sc->throttle) { 1325 cmd.data0 = sc->throttle; 1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1327 &cmd)) { 1328 device_printf(sc->dev, 1329 "can't enable throttle\n"); 1330 } 1331 } 1332 return status; 1333 } 1334 1335 static int 1336 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1337 { 1338 mxge_cmd_t cmd; 1339 mxge_softc_t *sc; 1340 int err; 1341 unsigned int throttle; 1342 1343 sc = arg1; 1344 throttle = sc->throttle; 1345 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1346 if (err != 0) { 1347 return err; 1348 } 1349 1350 if (throttle == sc->throttle) 1351 return 0; 1352 1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1354 return EINVAL; 1355 1356 mtx_lock(&sc->driver_mtx); 1357 cmd.data0 = throttle; 1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1359 if (err == 0) 1360 sc->throttle = throttle; 1361 mtx_unlock(&sc->driver_mtx); 1362 return err; 1363 } 1364 1365 static int 1366 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1367 { 1368 mxge_softc_t *sc; 1369 unsigned int intr_coal_delay; 1370 int err; 1371 1372 sc = arg1; 1373 intr_coal_delay = sc->intr_coal_delay; 1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1375 if (err != 0) { 1376 return err; 1377 } 1378 if (intr_coal_delay == sc->intr_coal_delay) 1379 return 0; 1380 1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1382 return EINVAL; 1383 1384 mtx_lock(&sc->driver_mtx); 1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1386 sc->intr_coal_delay = intr_coal_delay; 1387 1388 mtx_unlock(&sc->driver_mtx); 1389 return err; 1390 } 1391 1392 static int 1393 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1394 { 1395 mxge_softc_t *sc; 1396 unsigned int enabled; 1397 int err; 1398 1399 sc = arg1; 1400 enabled = sc->pause; 1401 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1402 if (err != 0) { 1403 return err; 1404 } 1405 if (enabled == sc->pause) 1406 return 0; 1407 1408 mtx_lock(&sc->driver_mtx); 1409 err = mxge_change_pause(sc, enabled); 1410 mtx_unlock(&sc->driver_mtx); 1411 return err; 1412 } 1413 1414 static int 1415 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1416 { 1417 struct ifnet *ifp; 1418 int err = 0; 1419 1420 ifp = sc->ifp; 1421 if (lro_cnt == 0) 1422 ifp->if_capenable &= ~IFCAP_LRO; 1423 else 1424 ifp->if_capenable |= IFCAP_LRO; 1425 sc->lro_cnt = lro_cnt; 1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1427 mxge_close(sc, 0); 1428 err = mxge_open(sc); 1429 } 1430 return err; 1431 } 1432 1433 static int 1434 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1435 { 1436 mxge_softc_t *sc; 1437 unsigned int lro_cnt; 1438 int err; 1439 1440 sc = arg1; 1441 lro_cnt = sc->lro_cnt; 1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1443 if (err != 0) 1444 return err; 1445 1446 if (lro_cnt == sc->lro_cnt) 1447 return 0; 1448 1449 if (lro_cnt > 128) 1450 return EINVAL; 1451 1452 mtx_lock(&sc->driver_mtx); 1453 err = mxge_change_lro_locked(sc, lro_cnt); 1454 mtx_unlock(&sc->driver_mtx); 1455 return err; 1456 } 1457 1458 static int 1459 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1460 { 1461 int err; 1462 1463 if (arg1 == NULL) 1464 return EFAULT; 1465 arg2 = be32toh(*(int *)arg1); 1466 arg1 = NULL; 1467 err = sysctl_handle_int(oidp, arg1, arg2, req); 1468 1469 return err; 1470 } 1471 1472 static void 1473 mxge_rem_sysctls(mxge_softc_t *sc) 1474 { 1475 struct mxge_slice_state *ss; 1476 int slice; 1477 1478 if (sc->slice_sysctl_tree == NULL) 1479 return; 1480 1481 for (slice = 0; slice < sc->num_slices; slice++) { 1482 ss = &sc->ss[slice]; 1483 if (ss == NULL || ss->sysctl_tree == NULL) 1484 continue; 1485 sysctl_ctx_free(&ss->sysctl_ctx); 1486 ss->sysctl_tree = NULL; 1487 } 1488 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = NULL; 1490 } 1491 1492 static void 1493 mxge_add_sysctls(mxge_softc_t *sc) 1494 { 1495 struct sysctl_ctx_list *ctx; 1496 struct sysctl_oid_list *children; 1497 mcp_irq_data_t *fw; 1498 struct mxge_slice_state *ss; 1499 int slice; 1500 char slice_num[8]; 1501 1502 ctx = device_get_sysctl_ctx(sc->dev); 1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1504 fw = sc->ss[0].fw_stats; 1505 1506 /* random information */ 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "firmware_version", 1509 CTLFLAG_RD, &sc->fw_version, 1510 0, "firmware version"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "serial_number", 1513 CTLFLAG_RD, &sc->serial_number_string, 1514 0, "serial number"); 1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1516 "product_code", 1517 CTLFLAG_RD, &sc->product_code_string, 1518 0, "product_code"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "pcie_link_width", 1521 CTLFLAG_RD, &sc->link_width, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "tx_boundary", 1525 CTLFLAG_RD, &sc->tx_boundary, 1526 0, "tx_boundary"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "write_combine", 1529 CTLFLAG_RD, &sc->wc, 1530 0, "write combining PIO?"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "read_dma_MBs", 1533 CTLFLAG_RD, &sc->read_dma, 1534 0, "DMA Read speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "write_dma_MBs", 1537 CTLFLAG_RD, &sc->write_dma, 1538 0, "DMA Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "read_write_dma_MBs", 1541 CTLFLAG_RD, &sc->read_write_dma, 1542 0, "DMA concurrent Read/Write speed in MB/s"); 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1544 "watchdog_resets", 1545 CTLFLAG_RD, &sc->watchdog_resets, 1546 0, "Number of times NIC was reset"); 1547 1548 1549 /* performance related tunables */ 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "intr_coal_delay", 1552 CTLTYPE_INT|CTLFLAG_RW, sc, 1553 0, mxge_change_intr_coal, 1554 "I", "interrupt coalescing delay in usecs"); 1555 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "throttle", 1558 CTLTYPE_INT|CTLFLAG_RW, sc, 1559 0, mxge_change_throttle, 1560 "I", "transmit throttling"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "flow_control_enabled", 1564 CTLTYPE_INT|CTLFLAG_RW, sc, 1565 0, mxge_change_flow_control, 1566 "I", "interrupt coalescing delay in usecs"); 1567 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "deassert_wait", 1570 CTLFLAG_RW, &mxge_deassert_wait, 1571 0, "Wait for IRQ line to go low in ihandler"); 1572 1573 /* stats block from firmware is in network byte order. 1574 Need to swap it */ 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "link_up", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1578 0, mxge_handle_be32, 1579 "I", "link up"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "rdma_tags_available", 1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1583 0, mxge_handle_be32, 1584 "I", "rdma_tags_available"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_bad_crc32", 1587 CTLTYPE_INT|CTLFLAG_RD, 1588 &fw->dropped_bad_crc32, 1589 0, mxge_handle_be32, 1590 "I", "dropped_bad_crc32"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_bad_phy", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_bad_phy, 1595 0, mxge_handle_be32, 1596 "I", "dropped_bad_phy"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_link_error_or_filtered", 1599 CTLTYPE_INT|CTLFLAG_RD, 1600 &fw->dropped_link_error_or_filtered, 1601 0, mxge_handle_be32, 1602 "I", "dropped_link_error_or_filtered"); 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_link_overflow", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1606 0, mxge_handle_be32, 1607 "I", "dropped_link_overflow"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_multicast_filtered", 1610 CTLTYPE_INT|CTLFLAG_RD, 1611 &fw->dropped_multicast_filtered, 1612 0, mxge_handle_be32, 1613 "I", "dropped_multicast_filtered"); 1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1615 "dropped_no_big_buffer", 1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1617 0, mxge_handle_be32, 1618 "I", "dropped_no_big_buffer"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_no_small_buffer", 1621 CTLTYPE_INT|CTLFLAG_RD, 1622 &fw->dropped_no_small_buffer, 1623 0, mxge_handle_be32, 1624 "I", "dropped_no_small_buffer"); 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_overrun", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1628 0, mxge_handle_be32, 1629 "I", "dropped_overrun"); 1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1631 "dropped_pause", 1632 CTLTYPE_INT|CTLFLAG_RD, 1633 &fw->dropped_pause, 1634 0, mxge_handle_be32, 1635 "I", "dropped_pause"); 1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1637 "dropped_runt", 1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1639 0, mxge_handle_be32, 1640 "I", "dropped_runt"); 1641 1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1643 "dropped_unicast_filtered", 1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1645 0, mxge_handle_be32, 1646 "I", "dropped_unicast_filtered"); 1647 1648 /* verbose printing? */ 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "verbose", 1651 CTLFLAG_RW, &mxge_verbose, 1652 0, "verbose printing"); 1653 1654 /* lro */ 1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1656 "lro_cnt", 1657 CTLTYPE_INT|CTLFLAG_RW, sc, 1658 0, mxge_change_lro, 1659 "I", "number of lro merge queues"); 1660 1661 1662 /* add counters exported for debugging from all slices */ 1663 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1664 sc->slice_sysctl_tree = 1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1666 "slice", CTLFLAG_RD, 0, ""); 1667 1668 for (slice = 0; slice < sc->num_slices; slice++) { 1669 ss = &sc->ss[slice]; 1670 sysctl_ctx_init(&ss->sysctl_ctx); 1671 ctx = &ss->sysctl_ctx; 1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1673 sprintf(slice_num, "%d", slice); 1674 ss->sysctl_tree = 1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1676 CTLFLAG_RD, 0, ""); 1677 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_small_cnt", 1680 CTLFLAG_RD, &ss->rx_small.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "rx_big_cnt", 1684 CTLFLAG_RD, &ss->rx_big.cnt, 1685 0, "rx_small_cnt"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1688 0, "number of lro merge queues flushed"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1692 0, "number of frames appended to lro merge" 1693 "queues"); 1694 1695 #ifndef IFNET_BUF_RING 1696 /* only transmit from slice 0 for now */ 1697 if (slice > 0) 1698 continue; 1699 #endif 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_req", 1702 CTLFLAG_RD, &ss->tx.req, 1703 0, "tx_req"); 1704 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_done", 1707 CTLFLAG_RD, &ss->tx.done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_pkt_done", 1711 CTLFLAG_RD, &ss->tx.pkt_done, 1712 0, "tx_done"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_stall", 1715 CTLFLAG_RD, &ss->tx.stall, 1716 0, "tx_stall"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_wake", 1719 CTLFLAG_RD, &ss->tx.wake, 1720 0, "tx_wake"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_defrag", 1723 CTLFLAG_RD, &ss->tx.defrag, 1724 0, "tx_defrag"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_queue_active", 1727 CTLFLAG_RD, &ss->tx.queue_active, 1728 0, "tx_queue_active"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_activate", 1731 CTLFLAG_RD, &ss->tx.activate, 1732 0, "tx_activate"); 1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1734 "tx_deactivate", 1735 CTLFLAG_RD, &ss->tx.deactivate, 1736 0, "tx_deactivate"); 1737 } 1738 } 1739 1740 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1741 backwards one at a time and handle ring wraps */ 1742 1743 static inline void 1744 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1745 mcp_kreq_ether_send_t *src, int cnt) 1746 { 1747 int idx, starting_slot; 1748 starting_slot = tx->req; 1749 while (cnt > 1) { 1750 cnt--; 1751 idx = (starting_slot + cnt) & tx->mask; 1752 mxge_pio_copy(&tx->lanai[idx], 1753 &src[cnt], sizeof(*src)); 1754 wmb(); 1755 } 1756 } 1757 1758 /* 1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1760 * at most 32 bytes at a time, so as to avoid involving the software 1761 * pio handler in the nic. We re-write the first segment's flags 1762 * to mark them valid only after writing the entire chain 1763 */ 1764 1765 static inline void 1766 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1767 int cnt) 1768 { 1769 int idx, i; 1770 uint32_t *src_ints; 1771 volatile uint32_t *dst_ints; 1772 mcp_kreq_ether_send_t *srcp; 1773 volatile mcp_kreq_ether_send_t *dstp, *dst; 1774 uint8_t last_flags; 1775 1776 idx = tx->req & tx->mask; 1777 1778 last_flags = src->flags; 1779 src->flags = 0; 1780 wmb(); 1781 dst = dstp = &tx->lanai[idx]; 1782 srcp = src; 1783 1784 if ((idx + cnt) < tx->mask) { 1785 for (i = 0; i < (cnt - 1); i += 2) { 1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1787 wmb(); /* force write every 32 bytes */ 1788 srcp += 2; 1789 dstp += 2; 1790 } 1791 } else { 1792 /* submit all but the first request, and ensure 1793 that it is submitted below */ 1794 mxge_submit_req_backwards(tx, src, cnt); 1795 i = 0; 1796 } 1797 if (i < cnt) { 1798 /* submit the first request */ 1799 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1800 wmb(); /* barrier before setting valid flag */ 1801 } 1802 1803 /* re-write the last 32-bits with the valid flags */ 1804 src->flags = last_flags; 1805 src_ints = (uint32_t *)src; 1806 src_ints+=3; 1807 dst_ints = (volatile uint32_t *)dst; 1808 dst_ints+=3; 1809 *dst_ints = *src_ints; 1810 tx->req += cnt; 1811 wmb(); 1812 } 1813 1814 #if IFCAP_TSO4 1815 1816 static void 1817 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1818 int busdma_seg_cnt, int ip_off) 1819 { 1820 mxge_tx_ring_t *tx; 1821 mcp_kreq_ether_send_t *req; 1822 bus_dma_segment_t *seg; 1823 struct ip *ip; 1824 struct tcphdr *tcp; 1825 uint32_t low, high_swapped; 1826 int len, seglen, cum_len, cum_len_next; 1827 int next_is_first, chop, cnt, rdma_count, small; 1828 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1829 uint8_t flags, flags_next; 1830 static int once; 1831 1832 mss = m->m_pkthdr.tso_segsz; 1833 1834 /* negative cum_len signifies to the 1835 * send loop that we are still in the 1836 * header portion of the TSO packet. 1837 */ 1838 1839 /* ensure we have the ethernet, IP and TCP 1840 header together in the first mbuf, copy 1841 it to a scratch buffer if not */ 1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1843 m_copydata(m, 0, ip_off + sizeof (*ip), 1844 ss->scratch); 1845 ip = (struct ip *)(ss->scratch + ip_off); 1846 } else { 1847 ip = (struct ip *)(mtod(m, char *) + ip_off); 1848 } 1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp))) { 1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1852 + sizeof (*tcp), ss->scratch); 1853 ip = (struct ip *)(mtod(m, char *) + ip_off); 1854 } 1855 1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1858 1859 /* TSO implies checksum offload on this hardware */ 1860 cksum_offset = ip_off + (ip->ip_hl << 2); 1861 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1862 1863 1864 /* for TSO, pseudo_hdr_offset holds mss. 1865 * The firmware figures out where to put 1866 * the checksum by parsing the header. */ 1867 pseudo_hdr_offset = htobe16(mss); 1868 1869 tx = &ss->tx; 1870 req = tx->req_list; 1871 seg = tx->seg_list; 1872 cnt = 0; 1873 rdma_count = 0; 1874 /* "rdma_count" is the number of RDMAs belonging to the 1875 * current packet BEFORE the current send request. For 1876 * non-TSO packets, this is equal to "count". 1877 * For TSO packets, rdma_count needs to be reset 1878 * to 0 after a segment cut. 1879 * 1880 * The rdma_count field of the send request is 1881 * the number of RDMAs of the packet starting at 1882 * that request. For TSO send requests with one ore more cuts 1883 * in the middle, this is the number of RDMAs starting 1884 * after the last cut in the request. All previous 1885 * segments before the last cut implicitly have 1 RDMA. 1886 * 1887 * Since the number of RDMAs is not known beforehand, 1888 * it must be filled-in retroactively - after each 1889 * segmentation cut or at the end of the entire packet. 1890 */ 1891 1892 while (busdma_seg_cnt) { 1893 /* Break the busdma segment up into pieces*/ 1894 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1895 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1896 len = seg->ds_len; 1897 1898 while (len) { 1899 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1900 seglen = len; 1901 cum_len_next = cum_len + seglen; 1902 (req-rdma_count)->rdma_count = rdma_count + 1; 1903 if (__predict_true(cum_len >= 0)) { 1904 /* payload */ 1905 chop = (cum_len_next > mss); 1906 cum_len_next = cum_len_next % mss; 1907 next_is_first = (cum_len_next == 0); 1908 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1909 flags_next |= next_is_first * 1910 MXGEFW_FLAGS_FIRST; 1911 rdma_count |= -(chop | next_is_first); 1912 rdma_count += chop & !next_is_first; 1913 } else if (cum_len_next >= 0) { 1914 /* header ends */ 1915 rdma_count = -1; 1916 cum_len_next = 0; 1917 seglen = -cum_len; 1918 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1919 flags_next = MXGEFW_FLAGS_TSO_PLD | 1920 MXGEFW_FLAGS_FIRST | 1921 (small * MXGEFW_FLAGS_SMALL); 1922 } 1923 1924 req->addr_high = high_swapped; 1925 req->addr_low = htobe32(low); 1926 req->pseudo_hdr_offset = pseudo_hdr_offset; 1927 req->pad = 0; 1928 req->rdma_count = 1; 1929 req->length = htobe16(seglen); 1930 req->cksum_offset = cksum_offset; 1931 req->flags = flags | ((cum_len & 1) * 1932 MXGEFW_FLAGS_ALIGN_ODD); 1933 low += seglen; 1934 len -= seglen; 1935 cum_len = cum_len_next; 1936 flags = flags_next; 1937 req++; 1938 cnt++; 1939 rdma_count++; 1940 if (__predict_false(cksum_offset > seglen)) 1941 cksum_offset -= seglen; 1942 else 1943 cksum_offset = 0; 1944 if (__predict_false(cnt > tx->max_desc)) 1945 goto drop; 1946 } 1947 busdma_seg_cnt--; 1948 seg++; 1949 } 1950 (req-rdma_count)->rdma_count = rdma_count; 1951 1952 do { 1953 req--; 1954 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1955 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1956 1957 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1958 mxge_submit_req(tx, tx->req_list, cnt); 1959 #ifdef IFNET_BUF_RING 1960 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1961 /* tell the NIC to start polling this slice */ 1962 *tx->send_go = 1; 1963 tx->queue_active = 1; 1964 tx->activate++; 1965 wmb(); 1966 } 1967 #endif 1968 return; 1969 1970 drop: 1971 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1972 m_freem(m); 1973 ss->oerrors++; 1974 if (!once) { 1975 printf("tx->max_desc exceeded via TSO!\n"); 1976 printf("mss = %d, %ld, %d!\n", mss, 1977 (long)seg - (long)tx->seg_list, tx->max_desc); 1978 once = 1; 1979 } 1980 return; 1981 1982 } 1983 1984 #endif /* IFCAP_TSO4 */ 1985 1986 #ifdef MXGE_NEW_VLAN_API 1987 /* 1988 * We reproduce the software vlan tag insertion from 1989 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1990 * vlan tag insertion. We need to advertise this in order to have the 1991 * vlan interface respect our csum offload flags. 1992 */ 1993 static struct mbuf * 1994 mxge_vlan_tag_insert(struct mbuf *m) 1995 { 1996 struct ether_vlan_header *evl; 1997 1998 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1999 if (__predict_false(m == NULL)) 2000 return NULL; 2001 if (m->m_len < sizeof(*evl)) { 2002 m = m_pullup(m, sizeof(*evl)); 2003 if (__predict_false(m == NULL)) 2004 return NULL; 2005 } 2006 /* 2007 * Transform the Ethernet header into an Ethernet header 2008 * with 802.1Q encapsulation. 2009 */ 2010 evl = mtod(m, struct ether_vlan_header *); 2011 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2012 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2013 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2014 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2015 m->m_flags &= ~M_VLANTAG; 2016 return m; 2017 } 2018 #endif /* MXGE_NEW_VLAN_API */ 2019 2020 static void 2021 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2022 { 2023 mxge_softc_t *sc; 2024 mcp_kreq_ether_send_t *req; 2025 bus_dma_segment_t *seg; 2026 struct mbuf *m_tmp; 2027 struct ifnet *ifp; 2028 mxge_tx_ring_t *tx; 2029 struct ip *ip; 2030 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2031 uint16_t pseudo_hdr_offset; 2032 uint8_t flags, cksum_offset; 2033 2034 2035 sc = ss->sc; 2036 ifp = sc->ifp; 2037 tx = &ss->tx; 2038 2039 ip_off = sizeof (struct ether_header); 2040 #ifdef MXGE_NEW_VLAN_API 2041 if (m->m_flags & M_VLANTAG) { 2042 m = mxge_vlan_tag_insert(m); 2043 if (__predict_false(m == NULL)) 2044 goto drop; 2045 ip_off += ETHER_VLAN_ENCAP_LEN; 2046 } 2047 #endif 2048 /* (try to) map the frame for DMA */ 2049 idx = tx->req & tx->mask; 2050 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2051 m, tx->seg_list, &cnt, 2052 BUS_DMA_NOWAIT); 2053 if (__predict_false(err == EFBIG)) { 2054 /* Too many segments in the chain. Try 2055 to defrag */ 2056 m_tmp = m_defrag(m, M_NOWAIT); 2057 if (m_tmp == NULL) { 2058 goto drop; 2059 } 2060 ss->tx.defrag++; 2061 m = m_tmp; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2063 tx->info[idx].map, 2064 m, tx->seg_list, &cnt, 2065 BUS_DMA_NOWAIT); 2066 } 2067 if (__predict_false(err != 0)) { 2068 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2069 " packet len = %d\n", err, m->m_pkthdr.len); 2070 goto drop; 2071 } 2072 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2073 BUS_DMASYNC_PREWRITE); 2074 tx->info[idx].m = m; 2075 2076 #if IFCAP_TSO4 2077 /* TSO is different enough, we handle it in another routine */ 2078 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2079 mxge_encap_tso(ss, m, cnt, ip_off); 2080 return; 2081 } 2082 #endif 2083 2084 req = tx->req_list; 2085 cksum_offset = 0; 2086 pseudo_hdr_offset = 0; 2087 flags = MXGEFW_FLAGS_NO_TSO; 2088 2089 /* checksum offloading? */ 2090 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2091 /* ensure ip header is in first mbuf, copy 2092 it to a scratch buffer if not */ 2093 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2094 m_copydata(m, 0, ip_off + sizeof (*ip), 2095 ss->scratch); 2096 ip = (struct ip *)(ss->scratch + ip_off); 2097 } else { 2098 ip = (struct ip *)(mtod(m, char *) + ip_off); 2099 } 2100 cksum_offset = ip_off + (ip->ip_hl << 2); 2101 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2102 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2103 req->cksum_offset = cksum_offset; 2104 flags |= MXGEFW_FLAGS_CKSUM; 2105 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2106 } else { 2107 odd_flag = 0; 2108 } 2109 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2110 flags |= MXGEFW_FLAGS_SMALL; 2111 2112 /* convert segments into a request list */ 2113 cum_len = 0; 2114 seg = tx->seg_list; 2115 req->flags = MXGEFW_FLAGS_FIRST; 2116 for (i = 0; i < cnt; i++) { 2117 req->addr_low = 2118 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2119 req->addr_high = 2120 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2121 req->length = htobe16(seg->ds_len); 2122 req->cksum_offset = cksum_offset; 2123 if (cksum_offset > seg->ds_len) 2124 cksum_offset -= seg->ds_len; 2125 else 2126 cksum_offset = 0; 2127 req->pseudo_hdr_offset = pseudo_hdr_offset; 2128 req->pad = 0; /* complete solid 16-byte block */ 2129 req->rdma_count = 1; 2130 req->flags |= flags | ((cum_len & 1) * odd_flag); 2131 cum_len += seg->ds_len; 2132 seg++; 2133 req++; 2134 req->flags = 0; 2135 } 2136 req--; 2137 /* pad runts to 60 bytes */ 2138 if (cum_len < 60) { 2139 req++; 2140 req->addr_low = 2141 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2142 req->addr_high = 2143 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2144 req->length = htobe16(60 - cum_len); 2145 req->cksum_offset = 0; 2146 req->pseudo_hdr_offset = pseudo_hdr_offset; 2147 req->pad = 0; /* complete solid 16-byte block */ 2148 req->rdma_count = 1; 2149 req->flags |= flags | ((cum_len & 1) * odd_flag); 2150 cnt++; 2151 } 2152 2153 tx->req_list[0].rdma_count = cnt; 2154 #if 0 2155 /* print what the firmware will see */ 2156 for (i = 0; i < cnt; i++) { 2157 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2158 "cso:%d, flags:0x%x, rdma:%d\n", 2159 i, (int)ntohl(tx->req_list[i].addr_high), 2160 (int)ntohl(tx->req_list[i].addr_low), 2161 (int)ntohs(tx->req_list[i].length), 2162 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2163 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2164 tx->req_list[i].rdma_count); 2165 } 2166 printf("--------------\n"); 2167 #endif 2168 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2169 mxge_submit_req(tx, tx->req_list, cnt); 2170 #ifdef IFNET_BUF_RING 2171 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2172 /* tell the NIC to start polling this slice */ 2173 *tx->send_go = 1; 2174 tx->queue_active = 1; 2175 tx->activate++; 2176 wmb(); 2177 } 2178 #endif 2179 return; 2180 2181 drop: 2182 m_freem(m); 2183 ss->oerrors++; 2184 return; 2185 } 2186 2187 #ifdef IFNET_BUF_RING 2188 static void 2189 mxge_qflush(struct ifnet *ifp) 2190 { 2191 mxge_softc_t *sc = ifp->if_softc; 2192 mxge_tx_ring_t *tx; 2193 struct mbuf *m; 2194 int slice; 2195 2196 for (slice = 0; slice < sc->num_slices; slice++) { 2197 tx = &sc->ss[slice].tx; 2198 mtx_lock(&tx->mtx); 2199 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2200 m_freem(m); 2201 mtx_unlock(&tx->mtx); 2202 } 2203 if_qflush(ifp); 2204 } 2205 2206 static inline void 2207 mxge_start_locked(struct mxge_slice_state *ss) 2208 { 2209 mxge_softc_t *sc; 2210 struct mbuf *m; 2211 struct ifnet *ifp; 2212 mxge_tx_ring_t *tx; 2213 2214 sc = ss->sc; 2215 ifp = sc->ifp; 2216 tx = &ss->tx; 2217 2218 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2219 m = drbr_dequeue(ifp, tx->br); 2220 if (m == NULL) { 2221 return; 2222 } 2223 /* let BPF see it */ 2224 BPF_MTAP(ifp, m); 2225 2226 /* give it to the nic */ 2227 mxge_encap(ss, m); 2228 } 2229 /* ran out of transmit slots */ 2230 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2231 && (!drbr_empty(ifp, tx->br))) { 2232 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2233 tx->stall++; 2234 } 2235 } 2236 2237 static int 2238 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2239 { 2240 mxge_softc_t *sc; 2241 struct ifnet *ifp; 2242 mxge_tx_ring_t *tx; 2243 int err; 2244 2245 sc = ss->sc; 2246 ifp = sc->ifp; 2247 tx = &ss->tx; 2248 2249 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2250 IFF_DRV_RUNNING) { 2251 err = drbr_enqueue(ifp, tx->br, m); 2252 return (err); 2253 } 2254 2255 if (!drbr_needs_enqueue(ifp, tx->br) && 2256 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2257 /* let BPF see it */ 2258 BPF_MTAP(ifp, m); 2259 /* give it to the nic */ 2260 mxge_encap(ss, m); 2261 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2262 return (err); 2263 } 2264 if (!drbr_empty(ifp, tx->br)) 2265 mxge_start_locked(ss); 2266 return (0); 2267 } 2268 2269 static int 2270 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2271 { 2272 mxge_softc_t *sc = ifp->if_softc; 2273 struct mxge_slice_state *ss; 2274 mxge_tx_ring_t *tx; 2275 int err = 0; 2276 int slice; 2277 2278 slice = m->m_pkthdr.flowid; 2279 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2280 2281 ss = &sc->ss[slice]; 2282 tx = &ss->tx; 2283 2284 if (mtx_trylock(&tx->mtx)) { 2285 err = mxge_transmit_locked(ss, m); 2286 mtx_unlock(&tx->mtx); 2287 } else { 2288 err = drbr_enqueue(ifp, tx->br, m); 2289 } 2290 2291 return (err); 2292 } 2293 2294 #else 2295 2296 static inline void 2297 mxge_start_locked(struct mxge_slice_state *ss) 2298 { 2299 mxge_softc_t *sc; 2300 struct mbuf *m; 2301 struct ifnet *ifp; 2302 mxge_tx_ring_t *tx; 2303 2304 sc = ss->sc; 2305 ifp = sc->ifp; 2306 tx = &ss->tx; 2307 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2308 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2309 if (m == NULL) { 2310 return; 2311 } 2312 /* let BPF see it */ 2313 BPF_MTAP(ifp, m); 2314 2315 /* give it to the nic */ 2316 mxge_encap(ss, m); 2317 } 2318 /* ran out of transmit slots */ 2319 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2320 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2321 tx->stall++; 2322 } 2323 } 2324 #endif 2325 static void 2326 mxge_start(struct ifnet *ifp) 2327 { 2328 mxge_softc_t *sc = ifp->if_softc; 2329 struct mxge_slice_state *ss; 2330 2331 /* only use the first slice for now */ 2332 ss = &sc->ss[0]; 2333 mtx_lock(&ss->tx.mtx); 2334 mxge_start_locked(ss); 2335 mtx_unlock(&ss->tx.mtx); 2336 } 2337 2338 /* 2339 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2340 * at most 32 bytes at a time, so as to avoid involving the software 2341 * pio handler in the nic. We re-write the first segment's low 2342 * DMA address to mark it valid only after we write the entire chunk 2343 * in a burst 2344 */ 2345 static inline void 2346 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2347 mcp_kreq_ether_recv_t *src) 2348 { 2349 uint32_t low; 2350 2351 low = src->addr_low; 2352 src->addr_low = 0xffffffff; 2353 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2354 wmb(); 2355 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2356 wmb(); 2357 src->addr_low = low; 2358 dst->addr_low = low; 2359 wmb(); 2360 } 2361 2362 static int 2363 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2364 { 2365 bus_dma_segment_t seg; 2366 struct mbuf *m; 2367 mxge_rx_ring_t *rx = &ss->rx_small; 2368 int cnt, err; 2369 2370 m = m_gethdr(M_DONTWAIT, MT_DATA); 2371 if (m == NULL) { 2372 rx->alloc_fail++; 2373 err = ENOBUFS; 2374 goto done; 2375 } 2376 m->m_len = MHLEN; 2377 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2378 &seg, &cnt, BUS_DMA_NOWAIT); 2379 if (err != 0) { 2380 m_free(m); 2381 goto done; 2382 } 2383 rx->info[idx].m = m; 2384 rx->shadow[idx].addr_low = 2385 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2386 rx->shadow[idx].addr_high = 2387 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2388 2389 done: 2390 if ((idx & 7) == 7) 2391 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2392 return err; 2393 } 2394 2395 static int 2396 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2397 { 2398 bus_dma_segment_t seg[3]; 2399 struct mbuf *m; 2400 mxge_rx_ring_t *rx = &ss->rx_big; 2401 int cnt, err, i; 2402 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422 #if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429 #endif 2430 2431 done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440 } 2441 2442 /* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451 static inline uint16_t 2452 mxge_rx_csum(struct mbuf *m, int csum) 2453 { 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467 #ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471 #else 2472 c = 1; 2473 #endif 2474 c ^= 0xffff; 2475 return (c); 2476 } 2477 2478 static void 2479 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480 { 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507 #ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509 #else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520 #endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532 } 2533 2534 2535 static inline void 2536 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537 { 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598 } 2599 2600 static inline void 2601 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602 { 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663 } 2664 2665 static inline void 2666 mxge_clean_rx_done(struct mxge_slice_state *ss) 2667 { 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689 #ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695 #endif 2696 } 2697 2698 2699 static inline void 2700 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701 { 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735 #ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737 #else 2738 flags = &ifp->if_drv_flags; 2739 #endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747 #ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758 #endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761 } 2762 2763 static struct mxge_media_type mxge_xfp_media_types[] = 2764 { 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774 }; 2775 static struct mxge_media_type mxge_sfp_media_types[] = 2776 { 2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2778 {0, (1 << 7), "Reserved"}, 2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2782 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2783 }; 2784 2785 static void 2786 mxge_media_set(mxge_softc_t *sc, int media_type) 2787 { 2788 2789 2790 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2791 0, NULL); 2792 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2793 sc->current_media = media_type; 2794 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2795 } 2796 2797 static void 2798 mxge_media_init(mxge_softc_t *sc) 2799 { 2800 char *ptr; 2801 int i; 2802 2803 ifmedia_removeall(&sc->media); 2804 mxge_media_set(sc, IFM_AUTO); 2805 2806 /* 2807 * parse the product code to deterimine the interface type 2808 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2809 * after the 3rd dash in the driver's cached copy of the 2810 * EEPROM's product code string. 2811 */ 2812 ptr = sc->product_code_string; 2813 if (ptr == NULL) { 2814 device_printf(sc->dev, "Missing product code\n"); 2815 return; 2816 } 2817 2818 for (i = 0; i < 3; i++, ptr++) { 2819 ptr = index(ptr, '-'); 2820 if (ptr == NULL) { 2821 device_printf(sc->dev, 2822 "only %d dashes in PC?!?\n", i); 2823 return; 2824 } 2825 } 2826 if (*ptr == 'C') { 2827 /* -C is CX4 */ 2828 sc->connector = MXGE_CX4; 2829 mxge_media_set(sc, IFM_10G_CX4); 2830 } else if (*ptr == 'Q') { 2831 /* -Q is Quad Ribbon Fiber */ 2832 sc->connector = MXGE_QRF; 2833 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2834 /* FreeBSD has no media type for Quad ribbon fiber */ 2835 } else if (*ptr == 'R') { 2836 /* -R is XFP */ 2837 sc->connector = MXGE_XFP; 2838 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2839 /* -S or -2S is SFP+ */ 2840 sc->connector = MXGE_SFP; 2841 } else { 2842 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2843 } 2844 } 2845 2846 /* 2847 * Determine the media type for a NIC. Some XFPs will identify 2848 * themselves only when their link is up, so this is initiated via a 2849 * link up interrupt. However, this can potentially take up to 2850 * several milliseconds, so it is run via the watchdog routine, rather 2851 * than in the interrupt handler itself. 2852 */ 2853 static void 2854 mxge_media_probe(mxge_softc_t *sc) 2855 { 2856 mxge_cmd_t cmd; 2857 char *cage_type; 2858 2859 struct mxge_media_type *mxge_media_types = NULL; 2860 int i, err, ms, mxge_media_type_entries; 2861 uint32_t byte; 2862 2863 sc->need_media_probe = 0; 2864 2865 if (sc->connector == MXGE_XFP) { 2866 /* -R is XFP */ 2867 mxge_media_types = mxge_xfp_media_types; 2868 mxge_media_type_entries = 2869 sizeof (mxge_xfp_media_types) / 2870 sizeof (mxge_xfp_media_types[0]); 2871 byte = MXGE_XFP_COMPLIANCE_BYTE; 2872 cage_type = "XFP"; 2873 } else if (sc->connector == MXGE_SFP) { 2874 /* -S or -2S is SFP+ */ 2875 mxge_media_types = mxge_sfp_media_types; 2876 mxge_media_type_entries = 2877 sizeof (mxge_sfp_media_types) / 2878 sizeof (mxge_sfp_media_types[0]); 2879 cage_type = "SFP+"; 2880 byte = 3; 2881 } else { 2882 /* nothing to do; media type cannot change */ 2883 return; 2884 } 2885 2886 /* 2887 * At this point we know the NIC has an XFP cage, so now we 2888 * try to determine what is in the cage by using the 2889 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2890 * register. We read just one byte, which may take over 2891 * a millisecond 2892 */ 2893 2894 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2895 cmd.data1 = byte; 2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2897 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2898 device_printf(sc->dev, "failed to read XFP\n"); 2899 } 2900 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2901 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2902 } 2903 if (err != MXGEFW_CMD_OK) { 2904 return; 2905 } 2906 2907 /* now we wait for the data to be cached */ 2908 cmd.data0 = byte; 2909 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2910 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2911 DELAY(1000); 2912 cmd.data0 = byte; 2913 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2914 } 2915 if (err != MXGEFW_CMD_OK) { 2916 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2917 cage_type, err, ms); 2918 return; 2919 } 2920 2921 if (cmd.data0 == mxge_media_types[0].bitmask) { 2922 if (mxge_verbose) 2923 device_printf(sc->dev, "%s:%s\n", cage_type, 2924 mxge_media_types[0].name); 2925 if (sc->current_media != mxge_media_types[0].flag) { 2926 mxge_media_init(sc); 2927 mxge_media_set(sc, mxge_media_types[0].flag); 2928 } 2929 return; 2930 } 2931 for (i = 1; i < mxge_media_type_entries; i++) { 2932 if (cmd.data0 & mxge_media_types[i].bitmask) { 2933 if (mxge_verbose) 2934 device_printf(sc->dev, "%s:%s\n", 2935 cage_type, 2936 mxge_media_types[i].name); 2937 2938 if (sc->current_media != mxge_media_types[i].flag) { 2939 mxge_media_init(sc); 2940 mxge_media_set(sc, mxge_media_types[i].flag); 2941 } 2942 return; 2943 } 2944 } 2945 if (mxge_verbose) 2946 device_printf(sc->dev, "%s media 0x%x unknown\n", 2947 cage_type, cmd.data0); 2948 2949 return; 2950 } 2951 2952 static void 2953 mxge_intr(void *arg) 2954 { 2955 struct mxge_slice_state *ss = arg; 2956 mxge_softc_t *sc = ss->sc; 2957 mcp_irq_data_t *stats = ss->fw_stats; 2958 mxge_tx_ring_t *tx = &ss->tx; 2959 mxge_rx_done_t *rx_done = &ss->rx_done; 2960 uint32_t send_done_count; 2961 uint8_t valid; 2962 2963 2964 #ifndef IFNET_BUF_RING 2965 /* an interrupt on a non-zero slice is implicitly valid 2966 since MSI-X irqs are not shared */ 2967 if (ss != sc->ss) { 2968 mxge_clean_rx_done(ss); 2969 *ss->irq_claim = be32toh(3); 2970 return; 2971 } 2972 #endif 2973 2974 /* make sure the DMA has finished */ 2975 if (!stats->valid) { 2976 return; 2977 } 2978 valid = stats->valid; 2979 2980 if (sc->legacy_irq) { 2981 /* lower legacy IRQ */ 2982 *sc->irq_deassert = 0; 2983 if (!mxge_deassert_wait) 2984 /* don't wait for conf. that irq is low */ 2985 stats->valid = 0; 2986 } else { 2987 stats->valid = 0; 2988 } 2989 2990 /* loop while waiting for legacy irq deassertion */ 2991 do { 2992 /* check for transmit completes and receives */ 2993 send_done_count = be32toh(stats->send_done_count); 2994 while ((send_done_count != tx->pkt_done) || 2995 (rx_done->entry[rx_done->idx].length != 0)) { 2996 if (send_done_count != tx->pkt_done) 2997 mxge_tx_done(ss, (int)send_done_count); 2998 mxge_clean_rx_done(ss); 2999 send_done_count = be32toh(stats->send_done_count); 3000 } 3001 if (sc->legacy_irq && mxge_deassert_wait) 3002 wmb(); 3003 } while (*((volatile uint8_t *) &stats->valid)); 3004 3005 /* fw link & error stats meaningful only on the first slice */ 3006 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3007 if (sc->link_state != stats->link_up) { 3008 sc->link_state = stats->link_up; 3009 if (sc->link_state) { 3010 if_link_state_change(sc->ifp, LINK_STATE_UP); 3011 sc->ifp->if_baudrate = IF_Gbps(10UL); 3012 if (mxge_verbose) 3013 device_printf(sc->dev, "link up\n"); 3014 } else { 3015 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3016 sc->ifp->if_baudrate = 0; 3017 if (mxge_verbose) 3018 device_printf(sc->dev, "link down\n"); 3019 } 3020 sc->need_media_probe = 1; 3021 } 3022 if (sc->rdma_tags_available != 3023 be32toh(stats->rdma_tags_available)) { 3024 sc->rdma_tags_available = 3025 be32toh(stats->rdma_tags_available); 3026 device_printf(sc->dev, "RDMA timed out! %d tags " 3027 "left\n", sc->rdma_tags_available); 3028 } 3029 3030 if (stats->link_down) { 3031 sc->down_cnt += stats->link_down; 3032 sc->link_state = 0; 3033 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3034 } 3035 } 3036 3037 /* check to see if we have rx token to pass back */ 3038 if (valid & 0x1) 3039 *ss->irq_claim = be32toh(3); 3040 *(ss->irq_claim + 1) = be32toh(3); 3041 } 3042 3043 static void 3044 mxge_init(void *arg) 3045 { 3046 } 3047 3048 3049 3050 static void 3051 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3052 { 3053 struct lro_entry *lro_entry; 3054 int i; 3055 3056 while (!SLIST_EMPTY(&ss->lro_free)) { 3057 lro_entry = SLIST_FIRST(&ss->lro_free); 3058 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3059 free(lro_entry, M_DEVBUF); 3060 } 3061 3062 for (i = 0; i <= ss->rx_big.mask; i++) { 3063 if (ss->rx_big.info[i].m == NULL) 3064 continue; 3065 bus_dmamap_unload(ss->rx_big.dmat, 3066 ss->rx_big.info[i].map); 3067 m_freem(ss->rx_big.info[i].m); 3068 ss->rx_big.info[i].m = NULL; 3069 } 3070 3071 for (i = 0; i <= ss->rx_small.mask; i++) { 3072 if (ss->rx_small.info[i].m == NULL) 3073 continue; 3074 bus_dmamap_unload(ss->rx_small.dmat, 3075 ss->rx_small.info[i].map); 3076 m_freem(ss->rx_small.info[i].m); 3077 ss->rx_small.info[i].m = NULL; 3078 } 3079 3080 /* transmit ring used only on the first slice */ 3081 if (ss->tx.info == NULL) 3082 return; 3083 3084 for (i = 0; i <= ss->tx.mask; i++) { 3085 ss->tx.info[i].flag = 0; 3086 if (ss->tx.info[i].m == NULL) 3087 continue; 3088 bus_dmamap_unload(ss->tx.dmat, 3089 ss->tx.info[i].map); 3090 m_freem(ss->tx.info[i].m); 3091 ss->tx.info[i].m = NULL; 3092 } 3093 } 3094 3095 static void 3096 mxge_free_mbufs(mxge_softc_t *sc) 3097 { 3098 int slice; 3099 3100 for (slice = 0; slice < sc->num_slices; slice++) 3101 mxge_free_slice_mbufs(&sc->ss[slice]); 3102 } 3103 3104 static void 3105 mxge_free_slice_rings(struct mxge_slice_state *ss) 3106 { 3107 int i; 3108 3109 3110 if (ss->rx_done.entry != NULL) 3111 mxge_dma_free(&ss->rx_done.dma); 3112 ss->rx_done.entry = NULL; 3113 3114 if (ss->tx.req_bytes != NULL) 3115 free(ss->tx.req_bytes, M_DEVBUF); 3116 ss->tx.req_bytes = NULL; 3117 3118 if (ss->tx.seg_list != NULL) 3119 free(ss->tx.seg_list, M_DEVBUF); 3120 ss->tx.seg_list = NULL; 3121 3122 if (ss->rx_small.shadow != NULL) 3123 free(ss->rx_small.shadow, M_DEVBUF); 3124 ss->rx_small.shadow = NULL; 3125 3126 if (ss->rx_big.shadow != NULL) 3127 free(ss->rx_big.shadow, M_DEVBUF); 3128 ss->rx_big.shadow = NULL; 3129 3130 if (ss->tx.info != NULL) { 3131 if (ss->tx.dmat != NULL) { 3132 for (i = 0; i <= ss->tx.mask; i++) { 3133 bus_dmamap_destroy(ss->tx.dmat, 3134 ss->tx.info[i].map); 3135 } 3136 bus_dma_tag_destroy(ss->tx.dmat); 3137 } 3138 free(ss->tx.info, M_DEVBUF); 3139 } 3140 ss->tx.info = NULL; 3141 3142 if (ss->rx_small.info != NULL) { 3143 if (ss->rx_small.dmat != NULL) { 3144 for (i = 0; i <= ss->rx_small.mask; i++) { 3145 bus_dmamap_destroy(ss->rx_small.dmat, 3146 ss->rx_small.info[i].map); 3147 } 3148 bus_dmamap_destroy(ss->rx_small.dmat, 3149 ss->rx_small.extra_map); 3150 bus_dma_tag_destroy(ss->rx_small.dmat); 3151 } 3152 free(ss->rx_small.info, M_DEVBUF); 3153 } 3154 ss->rx_small.info = NULL; 3155 3156 if (ss->rx_big.info != NULL) { 3157 if (ss->rx_big.dmat != NULL) { 3158 for (i = 0; i <= ss->rx_big.mask; i++) { 3159 bus_dmamap_destroy(ss->rx_big.dmat, 3160 ss->rx_big.info[i].map); 3161 } 3162 bus_dmamap_destroy(ss->rx_big.dmat, 3163 ss->rx_big.extra_map); 3164 bus_dma_tag_destroy(ss->rx_big.dmat); 3165 } 3166 free(ss->rx_big.info, M_DEVBUF); 3167 } 3168 ss->rx_big.info = NULL; 3169 } 3170 3171 static void 3172 mxge_free_rings(mxge_softc_t *sc) 3173 { 3174 int slice; 3175 3176 for (slice = 0; slice < sc->num_slices; slice++) 3177 mxge_free_slice_rings(&sc->ss[slice]); 3178 } 3179 3180 static int 3181 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3182 int tx_ring_entries) 3183 { 3184 mxge_softc_t *sc = ss->sc; 3185 size_t bytes; 3186 int err, i; 3187 3188 err = ENOMEM; 3189 3190 /* allocate per-slice receive resources */ 3191 3192 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3193 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3194 3195 /* allocate the rx shadow rings */ 3196 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3197 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3198 if (ss->rx_small.shadow == NULL) 3199 return err; 3200 3201 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3202 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3203 if (ss->rx_big.shadow == NULL) 3204 return err; 3205 3206 /* allocate the rx host info rings */ 3207 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3208 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3209 if (ss->rx_small.info == NULL) 3210 return err; 3211 3212 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3213 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3214 if (ss->rx_big.info == NULL) 3215 return err; 3216 3217 /* allocate the rx busdma resources */ 3218 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3219 1, /* alignment */ 3220 4096, /* boundary */ 3221 BUS_SPACE_MAXADDR, /* low */ 3222 BUS_SPACE_MAXADDR, /* high */ 3223 NULL, NULL, /* filter */ 3224 MHLEN, /* maxsize */ 3225 1, /* num segs */ 3226 MHLEN, /* maxsegsize */ 3227 BUS_DMA_ALLOCNOW, /* flags */ 3228 NULL, NULL, /* lock */ 3229 &ss->rx_small.dmat); /* tag */ 3230 if (err != 0) { 3231 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3232 err); 3233 return err; 3234 } 3235 3236 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3237 1, /* alignment */ 3238 #if MXGE_VIRT_JUMBOS 3239 4096, /* boundary */ 3240 #else 3241 0, /* boundary */ 3242 #endif 3243 BUS_SPACE_MAXADDR, /* low */ 3244 BUS_SPACE_MAXADDR, /* high */ 3245 NULL, NULL, /* filter */ 3246 3*4096, /* maxsize */ 3247 #if MXGE_VIRT_JUMBOS 3248 3, /* num segs */ 3249 4096, /* maxsegsize*/ 3250 #else 3251 1, /* num segs */ 3252 MJUM9BYTES, /* maxsegsize*/ 3253 #endif 3254 BUS_DMA_ALLOCNOW, /* flags */ 3255 NULL, NULL, /* lock */ 3256 &ss->rx_big.dmat); /* tag */ 3257 if (err != 0) { 3258 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3259 err); 3260 return err; 3261 } 3262 for (i = 0; i <= ss->rx_small.mask; i++) { 3263 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3264 &ss->rx_small.info[i].map); 3265 if (err != 0) { 3266 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3267 err); 3268 return err; 3269 } 3270 } 3271 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3272 &ss->rx_small.extra_map); 3273 if (err != 0) { 3274 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3275 err); 3276 return err; 3277 } 3278 3279 for (i = 0; i <= ss->rx_big.mask; i++) { 3280 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3281 &ss->rx_big.info[i].map); 3282 if (err != 0) { 3283 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3284 err); 3285 return err; 3286 } 3287 } 3288 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3289 &ss->rx_big.extra_map); 3290 if (err != 0) { 3291 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3292 err); 3293 return err; 3294 } 3295 3296 /* now allocate TX resouces */ 3297 3298 #ifndef IFNET_BUF_RING 3299 /* only use a single TX ring for now */ 3300 if (ss != ss->sc->ss) 3301 return 0; 3302 #endif 3303 3304 ss->tx.mask = tx_ring_entries - 1; 3305 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3306 3307 3308 /* allocate the tx request copy block */ 3309 bytes = 8 + 3310 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3311 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3312 if (ss->tx.req_bytes == NULL) 3313 return err; 3314 /* ensure req_list entries are aligned to 8 bytes */ 3315 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3316 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3317 3318 /* allocate the tx busdma segment list */ 3319 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3320 ss->tx.seg_list = (bus_dma_segment_t *) 3321 malloc(bytes, M_DEVBUF, M_WAITOK); 3322 if (ss->tx.seg_list == NULL) 3323 return err; 3324 3325 /* allocate the tx host info ring */ 3326 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3327 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3328 if (ss->tx.info == NULL) 3329 return err; 3330 3331 /* allocate the tx busdma resources */ 3332 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3333 1, /* alignment */ 3334 sc->tx_boundary, /* boundary */ 3335 BUS_SPACE_MAXADDR, /* low */ 3336 BUS_SPACE_MAXADDR, /* high */ 3337 NULL, NULL, /* filter */ 3338 65536 + 256, /* maxsize */ 3339 ss->tx.max_desc - 2, /* num segs */ 3340 sc->tx_boundary, /* maxsegsz */ 3341 BUS_DMA_ALLOCNOW, /* flags */ 3342 NULL, NULL, /* lock */ 3343 &ss->tx.dmat); /* tag */ 3344 3345 if (err != 0) { 3346 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3347 err); 3348 return err; 3349 } 3350 3351 /* now use these tags to setup dmamaps for each slot 3352 in the ring */ 3353 for (i = 0; i <= ss->tx.mask; i++) { 3354 err = bus_dmamap_create(ss->tx.dmat, 0, 3355 &ss->tx.info[i].map); 3356 if (err != 0) { 3357 device_printf(sc->dev, "Err %d tx dmamap\n", 3358 err); 3359 return err; 3360 } 3361 } 3362 return 0; 3363 3364 } 3365 3366 static int 3367 mxge_alloc_rings(mxge_softc_t *sc) 3368 { 3369 mxge_cmd_t cmd; 3370 int tx_ring_size; 3371 int tx_ring_entries, rx_ring_entries; 3372 int err, slice; 3373 3374 /* get ring sizes */ 3375 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3376 tx_ring_size = cmd.data0; 3377 if (err != 0) { 3378 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3379 goto abort; 3380 } 3381 3382 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3383 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3384 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3385 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3386 IFQ_SET_READY(&sc->ifp->if_snd); 3387 3388 for (slice = 0; slice < sc->num_slices; slice++) { 3389 err = mxge_alloc_slice_rings(&sc->ss[slice], 3390 rx_ring_entries, 3391 tx_ring_entries); 3392 if (err != 0) 3393 goto abort; 3394 } 3395 return 0; 3396 3397 abort: 3398 mxge_free_rings(sc); 3399 return err; 3400 3401 } 3402 3403 3404 static void 3405 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3406 { 3407 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3408 3409 if (bufsize < MCLBYTES) { 3410 /* easy, everything fits in a single buffer */ 3411 *big_buf_size = MCLBYTES; 3412 *cl_size = MCLBYTES; 3413 *nbufs = 1; 3414 return; 3415 } 3416 3417 if (bufsize < MJUMPAGESIZE) { 3418 /* still easy, everything still fits in a single buffer */ 3419 *big_buf_size = MJUMPAGESIZE; 3420 *cl_size = MJUMPAGESIZE; 3421 *nbufs = 1; 3422 return; 3423 } 3424 #if MXGE_VIRT_JUMBOS 3425 /* now we need to use virtually contiguous buffers */ 3426 *cl_size = MJUM9BYTES; 3427 *big_buf_size = 4096; 3428 *nbufs = mtu / 4096 + 1; 3429 /* needs to be a power of two, so round up */ 3430 if (*nbufs == 3) 3431 *nbufs = 4; 3432 #else 3433 *cl_size = MJUM9BYTES; 3434 *big_buf_size = MJUM9BYTES; 3435 *nbufs = 1; 3436 #endif 3437 } 3438 3439 static int 3440 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3441 { 3442 mxge_softc_t *sc; 3443 mxge_cmd_t cmd; 3444 bus_dmamap_t map; 3445 struct lro_entry *lro_entry; 3446 int err, i, slice; 3447 3448 3449 sc = ss->sc; 3450 slice = ss - sc->ss; 3451 3452 SLIST_INIT(&ss->lro_free); 3453 SLIST_INIT(&ss->lro_active); 3454 3455 for (i = 0; i < sc->lro_cnt; i++) { 3456 lro_entry = (struct lro_entry *) 3457 malloc(sizeof (*lro_entry), M_DEVBUF, 3458 M_NOWAIT | M_ZERO); 3459 if (lro_entry == NULL) { 3460 sc->lro_cnt = i; 3461 break; 3462 } 3463 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3464 } 3465 /* get the lanai pointers to the send and receive rings */ 3466 3467 err = 0; 3468 #ifndef IFNET_BUF_RING 3469 /* We currently only send from the first slice */ 3470 if (slice == 0) { 3471 #endif 3472 cmd.data0 = slice; 3473 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3474 ss->tx.lanai = 3475 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3476 ss->tx.send_go = (volatile uint32_t *) 3477 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3478 ss->tx.send_stop = (volatile uint32_t *) 3479 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3480 #ifndef IFNET_BUF_RING 3481 } 3482 #endif 3483 cmd.data0 = slice; 3484 err |= mxge_send_cmd(sc, 3485 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3486 ss->rx_small.lanai = 3487 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3488 cmd.data0 = slice; 3489 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3490 ss->rx_big.lanai = 3491 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3492 3493 if (err != 0) { 3494 device_printf(sc->dev, 3495 "failed to get ring sizes or locations\n"); 3496 return EIO; 3497 } 3498 3499 /* stock receive rings */ 3500 for (i = 0; i <= ss->rx_small.mask; i++) { 3501 map = ss->rx_small.info[i].map; 3502 err = mxge_get_buf_small(ss, map, i); 3503 if (err) { 3504 device_printf(sc->dev, "alloced %d/%d smalls\n", 3505 i, ss->rx_small.mask + 1); 3506 return ENOMEM; 3507 } 3508 } 3509 for (i = 0; i <= ss->rx_big.mask; i++) { 3510 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3511 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3512 } 3513 ss->rx_big.nbufs = nbufs; 3514 ss->rx_big.cl_size = cl_size; 3515 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3516 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3517 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3518 map = ss->rx_big.info[i].map; 3519 err = mxge_get_buf_big(ss, map, i); 3520 if (err) { 3521 device_printf(sc->dev, "alloced %d/%d bigs\n", 3522 i, ss->rx_big.mask + 1); 3523 return ENOMEM; 3524 } 3525 } 3526 return 0; 3527 } 3528 3529 static int 3530 mxge_open(mxge_softc_t *sc) 3531 { 3532 mxge_cmd_t cmd; 3533 int err, big_bytes, nbufs, slice, cl_size, i; 3534 bus_addr_t bus; 3535 volatile uint8_t *itable; 3536 struct mxge_slice_state *ss; 3537 3538 /* Copy the MAC address in case it was overridden */ 3539 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3540 3541 err = mxge_reset(sc, 1); 3542 if (err != 0) { 3543 device_printf(sc->dev, "failed to reset\n"); 3544 return EIO; 3545 } 3546 3547 if (sc->num_slices > 1) { 3548 /* setup the indirection table */ 3549 cmd.data0 = sc->num_slices; 3550 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3551 &cmd); 3552 3553 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3554 &cmd); 3555 if (err != 0) { 3556 device_printf(sc->dev, 3557 "failed to setup rss tables\n"); 3558 return err; 3559 } 3560 3561 /* just enable an identity mapping */ 3562 itable = sc->sram + cmd.data0; 3563 for (i = 0; i < sc->num_slices; i++) 3564 itable[i] = (uint8_t)i; 3565 3566 cmd.data0 = 1; 3567 cmd.data1 = mxge_rss_hash_type; 3568 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3569 if (err != 0) { 3570 device_printf(sc->dev, "failed to enable slices\n"); 3571 return err; 3572 } 3573 } 3574 3575 3576 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3577 3578 cmd.data0 = nbufs; 3579 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3580 &cmd); 3581 /* error is only meaningful if we're trying to set 3582 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3583 if (err && nbufs > 1) { 3584 device_printf(sc->dev, 3585 "Failed to set alway-use-n to %d\n", 3586 nbufs); 3587 return EIO; 3588 } 3589 /* Give the firmware the mtu and the big and small buffer 3590 sizes. The firmware wants the big buf size to be a power 3591 of two. Luckily, FreeBSD's clusters are powers of two */ 3592 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3593 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3594 cmd.data0 = MHLEN - MXGEFW_PAD; 3595 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3596 &cmd); 3597 cmd.data0 = big_bytes; 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3599 3600 if (err != 0) { 3601 device_printf(sc->dev, "failed to setup params\n"); 3602 goto abort; 3603 } 3604 3605 /* Now give him the pointer to the stats block */ 3606 for (slice = 0; 3607 #ifdef IFNET_BUF_RING 3608 slice < sc->num_slices; 3609 #else 3610 slice < 1; 3611 #endif 3612 slice++) { 3613 ss = &sc->ss[slice]; 3614 cmd.data0 = 3615 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3616 cmd.data1 = 3617 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3618 cmd.data2 = sizeof(struct mcp_irq_data); 3619 cmd.data2 |= (slice << 16); 3620 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3621 } 3622 3623 if (err != 0) { 3624 bus = sc->ss->fw_stats_dma.bus_addr; 3625 bus += offsetof(struct mcp_irq_data, send_done_count); 3626 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3627 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3628 err = mxge_send_cmd(sc, 3629 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3630 &cmd); 3631 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3632 sc->fw_multicast_support = 0; 3633 } else { 3634 sc->fw_multicast_support = 1; 3635 } 3636 3637 if (err != 0) { 3638 device_printf(sc->dev, "failed to setup params\n"); 3639 goto abort; 3640 } 3641 3642 for (slice = 0; slice < sc->num_slices; slice++) { 3643 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3644 if (err != 0) { 3645 device_printf(sc->dev, "couldn't open slice %d\n", 3646 slice); 3647 goto abort; 3648 } 3649 } 3650 3651 /* Finally, start the firmware running */ 3652 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3653 if (err) { 3654 device_printf(sc->dev, "Couldn't bring up link\n"); 3655 goto abort; 3656 } 3657 #ifdef IFNET_BUF_RING 3658 for (slice = 0; slice < sc->num_slices; slice++) { 3659 ss = &sc->ss[slice]; 3660 ss->if_drv_flags |= IFF_DRV_RUNNING; 3661 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3662 } 3663 #endif 3664 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3665 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3666 3667 return 0; 3668 3669 3670 abort: 3671 mxge_free_mbufs(sc); 3672 3673 return err; 3674 } 3675 3676 static int 3677 mxge_close(mxge_softc_t *sc, int down) 3678 { 3679 mxge_cmd_t cmd; 3680 int err, old_down_cnt; 3681 #ifdef IFNET_BUF_RING 3682 struct mxge_slice_state *ss; 3683 int slice; 3684 #endif 3685 3686 #ifdef IFNET_BUF_RING 3687 for (slice = 0; slice < sc->num_slices; slice++) { 3688 ss = &sc->ss[slice]; 3689 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3690 } 3691 #endif 3692 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3693 if (!down) { 3694 old_down_cnt = sc->down_cnt; 3695 wmb(); 3696 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3697 if (err) { 3698 device_printf(sc->dev, 3699 "Couldn't bring down link\n"); 3700 } 3701 if (old_down_cnt == sc->down_cnt) { 3702 /* wait for down irq */ 3703 DELAY(10 * sc->intr_coal_delay); 3704 } 3705 wmb(); 3706 if (old_down_cnt == sc->down_cnt) { 3707 device_printf(sc->dev, "never got down irq\n"); 3708 } 3709 } 3710 mxge_free_mbufs(sc); 3711 3712 return 0; 3713 } 3714 3715 static void 3716 mxge_setup_cfg_space(mxge_softc_t *sc) 3717 { 3718 device_t dev = sc->dev; 3719 int reg; 3720 uint16_t cmd, lnk, pectl; 3721 3722 /* find the PCIe link width and set max read request to 4KB*/ 3723 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3724 lnk = pci_read_config(dev, reg + 0x12, 2); 3725 sc->link_width = (lnk >> 4) & 0x3f; 3726 3727 if (sc->pectl == 0) { 3728 pectl = pci_read_config(dev, reg + 0x8, 2); 3729 pectl = (pectl & ~0x7000) | (5 << 12); 3730 pci_write_config(dev, reg + 0x8, pectl, 2); 3731 sc->pectl = pectl; 3732 } else { 3733 /* restore saved pectl after watchdog reset */ 3734 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3735 } 3736 } 3737 3738 /* Enable DMA and Memory space access */ 3739 pci_enable_busmaster(dev); 3740 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3741 cmd |= PCIM_CMD_MEMEN; 3742 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3743 } 3744 3745 static uint32_t 3746 mxge_read_reboot(mxge_softc_t *sc) 3747 { 3748 device_t dev = sc->dev; 3749 uint32_t vs; 3750 3751 /* find the vendor specific offset */ 3752 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3753 device_printf(sc->dev, 3754 "could not find vendor specific offset\n"); 3755 return (uint32_t)-1; 3756 } 3757 /* enable read32 mode */ 3758 pci_write_config(dev, vs + 0x10, 0x3, 1); 3759 /* tell NIC which register to read */ 3760 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3761 return (pci_read_config(dev, vs + 0x14, 4)); 3762 } 3763 3764 static void 3765 mxge_watchdog_reset(mxge_softc_t *sc) 3766 { 3767 struct pci_devinfo *dinfo; 3768 struct mxge_slice_state *ss; 3769 int err, running, s, num_tx_slices = 1; 3770 uint32_t reboot; 3771 uint16_t cmd; 3772 3773 err = ENXIO; 3774 3775 device_printf(sc->dev, "Watchdog reset!\n"); 3776 3777 /* 3778 * check to see if the NIC rebooted. If it did, then all of 3779 * PCI config space has been reset, and things like the 3780 * busmaster bit will be zero. If this is the case, then we 3781 * must restore PCI config space before the NIC can be used 3782 * again 3783 */ 3784 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3785 if (cmd == 0xffff) { 3786 /* 3787 * maybe the watchdog caught the NIC rebooting; wait 3788 * up to 100ms for it to finish. If it does not come 3789 * back, then give up 3790 */ 3791 DELAY(1000*100); 3792 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3793 if (cmd == 0xffff) { 3794 device_printf(sc->dev, "NIC disappeared!\n"); 3795 } 3796 } 3797 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3798 /* print the reboot status */ 3799 reboot = mxge_read_reboot(sc); 3800 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3801 reboot); 3802 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3803 if (running) { 3804 3805 /* 3806 * quiesce NIC so that TX routines will not try to 3807 * xmit after restoration of BAR 3808 */ 3809 3810 /* Mark the link as down */ 3811 if (sc->link_state) { 3812 sc->link_state = 0; 3813 if_link_state_change(sc->ifp, 3814 LINK_STATE_DOWN); 3815 } 3816 #ifdef IFNET_BUF_RING 3817 num_tx_slices = sc->num_slices; 3818 #endif 3819 /* grab all TX locks to ensure no tx */ 3820 for (s = 0; s < num_tx_slices; s++) { 3821 ss = &sc->ss[s]; 3822 mtx_lock(&ss->tx.mtx); 3823 } 3824 mxge_close(sc, 1); 3825 } 3826 /* restore PCI configuration space */ 3827 dinfo = device_get_ivars(sc->dev); 3828 pci_cfg_restore(sc->dev, dinfo); 3829 3830 /* and redo any changes we made to our config space */ 3831 mxge_setup_cfg_space(sc); 3832 3833 /* reload f/w */ 3834 err = mxge_load_firmware(sc, 0); 3835 if (err) { 3836 device_printf(sc->dev, 3837 "Unable to re-load f/w\n"); 3838 } 3839 if (running) { 3840 if (!err) 3841 err = mxge_open(sc); 3842 /* release all TX locks */ 3843 for (s = 0; s < num_tx_slices; s++) { 3844 ss = &sc->ss[s]; 3845 #ifdef IFNET_BUF_RING 3846 mxge_start_locked(ss); 3847 #endif 3848 mtx_unlock(&ss->tx.mtx); 3849 } 3850 } 3851 sc->watchdog_resets++; 3852 } else { 3853 device_printf(sc->dev, 3854 "NIC did not reboot, not resetting\n"); 3855 err = 0; 3856 } 3857 if (err) { 3858 device_printf(sc->dev, "watchdog reset failed\n"); 3859 } else { 3860 if (sc->dying == 2) 3861 sc->dying = 0; 3862 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3863 } 3864 } 3865 3866 static void 3867 mxge_watchdog_task(void *arg, int pending) 3868 { 3869 mxge_softc_t *sc = arg; 3870 3871 3872 mtx_lock(&sc->driver_mtx); 3873 mxge_watchdog_reset(sc); 3874 mtx_unlock(&sc->driver_mtx); 3875 } 3876 3877 static void 3878 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3879 { 3880 tx = &sc->ss[slice].tx; 3881 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3882 device_printf(sc->dev, 3883 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3884 tx->req, tx->done, tx->queue_active); 3885 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3886 tx->activate, tx->deactivate); 3887 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3888 tx->pkt_done, 3889 be32toh(sc->ss->fw_stats->send_done_count)); 3890 } 3891 3892 static int 3893 mxge_watchdog(mxge_softc_t *sc) 3894 { 3895 mxge_tx_ring_t *tx; 3896 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3897 int i, err = 0; 3898 3899 /* see if we have outstanding transmits, which 3900 have been pending for more than mxge_ticks */ 3901 for (i = 0; 3902 #ifdef IFNET_BUF_RING 3903 (i < sc->num_slices) && (err == 0); 3904 #else 3905 (i < 1) && (err == 0); 3906 #endif 3907 i++) { 3908 tx = &sc->ss[i].tx; 3909 if (tx->req != tx->done && 3910 tx->watchdog_req != tx->watchdog_done && 3911 tx->done == tx->watchdog_done) { 3912 /* check for pause blocking before resetting */ 3913 if (tx->watchdog_rx_pause == rx_pause) { 3914 mxge_warn_stuck(sc, tx, i); 3915 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3916 return (ENXIO); 3917 } 3918 else 3919 device_printf(sc->dev, "Flow control blocking " 3920 "xmits, check link partner\n"); 3921 } 3922 3923 tx->watchdog_req = tx->req; 3924 tx->watchdog_done = tx->done; 3925 tx->watchdog_rx_pause = rx_pause; 3926 } 3927 3928 if (sc->need_media_probe) 3929 mxge_media_probe(sc); 3930 return (err); 3931 } 3932 3933 static u_long 3934 mxge_update_stats(mxge_softc_t *sc) 3935 { 3936 struct mxge_slice_state *ss; 3937 u_long pkts = 0; 3938 u_long ipackets = 0; 3939 u_long opackets = 0; 3940 #ifdef IFNET_BUF_RING 3941 u_long obytes = 0; 3942 u_long omcasts = 0; 3943 u_long odrops = 0; 3944 #endif 3945 u_long oerrors = 0; 3946 int slice; 3947 3948 for (slice = 0; slice < sc->num_slices; slice++) { 3949 ss = &sc->ss[slice]; 3950 ipackets += ss->ipackets; 3951 opackets += ss->opackets; 3952 #ifdef IFNET_BUF_RING 3953 obytes += ss->obytes; 3954 omcasts += ss->omcasts; 3955 odrops += ss->tx.br->br_drops; 3956 #endif 3957 oerrors += ss->oerrors; 3958 } 3959 pkts = (ipackets - sc->ifp->if_ipackets); 3960 pkts += (opackets - sc->ifp->if_opackets); 3961 sc->ifp->if_ipackets = ipackets; 3962 sc->ifp->if_opackets = opackets; 3963 #ifdef IFNET_BUF_RING 3964 sc->ifp->if_obytes = obytes; 3965 sc->ifp->if_omcasts = omcasts; 3966 sc->ifp->if_snd.ifq_drops = odrops; 3967 #endif 3968 sc->ifp->if_oerrors = oerrors; 3969 return pkts; 3970 } 3971 3972 static void 3973 mxge_tick(void *arg) 3974 { 3975 mxge_softc_t *sc = arg; 3976 u_long pkts = 0; 3977 int err = 0; 3978 int running, ticks; 3979 uint16_t cmd; 3980 3981 ticks = mxge_ticks; 3982 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3983 if (running) { 3984 /* aggregate stats from different slices */ 3985 pkts = mxge_update_stats(sc); 3986 if (!sc->watchdog_countdown) { 3987 err = mxge_watchdog(sc); 3988 sc->watchdog_countdown = 4; 3989 } 3990 sc->watchdog_countdown--; 3991 } 3992 if (pkts == 0) { 3993 /* ensure NIC did not suffer h/w fault while idle */ 3994 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3995 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3996 sc->dying = 2; 3997 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3998 err = ENXIO; 3999 } 4000 /* look less often if NIC is idle */ 4001 ticks *= 4; 4002 } 4003 4004 if (err == 0) 4005 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4006 4007 } 4008 4009 static int 4010 mxge_media_change(struct ifnet *ifp) 4011 { 4012 return EINVAL; 4013 } 4014 4015 static int 4016 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4017 { 4018 struct ifnet *ifp = sc->ifp; 4019 int real_mtu, old_mtu; 4020 int err = 0; 4021 4022 4023 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4024 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4025 return EINVAL; 4026 mtx_lock(&sc->driver_mtx); 4027 old_mtu = ifp->if_mtu; 4028 ifp->if_mtu = mtu; 4029 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4030 mxge_close(sc, 0); 4031 err = mxge_open(sc); 4032 if (err != 0) { 4033 ifp->if_mtu = old_mtu; 4034 mxge_close(sc, 0); 4035 (void) mxge_open(sc); 4036 } 4037 } 4038 mtx_unlock(&sc->driver_mtx); 4039 return err; 4040 } 4041 4042 static void 4043 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4044 { 4045 mxge_softc_t *sc = ifp->if_softc; 4046 4047 4048 if (sc == NULL) 4049 return; 4050 ifmr->ifm_status = IFM_AVALID; 4051 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4052 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4053 ifmr->ifm_active |= sc->current_media; 4054 } 4055 4056 static int 4057 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4058 { 4059 mxge_softc_t *sc = ifp->if_softc; 4060 struct ifreq *ifr = (struct ifreq *)data; 4061 int err, mask; 4062 4063 err = 0; 4064 switch (command) { 4065 case SIOCSIFADDR: 4066 case SIOCGIFADDR: 4067 err = ether_ioctl(ifp, command, data); 4068 break; 4069 4070 case SIOCSIFMTU: 4071 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4072 break; 4073 4074 case SIOCSIFFLAGS: 4075 mtx_lock(&sc->driver_mtx); 4076 if (sc->dying) { 4077 mtx_unlock(&sc->driver_mtx); 4078 return EINVAL; 4079 } 4080 if (ifp->if_flags & IFF_UP) { 4081 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4082 err = mxge_open(sc); 4083 } else { 4084 /* take care of promis can allmulti 4085 flag chages */ 4086 mxge_change_promisc(sc, 4087 ifp->if_flags & IFF_PROMISC); 4088 mxge_set_multicast_list(sc); 4089 } 4090 } else { 4091 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4092 mxge_close(sc, 0); 4093 } 4094 } 4095 mtx_unlock(&sc->driver_mtx); 4096 break; 4097 4098 case SIOCADDMULTI: 4099 case SIOCDELMULTI: 4100 mtx_lock(&sc->driver_mtx); 4101 mxge_set_multicast_list(sc); 4102 mtx_unlock(&sc->driver_mtx); 4103 break; 4104 4105 case SIOCSIFCAP: 4106 mtx_lock(&sc->driver_mtx); 4107 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4108 if (mask & IFCAP_TXCSUM) { 4109 if (IFCAP_TXCSUM & ifp->if_capenable) { 4110 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4111 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4112 | CSUM_TSO); 4113 } else { 4114 ifp->if_capenable |= IFCAP_TXCSUM; 4115 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4116 } 4117 } else if (mask & IFCAP_RXCSUM) { 4118 if (IFCAP_RXCSUM & ifp->if_capenable) { 4119 ifp->if_capenable &= ~IFCAP_RXCSUM; 4120 sc->csum_flag = 0; 4121 } else { 4122 ifp->if_capenable |= IFCAP_RXCSUM; 4123 sc->csum_flag = 1; 4124 } 4125 } 4126 if (mask & IFCAP_TSO4) { 4127 if (IFCAP_TSO4 & ifp->if_capenable) { 4128 ifp->if_capenable &= ~IFCAP_TSO4; 4129 ifp->if_hwassist &= ~CSUM_TSO; 4130 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4131 ifp->if_capenable |= IFCAP_TSO4; 4132 ifp->if_hwassist |= CSUM_TSO; 4133 } else { 4134 printf("mxge requires tx checksum offload" 4135 " be enabled to use TSO\n"); 4136 err = EINVAL; 4137 } 4138 } 4139 if (mask & IFCAP_LRO) { 4140 if (IFCAP_LRO & ifp->if_capenable) 4141 err = mxge_change_lro_locked(sc, 0); 4142 else 4143 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4144 } 4145 if (mask & IFCAP_VLAN_HWTAGGING) 4146 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4147 if (mask & IFCAP_VLAN_HWTSO) 4148 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4149 4150 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4151 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4152 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4153 4154 mtx_unlock(&sc->driver_mtx); 4155 VLAN_CAPABILITIES(ifp); 4156 4157 break; 4158 4159 case SIOCGIFMEDIA: 4160 mtx_lock(&sc->driver_mtx); 4161 mxge_media_probe(sc); 4162 mtx_unlock(&sc->driver_mtx); 4163 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4164 &sc->media, command); 4165 break; 4166 4167 default: 4168 err = ENOTTY; 4169 } 4170 return err; 4171 } 4172 4173 static void 4174 mxge_fetch_tunables(mxge_softc_t *sc) 4175 { 4176 4177 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4178 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4179 &mxge_flow_control); 4180 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4181 &mxge_intr_coal_delay); 4182 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4183 &mxge_nvidia_ecrc_enable); 4184 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4185 &mxge_force_firmware); 4186 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4187 &mxge_deassert_wait); 4188 TUNABLE_INT_FETCH("hw.mxge.verbose", 4189 &mxge_verbose); 4190 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4191 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4192 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4193 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4194 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4195 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4196 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4197 if (sc->lro_cnt != 0) 4198 mxge_lro_cnt = sc->lro_cnt; 4199 4200 if (bootverbose) 4201 mxge_verbose = 1; 4202 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4203 mxge_intr_coal_delay = 30; 4204 if (mxge_ticks == 0) 4205 mxge_ticks = hz / 2; 4206 sc->pause = mxge_flow_control; 4207 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4208 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4209 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4210 } 4211 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4212 mxge_initial_mtu < ETHER_MIN_LEN) 4213 mxge_initial_mtu = ETHERMTU_JUMBO; 4214 4215 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4216 mxge_throttle = MXGE_MAX_THROTTLE; 4217 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4218 mxge_throttle = MXGE_MIN_THROTTLE; 4219 sc->throttle = mxge_throttle; 4220 } 4221 4222 4223 static void 4224 mxge_free_slices(mxge_softc_t *sc) 4225 { 4226 struct mxge_slice_state *ss; 4227 int i; 4228 4229 4230 if (sc->ss == NULL) 4231 return; 4232 4233 for (i = 0; i < sc->num_slices; i++) { 4234 ss = &sc->ss[i]; 4235 if (ss->fw_stats != NULL) { 4236 mxge_dma_free(&ss->fw_stats_dma); 4237 ss->fw_stats = NULL; 4238 #ifdef IFNET_BUF_RING 4239 if (ss->tx.br != NULL) { 4240 drbr_free(ss->tx.br, M_DEVBUF); 4241 ss->tx.br = NULL; 4242 } 4243 #endif 4244 mtx_destroy(&ss->tx.mtx); 4245 } 4246 if (ss->rx_done.entry != NULL) { 4247 mxge_dma_free(&ss->rx_done.dma); 4248 ss->rx_done.entry = NULL; 4249 } 4250 } 4251 free(sc->ss, M_DEVBUF); 4252 sc->ss = NULL; 4253 } 4254 4255 static int 4256 mxge_alloc_slices(mxge_softc_t *sc) 4257 { 4258 mxge_cmd_t cmd; 4259 struct mxge_slice_state *ss; 4260 size_t bytes; 4261 int err, i, max_intr_slots; 4262 4263 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4264 if (err != 0) { 4265 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4266 return err; 4267 } 4268 sc->rx_ring_size = cmd.data0; 4269 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4270 4271 bytes = sizeof (*sc->ss) * sc->num_slices; 4272 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4273 if (sc->ss == NULL) 4274 return (ENOMEM); 4275 for (i = 0; i < sc->num_slices; i++) { 4276 ss = &sc->ss[i]; 4277 4278 ss->sc = sc; 4279 4280 /* allocate per-slice rx interrupt queues */ 4281 4282 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4283 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4284 if (err != 0) 4285 goto abort; 4286 ss->rx_done.entry = ss->rx_done.dma.addr; 4287 bzero(ss->rx_done.entry, bytes); 4288 4289 /* 4290 * allocate the per-slice firmware stats; stats 4291 * (including tx) are used used only on the first 4292 * slice for now 4293 */ 4294 #ifndef IFNET_BUF_RING 4295 if (i > 0) 4296 continue; 4297 #endif 4298 4299 bytes = sizeof (*ss->fw_stats); 4300 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4301 sizeof (*ss->fw_stats), 64); 4302 if (err != 0) 4303 goto abort; 4304 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4305 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4306 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4307 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4308 #ifdef IFNET_BUF_RING 4309 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4310 &ss->tx.mtx); 4311 #endif 4312 } 4313 4314 return (0); 4315 4316 abort: 4317 mxge_free_slices(sc); 4318 return (ENOMEM); 4319 } 4320 4321 static void 4322 mxge_slice_probe(mxge_softc_t *sc) 4323 { 4324 mxge_cmd_t cmd; 4325 char *old_fw; 4326 int msix_cnt, status, max_intr_slots; 4327 4328 sc->num_slices = 1; 4329 /* 4330 * don't enable multiple slices if they are not enabled, 4331 * or if this is not an SMP system 4332 */ 4333 4334 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4335 return; 4336 4337 /* see how many MSI-X interrupts are available */ 4338 msix_cnt = pci_msix_count(sc->dev); 4339 if (msix_cnt < 2) 4340 return; 4341 4342 /* now load the slice aware firmware see what it supports */ 4343 old_fw = sc->fw_name; 4344 if (old_fw == mxge_fw_aligned) 4345 sc->fw_name = mxge_fw_rss_aligned; 4346 else 4347 sc->fw_name = mxge_fw_rss_unaligned; 4348 status = mxge_load_firmware(sc, 0); 4349 if (status != 0) { 4350 device_printf(sc->dev, "Falling back to a single slice\n"); 4351 return; 4352 } 4353 4354 /* try to send a reset command to the card to see if it 4355 is alive */ 4356 memset(&cmd, 0, sizeof (cmd)); 4357 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4358 if (status != 0) { 4359 device_printf(sc->dev, "failed reset\n"); 4360 goto abort_with_fw; 4361 } 4362 4363 /* get rx ring size */ 4364 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4365 if (status != 0) { 4366 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4367 goto abort_with_fw; 4368 } 4369 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4370 4371 /* tell it the size of the interrupt queues */ 4372 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4373 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4374 if (status != 0) { 4375 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4376 goto abort_with_fw; 4377 } 4378 4379 /* ask the maximum number of slices it supports */ 4380 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4381 if (status != 0) { 4382 device_printf(sc->dev, 4383 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4384 goto abort_with_fw; 4385 } 4386 sc->num_slices = cmd.data0; 4387 if (sc->num_slices > msix_cnt) 4388 sc->num_slices = msix_cnt; 4389 4390 if (mxge_max_slices == -1) { 4391 /* cap to number of CPUs in system */ 4392 if (sc->num_slices > mp_ncpus) 4393 sc->num_slices = mp_ncpus; 4394 } else { 4395 if (sc->num_slices > mxge_max_slices) 4396 sc->num_slices = mxge_max_slices; 4397 } 4398 /* make sure it is a power of two */ 4399 while (sc->num_slices & (sc->num_slices - 1)) 4400 sc->num_slices--; 4401 4402 if (mxge_verbose) 4403 device_printf(sc->dev, "using %d slices\n", 4404 sc->num_slices); 4405 4406 return; 4407 4408 abort_with_fw: 4409 sc->fw_name = old_fw; 4410 (void) mxge_load_firmware(sc, 0); 4411 } 4412 4413 static int 4414 mxge_add_msix_irqs(mxge_softc_t *sc) 4415 { 4416 size_t bytes; 4417 int count, err, i, rid; 4418 4419 rid = PCIR_BAR(2); 4420 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4421 &rid, RF_ACTIVE); 4422 4423 if (sc->msix_table_res == NULL) { 4424 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4425 return ENXIO; 4426 } 4427 4428 count = sc->num_slices; 4429 err = pci_alloc_msix(sc->dev, &count); 4430 if (err != 0) { 4431 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4432 "err = %d \n", sc->num_slices, err); 4433 goto abort_with_msix_table; 4434 } 4435 if (count < sc->num_slices) { 4436 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4437 count, sc->num_slices); 4438 device_printf(sc->dev, 4439 "Try setting hw.mxge.max_slices to %d\n", 4440 count); 4441 err = ENOSPC; 4442 goto abort_with_msix; 4443 } 4444 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4445 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4446 if (sc->msix_irq_res == NULL) { 4447 err = ENOMEM; 4448 goto abort_with_msix; 4449 } 4450 4451 for (i = 0; i < sc->num_slices; i++) { 4452 rid = i + 1; 4453 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4454 SYS_RES_IRQ, 4455 &rid, RF_ACTIVE); 4456 if (sc->msix_irq_res[i] == NULL) { 4457 device_printf(sc->dev, "couldn't allocate IRQ res" 4458 " for message %d\n", i); 4459 err = ENXIO; 4460 goto abort_with_res; 4461 } 4462 } 4463 4464 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4465 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4466 4467 for (i = 0; i < sc->num_slices; i++) { 4468 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4469 INTR_TYPE_NET | INTR_MPSAFE, 4470 #if __FreeBSD_version > 700030 4471 NULL, 4472 #endif 4473 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4474 if (err != 0) { 4475 device_printf(sc->dev, "couldn't setup intr for " 4476 "message %d\n", i); 4477 goto abort_with_intr; 4478 } 4479 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4480 sc->msix_ih[i], "s%d", i); 4481 } 4482 4483 if (mxge_verbose) { 4484 device_printf(sc->dev, "using %d msix IRQs:", 4485 sc->num_slices); 4486 for (i = 0; i < sc->num_slices; i++) 4487 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4488 printf("\n"); 4489 } 4490 return (0); 4491 4492 abort_with_intr: 4493 for (i = 0; i < sc->num_slices; i++) { 4494 if (sc->msix_ih[i] != NULL) { 4495 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4496 sc->msix_ih[i]); 4497 sc->msix_ih[i] = NULL; 4498 } 4499 } 4500 free(sc->msix_ih, M_DEVBUF); 4501 4502 4503 abort_with_res: 4504 for (i = 0; i < sc->num_slices; i++) { 4505 rid = i + 1; 4506 if (sc->msix_irq_res[i] != NULL) 4507 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4508 sc->msix_irq_res[i]); 4509 sc->msix_irq_res[i] = NULL; 4510 } 4511 free(sc->msix_irq_res, M_DEVBUF); 4512 4513 4514 abort_with_msix: 4515 pci_release_msi(sc->dev); 4516 4517 abort_with_msix_table: 4518 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4519 sc->msix_table_res); 4520 4521 return err; 4522 } 4523 4524 static int 4525 mxge_add_single_irq(mxge_softc_t *sc) 4526 { 4527 int count, err, rid; 4528 4529 count = pci_msi_count(sc->dev); 4530 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4531 rid = 1; 4532 } else { 4533 rid = 0; 4534 sc->legacy_irq = 1; 4535 } 4536 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4537 1, RF_SHAREABLE | RF_ACTIVE); 4538 if (sc->irq_res == NULL) { 4539 device_printf(sc->dev, "could not alloc interrupt\n"); 4540 return ENXIO; 4541 } 4542 if (mxge_verbose) 4543 device_printf(sc->dev, "using %s irq %ld\n", 4544 sc->legacy_irq ? "INTx" : "MSI", 4545 rman_get_start(sc->irq_res)); 4546 err = bus_setup_intr(sc->dev, sc->irq_res, 4547 INTR_TYPE_NET | INTR_MPSAFE, 4548 #if __FreeBSD_version > 700030 4549 NULL, 4550 #endif 4551 mxge_intr, &sc->ss[0], &sc->ih); 4552 if (err != 0) { 4553 bus_release_resource(sc->dev, SYS_RES_IRQ, 4554 sc->legacy_irq ? 0 : 1, sc->irq_res); 4555 if (!sc->legacy_irq) 4556 pci_release_msi(sc->dev); 4557 } 4558 return err; 4559 } 4560 4561 static void 4562 mxge_rem_msix_irqs(mxge_softc_t *sc) 4563 { 4564 int i, rid; 4565 4566 for (i = 0; i < sc->num_slices; i++) { 4567 if (sc->msix_ih[i] != NULL) { 4568 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4569 sc->msix_ih[i]); 4570 sc->msix_ih[i] = NULL; 4571 } 4572 } 4573 free(sc->msix_ih, M_DEVBUF); 4574 4575 for (i = 0; i < sc->num_slices; i++) { 4576 rid = i + 1; 4577 if (sc->msix_irq_res[i] != NULL) 4578 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4579 sc->msix_irq_res[i]); 4580 sc->msix_irq_res[i] = NULL; 4581 } 4582 free(sc->msix_irq_res, M_DEVBUF); 4583 4584 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4585 sc->msix_table_res); 4586 4587 pci_release_msi(sc->dev); 4588 return; 4589 } 4590 4591 static void 4592 mxge_rem_single_irq(mxge_softc_t *sc) 4593 { 4594 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4595 bus_release_resource(sc->dev, SYS_RES_IRQ, 4596 sc->legacy_irq ? 0 : 1, sc->irq_res); 4597 if (!sc->legacy_irq) 4598 pci_release_msi(sc->dev); 4599 } 4600 4601 static void 4602 mxge_rem_irq(mxge_softc_t *sc) 4603 { 4604 if (sc->num_slices > 1) 4605 mxge_rem_msix_irqs(sc); 4606 else 4607 mxge_rem_single_irq(sc); 4608 } 4609 4610 static int 4611 mxge_add_irq(mxge_softc_t *sc) 4612 { 4613 int err; 4614 4615 if (sc->num_slices > 1) 4616 err = mxge_add_msix_irqs(sc); 4617 else 4618 err = mxge_add_single_irq(sc); 4619 4620 if (0 && err == 0 && sc->num_slices > 1) { 4621 mxge_rem_msix_irqs(sc); 4622 err = mxge_add_msix_irqs(sc); 4623 } 4624 return err; 4625 } 4626 4627 4628 static int 4629 mxge_attach(device_t dev) 4630 { 4631 mxge_softc_t *sc = device_get_softc(dev); 4632 struct ifnet *ifp; 4633 int err, rid; 4634 4635 sc->dev = dev; 4636 mxge_fetch_tunables(sc); 4637 4638 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4639 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4640 taskqueue_thread_enqueue, 4641 &sc->tq); 4642 if (sc->tq == NULL) { 4643 err = ENOMEM; 4644 goto abort_with_nothing; 4645 } 4646 4647 err = bus_dma_tag_create(NULL, /* parent */ 4648 1, /* alignment */ 4649 0, /* boundary */ 4650 BUS_SPACE_MAXADDR, /* low */ 4651 BUS_SPACE_MAXADDR, /* high */ 4652 NULL, NULL, /* filter */ 4653 65536 + 256, /* maxsize */ 4654 MXGE_MAX_SEND_DESC, /* num segs */ 4655 65536, /* maxsegsize */ 4656 0, /* flags */ 4657 NULL, NULL, /* lock */ 4658 &sc->parent_dmat); /* tag */ 4659 4660 if (err != 0) { 4661 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4662 err); 4663 goto abort_with_tq; 4664 } 4665 4666 ifp = sc->ifp = if_alloc(IFT_ETHER); 4667 if (ifp == NULL) { 4668 device_printf(dev, "can not if_alloc()\n"); 4669 err = ENOSPC; 4670 goto abort_with_parent_dmat; 4671 } 4672 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4673 4674 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4675 device_get_nameunit(dev)); 4676 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4677 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4678 "%s:drv", device_get_nameunit(dev)); 4679 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4680 MTX_NETWORK_LOCK, MTX_DEF); 4681 4682 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4683 4684 mxge_setup_cfg_space(sc); 4685 4686 /* Map the board into the kernel */ 4687 rid = PCIR_BARS; 4688 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4689 ~0, 1, RF_ACTIVE); 4690 if (sc->mem_res == NULL) { 4691 device_printf(dev, "could not map memory\n"); 4692 err = ENXIO; 4693 goto abort_with_lock; 4694 } 4695 sc->sram = rman_get_virtual(sc->mem_res); 4696 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4697 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4698 device_printf(dev, "impossible memory region size %ld\n", 4699 rman_get_size(sc->mem_res)); 4700 err = ENXIO; 4701 goto abort_with_mem_res; 4702 } 4703 4704 /* make NULL terminated copy of the EEPROM strings section of 4705 lanai SRAM */ 4706 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4707 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4708 rman_get_bushandle(sc->mem_res), 4709 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4710 sc->eeprom_strings, 4711 MXGE_EEPROM_STRINGS_SIZE - 2); 4712 err = mxge_parse_strings(sc); 4713 if (err != 0) 4714 goto abort_with_mem_res; 4715 4716 /* Enable write combining for efficient use of PCIe bus */ 4717 mxge_enable_wc(sc); 4718 4719 /* Allocate the out of band dma memory */ 4720 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4721 sizeof (mxge_cmd_t), 64); 4722 if (err != 0) 4723 goto abort_with_mem_res; 4724 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4725 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4726 if (err != 0) 4727 goto abort_with_cmd_dma; 4728 4729 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4730 if (err != 0) 4731 goto abort_with_zeropad_dma; 4732 4733 /* select & load the firmware */ 4734 err = mxge_select_firmware(sc); 4735 if (err != 0) 4736 goto abort_with_dmabench; 4737 sc->intr_coal_delay = mxge_intr_coal_delay; 4738 4739 mxge_slice_probe(sc); 4740 err = mxge_alloc_slices(sc); 4741 if (err != 0) 4742 goto abort_with_dmabench; 4743 4744 err = mxge_reset(sc, 0); 4745 if (err != 0) 4746 goto abort_with_slices; 4747 4748 err = mxge_alloc_rings(sc); 4749 if (err != 0) { 4750 device_printf(sc->dev, "failed to allocate rings\n"); 4751 goto abort_with_slices; 4752 } 4753 4754 err = mxge_add_irq(sc); 4755 if (err != 0) { 4756 device_printf(sc->dev, "failed to add irq\n"); 4757 goto abort_with_rings; 4758 } 4759 4760 ifp->if_baudrate = IF_Gbps(10UL); 4761 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4762 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4763 #ifdef INET 4764 ifp->if_capabilities |= IFCAP_LRO; 4765 #endif 4766 4767 #ifdef MXGE_NEW_VLAN_API 4768 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4769 4770 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4771 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4772 sc->fw_ver_tiny >= 32) 4773 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4774 #endif 4775 4776 sc->max_mtu = mxge_max_mtu(sc); 4777 if (sc->max_mtu >= 9000) 4778 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4779 else 4780 device_printf(dev, "MTU limited to %d. Install " 4781 "latest firmware for 9000 byte jumbo support\n", 4782 sc->max_mtu - ETHER_HDR_LEN); 4783 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4784 ifp->if_capenable = ifp->if_capabilities; 4785 if (sc->lro_cnt == 0) 4786 ifp->if_capenable &= ~IFCAP_LRO; 4787 sc->csum_flag = 1; 4788 ifp->if_init = mxge_init; 4789 ifp->if_softc = sc; 4790 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4791 ifp->if_ioctl = mxge_ioctl; 4792 ifp->if_start = mxge_start; 4793 /* Initialise the ifmedia structure */ 4794 ifmedia_init(&sc->media, 0, mxge_media_change, 4795 mxge_media_status); 4796 mxge_media_init(sc); 4797 mxge_media_probe(sc); 4798 sc->dying = 0; 4799 ether_ifattach(ifp, sc->mac_addr); 4800 /* ether_ifattach sets mtu to ETHERMTU */ 4801 if (mxge_initial_mtu != ETHERMTU) 4802 mxge_change_mtu(sc, mxge_initial_mtu); 4803 4804 mxge_add_sysctls(sc); 4805 #ifdef IFNET_BUF_RING 4806 ifp->if_transmit = mxge_transmit; 4807 ifp->if_qflush = mxge_qflush; 4808 #endif 4809 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4810 device_get_nameunit(sc->dev)); 4811 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4812 return 0; 4813 4814 abort_with_rings: 4815 mxge_free_rings(sc); 4816 abort_with_slices: 4817 mxge_free_slices(sc); 4818 abort_with_dmabench: 4819 mxge_dma_free(&sc->dmabench_dma); 4820 abort_with_zeropad_dma: 4821 mxge_dma_free(&sc->zeropad_dma); 4822 abort_with_cmd_dma: 4823 mxge_dma_free(&sc->cmd_dma); 4824 abort_with_mem_res: 4825 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4826 abort_with_lock: 4827 pci_disable_busmaster(dev); 4828 mtx_destroy(&sc->cmd_mtx); 4829 mtx_destroy(&sc->driver_mtx); 4830 if_free(ifp); 4831 abort_with_parent_dmat: 4832 bus_dma_tag_destroy(sc->parent_dmat); 4833 abort_with_tq: 4834 if (sc->tq != NULL) { 4835 taskqueue_drain(sc->tq, &sc->watchdog_task); 4836 taskqueue_free(sc->tq); 4837 sc->tq = NULL; 4838 } 4839 abort_with_nothing: 4840 return err; 4841 } 4842 4843 static int 4844 mxge_detach(device_t dev) 4845 { 4846 mxge_softc_t *sc = device_get_softc(dev); 4847 4848 if (mxge_vlans_active(sc)) { 4849 device_printf(sc->dev, 4850 "Detach vlans before removing module\n"); 4851 return EBUSY; 4852 } 4853 mtx_lock(&sc->driver_mtx); 4854 sc->dying = 1; 4855 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4856 mxge_close(sc, 0); 4857 mtx_unlock(&sc->driver_mtx); 4858 ether_ifdetach(sc->ifp); 4859 if (sc->tq != NULL) { 4860 taskqueue_drain(sc->tq, &sc->watchdog_task); 4861 taskqueue_free(sc->tq); 4862 sc->tq = NULL; 4863 } 4864 callout_drain(&sc->co_hdl); 4865 ifmedia_removeall(&sc->media); 4866 mxge_dummy_rdma(sc, 0); 4867 mxge_rem_sysctls(sc); 4868 mxge_rem_irq(sc); 4869 mxge_free_rings(sc); 4870 mxge_free_slices(sc); 4871 mxge_dma_free(&sc->dmabench_dma); 4872 mxge_dma_free(&sc->zeropad_dma); 4873 mxge_dma_free(&sc->cmd_dma); 4874 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4875 pci_disable_busmaster(dev); 4876 mtx_destroy(&sc->cmd_mtx); 4877 mtx_destroy(&sc->driver_mtx); 4878 if_free(sc->ifp); 4879 bus_dma_tag_destroy(sc->parent_dmat); 4880 return 0; 4881 } 4882 4883 static int 4884 mxge_shutdown(device_t dev) 4885 { 4886 return 0; 4887 } 4888 4889 /* 4890 This file uses Myri10GE driver indentation. 4891 4892 Local Variables: 4893 c-file-style:"linux" 4894 tab-width:8 4895 End: 4896 */ 4897