1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 49 /* count xmits ourselves, rather than via drbr */ 50 #define NO_SLOW_STATS 51 #include <net/if.h> 52 #include <net/if_arp.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 57 #include <net/bpf.h> 58 59 #include <net/if_types.h> 60 #include <net/if_vlan_var.h> 61 #include <net/zlib.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/tcp.h> 67 68 #include <machine/bus.h> 69 #include <machine/in_cksum.h> 70 #include <machine/resource.h> 71 #include <sys/bus.h> 72 #include <sys/rman.h> 73 #include <sys/smp.h> 74 75 #include <dev/pci/pcireg.h> 76 #include <dev/pci/pcivar.h> 77 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 78 79 #include <vm/vm.h> /* for pmap_mapdev() */ 80 #include <vm/pmap.h> 81 82 #if defined(__i386) || defined(__amd64) 83 #include <machine/specialreg.h> 84 #endif 85 86 #include <dev/mxge/mxge_mcp.h> 87 #include <dev/mxge/mcp_gen_header.h> 88 /*#define MXGE_FAKE_IFP*/ 89 #include <dev/mxge/if_mxge_var.h> 90 #ifdef IFNET_BUF_RING 91 #include <sys/buf_ring.h> 92 #endif 93 94 #include "opt_inet.h" 95 96 /* tunable params */ 97 static int mxge_nvidia_ecrc_enable = 1; 98 static int mxge_force_firmware = 0; 99 static int mxge_intr_coal_delay = 30; 100 static int mxge_deassert_wait = 1; 101 static int mxge_flow_control = 1; 102 static int mxge_verbose = 0; 103 static int mxge_lro_cnt = 8; 104 static int mxge_ticks; 105 static int mxge_max_slices = 1; 106 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 107 static int mxge_always_promisc = 0; 108 static int mxge_initial_mtu = ETHERMTU_JUMBO; 109 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 static int mxge_probe(device_t dev); 115 static int mxge_attach(device_t dev); 116 static int mxge_detach(device_t dev); 117 static int mxge_shutdown(device_t dev); 118 static void mxge_intr(void *arg); 119 120 static device_method_t mxge_methods[] = 121 { 122 /* Device interface */ 123 DEVMETHOD(device_probe, mxge_probe), 124 DEVMETHOD(device_attach, mxge_attach), 125 DEVMETHOD(device_detach, mxge_detach), 126 DEVMETHOD(device_shutdown, mxge_shutdown), 127 {0, 0} 128 }; 129 130 static driver_t mxge_driver = 131 { 132 "mxge", 133 mxge_methods, 134 sizeof(mxge_softc_t), 135 }; 136 137 static devclass_t mxge_devclass; 138 139 /* Declare ourselves to be a child of the PCI bus.*/ 140 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 141 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 142 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 143 144 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 145 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 146 static int mxge_close(mxge_softc_t *sc); 147 static int mxge_open(mxge_softc_t *sc); 148 static void mxge_tick(void *arg); 149 150 static int 151 mxge_probe(device_t dev) 152 { 153 int rev; 154 155 156 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 157 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 158 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 159 rev = pci_get_revid(dev); 160 switch (rev) { 161 case MXGE_PCI_REV_Z8E: 162 device_set_desc(dev, "Myri10G-PCIE-8A"); 163 break; 164 case MXGE_PCI_REV_Z8ES: 165 device_set_desc(dev, "Myri10G-PCIE-8B"); 166 break; 167 default: 168 device_set_desc(dev, "Myri10G-PCIE-8??"); 169 device_printf(dev, "Unrecognized rev %d NIC\n", 170 rev); 171 break; 172 } 173 return 0; 174 } 175 return ENXIO; 176 } 177 178 static void 179 mxge_enable_wc(mxge_softc_t *sc) 180 { 181 #if defined(__i386) || defined(__amd64) 182 vm_offset_t len; 183 int err; 184 185 sc->wc = 1; 186 len = rman_get_size(sc->mem_res); 187 err = pmap_change_attr((vm_offset_t) sc->sram, 188 len, PAT_WRITE_COMBINING); 189 if (err != 0) { 190 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 191 err); 192 sc->wc = 0; 193 } 194 #endif 195 } 196 197 198 /* callback to get our DMA address */ 199 static void 200 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 201 int error) 202 { 203 if (error == 0) { 204 *(bus_addr_t *) arg = segs->ds_addr; 205 } 206 } 207 208 static int 209 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 210 bus_size_t alignment) 211 { 212 int err; 213 device_t dev = sc->dev; 214 bus_size_t boundary, maxsegsize; 215 216 if (bytes > 4096 && alignment == 4096) { 217 boundary = 0; 218 maxsegsize = bytes; 219 } else { 220 boundary = 4096; 221 maxsegsize = 4096; 222 } 223 224 /* allocate DMAable memory tags */ 225 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 226 alignment, /* alignment */ 227 boundary, /* boundary */ 228 BUS_SPACE_MAXADDR, /* low */ 229 BUS_SPACE_MAXADDR, /* high */ 230 NULL, NULL, /* filter */ 231 bytes, /* maxsize */ 232 1, /* num segs */ 233 maxsegsize, /* maxsegsize */ 234 BUS_DMA_COHERENT, /* flags */ 235 NULL, NULL, /* lock */ 236 &dma->dmat); /* tag */ 237 if (err != 0) { 238 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 239 return err; 240 } 241 242 /* allocate DMAable memory & map */ 243 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 244 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 245 | BUS_DMA_ZERO), &dma->map); 246 if (err != 0) { 247 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 248 goto abort_with_dmat; 249 } 250 251 /* load the memory */ 252 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 253 mxge_dmamap_callback, 254 (void *)&dma->bus_addr, 0); 255 if (err != 0) { 256 device_printf(dev, "couldn't load map (err = %d)\n", err); 257 goto abort_with_mem; 258 } 259 return 0; 260 261 abort_with_mem: 262 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 263 abort_with_dmat: 264 (void)bus_dma_tag_destroy(dma->dmat); 265 return err; 266 } 267 268 269 static void 270 mxge_dma_free(mxge_dma_t *dma) 271 { 272 bus_dmamap_unload(dma->dmat, dma->map); 273 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 274 (void)bus_dma_tag_destroy(dma->dmat); 275 } 276 277 /* 278 * The eeprom strings on the lanaiX have the format 279 * SN=x\0 280 * MAC=x:x:x:x:x:x\0 281 * PC=text\0 282 */ 283 284 static int 285 mxge_parse_strings(mxge_softc_t *sc) 286 { 287 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 288 289 char *ptr, *limit; 290 int i, found_mac; 291 292 ptr = sc->eeprom_strings; 293 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 294 found_mac = 0; 295 while (ptr < limit && *ptr != '\0') { 296 if (memcmp(ptr, "MAC=", 4) == 0) { 297 ptr += 1; 298 sc->mac_addr_string = ptr; 299 for (i = 0; i < 6; i++) { 300 ptr += 3; 301 if ((ptr + 2) > limit) 302 goto abort; 303 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 304 found_mac = 1; 305 } 306 } else if (memcmp(ptr, "PC=", 3) == 0) { 307 ptr += 3; 308 strncpy(sc->product_code_string, ptr, 309 sizeof (sc->product_code_string) - 1); 310 } else if (memcmp(ptr, "SN=", 3) == 0) { 311 ptr += 3; 312 strncpy(sc->serial_number_string, ptr, 313 sizeof (sc->serial_number_string) - 1); 314 } 315 MXGE_NEXT_STRING(ptr); 316 } 317 318 if (found_mac) 319 return 0; 320 321 abort: 322 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 323 324 return ENXIO; 325 } 326 327 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 328 static void 329 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 330 { 331 uint32_t val; 332 unsigned long base, off; 333 char *va, *cfgptr; 334 device_t pdev, mcp55; 335 uint16_t vendor_id, device_id, word; 336 uintptr_t bus, slot, func, ivend, idev; 337 uint32_t *ptr32; 338 339 340 if (!mxge_nvidia_ecrc_enable) 341 return; 342 343 pdev = device_get_parent(device_get_parent(sc->dev)); 344 if (pdev == NULL) { 345 device_printf(sc->dev, "could not find parent?\n"); 346 return; 347 } 348 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 349 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 350 351 if (vendor_id != 0x10de) 352 return; 353 354 base = 0; 355 356 if (device_id == 0x005d) { 357 /* ck804, base address is magic */ 358 base = 0xe0000000UL; 359 } else if (device_id >= 0x0374 && device_id <= 0x378) { 360 /* mcp55, base address stored in chipset */ 361 mcp55 = pci_find_bsf(0, 0, 0); 362 if (mcp55 && 363 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 364 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 365 word = pci_read_config(mcp55, 0x90, 2); 366 base = ((unsigned long)word & 0x7ffeU) << 25; 367 } 368 } 369 if (!base) 370 return; 371 372 /* XXXX 373 Test below is commented because it is believed that doing 374 config read/write beyond 0xff will access the config space 375 for the next larger function. Uncomment this and remove 376 the hacky pmap_mapdev() way of accessing config space when 377 FreeBSD grows support for extended pcie config space access 378 */ 379 #if 0 380 /* See if we can, by some miracle, access the extended 381 config space */ 382 val = pci_read_config(pdev, 0x178, 4); 383 if (val != 0xffffffff) { 384 val |= 0x40; 385 pci_write_config(pdev, 0x178, val, 4); 386 return; 387 } 388 #endif 389 /* Rather than using normal pci config space writes, we must 390 * map the Nvidia config space ourselves. This is because on 391 * opteron/nvidia class machine the 0xe000000 mapping is 392 * handled by the nvidia chipset, that means the internal PCI 393 * device (the on-chip northbridge), or the amd-8131 bridge 394 * and things behind them are not visible by this method. 395 */ 396 397 BUS_READ_IVAR(device_get_parent(pdev), pdev, 398 PCI_IVAR_BUS, &bus); 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_SLOT, &slot); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_FUNCTION, &func); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_VENDOR, &ivend); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_DEVICE, &idev); 407 408 off = base 409 + 0x00100000UL * (unsigned long)bus 410 + 0x00001000UL * (unsigned long)(func 411 + 8 * slot); 412 413 /* map it into the kernel */ 414 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 415 416 417 if (va == NULL) { 418 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 419 return; 420 } 421 /* get a pointer to the config space mapped into the kernel */ 422 cfgptr = va + (off & PAGE_MASK); 423 424 /* make sure that we can really access it */ 425 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 426 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 427 if (! (vendor_id == ivend && device_id == idev)) { 428 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 429 vendor_id, device_id); 430 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 431 return; 432 } 433 434 ptr32 = (uint32_t*)(cfgptr + 0x178); 435 val = *ptr32; 436 437 if (val == 0xffffffff) { 438 device_printf(sc->dev, "extended mapping failed\n"); 439 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 440 return; 441 } 442 *ptr32 = val | 0x40; 443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 444 if (mxge_verbose) 445 device_printf(sc->dev, 446 "Enabled ECRC on upstream Nvidia bridge " 447 "at %d:%d:%d\n", 448 (int)bus, (int)slot, (int)func); 449 return; 450 } 451 #else 452 static void 453 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 454 { 455 device_printf(sc->dev, 456 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 457 return; 458 } 459 #endif 460 461 462 static int 463 mxge_dma_test(mxge_softc_t *sc, int test_type) 464 { 465 mxge_cmd_t cmd; 466 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 467 int status; 468 uint32_t len; 469 char *test = " "; 470 471 472 /* Run a small DMA test. 473 * The magic multipliers to the length tell the firmware 474 * to do DMA read, write, or read+write tests. The 475 * results are returned in cmd.data0. The upper 16 476 * bits of the return is the number of transfers completed. 477 * The lower 16 bits is the time in 0.5us ticks that the 478 * transfers took to complete. 479 */ 480 481 len = sc->tx_boundary; 482 483 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 484 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 485 cmd.data2 = len * 0x10000; 486 status = mxge_send_cmd(sc, test_type, &cmd); 487 if (status != 0) { 488 test = "read"; 489 goto abort; 490 } 491 sc->read_dma = ((cmd.data0>>16) * len * 2) / 492 (cmd.data0 & 0xffff); 493 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 494 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 495 cmd.data2 = len * 0x1; 496 status = mxge_send_cmd(sc, test_type, &cmd); 497 if (status != 0) { 498 test = "write"; 499 goto abort; 500 } 501 sc->write_dma = ((cmd.data0>>16) * len * 2) / 502 (cmd.data0 & 0xffff); 503 504 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 505 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 506 cmd.data2 = len * 0x10001; 507 status = mxge_send_cmd(sc, test_type, &cmd); 508 if (status != 0) { 509 test = "read/write"; 510 goto abort; 511 } 512 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 513 (cmd.data0 & 0xffff); 514 515 abort: 516 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 517 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 518 test, status); 519 520 return status; 521 } 522 523 /* 524 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 525 * when the PCI-E Completion packets are aligned on an 8-byte 526 * boundary. Some PCI-E chip sets always align Completion packets; on 527 * the ones that do not, the alignment can be enforced by enabling 528 * ECRC generation (if supported). 529 * 530 * When PCI-E Completion packets are not aligned, it is actually more 531 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 532 * 533 * If the driver can neither enable ECRC nor verify that it has 534 * already been enabled, then it must use a firmware image which works 535 * around unaligned completion packets (ethp_z8e.dat), and it should 536 * also ensure that it never gives the device a Read-DMA which is 537 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 538 * enabled, then the driver should use the aligned (eth_z8e.dat) 539 * firmware image, and set tx_boundary to 4KB. 540 */ 541 542 static int 543 mxge_firmware_probe(mxge_softc_t *sc) 544 { 545 device_t dev = sc->dev; 546 int reg, status; 547 uint16_t pectl; 548 549 sc->tx_boundary = 4096; 550 /* 551 * Verify the max read request size was set to 4KB 552 * before trying the test with 4KB. 553 */ 554 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 555 pectl = pci_read_config(dev, reg + 0x8, 2); 556 if ((pectl & (5 << 12)) != (5 << 12)) { 557 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 558 pectl); 559 sc->tx_boundary = 2048; 560 } 561 } 562 563 /* 564 * load the optimized firmware (which assumes aligned PCIe 565 * completions) in order to see if it works on this host. 566 */ 567 sc->fw_name = mxge_fw_aligned; 568 status = mxge_load_firmware(sc, 1); 569 if (status != 0) { 570 return status; 571 } 572 573 /* 574 * Enable ECRC if possible 575 */ 576 mxge_enable_nvidia_ecrc(sc); 577 578 /* 579 * Run a DMA test which watches for unaligned completions and 580 * aborts on the first one seen. 581 */ 582 583 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 584 if (status == 0) 585 return 0; /* keep the aligned firmware */ 586 587 if (status != E2BIG) 588 device_printf(dev, "DMA test failed: %d\n", status); 589 if (status == ENOSYS) 590 device_printf(dev, "Falling back to ethp! " 591 "Please install up to date fw\n"); 592 return status; 593 } 594 595 static int 596 mxge_select_firmware(mxge_softc_t *sc) 597 { 598 int aligned = 0; 599 600 601 if (mxge_force_firmware != 0) { 602 if (mxge_force_firmware == 1) 603 aligned = 1; 604 else 605 aligned = 0; 606 if (mxge_verbose) 607 device_printf(sc->dev, 608 "Assuming %s completions (forced)\n", 609 aligned ? "aligned" : "unaligned"); 610 goto abort; 611 } 612 613 /* if the PCIe link width is 4 or less, we can use the aligned 614 firmware and skip any checks */ 615 if (sc->link_width != 0 && sc->link_width <= 4) { 616 device_printf(sc->dev, 617 "PCIe x%d Link, expect reduced performance\n", 618 sc->link_width); 619 aligned = 1; 620 goto abort; 621 } 622 623 if (0 == mxge_firmware_probe(sc)) 624 return 0; 625 626 abort: 627 if (aligned) { 628 sc->fw_name = mxge_fw_aligned; 629 sc->tx_boundary = 4096; 630 } else { 631 sc->fw_name = mxge_fw_unaligned; 632 sc->tx_boundary = 2048; 633 } 634 return (mxge_load_firmware(sc, 0)); 635 } 636 637 union qualhack 638 { 639 const char *ro_char; 640 char *rw_char; 641 }; 642 643 static int 644 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 645 { 646 647 648 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 649 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 650 be32toh(hdr->mcp_type)); 651 return EIO; 652 } 653 654 /* save firmware version for sysctl */ 655 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 656 if (mxge_verbose) 657 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 658 659 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 660 &sc->fw_ver_minor, &sc->fw_ver_tiny); 661 662 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 663 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 664 device_printf(sc->dev, "Found firmware version %s\n", 665 sc->fw_version); 666 device_printf(sc->dev, "Driver needs %d.%d\n", 667 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 668 return EINVAL; 669 } 670 return 0; 671 672 } 673 674 static void * 675 z_alloc(void *nil, u_int items, u_int size) 676 { 677 void *ptr; 678 679 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 680 return ptr; 681 } 682 683 static void 684 z_free(void *nil, void *ptr) 685 { 686 free(ptr, M_TEMP); 687 } 688 689 690 static int 691 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 692 { 693 z_stream zs; 694 char *inflate_buffer; 695 const struct firmware *fw; 696 const mcp_gen_header_t *hdr; 697 unsigned hdr_offset; 698 int status; 699 unsigned int i; 700 char dummy; 701 size_t fw_len; 702 703 fw = firmware_get(sc->fw_name); 704 if (fw == NULL) { 705 device_printf(sc->dev, "Could not find firmware image %s\n", 706 sc->fw_name); 707 return ENOENT; 708 } 709 710 711 712 /* setup zlib and decompress f/w */ 713 bzero(&zs, sizeof (zs)); 714 zs.zalloc = z_alloc; 715 zs.zfree = z_free; 716 status = inflateInit(&zs); 717 if (status != Z_OK) { 718 status = EIO; 719 goto abort_with_fw; 720 } 721 722 /* the uncompressed size is stored as the firmware version, 723 which would otherwise go unused */ 724 fw_len = (size_t) fw->version; 725 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 726 if (inflate_buffer == NULL) 727 goto abort_with_zs; 728 zs.avail_in = fw->datasize; 729 zs.next_in = __DECONST(char *, fw->data); 730 zs.avail_out = fw_len; 731 zs.next_out = inflate_buffer; 732 status = inflate(&zs, Z_FINISH); 733 if (status != Z_STREAM_END) { 734 device_printf(sc->dev, "zlib %d\n", status); 735 status = EIO; 736 goto abort_with_buffer; 737 } 738 739 /* check id */ 740 hdr_offset = htobe32(*(const uint32_t *) 741 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 742 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 743 device_printf(sc->dev, "Bad firmware file"); 744 status = EIO; 745 goto abort_with_buffer; 746 } 747 hdr = (const void*)(inflate_buffer + hdr_offset); 748 749 status = mxge_validate_firmware(sc, hdr); 750 if (status != 0) 751 goto abort_with_buffer; 752 753 /* Copy the inflated firmware to NIC SRAM. */ 754 for (i = 0; i < fw_len; i += 256) { 755 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 756 inflate_buffer + i, 757 min(256U, (unsigned)(fw_len - i))); 758 wmb(); 759 dummy = *sc->sram; 760 wmb(); 761 } 762 763 *limit = fw_len; 764 status = 0; 765 abort_with_buffer: 766 free(inflate_buffer, M_TEMP); 767 abort_with_zs: 768 inflateEnd(&zs); 769 abort_with_fw: 770 firmware_put(fw, FIRMWARE_UNLOAD); 771 return status; 772 } 773 774 /* 775 * Enable or disable periodic RDMAs from the host to make certain 776 * chipsets resend dropped PCIe messages 777 */ 778 779 static void 780 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 781 { 782 char buf_bytes[72]; 783 volatile uint32_t *confirm; 784 volatile char *submit; 785 uint32_t *buf, dma_low, dma_high; 786 int i; 787 788 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 789 790 /* clear confirmation addr */ 791 confirm = (volatile uint32_t *)sc->cmd; 792 *confirm = 0; 793 wmb(); 794 795 /* send an rdma command to the PCIe engine, and wait for the 796 response in the confirmation address. The firmware should 797 write a -1 there to indicate it is alive and well 798 */ 799 800 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 801 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 802 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 803 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 804 buf[2] = htobe32(0xffffffff); /* confirm data */ 805 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 807 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 808 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 809 buf[5] = htobe32(enable); /* enable? */ 810 811 812 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 813 814 mxge_pio_copy(submit, buf, 64); 815 wmb(); 816 DELAY(1000); 817 wmb(); 818 i = 0; 819 while (*confirm != 0xffffffff && i < 20) { 820 DELAY(1000); 821 i++; 822 } 823 if (*confirm != 0xffffffff) { 824 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 825 (enable ? "enable" : "disable"), confirm, 826 *confirm); 827 } 828 return; 829 } 830 831 static int 832 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 833 { 834 mcp_cmd_t *buf; 835 char buf_bytes[sizeof(*buf) + 8]; 836 volatile mcp_cmd_response_t *response = sc->cmd; 837 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 838 uint32_t dma_low, dma_high; 839 int err, sleep_total = 0; 840 841 /* ensure buf is aligned to 8 bytes */ 842 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 843 844 buf->data0 = htobe32(data->data0); 845 buf->data1 = htobe32(data->data1); 846 buf->data2 = htobe32(data->data2); 847 buf->cmd = htobe32(cmd); 848 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 849 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 850 851 buf->response_addr.low = htobe32(dma_low); 852 buf->response_addr.high = htobe32(dma_high); 853 mtx_lock(&sc->cmd_mtx); 854 response->result = 0xffffffff; 855 wmb(); 856 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 857 858 /* wait up to 20ms */ 859 err = EAGAIN; 860 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 861 bus_dmamap_sync(sc->cmd_dma.dmat, 862 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 863 wmb(); 864 switch (be32toh(response->result)) { 865 case 0: 866 data->data0 = be32toh(response->data); 867 err = 0; 868 break; 869 case 0xffffffff: 870 DELAY(1000); 871 break; 872 case MXGEFW_CMD_UNKNOWN: 873 err = ENOSYS; 874 break; 875 case MXGEFW_CMD_ERROR_UNALIGNED: 876 err = E2BIG; 877 break; 878 case MXGEFW_CMD_ERROR_BUSY: 879 err = EBUSY; 880 break; 881 default: 882 device_printf(sc->dev, 883 "mxge: command %d " 884 "failed, result = %d\n", 885 cmd, be32toh(response->result)); 886 err = ENXIO; 887 break; 888 } 889 if (err != EAGAIN) 890 break; 891 } 892 if (err == EAGAIN) 893 device_printf(sc->dev, "mxge: command %d timed out" 894 "result = %d\n", 895 cmd, be32toh(response->result)); 896 mtx_unlock(&sc->cmd_mtx); 897 return err; 898 } 899 900 static int 901 mxge_adopt_running_firmware(mxge_softc_t *sc) 902 { 903 struct mcp_gen_header *hdr; 904 const size_t bytes = sizeof (struct mcp_gen_header); 905 size_t hdr_offset; 906 int status; 907 908 /* find running firmware header */ 909 hdr_offset = htobe32(*(volatile uint32_t *) 910 (sc->sram + MCP_HEADER_PTR_OFFSET)); 911 912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 913 device_printf(sc->dev, 914 "Running firmware has bad header offset (%d)\n", 915 (int)hdr_offset); 916 return EIO; 917 } 918 919 /* copy header of running firmware from SRAM to host memory to 920 * validate firmware */ 921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 922 if (hdr == NULL) { 923 device_printf(sc->dev, "could not malloc firmware hdr\n"); 924 return ENOMEM; 925 } 926 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 927 rman_get_bushandle(sc->mem_res), 928 hdr_offset, (char *)hdr, bytes); 929 status = mxge_validate_firmware(sc, hdr); 930 free(hdr, M_DEVBUF); 931 932 /* 933 * check to see if adopted firmware has bug where adopting 934 * it will cause broadcasts to be filtered unless the NIC 935 * is kept in ALLMULTI mode 936 */ 937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 939 sc->adopted_rx_filter_bug = 1; 940 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 941 "working around rx filter bug\n", 942 sc->fw_ver_major, sc->fw_ver_minor, 943 sc->fw_ver_tiny); 944 } 945 946 return status; 947 } 948 949 950 static int 951 mxge_load_firmware(mxge_softc_t *sc, int adopt) 952 { 953 volatile uint32_t *confirm; 954 volatile char *submit; 955 char buf_bytes[72]; 956 uint32_t *buf, size, dma_low, dma_high; 957 int status, i; 958 959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 960 961 size = sc->sram_size; 962 status = mxge_load_firmware_helper(sc, &size); 963 if (status) { 964 if (!adopt) 965 return status; 966 /* Try to use the currently running firmware, if 967 it is new enough */ 968 status = mxge_adopt_running_firmware(sc); 969 if (status) { 970 device_printf(sc->dev, 971 "failed to adopt running firmware\n"); 972 return status; 973 } 974 device_printf(sc->dev, 975 "Successfully adopted running firmware\n"); 976 if (sc->tx_boundary == 4096) { 977 device_printf(sc->dev, 978 "Using firmware currently running on NIC" 979 ". For optimal\n"); 980 device_printf(sc->dev, 981 "performance consider loading optimized " 982 "firmware\n"); 983 } 984 sc->fw_name = mxge_fw_unaligned; 985 sc->tx_boundary = 2048; 986 return 0; 987 } 988 /* clear confirmation addr */ 989 confirm = (volatile uint32_t *)sc->cmd; 990 *confirm = 0; 991 wmb(); 992 /* send a reload command to the bootstrap MCP, and wait for the 993 response in the confirmation address. The firmware should 994 write a -1 there to indicate it is alive and well 995 */ 996 997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 999 1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1002 buf[2] = htobe32(0xffffffff); /* confirm data */ 1003 1004 /* FIX: All newest firmware should un-protect the bottom of 1005 the sram before handoff. However, the very first interfaces 1006 do not. Therefore the handoff copy must skip the first 8 bytes 1007 */ 1008 /* where the code starts*/ 1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1010 buf[4] = htobe32(size - 8); /* length of code */ 1011 buf[5] = htobe32(8); /* where to copy to */ 1012 buf[6] = htobe32(0); /* where to jump to */ 1013 1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1015 mxge_pio_copy(submit, buf, 64); 1016 wmb(); 1017 DELAY(1000); 1018 wmb(); 1019 i = 0; 1020 while (*confirm != 0xffffffff && i < 20) { 1021 DELAY(1000*10); 1022 i++; 1023 bus_dmamap_sync(sc->cmd_dma.dmat, 1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1025 } 1026 if (*confirm != 0xffffffff) { 1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1028 confirm, *confirm); 1029 1030 return ENXIO; 1031 } 1032 return 0; 1033 } 1034 1035 static int 1036 mxge_update_mac_address(mxge_softc_t *sc) 1037 { 1038 mxge_cmd_t cmd; 1039 uint8_t *addr = sc->mac_addr; 1040 int status; 1041 1042 1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1044 | (addr[2] << 8) | addr[3]); 1045 1046 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1047 1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1049 return status; 1050 } 1051 1052 static int 1053 mxge_change_pause(mxge_softc_t *sc, int pause) 1054 { 1055 mxge_cmd_t cmd; 1056 int status; 1057 1058 if (pause) 1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1060 &cmd); 1061 else 1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1063 &cmd); 1064 1065 if (status) { 1066 device_printf(sc->dev, "Failed to set flow control mode\n"); 1067 return ENXIO; 1068 } 1069 sc->pause = pause; 1070 return 0; 1071 } 1072 1073 static void 1074 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1075 { 1076 mxge_cmd_t cmd; 1077 int status; 1078 1079 if (mxge_always_promisc) 1080 promisc = 1; 1081 1082 if (promisc) 1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1084 &cmd); 1085 else 1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1087 &cmd); 1088 1089 if (status) { 1090 device_printf(sc->dev, "Failed to set promisc mode\n"); 1091 } 1092 } 1093 1094 static void 1095 mxge_set_multicast_list(mxge_softc_t *sc) 1096 { 1097 mxge_cmd_t cmd; 1098 struct ifmultiaddr *ifma; 1099 struct ifnet *ifp = sc->ifp; 1100 int err; 1101 1102 /* This firmware is known to not support multicast */ 1103 if (!sc->fw_multicast_support) 1104 return; 1105 1106 /* Disable multicast filtering while we play with the lists*/ 1107 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1108 if (err != 0) { 1109 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1110 " error status: %d\n", err); 1111 return; 1112 } 1113 1114 if (sc->adopted_rx_filter_bug) 1115 return; 1116 1117 if (ifp->if_flags & IFF_ALLMULTI) 1118 /* request to disable multicast filtering, so quit here */ 1119 return; 1120 1121 /* Flush all the filters */ 1122 1123 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1124 if (err != 0) { 1125 device_printf(sc->dev, 1126 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1127 ", error status: %d\n", err); 1128 return; 1129 } 1130 1131 /* Walk the multicast list, and add each address */ 1132 1133 if_maddr_rlock(ifp); 1134 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1135 if (ifma->ifma_addr->sa_family != AF_LINK) 1136 continue; 1137 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1138 &cmd.data0, 4); 1139 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1140 &cmd.data1, 2); 1141 cmd.data0 = htonl(cmd.data0); 1142 cmd.data1 = htonl(cmd.data1); 1143 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1144 if (err != 0) { 1145 device_printf(sc->dev, "Failed " 1146 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1147 "%d\t", err); 1148 /* abort, leaving multicast filtering off */ 1149 if_maddr_runlock(ifp); 1150 return; 1151 } 1152 } 1153 if_maddr_runlock(ifp); 1154 /* Enable multicast filtering */ 1155 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1156 if (err != 0) { 1157 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1158 ", error status: %d\n", err); 1159 } 1160 } 1161 1162 static int 1163 mxge_max_mtu(mxge_softc_t *sc) 1164 { 1165 mxge_cmd_t cmd; 1166 int status; 1167 1168 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1169 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1170 1171 /* try to set nbufs to see if it we can 1172 use virtually contiguous jumbos */ 1173 cmd.data0 = 0; 1174 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1175 &cmd); 1176 if (status == 0) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* otherwise, we're limited to MJUMPAGESIZE */ 1180 return MJUMPAGESIZE - MXGEFW_PAD; 1181 } 1182 1183 static int 1184 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1185 { 1186 struct mxge_slice_state *ss; 1187 mxge_rx_done_t *rx_done; 1188 volatile uint32_t *irq_claim; 1189 mxge_cmd_t cmd; 1190 int slice, status; 1191 1192 /* try to send a reset command to the card to see if it 1193 is alive */ 1194 memset(&cmd, 0, sizeof (cmd)); 1195 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1196 if (status != 0) { 1197 device_printf(sc->dev, "failed reset\n"); 1198 return ENXIO; 1199 } 1200 1201 mxge_dummy_rdma(sc, 1); 1202 1203 1204 /* set the intrq size */ 1205 cmd.data0 = sc->rx_ring_size; 1206 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1207 1208 /* 1209 * Even though we already know how many slices are supported 1210 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1211 * has magic side effects, and must be called after a reset. 1212 * It must be called prior to calling any RSS related cmds, 1213 * including assigning an interrupt queue for anything but 1214 * slice 0. It must also be called *after* 1215 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1216 * the firmware to compute offsets. 1217 */ 1218 1219 if (sc->num_slices > 1) { 1220 /* ask the maximum number of slices it supports */ 1221 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1222 &cmd); 1223 if (status != 0) { 1224 device_printf(sc->dev, 1225 "failed to get number of slices\n"); 1226 return status; 1227 } 1228 /* 1229 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1230 * to setting up the interrupt queue DMA 1231 */ 1232 cmd.data0 = sc->num_slices; 1233 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1234 #ifdef IFNET_BUF_RING 1235 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1236 #endif 1237 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1238 &cmd); 1239 if (status != 0) { 1240 device_printf(sc->dev, 1241 "failed to set number of slices\n"); 1242 return status; 1243 } 1244 } 1245 1246 1247 if (interrupts_setup) { 1248 /* Now exchange information about interrupts */ 1249 for (slice = 0; slice < sc->num_slices; slice++) { 1250 rx_done = &sc->ss[slice].rx_done; 1251 memset(rx_done->entry, 0, sc->rx_ring_size); 1252 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1253 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1254 cmd.data2 = slice; 1255 status |= mxge_send_cmd(sc, 1256 MXGEFW_CMD_SET_INTRQ_DMA, 1257 &cmd); 1258 } 1259 } 1260 1261 status |= mxge_send_cmd(sc, 1262 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1263 1264 1265 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1266 1267 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1268 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1269 1270 1271 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1272 &cmd); 1273 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 if (status != 0) { 1275 device_printf(sc->dev, "failed set interrupt parameters\n"); 1276 return status; 1277 } 1278 1279 1280 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1281 1282 1283 /* run a DMA benchmark */ 1284 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1285 1286 for (slice = 0; slice < sc->num_slices; slice++) { 1287 ss = &sc->ss[slice]; 1288 1289 ss->irq_claim = irq_claim + (2 * slice); 1290 /* reset mcp/driver shared state back to 0 */ 1291 ss->rx_done.idx = 0; 1292 ss->rx_done.cnt = 0; 1293 ss->tx.req = 0; 1294 ss->tx.done = 0; 1295 ss->tx.pkt_done = 0; 1296 ss->tx.queue_active = 0; 1297 ss->tx.activate = 0; 1298 ss->tx.deactivate = 0; 1299 ss->tx.wake = 0; 1300 ss->tx.defrag = 0; 1301 ss->tx.stall = 0; 1302 ss->rx_big.cnt = 0; 1303 ss->rx_small.cnt = 0; 1304 ss->lro_bad_csum = 0; 1305 ss->lro_queued = 0; 1306 ss->lro_flushed = 0; 1307 if (ss->fw_stats != NULL) { 1308 ss->fw_stats->valid = 0; 1309 ss->fw_stats->send_done_count = 0; 1310 } 1311 } 1312 sc->rdma_tags_available = 15; 1313 status = mxge_update_mac_address(sc); 1314 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1315 mxge_change_pause(sc, sc->pause); 1316 mxge_set_multicast_list(sc); 1317 return status; 1318 } 1319 1320 static int 1321 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1322 { 1323 mxge_softc_t *sc; 1324 unsigned int intr_coal_delay; 1325 int err; 1326 1327 sc = arg1; 1328 intr_coal_delay = sc->intr_coal_delay; 1329 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1330 if (err != 0) { 1331 return err; 1332 } 1333 if (intr_coal_delay == sc->intr_coal_delay) 1334 return 0; 1335 1336 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1337 return EINVAL; 1338 1339 mtx_lock(&sc->driver_mtx); 1340 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1341 sc->intr_coal_delay = intr_coal_delay; 1342 1343 mtx_unlock(&sc->driver_mtx); 1344 return err; 1345 } 1346 1347 static int 1348 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1349 { 1350 mxge_softc_t *sc; 1351 unsigned int enabled; 1352 int err; 1353 1354 sc = arg1; 1355 enabled = sc->pause; 1356 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1357 if (err != 0) { 1358 return err; 1359 } 1360 if (enabled == sc->pause) 1361 return 0; 1362 1363 mtx_lock(&sc->driver_mtx); 1364 err = mxge_change_pause(sc, enabled); 1365 mtx_unlock(&sc->driver_mtx); 1366 return err; 1367 } 1368 1369 static int 1370 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1371 { 1372 struct ifnet *ifp; 1373 int err = 0; 1374 1375 ifp = sc->ifp; 1376 if (lro_cnt == 0) 1377 ifp->if_capenable &= ~IFCAP_LRO; 1378 else 1379 ifp->if_capenable |= IFCAP_LRO; 1380 sc->lro_cnt = lro_cnt; 1381 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1382 mxge_close(sc); 1383 err = mxge_open(sc); 1384 } 1385 return err; 1386 } 1387 1388 static int 1389 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1390 { 1391 mxge_softc_t *sc; 1392 unsigned int lro_cnt; 1393 int err; 1394 1395 sc = arg1; 1396 lro_cnt = sc->lro_cnt; 1397 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1398 if (err != 0) 1399 return err; 1400 1401 if (lro_cnt == sc->lro_cnt) 1402 return 0; 1403 1404 if (lro_cnt > 128) 1405 return EINVAL; 1406 1407 mtx_lock(&sc->driver_mtx); 1408 err = mxge_change_lro_locked(sc, lro_cnt); 1409 mtx_unlock(&sc->driver_mtx); 1410 return err; 1411 } 1412 1413 static int 1414 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1415 { 1416 int err; 1417 1418 if (arg1 == NULL) 1419 return EFAULT; 1420 arg2 = be32toh(*(int *)arg1); 1421 arg1 = NULL; 1422 err = sysctl_handle_int(oidp, arg1, arg2, req); 1423 1424 return err; 1425 } 1426 1427 static void 1428 mxge_rem_sysctls(mxge_softc_t *sc) 1429 { 1430 struct mxge_slice_state *ss; 1431 int slice; 1432 1433 if (sc->slice_sysctl_tree == NULL) 1434 return; 1435 1436 for (slice = 0; slice < sc->num_slices; slice++) { 1437 ss = &sc->ss[slice]; 1438 if (ss == NULL || ss->sysctl_tree == NULL) 1439 continue; 1440 sysctl_ctx_free(&ss->sysctl_ctx); 1441 ss->sysctl_tree = NULL; 1442 } 1443 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1444 sc->slice_sysctl_tree = NULL; 1445 } 1446 1447 static void 1448 mxge_add_sysctls(mxge_softc_t *sc) 1449 { 1450 struct sysctl_ctx_list *ctx; 1451 struct sysctl_oid_list *children; 1452 mcp_irq_data_t *fw; 1453 struct mxge_slice_state *ss; 1454 int slice; 1455 char slice_num[8]; 1456 1457 ctx = device_get_sysctl_ctx(sc->dev); 1458 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1459 fw = sc->ss[0].fw_stats; 1460 1461 /* random information */ 1462 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1463 "firmware_version", 1464 CTLFLAG_RD, &sc->fw_version, 1465 0, "firmware version"); 1466 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1467 "serial_number", 1468 CTLFLAG_RD, &sc->serial_number_string, 1469 0, "serial number"); 1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1471 "product_code", 1472 CTLFLAG_RD, &sc->product_code_string, 1473 0, "product_code"); 1474 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1475 "pcie_link_width", 1476 CTLFLAG_RD, &sc->link_width, 1477 0, "tx_boundary"); 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "tx_boundary", 1480 CTLFLAG_RD, &sc->tx_boundary, 1481 0, "tx_boundary"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "write_combine", 1484 CTLFLAG_RD, &sc->wc, 1485 0, "write combining PIO?"); 1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1487 "read_dma_MBs", 1488 CTLFLAG_RD, &sc->read_dma, 1489 0, "DMA Read speed in MB/s"); 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "write_dma_MBs", 1492 CTLFLAG_RD, &sc->write_dma, 1493 0, "DMA Write speed in MB/s"); 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "read_write_dma_MBs", 1496 CTLFLAG_RD, &sc->read_write_dma, 1497 0, "DMA concurrent Read/Write speed in MB/s"); 1498 1499 1500 /* performance related tunables */ 1501 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1502 "intr_coal_delay", 1503 CTLTYPE_INT|CTLFLAG_RW, sc, 1504 0, mxge_change_intr_coal, 1505 "I", "interrupt coalescing delay in usecs"); 1506 1507 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1508 "flow_control_enabled", 1509 CTLTYPE_INT|CTLFLAG_RW, sc, 1510 0, mxge_change_flow_control, 1511 "I", "interrupt coalescing delay in usecs"); 1512 1513 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1514 "deassert_wait", 1515 CTLFLAG_RW, &mxge_deassert_wait, 1516 0, "Wait for IRQ line to go low in ihandler"); 1517 1518 /* stats block from firmware is in network byte order. 1519 Need to swap it */ 1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1521 "link_up", 1522 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1523 0, mxge_handle_be32, 1524 "I", "link up"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "rdma_tags_available", 1527 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1528 0, mxge_handle_be32, 1529 "I", "rdma_tags_available"); 1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1531 "dropped_bad_crc32", 1532 CTLTYPE_INT|CTLFLAG_RD, 1533 &fw->dropped_bad_crc32, 1534 0, mxge_handle_be32, 1535 "I", "dropped_bad_crc32"); 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "dropped_bad_phy", 1538 CTLTYPE_INT|CTLFLAG_RD, 1539 &fw->dropped_bad_phy, 1540 0, mxge_handle_be32, 1541 "I", "dropped_bad_phy"); 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "dropped_link_error_or_filtered", 1544 CTLTYPE_INT|CTLFLAG_RD, 1545 &fw->dropped_link_error_or_filtered, 1546 0, mxge_handle_be32, 1547 "I", "dropped_link_error_or_filtered"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_link_overflow", 1550 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1551 0, mxge_handle_be32, 1552 "I", "dropped_link_overflow"); 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "dropped_multicast_filtered", 1555 CTLTYPE_INT|CTLFLAG_RD, 1556 &fw->dropped_multicast_filtered, 1557 0, mxge_handle_be32, 1558 "I", "dropped_multicast_filtered"); 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "dropped_no_big_buffer", 1561 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1562 0, mxge_handle_be32, 1563 "I", "dropped_no_big_buffer"); 1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1565 "dropped_no_small_buffer", 1566 CTLTYPE_INT|CTLFLAG_RD, 1567 &fw->dropped_no_small_buffer, 1568 0, mxge_handle_be32, 1569 "I", "dropped_no_small_buffer"); 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "dropped_overrun", 1572 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1573 0, mxge_handle_be32, 1574 "I", "dropped_overrun"); 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "dropped_pause", 1577 CTLTYPE_INT|CTLFLAG_RD, 1578 &fw->dropped_pause, 1579 0, mxge_handle_be32, 1580 "I", "dropped_pause"); 1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1582 "dropped_runt", 1583 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1584 0, mxge_handle_be32, 1585 "I", "dropped_runt"); 1586 1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1588 "dropped_unicast_filtered", 1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1590 0, mxge_handle_be32, 1591 "I", "dropped_unicast_filtered"); 1592 1593 /* verbose printing? */ 1594 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1595 "verbose", 1596 CTLFLAG_RW, &mxge_verbose, 1597 0, "verbose printing"); 1598 1599 /* lro */ 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "lro_cnt", 1602 CTLTYPE_INT|CTLFLAG_RW, sc, 1603 0, mxge_change_lro, 1604 "I", "number of lro merge queues"); 1605 1606 1607 /* add counters exported for debugging from all slices */ 1608 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1609 sc->slice_sysctl_tree = 1610 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1611 "slice", CTLFLAG_RD, 0, ""); 1612 1613 for (slice = 0; slice < sc->num_slices; slice++) { 1614 ss = &sc->ss[slice]; 1615 sysctl_ctx_init(&ss->sysctl_ctx); 1616 ctx = &ss->sysctl_ctx; 1617 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1618 sprintf(slice_num, "%d", slice); 1619 ss->sysctl_tree = 1620 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1621 CTLFLAG_RD, 0, ""); 1622 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1623 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1624 "rx_small_cnt", 1625 CTLFLAG_RD, &ss->rx_small.cnt, 1626 0, "rx_small_cnt"); 1627 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1628 "rx_big_cnt", 1629 CTLFLAG_RD, &ss->rx_big.cnt, 1630 0, "rx_small_cnt"); 1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1632 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1633 0, "number of lro merge queues flushed"); 1634 1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1636 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1637 0, "number of frames appended to lro merge" 1638 "queues"); 1639 1640 #ifndef IFNET_BUF_RING 1641 /* only transmit from slice 0 for now */ 1642 if (slice > 0) 1643 continue; 1644 #endif 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "tx_req", 1647 CTLFLAG_RD, &ss->tx.req, 1648 0, "tx_req"); 1649 1650 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1651 "tx_done", 1652 CTLFLAG_RD, &ss->tx.done, 1653 0, "tx_done"); 1654 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1655 "tx_pkt_done", 1656 CTLFLAG_RD, &ss->tx.pkt_done, 1657 0, "tx_done"); 1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1659 "tx_stall", 1660 CTLFLAG_RD, &ss->tx.stall, 1661 0, "tx_stall"); 1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1663 "tx_wake", 1664 CTLFLAG_RD, &ss->tx.wake, 1665 0, "tx_wake"); 1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1667 "tx_defrag", 1668 CTLFLAG_RD, &ss->tx.defrag, 1669 0, "tx_defrag"); 1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1671 "tx_queue_active", 1672 CTLFLAG_RD, &ss->tx.queue_active, 1673 0, "tx_queue_active"); 1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1675 "tx_activate", 1676 CTLFLAG_RD, &ss->tx.activate, 1677 0, "tx_activate"); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "tx_deactivate", 1680 CTLFLAG_RD, &ss->tx.deactivate, 1681 0, "tx_deactivate"); 1682 } 1683 } 1684 1685 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1686 backwards one at a time and handle ring wraps */ 1687 1688 static inline void 1689 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1690 mcp_kreq_ether_send_t *src, int cnt) 1691 { 1692 int idx, starting_slot; 1693 starting_slot = tx->req; 1694 while (cnt > 1) { 1695 cnt--; 1696 idx = (starting_slot + cnt) & tx->mask; 1697 mxge_pio_copy(&tx->lanai[idx], 1698 &src[cnt], sizeof(*src)); 1699 wmb(); 1700 } 1701 } 1702 1703 /* 1704 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1705 * at most 32 bytes at a time, so as to avoid involving the software 1706 * pio handler in the nic. We re-write the first segment's flags 1707 * to mark them valid only after writing the entire chain 1708 */ 1709 1710 static inline void 1711 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1712 int cnt) 1713 { 1714 int idx, i; 1715 uint32_t *src_ints; 1716 volatile uint32_t *dst_ints; 1717 mcp_kreq_ether_send_t *srcp; 1718 volatile mcp_kreq_ether_send_t *dstp, *dst; 1719 uint8_t last_flags; 1720 1721 idx = tx->req & tx->mask; 1722 1723 last_flags = src->flags; 1724 src->flags = 0; 1725 wmb(); 1726 dst = dstp = &tx->lanai[idx]; 1727 srcp = src; 1728 1729 if ((idx + cnt) < tx->mask) { 1730 for (i = 0; i < (cnt - 1); i += 2) { 1731 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1732 wmb(); /* force write every 32 bytes */ 1733 srcp += 2; 1734 dstp += 2; 1735 } 1736 } else { 1737 /* submit all but the first request, and ensure 1738 that it is submitted below */ 1739 mxge_submit_req_backwards(tx, src, cnt); 1740 i = 0; 1741 } 1742 if (i < cnt) { 1743 /* submit the first request */ 1744 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1745 wmb(); /* barrier before setting valid flag */ 1746 } 1747 1748 /* re-write the last 32-bits with the valid flags */ 1749 src->flags = last_flags; 1750 src_ints = (uint32_t *)src; 1751 src_ints+=3; 1752 dst_ints = (volatile uint32_t *)dst; 1753 dst_ints+=3; 1754 *dst_ints = *src_ints; 1755 tx->req += cnt; 1756 wmb(); 1757 } 1758 1759 #if IFCAP_TSO4 1760 1761 static void 1762 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1763 int busdma_seg_cnt, int ip_off) 1764 { 1765 mxge_tx_ring_t *tx; 1766 mcp_kreq_ether_send_t *req; 1767 bus_dma_segment_t *seg; 1768 struct ip *ip; 1769 struct tcphdr *tcp; 1770 uint32_t low, high_swapped; 1771 int len, seglen, cum_len, cum_len_next; 1772 int next_is_first, chop, cnt, rdma_count, small; 1773 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1774 uint8_t flags, flags_next; 1775 static int once; 1776 1777 mss = m->m_pkthdr.tso_segsz; 1778 1779 /* negative cum_len signifies to the 1780 * send loop that we are still in the 1781 * header portion of the TSO packet. 1782 */ 1783 1784 /* ensure we have the ethernet, IP and TCP 1785 header together in the first mbuf, copy 1786 it to a scratch buffer if not */ 1787 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1788 m_copydata(m, 0, ip_off + sizeof (*ip), 1789 ss->scratch); 1790 ip = (struct ip *)(ss->scratch + ip_off); 1791 } else { 1792 ip = (struct ip *)(mtod(m, char *) + ip_off); 1793 } 1794 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1795 + sizeof (*tcp))) { 1796 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1797 + sizeof (*tcp), ss->scratch); 1798 ip = (struct ip *)(mtod(m, char *) + ip_off); 1799 } 1800 1801 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1802 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1803 1804 /* TSO implies checksum offload on this hardware */ 1805 cksum_offset = ip_off + (ip->ip_hl << 2); 1806 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1807 1808 1809 /* for TSO, pseudo_hdr_offset holds mss. 1810 * The firmware figures out where to put 1811 * the checksum by parsing the header. */ 1812 pseudo_hdr_offset = htobe16(mss); 1813 1814 tx = &ss->tx; 1815 req = tx->req_list; 1816 seg = tx->seg_list; 1817 cnt = 0; 1818 rdma_count = 0; 1819 /* "rdma_count" is the number of RDMAs belonging to the 1820 * current packet BEFORE the current send request. For 1821 * non-TSO packets, this is equal to "count". 1822 * For TSO packets, rdma_count needs to be reset 1823 * to 0 after a segment cut. 1824 * 1825 * The rdma_count field of the send request is 1826 * the number of RDMAs of the packet starting at 1827 * that request. For TSO send requests with one ore more cuts 1828 * in the middle, this is the number of RDMAs starting 1829 * after the last cut in the request. All previous 1830 * segments before the last cut implicitly have 1 RDMA. 1831 * 1832 * Since the number of RDMAs is not known beforehand, 1833 * it must be filled-in retroactively - after each 1834 * segmentation cut or at the end of the entire packet. 1835 */ 1836 1837 while (busdma_seg_cnt) { 1838 /* Break the busdma segment up into pieces*/ 1839 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1840 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1841 len = seg->ds_len; 1842 1843 while (len) { 1844 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1845 seglen = len; 1846 cum_len_next = cum_len + seglen; 1847 (req-rdma_count)->rdma_count = rdma_count + 1; 1848 if (__predict_true(cum_len >= 0)) { 1849 /* payload */ 1850 chop = (cum_len_next > mss); 1851 cum_len_next = cum_len_next % mss; 1852 next_is_first = (cum_len_next == 0); 1853 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1854 flags_next |= next_is_first * 1855 MXGEFW_FLAGS_FIRST; 1856 rdma_count |= -(chop | next_is_first); 1857 rdma_count += chop & !next_is_first; 1858 } else if (cum_len_next >= 0) { 1859 /* header ends */ 1860 rdma_count = -1; 1861 cum_len_next = 0; 1862 seglen = -cum_len; 1863 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1864 flags_next = MXGEFW_FLAGS_TSO_PLD | 1865 MXGEFW_FLAGS_FIRST | 1866 (small * MXGEFW_FLAGS_SMALL); 1867 } 1868 1869 req->addr_high = high_swapped; 1870 req->addr_low = htobe32(low); 1871 req->pseudo_hdr_offset = pseudo_hdr_offset; 1872 req->pad = 0; 1873 req->rdma_count = 1; 1874 req->length = htobe16(seglen); 1875 req->cksum_offset = cksum_offset; 1876 req->flags = flags | ((cum_len & 1) * 1877 MXGEFW_FLAGS_ALIGN_ODD); 1878 low += seglen; 1879 len -= seglen; 1880 cum_len = cum_len_next; 1881 flags = flags_next; 1882 req++; 1883 cnt++; 1884 rdma_count++; 1885 if (__predict_false(cksum_offset > seglen)) 1886 cksum_offset -= seglen; 1887 else 1888 cksum_offset = 0; 1889 if (__predict_false(cnt > tx->max_desc)) 1890 goto drop; 1891 } 1892 busdma_seg_cnt--; 1893 seg++; 1894 } 1895 (req-rdma_count)->rdma_count = rdma_count; 1896 1897 do { 1898 req--; 1899 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1900 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1901 1902 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1903 mxge_submit_req(tx, tx->req_list, cnt); 1904 #ifdef IFNET_BUF_RING 1905 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1906 /* tell the NIC to start polling this slice */ 1907 *tx->send_go = 1; 1908 tx->queue_active = 1; 1909 tx->activate++; 1910 wmb(); 1911 } 1912 #endif 1913 return; 1914 1915 drop: 1916 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1917 m_freem(m); 1918 ss->oerrors++; 1919 if (!once) { 1920 printf("tx->max_desc exceeded via TSO!\n"); 1921 printf("mss = %d, %ld, %d!\n", mss, 1922 (long)seg - (long)tx->seg_list, tx->max_desc); 1923 once = 1; 1924 } 1925 return; 1926 1927 } 1928 1929 #endif /* IFCAP_TSO4 */ 1930 1931 #ifdef MXGE_NEW_VLAN_API 1932 /* 1933 * We reproduce the software vlan tag insertion from 1934 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1935 * vlan tag insertion. We need to advertise this in order to have the 1936 * vlan interface respect our csum offload flags. 1937 */ 1938 static struct mbuf * 1939 mxge_vlan_tag_insert(struct mbuf *m) 1940 { 1941 struct ether_vlan_header *evl; 1942 1943 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1944 if (__predict_false(m == NULL)) 1945 return NULL; 1946 if (m->m_len < sizeof(*evl)) { 1947 m = m_pullup(m, sizeof(*evl)); 1948 if (__predict_false(m == NULL)) 1949 return NULL; 1950 } 1951 /* 1952 * Transform the Ethernet header into an Ethernet header 1953 * with 802.1Q encapsulation. 1954 */ 1955 evl = mtod(m, struct ether_vlan_header *); 1956 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1957 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1958 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1959 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1960 m->m_flags &= ~M_VLANTAG; 1961 return m; 1962 } 1963 #endif /* MXGE_NEW_VLAN_API */ 1964 1965 static void 1966 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1967 { 1968 mxge_softc_t *sc; 1969 mcp_kreq_ether_send_t *req; 1970 bus_dma_segment_t *seg; 1971 struct mbuf *m_tmp; 1972 struct ifnet *ifp; 1973 mxge_tx_ring_t *tx; 1974 struct ip *ip; 1975 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1976 uint16_t pseudo_hdr_offset; 1977 uint8_t flags, cksum_offset; 1978 1979 1980 sc = ss->sc; 1981 ifp = sc->ifp; 1982 tx = &ss->tx; 1983 1984 ip_off = sizeof (struct ether_header); 1985 #ifdef MXGE_NEW_VLAN_API 1986 if (m->m_flags & M_VLANTAG) { 1987 m = mxge_vlan_tag_insert(m); 1988 if (__predict_false(m == NULL)) 1989 goto drop; 1990 ip_off += ETHER_VLAN_ENCAP_LEN; 1991 } 1992 #endif 1993 /* (try to) map the frame for DMA */ 1994 idx = tx->req & tx->mask; 1995 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1996 m, tx->seg_list, &cnt, 1997 BUS_DMA_NOWAIT); 1998 if (__predict_false(err == EFBIG)) { 1999 /* Too many segments in the chain. Try 2000 to defrag */ 2001 m_tmp = m_defrag(m, M_NOWAIT); 2002 if (m_tmp == NULL) { 2003 goto drop; 2004 } 2005 ss->tx.defrag++; 2006 m = m_tmp; 2007 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2008 tx->info[idx].map, 2009 m, tx->seg_list, &cnt, 2010 BUS_DMA_NOWAIT); 2011 } 2012 if (__predict_false(err != 0)) { 2013 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2014 " packet len = %d\n", err, m->m_pkthdr.len); 2015 goto drop; 2016 } 2017 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2018 BUS_DMASYNC_PREWRITE); 2019 tx->info[idx].m = m; 2020 2021 #if IFCAP_TSO4 2022 /* TSO is different enough, we handle it in another routine */ 2023 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2024 mxge_encap_tso(ss, m, cnt, ip_off); 2025 return; 2026 } 2027 #endif 2028 2029 req = tx->req_list; 2030 cksum_offset = 0; 2031 pseudo_hdr_offset = 0; 2032 flags = MXGEFW_FLAGS_NO_TSO; 2033 2034 /* checksum offloading? */ 2035 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2036 /* ensure ip header is in first mbuf, copy 2037 it to a scratch buffer if not */ 2038 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2039 m_copydata(m, 0, ip_off + sizeof (*ip), 2040 ss->scratch); 2041 ip = (struct ip *)(ss->scratch + ip_off); 2042 } else { 2043 ip = (struct ip *)(mtod(m, char *) + ip_off); 2044 } 2045 cksum_offset = ip_off + (ip->ip_hl << 2); 2046 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2047 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2048 req->cksum_offset = cksum_offset; 2049 flags |= MXGEFW_FLAGS_CKSUM; 2050 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2051 } else { 2052 odd_flag = 0; 2053 } 2054 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2055 flags |= MXGEFW_FLAGS_SMALL; 2056 2057 /* convert segments into a request list */ 2058 cum_len = 0; 2059 seg = tx->seg_list; 2060 req->flags = MXGEFW_FLAGS_FIRST; 2061 for (i = 0; i < cnt; i++) { 2062 req->addr_low = 2063 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2064 req->addr_high = 2065 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2066 req->length = htobe16(seg->ds_len); 2067 req->cksum_offset = cksum_offset; 2068 if (cksum_offset > seg->ds_len) 2069 cksum_offset -= seg->ds_len; 2070 else 2071 cksum_offset = 0; 2072 req->pseudo_hdr_offset = pseudo_hdr_offset; 2073 req->pad = 0; /* complete solid 16-byte block */ 2074 req->rdma_count = 1; 2075 req->flags |= flags | ((cum_len & 1) * odd_flag); 2076 cum_len += seg->ds_len; 2077 seg++; 2078 req++; 2079 req->flags = 0; 2080 } 2081 req--; 2082 /* pad runts to 60 bytes */ 2083 if (cum_len < 60) { 2084 req++; 2085 req->addr_low = 2086 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2087 req->addr_high = 2088 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2089 req->length = htobe16(60 - cum_len); 2090 req->cksum_offset = 0; 2091 req->pseudo_hdr_offset = pseudo_hdr_offset; 2092 req->pad = 0; /* complete solid 16-byte block */ 2093 req->rdma_count = 1; 2094 req->flags |= flags | ((cum_len & 1) * odd_flag); 2095 cnt++; 2096 } 2097 2098 tx->req_list[0].rdma_count = cnt; 2099 #if 0 2100 /* print what the firmware will see */ 2101 for (i = 0; i < cnt; i++) { 2102 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2103 "cso:%d, flags:0x%x, rdma:%d\n", 2104 i, (int)ntohl(tx->req_list[i].addr_high), 2105 (int)ntohl(tx->req_list[i].addr_low), 2106 (int)ntohs(tx->req_list[i].length), 2107 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2108 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2109 tx->req_list[i].rdma_count); 2110 } 2111 printf("--------------\n"); 2112 #endif 2113 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2114 mxge_submit_req(tx, tx->req_list, cnt); 2115 #ifdef IFNET_BUF_RING 2116 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2117 /* tell the NIC to start polling this slice */ 2118 *tx->send_go = 1; 2119 tx->queue_active = 1; 2120 tx->activate++; 2121 wmb(); 2122 } 2123 #endif 2124 return; 2125 2126 drop: 2127 m_freem(m); 2128 ss->oerrors++; 2129 return; 2130 } 2131 2132 #ifdef IFNET_BUF_RING 2133 static void 2134 mxge_qflush(struct ifnet *ifp) 2135 { 2136 mxge_softc_t *sc = ifp->if_softc; 2137 mxge_tx_ring_t *tx; 2138 struct mbuf *m; 2139 int slice; 2140 2141 for (slice = 0; slice < sc->num_slices; slice++) { 2142 tx = &sc->ss[slice].tx; 2143 mtx_lock(&tx->mtx); 2144 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2145 m_freem(m); 2146 mtx_unlock(&tx->mtx); 2147 } 2148 if_qflush(ifp); 2149 } 2150 2151 static inline void 2152 mxge_start_locked(struct mxge_slice_state *ss) 2153 { 2154 mxge_softc_t *sc; 2155 struct mbuf *m; 2156 struct ifnet *ifp; 2157 mxge_tx_ring_t *tx; 2158 2159 sc = ss->sc; 2160 ifp = sc->ifp; 2161 tx = &ss->tx; 2162 2163 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2164 m = drbr_dequeue(ifp, tx->br); 2165 if (m == NULL) { 2166 return; 2167 } 2168 /* let BPF see it */ 2169 BPF_MTAP(ifp, m); 2170 2171 /* give it to the nic */ 2172 mxge_encap(ss, m); 2173 } 2174 /* ran out of transmit slots */ 2175 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2176 && (!drbr_empty(ifp, tx->br))) { 2177 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2178 tx->stall++; 2179 } 2180 } 2181 2182 static int 2183 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2184 { 2185 mxge_softc_t *sc; 2186 struct ifnet *ifp; 2187 mxge_tx_ring_t *tx; 2188 int err; 2189 2190 sc = ss->sc; 2191 ifp = sc->ifp; 2192 tx = &ss->tx; 2193 2194 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2195 IFF_DRV_RUNNING) { 2196 err = drbr_enqueue(ifp, tx->br, m); 2197 return (err); 2198 } 2199 2200 if (drbr_empty(ifp, tx->br) && 2201 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2202 /* let BPF see it */ 2203 BPF_MTAP(ifp, m); 2204 /* give it to the nic */ 2205 mxge_encap(ss, m); 2206 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2207 return (err); 2208 } 2209 if (!drbr_empty(ifp, tx->br)) 2210 mxge_start_locked(ss); 2211 return (0); 2212 } 2213 2214 static int 2215 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2216 { 2217 mxge_softc_t *sc = ifp->if_softc; 2218 struct mxge_slice_state *ss; 2219 mxge_tx_ring_t *tx; 2220 int err = 0; 2221 int slice; 2222 2223 slice = m->m_pkthdr.flowid; 2224 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2225 2226 ss = &sc->ss[slice]; 2227 tx = &ss->tx; 2228 2229 if (mtx_trylock(&tx->mtx)) { 2230 err = mxge_transmit_locked(ss, m); 2231 mtx_unlock(&tx->mtx); 2232 } else { 2233 err = drbr_enqueue(ifp, tx->br, m); 2234 } 2235 2236 return (err); 2237 } 2238 2239 #else 2240 2241 static inline void 2242 mxge_start_locked(struct mxge_slice_state *ss) 2243 { 2244 mxge_softc_t *sc; 2245 struct mbuf *m; 2246 struct ifnet *ifp; 2247 mxge_tx_ring_t *tx; 2248 2249 sc = ss->sc; 2250 ifp = sc->ifp; 2251 tx = &ss->tx; 2252 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2253 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2254 if (m == NULL) { 2255 return; 2256 } 2257 /* let BPF see it */ 2258 BPF_MTAP(ifp, m); 2259 2260 /* give it to the nic */ 2261 mxge_encap(ss, m); 2262 } 2263 /* ran out of transmit slots */ 2264 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2265 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2266 tx->stall++; 2267 } 2268 } 2269 #endif 2270 static void 2271 mxge_start(struct ifnet *ifp) 2272 { 2273 mxge_softc_t *sc = ifp->if_softc; 2274 struct mxge_slice_state *ss; 2275 2276 /* only use the first slice for now */ 2277 ss = &sc->ss[0]; 2278 mtx_lock(&ss->tx.mtx); 2279 mxge_start_locked(ss); 2280 mtx_unlock(&ss->tx.mtx); 2281 } 2282 2283 /* 2284 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2285 * at most 32 bytes at a time, so as to avoid involving the software 2286 * pio handler in the nic. We re-write the first segment's low 2287 * DMA address to mark it valid only after we write the entire chunk 2288 * in a burst 2289 */ 2290 static inline void 2291 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2292 mcp_kreq_ether_recv_t *src) 2293 { 2294 uint32_t low; 2295 2296 low = src->addr_low; 2297 src->addr_low = 0xffffffff; 2298 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2299 wmb(); 2300 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2301 wmb(); 2302 src->addr_low = low; 2303 dst->addr_low = low; 2304 wmb(); 2305 } 2306 2307 static int 2308 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2309 { 2310 bus_dma_segment_t seg; 2311 struct mbuf *m; 2312 mxge_rx_ring_t *rx = &ss->rx_small; 2313 int cnt, err; 2314 2315 m = m_gethdr(M_DONTWAIT, MT_DATA); 2316 if (m == NULL) { 2317 rx->alloc_fail++; 2318 err = ENOBUFS; 2319 goto done; 2320 } 2321 m->m_len = MHLEN; 2322 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2323 &seg, &cnt, BUS_DMA_NOWAIT); 2324 if (err != 0) { 2325 m_free(m); 2326 goto done; 2327 } 2328 rx->info[idx].m = m; 2329 rx->shadow[idx].addr_low = 2330 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2331 rx->shadow[idx].addr_high = 2332 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2333 2334 done: 2335 if ((idx & 7) == 7) 2336 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2337 return err; 2338 } 2339 2340 static int 2341 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2342 { 2343 bus_dma_segment_t seg[3]; 2344 struct mbuf *m; 2345 mxge_rx_ring_t *rx = &ss->rx_big; 2346 int cnt, err, i; 2347 2348 if (rx->cl_size == MCLBYTES) 2349 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2350 else 2351 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2352 if (m == NULL) { 2353 rx->alloc_fail++; 2354 err = ENOBUFS; 2355 goto done; 2356 } 2357 m->m_len = rx->mlen; 2358 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2359 seg, &cnt, BUS_DMA_NOWAIT); 2360 if (err != 0) { 2361 m_free(m); 2362 goto done; 2363 } 2364 rx->info[idx].m = m; 2365 rx->shadow[idx].addr_low = 2366 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2367 rx->shadow[idx].addr_high = 2368 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2369 2370 #if MXGE_VIRT_JUMBOS 2371 for (i = 1; i < cnt; i++) { 2372 rx->shadow[idx + i].addr_low = 2373 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2374 rx->shadow[idx + i].addr_high = 2375 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2376 } 2377 #endif 2378 2379 done: 2380 for (i = 0; i < rx->nbufs; i++) { 2381 if ((idx & 7) == 7) { 2382 mxge_submit_8rx(&rx->lanai[idx - 7], 2383 &rx->shadow[idx - 7]); 2384 } 2385 idx++; 2386 } 2387 return err; 2388 } 2389 2390 /* 2391 * Myri10GE hardware checksums are not valid if the sender 2392 * padded the frame with non-zero padding. This is because 2393 * the firmware just does a simple 16-bit 1s complement 2394 * checksum across the entire frame, excluding the first 14 2395 * bytes. It is best to simply to check the checksum and 2396 * tell the stack about it only if the checksum is good 2397 */ 2398 2399 static inline uint16_t 2400 mxge_rx_csum(struct mbuf *m, int csum) 2401 { 2402 struct ether_header *eh; 2403 struct ip *ip; 2404 uint16_t c; 2405 2406 eh = mtod(m, struct ether_header *); 2407 2408 /* only deal with IPv4 TCP & UDP for now */ 2409 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2410 return 1; 2411 ip = (struct ip *)(eh + 1); 2412 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2413 ip->ip_p != IPPROTO_UDP)) 2414 return 1; 2415 #ifdef INET 2416 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2417 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2418 - (ip->ip_hl << 2) + ip->ip_p)); 2419 #else 2420 c = 1; 2421 #endif 2422 c ^= 0xffff; 2423 return (c); 2424 } 2425 2426 static void 2427 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2428 { 2429 struct ether_vlan_header *evl; 2430 struct ether_header *eh; 2431 uint32_t partial; 2432 2433 evl = mtod(m, struct ether_vlan_header *); 2434 eh = mtod(m, struct ether_header *); 2435 2436 /* 2437 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2438 * after what the firmware thought was the end of the ethernet 2439 * header. 2440 */ 2441 2442 /* put checksum into host byte order */ 2443 *csum = ntohs(*csum); 2444 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2445 (*csum) += ~partial; 2446 (*csum) += ((*csum) < ~partial); 2447 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2448 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2449 2450 /* restore checksum to network byte order; 2451 later consumers expect this */ 2452 *csum = htons(*csum); 2453 2454 /* save the tag */ 2455 #ifdef MXGE_NEW_VLAN_API 2456 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2457 #else 2458 { 2459 struct m_tag *mtag; 2460 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2461 M_NOWAIT); 2462 if (mtag == NULL) 2463 return; 2464 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2465 m_tag_prepend(m, mtag); 2466 } 2467 2468 #endif 2469 m->m_flags |= M_VLANTAG; 2470 2471 /* 2472 * Remove the 802.1q header by copying the Ethernet 2473 * addresses over it and adjusting the beginning of 2474 * the data in the mbuf. The encapsulated Ethernet 2475 * type field is already in place. 2476 */ 2477 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2478 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2479 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2480 } 2481 2482 2483 static inline void 2484 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2485 { 2486 mxge_softc_t *sc; 2487 struct ifnet *ifp; 2488 struct mbuf *m; 2489 struct ether_header *eh; 2490 mxge_rx_ring_t *rx; 2491 bus_dmamap_t old_map; 2492 int idx; 2493 uint16_t tcpudp_csum; 2494 2495 sc = ss->sc; 2496 ifp = sc->ifp; 2497 rx = &ss->rx_big; 2498 idx = rx->cnt & rx->mask; 2499 rx->cnt += rx->nbufs; 2500 /* save a pointer to the received mbuf */ 2501 m = rx->info[idx].m; 2502 /* try to replace the received mbuf */ 2503 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2504 /* drop the frame -- the old mbuf is re-cycled */ 2505 ifp->if_ierrors++; 2506 return; 2507 } 2508 2509 /* unmap the received buffer */ 2510 old_map = rx->info[idx].map; 2511 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2512 bus_dmamap_unload(rx->dmat, old_map); 2513 2514 /* swap the bus_dmamap_t's */ 2515 rx->info[idx].map = rx->extra_map; 2516 rx->extra_map = old_map; 2517 2518 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2519 * aligned */ 2520 m->m_data += MXGEFW_PAD; 2521 2522 m->m_pkthdr.rcvif = ifp; 2523 m->m_len = m->m_pkthdr.len = len; 2524 ss->ipackets++; 2525 eh = mtod(m, struct ether_header *); 2526 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2527 mxge_vlan_tag_remove(m, &csum); 2528 } 2529 /* if the checksum is valid, mark it in the mbuf header */ 2530 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2531 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2532 return; 2533 /* otherwise, it was a UDP frame, or a TCP frame which 2534 we could not do LRO on. Tell the stack that the 2535 checksum is good */ 2536 m->m_pkthdr.csum_data = 0xffff; 2537 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2538 } 2539 /* flowid only valid if RSS hashing is enabled */ 2540 if (sc->num_slices > 1) { 2541 m->m_pkthdr.flowid = (ss - sc->ss); 2542 m->m_flags |= M_FLOWID; 2543 } 2544 /* pass the frame up the stack */ 2545 (*ifp->if_input)(ifp, m); 2546 } 2547 2548 static inline void 2549 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2550 { 2551 mxge_softc_t *sc; 2552 struct ifnet *ifp; 2553 struct ether_header *eh; 2554 struct mbuf *m; 2555 mxge_rx_ring_t *rx; 2556 bus_dmamap_t old_map; 2557 int idx; 2558 uint16_t tcpudp_csum; 2559 2560 sc = ss->sc; 2561 ifp = sc->ifp; 2562 rx = &ss->rx_small; 2563 idx = rx->cnt & rx->mask; 2564 rx->cnt++; 2565 /* save a pointer to the received mbuf */ 2566 m = rx->info[idx].m; 2567 /* try to replace the received mbuf */ 2568 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2569 /* drop the frame -- the old mbuf is re-cycled */ 2570 ifp->if_ierrors++; 2571 return; 2572 } 2573 2574 /* unmap the received buffer */ 2575 old_map = rx->info[idx].map; 2576 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2577 bus_dmamap_unload(rx->dmat, old_map); 2578 2579 /* swap the bus_dmamap_t's */ 2580 rx->info[idx].map = rx->extra_map; 2581 rx->extra_map = old_map; 2582 2583 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2584 * aligned */ 2585 m->m_data += MXGEFW_PAD; 2586 2587 m->m_pkthdr.rcvif = ifp; 2588 m->m_len = m->m_pkthdr.len = len; 2589 ss->ipackets++; 2590 eh = mtod(m, struct ether_header *); 2591 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2592 mxge_vlan_tag_remove(m, &csum); 2593 } 2594 /* if the checksum is valid, mark it in the mbuf header */ 2595 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2596 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2597 return; 2598 /* otherwise, it was a UDP frame, or a TCP frame which 2599 we could not do LRO on. Tell the stack that the 2600 checksum is good */ 2601 m->m_pkthdr.csum_data = 0xffff; 2602 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2603 } 2604 /* flowid only valid if RSS hashing is enabled */ 2605 if (sc->num_slices > 1) { 2606 m->m_pkthdr.flowid = (ss - sc->ss); 2607 m->m_flags |= M_FLOWID; 2608 } 2609 /* pass the frame up the stack */ 2610 (*ifp->if_input)(ifp, m); 2611 } 2612 2613 static inline void 2614 mxge_clean_rx_done(struct mxge_slice_state *ss) 2615 { 2616 mxge_rx_done_t *rx_done = &ss->rx_done; 2617 int limit = 0; 2618 uint16_t length; 2619 uint16_t checksum; 2620 2621 2622 while (rx_done->entry[rx_done->idx].length != 0) { 2623 length = ntohs(rx_done->entry[rx_done->idx].length); 2624 rx_done->entry[rx_done->idx].length = 0; 2625 checksum = rx_done->entry[rx_done->idx].checksum; 2626 if (length <= (MHLEN - MXGEFW_PAD)) 2627 mxge_rx_done_small(ss, length, checksum); 2628 else 2629 mxge_rx_done_big(ss, length, checksum); 2630 rx_done->cnt++; 2631 rx_done->idx = rx_done->cnt & rx_done->mask; 2632 2633 /* limit potential for livelock */ 2634 if (__predict_false(++limit > rx_done->mask / 2)) 2635 break; 2636 } 2637 #ifdef INET 2638 while (!SLIST_EMPTY(&ss->lro_active)) { 2639 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2640 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2641 mxge_lro_flush(ss, lro); 2642 } 2643 #endif 2644 } 2645 2646 2647 static inline void 2648 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2649 { 2650 struct ifnet *ifp; 2651 mxge_tx_ring_t *tx; 2652 struct mbuf *m; 2653 bus_dmamap_t map; 2654 int idx; 2655 int *flags; 2656 2657 tx = &ss->tx; 2658 ifp = ss->sc->ifp; 2659 while (tx->pkt_done != mcp_idx) { 2660 idx = tx->done & tx->mask; 2661 tx->done++; 2662 m = tx->info[idx].m; 2663 /* mbuf and DMA map only attached to the first 2664 segment per-mbuf */ 2665 if (m != NULL) { 2666 ss->obytes += m->m_pkthdr.len; 2667 if (m->m_flags & M_MCAST) 2668 ss->omcasts++; 2669 ss->opackets++; 2670 tx->info[idx].m = NULL; 2671 map = tx->info[idx].map; 2672 bus_dmamap_unload(tx->dmat, map); 2673 m_freem(m); 2674 } 2675 if (tx->info[idx].flag) { 2676 tx->info[idx].flag = 0; 2677 tx->pkt_done++; 2678 } 2679 } 2680 2681 /* If we have space, clear IFF_OACTIVE to tell the stack that 2682 its OK to send packets */ 2683 #ifdef IFNET_BUF_RING 2684 flags = &ss->if_drv_flags; 2685 #else 2686 flags = &ifp->if_drv_flags; 2687 #endif 2688 mtx_lock(&ss->tx.mtx); 2689 if ((*flags) & IFF_DRV_OACTIVE && 2690 tx->req - tx->done < (tx->mask + 1)/4) { 2691 *(flags) &= ~IFF_DRV_OACTIVE; 2692 ss->tx.wake++; 2693 mxge_start_locked(ss); 2694 } 2695 #ifdef IFNET_BUF_RING 2696 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2697 /* let the NIC stop polling this queue, since there 2698 * are no more transmits pending */ 2699 if (tx->req == tx->done) { 2700 *tx->send_stop = 1; 2701 tx->queue_active = 0; 2702 tx->deactivate++; 2703 wmb(); 2704 } 2705 } 2706 #endif 2707 mtx_unlock(&ss->tx.mtx); 2708 2709 } 2710 2711 static struct mxge_media_type mxge_xfp_media_types[] = 2712 { 2713 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2714 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2715 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2716 {0, (1 << 5), "10GBASE-ER"}, 2717 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2718 {0, (1 << 3), "10GBASE-SW"}, 2719 {0, (1 << 2), "10GBASE-LW"}, 2720 {0, (1 << 1), "10GBASE-EW"}, 2721 {0, (1 << 0), "Reserved"} 2722 }; 2723 static struct mxge_media_type mxge_sfp_media_types[] = 2724 { 2725 {0, (1 << 7), "Reserved"}, 2726 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2727 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2728 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2729 }; 2730 2731 static void 2732 mxge_set_media(mxge_softc_t *sc, int type) 2733 { 2734 sc->media_flags |= type; 2735 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2736 ifmedia_set(&sc->media, sc->media_flags); 2737 } 2738 2739 2740 /* 2741 * Determine the media type for a NIC. Some XFPs will identify 2742 * themselves only when their link is up, so this is initiated via a 2743 * link up interrupt. However, this can potentially take up to 2744 * several milliseconds, so it is run via the watchdog routine, rather 2745 * than in the interrupt handler itself. This need only be done 2746 * once, not each time the link is up. 2747 */ 2748 static void 2749 mxge_media_probe(mxge_softc_t *sc) 2750 { 2751 mxge_cmd_t cmd; 2752 char *cage_type; 2753 char *ptr; 2754 struct mxge_media_type *mxge_media_types = NULL; 2755 int i, err, ms, mxge_media_type_entries; 2756 uint32_t byte; 2757 2758 sc->need_media_probe = 0; 2759 2760 /* if we've already set a media type, we're done */ 2761 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2762 return; 2763 2764 /* 2765 * parse the product code to deterimine the interface type 2766 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2767 * after the 3rd dash in the driver's cached copy of the 2768 * EEPROM's product code string. 2769 */ 2770 ptr = sc->product_code_string; 2771 if (ptr == NULL) { 2772 device_printf(sc->dev, "Missing product code\n"); 2773 } 2774 2775 for (i = 0; i < 3; i++, ptr++) { 2776 ptr = index(ptr, '-'); 2777 if (ptr == NULL) { 2778 device_printf(sc->dev, 2779 "only %d dashes in PC?!?\n", i); 2780 return; 2781 } 2782 } 2783 if (*ptr == 'C') { 2784 /* -C is CX4 */ 2785 mxge_set_media(sc, IFM_10G_CX4); 2786 return; 2787 } 2788 else if (*ptr == 'Q') { 2789 /* -Q is Quad Ribbon Fiber */ 2790 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2791 /* FreeBSD has no media type for Quad ribbon fiber */ 2792 return; 2793 } 2794 2795 if (*ptr == 'R') { 2796 /* -R is XFP */ 2797 mxge_media_types = mxge_xfp_media_types; 2798 mxge_media_type_entries = 2799 sizeof (mxge_xfp_media_types) / 2800 sizeof (mxge_xfp_media_types[0]); 2801 byte = MXGE_XFP_COMPLIANCE_BYTE; 2802 cage_type = "XFP"; 2803 } 2804 2805 if (*ptr == 'S' || *(ptr +1) == 'S') { 2806 /* -S or -2S is SFP+ */ 2807 mxge_media_types = mxge_sfp_media_types; 2808 mxge_media_type_entries = 2809 sizeof (mxge_sfp_media_types) / 2810 sizeof (mxge_sfp_media_types[0]); 2811 cage_type = "SFP+"; 2812 byte = 3; 2813 } 2814 2815 if (mxge_media_types == NULL) { 2816 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2817 return; 2818 } 2819 2820 /* 2821 * At this point we know the NIC has an XFP cage, so now we 2822 * try to determine what is in the cage by using the 2823 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2824 * register. We read just one byte, which may take over 2825 * a millisecond 2826 */ 2827 2828 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2829 cmd.data1 = byte; 2830 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2831 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2832 device_printf(sc->dev, "failed to read XFP\n"); 2833 } 2834 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2835 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2836 } 2837 if (err != MXGEFW_CMD_OK) { 2838 return; 2839 } 2840 2841 /* now we wait for the data to be cached */ 2842 cmd.data0 = byte; 2843 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2844 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2845 DELAY(1000); 2846 cmd.data0 = byte; 2847 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2848 } 2849 if (err != MXGEFW_CMD_OK) { 2850 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2851 cage_type, err, ms); 2852 return; 2853 } 2854 2855 if (cmd.data0 == mxge_media_types[0].bitmask) { 2856 if (mxge_verbose) 2857 device_printf(sc->dev, "%s:%s\n", cage_type, 2858 mxge_media_types[0].name); 2859 mxge_set_media(sc, IFM_10G_CX4); 2860 return; 2861 } 2862 for (i = 1; i < mxge_media_type_entries; i++) { 2863 if (cmd.data0 & mxge_media_types[i].bitmask) { 2864 if (mxge_verbose) 2865 device_printf(sc->dev, "%s:%s\n", 2866 cage_type, 2867 mxge_media_types[i].name); 2868 2869 mxge_set_media(sc, mxge_media_types[i].flag); 2870 return; 2871 } 2872 } 2873 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2874 cmd.data0); 2875 2876 return; 2877 } 2878 2879 static void 2880 mxge_intr(void *arg) 2881 { 2882 struct mxge_slice_state *ss = arg; 2883 mxge_softc_t *sc = ss->sc; 2884 mcp_irq_data_t *stats = ss->fw_stats; 2885 mxge_tx_ring_t *tx = &ss->tx; 2886 mxge_rx_done_t *rx_done = &ss->rx_done; 2887 uint32_t send_done_count; 2888 uint8_t valid; 2889 2890 2891 #ifndef IFNET_BUF_RING 2892 /* an interrupt on a non-zero slice is implicitly valid 2893 since MSI-X irqs are not shared */ 2894 if (ss != sc->ss) { 2895 mxge_clean_rx_done(ss); 2896 *ss->irq_claim = be32toh(3); 2897 return; 2898 } 2899 #endif 2900 2901 /* make sure the DMA has finished */ 2902 if (!stats->valid) { 2903 return; 2904 } 2905 valid = stats->valid; 2906 2907 if (sc->legacy_irq) { 2908 /* lower legacy IRQ */ 2909 *sc->irq_deassert = 0; 2910 if (!mxge_deassert_wait) 2911 /* don't wait for conf. that irq is low */ 2912 stats->valid = 0; 2913 } else { 2914 stats->valid = 0; 2915 } 2916 2917 /* loop while waiting for legacy irq deassertion */ 2918 do { 2919 /* check for transmit completes and receives */ 2920 send_done_count = be32toh(stats->send_done_count); 2921 while ((send_done_count != tx->pkt_done) || 2922 (rx_done->entry[rx_done->idx].length != 0)) { 2923 if (send_done_count != tx->pkt_done) 2924 mxge_tx_done(ss, (int)send_done_count); 2925 mxge_clean_rx_done(ss); 2926 send_done_count = be32toh(stats->send_done_count); 2927 } 2928 if (sc->legacy_irq && mxge_deassert_wait) 2929 wmb(); 2930 } while (*((volatile uint8_t *) &stats->valid)); 2931 2932 /* fw link & error stats meaningful only on the first slice */ 2933 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2934 if (sc->link_state != stats->link_up) { 2935 sc->link_state = stats->link_up; 2936 if (sc->link_state) { 2937 if_link_state_change(sc->ifp, LINK_STATE_UP); 2938 if (mxge_verbose) 2939 device_printf(sc->dev, "link up\n"); 2940 } else { 2941 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2942 if (mxge_verbose) 2943 device_printf(sc->dev, "link down\n"); 2944 } 2945 sc->need_media_probe = 1; 2946 } 2947 if (sc->rdma_tags_available != 2948 be32toh(stats->rdma_tags_available)) { 2949 sc->rdma_tags_available = 2950 be32toh(stats->rdma_tags_available); 2951 device_printf(sc->dev, "RDMA timed out! %d tags " 2952 "left\n", sc->rdma_tags_available); 2953 } 2954 2955 if (stats->link_down) { 2956 sc->down_cnt += stats->link_down; 2957 sc->link_state = 0; 2958 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2959 } 2960 } 2961 2962 /* check to see if we have rx token to pass back */ 2963 if (valid & 0x1) 2964 *ss->irq_claim = be32toh(3); 2965 *(ss->irq_claim + 1) = be32toh(3); 2966 } 2967 2968 static void 2969 mxge_init(void *arg) 2970 { 2971 } 2972 2973 2974 2975 static void 2976 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2977 { 2978 struct lro_entry *lro_entry; 2979 int i; 2980 2981 while (!SLIST_EMPTY(&ss->lro_free)) { 2982 lro_entry = SLIST_FIRST(&ss->lro_free); 2983 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2984 free(lro_entry, M_DEVBUF); 2985 } 2986 2987 for (i = 0; i <= ss->rx_big.mask; i++) { 2988 if (ss->rx_big.info[i].m == NULL) 2989 continue; 2990 bus_dmamap_unload(ss->rx_big.dmat, 2991 ss->rx_big.info[i].map); 2992 m_freem(ss->rx_big.info[i].m); 2993 ss->rx_big.info[i].m = NULL; 2994 } 2995 2996 for (i = 0; i <= ss->rx_small.mask; i++) { 2997 if (ss->rx_small.info[i].m == NULL) 2998 continue; 2999 bus_dmamap_unload(ss->rx_small.dmat, 3000 ss->rx_small.info[i].map); 3001 m_freem(ss->rx_small.info[i].m); 3002 ss->rx_small.info[i].m = NULL; 3003 } 3004 3005 /* transmit ring used only on the first slice */ 3006 if (ss->tx.info == NULL) 3007 return; 3008 3009 for (i = 0; i <= ss->tx.mask; i++) { 3010 ss->tx.info[i].flag = 0; 3011 if (ss->tx.info[i].m == NULL) 3012 continue; 3013 bus_dmamap_unload(ss->tx.dmat, 3014 ss->tx.info[i].map); 3015 m_freem(ss->tx.info[i].m); 3016 ss->tx.info[i].m = NULL; 3017 } 3018 } 3019 3020 static void 3021 mxge_free_mbufs(mxge_softc_t *sc) 3022 { 3023 int slice; 3024 3025 for (slice = 0; slice < sc->num_slices; slice++) 3026 mxge_free_slice_mbufs(&sc->ss[slice]); 3027 } 3028 3029 static void 3030 mxge_free_slice_rings(struct mxge_slice_state *ss) 3031 { 3032 int i; 3033 3034 3035 if (ss->rx_done.entry != NULL) 3036 mxge_dma_free(&ss->rx_done.dma); 3037 ss->rx_done.entry = NULL; 3038 3039 if (ss->tx.req_bytes != NULL) 3040 free(ss->tx.req_bytes, M_DEVBUF); 3041 ss->tx.req_bytes = NULL; 3042 3043 if (ss->tx.seg_list != NULL) 3044 free(ss->tx.seg_list, M_DEVBUF); 3045 ss->tx.seg_list = NULL; 3046 3047 if (ss->rx_small.shadow != NULL) 3048 free(ss->rx_small.shadow, M_DEVBUF); 3049 ss->rx_small.shadow = NULL; 3050 3051 if (ss->rx_big.shadow != NULL) 3052 free(ss->rx_big.shadow, M_DEVBUF); 3053 ss->rx_big.shadow = NULL; 3054 3055 if (ss->tx.info != NULL) { 3056 if (ss->tx.dmat != NULL) { 3057 for (i = 0; i <= ss->tx.mask; i++) { 3058 bus_dmamap_destroy(ss->tx.dmat, 3059 ss->tx.info[i].map); 3060 } 3061 bus_dma_tag_destroy(ss->tx.dmat); 3062 } 3063 free(ss->tx.info, M_DEVBUF); 3064 } 3065 ss->tx.info = NULL; 3066 3067 if (ss->rx_small.info != NULL) { 3068 if (ss->rx_small.dmat != NULL) { 3069 for (i = 0; i <= ss->rx_small.mask; i++) { 3070 bus_dmamap_destroy(ss->rx_small.dmat, 3071 ss->rx_small.info[i].map); 3072 } 3073 bus_dmamap_destroy(ss->rx_small.dmat, 3074 ss->rx_small.extra_map); 3075 bus_dma_tag_destroy(ss->rx_small.dmat); 3076 } 3077 free(ss->rx_small.info, M_DEVBUF); 3078 } 3079 ss->rx_small.info = NULL; 3080 3081 if (ss->rx_big.info != NULL) { 3082 if (ss->rx_big.dmat != NULL) { 3083 for (i = 0; i <= ss->rx_big.mask; i++) { 3084 bus_dmamap_destroy(ss->rx_big.dmat, 3085 ss->rx_big.info[i].map); 3086 } 3087 bus_dmamap_destroy(ss->rx_big.dmat, 3088 ss->rx_big.extra_map); 3089 bus_dma_tag_destroy(ss->rx_big.dmat); 3090 } 3091 free(ss->rx_big.info, M_DEVBUF); 3092 } 3093 ss->rx_big.info = NULL; 3094 } 3095 3096 static void 3097 mxge_free_rings(mxge_softc_t *sc) 3098 { 3099 int slice; 3100 3101 for (slice = 0; slice < sc->num_slices; slice++) 3102 mxge_free_slice_rings(&sc->ss[slice]); 3103 } 3104 3105 static int 3106 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3107 int tx_ring_entries) 3108 { 3109 mxge_softc_t *sc = ss->sc; 3110 size_t bytes; 3111 int err, i; 3112 3113 err = ENOMEM; 3114 3115 /* allocate per-slice receive resources */ 3116 3117 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3118 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3119 3120 /* allocate the rx shadow rings */ 3121 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3122 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3123 if (ss->rx_small.shadow == NULL) 3124 return err;; 3125 3126 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3127 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3128 if (ss->rx_big.shadow == NULL) 3129 return err;; 3130 3131 /* allocate the rx host info rings */ 3132 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3133 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3134 if (ss->rx_small.info == NULL) 3135 return err;; 3136 3137 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3138 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3139 if (ss->rx_big.info == NULL) 3140 return err;; 3141 3142 /* allocate the rx busdma resources */ 3143 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3144 1, /* alignment */ 3145 4096, /* boundary */ 3146 BUS_SPACE_MAXADDR, /* low */ 3147 BUS_SPACE_MAXADDR, /* high */ 3148 NULL, NULL, /* filter */ 3149 MHLEN, /* maxsize */ 3150 1, /* num segs */ 3151 MHLEN, /* maxsegsize */ 3152 BUS_DMA_ALLOCNOW, /* flags */ 3153 NULL, NULL, /* lock */ 3154 &ss->rx_small.dmat); /* tag */ 3155 if (err != 0) { 3156 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3157 err); 3158 return err;; 3159 } 3160 3161 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3162 1, /* alignment */ 3163 #if MXGE_VIRT_JUMBOS 3164 4096, /* boundary */ 3165 #else 3166 0, /* boundary */ 3167 #endif 3168 BUS_SPACE_MAXADDR, /* low */ 3169 BUS_SPACE_MAXADDR, /* high */ 3170 NULL, NULL, /* filter */ 3171 3*4096, /* maxsize */ 3172 #if MXGE_VIRT_JUMBOS 3173 3, /* num segs */ 3174 4096, /* maxsegsize*/ 3175 #else 3176 1, /* num segs */ 3177 MJUM9BYTES, /* maxsegsize*/ 3178 #endif 3179 BUS_DMA_ALLOCNOW, /* flags */ 3180 NULL, NULL, /* lock */ 3181 &ss->rx_big.dmat); /* tag */ 3182 if (err != 0) { 3183 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3184 err); 3185 return err;; 3186 } 3187 for (i = 0; i <= ss->rx_small.mask; i++) { 3188 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3189 &ss->rx_small.info[i].map); 3190 if (err != 0) { 3191 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3192 err); 3193 return err;; 3194 } 3195 } 3196 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3197 &ss->rx_small.extra_map); 3198 if (err != 0) { 3199 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3200 err); 3201 return err;; 3202 } 3203 3204 for (i = 0; i <= ss->rx_big.mask; i++) { 3205 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3206 &ss->rx_big.info[i].map); 3207 if (err != 0) { 3208 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3209 err); 3210 return err;; 3211 } 3212 } 3213 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3214 &ss->rx_big.extra_map); 3215 if (err != 0) { 3216 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3217 err); 3218 return err;; 3219 } 3220 3221 /* now allocate TX resouces */ 3222 3223 #ifndef IFNET_BUF_RING 3224 /* only use a single TX ring for now */ 3225 if (ss != ss->sc->ss) 3226 return 0; 3227 #endif 3228 3229 ss->tx.mask = tx_ring_entries - 1; 3230 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3231 3232 3233 /* allocate the tx request copy block */ 3234 bytes = 8 + 3235 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3236 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3237 if (ss->tx.req_bytes == NULL) 3238 return err;; 3239 /* ensure req_list entries are aligned to 8 bytes */ 3240 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3241 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3242 3243 /* allocate the tx busdma segment list */ 3244 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3245 ss->tx.seg_list = (bus_dma_segment_t *) 3246 malloc(bytes, M_DEVBUF, M_WAITOK); 3247 if (ss->tx.seg_list == NULL) 3248 return err;; 3249 3250 /* allocate the tx host info ring */ 3251 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3252 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3253 if (ss->tx.info == NULL) 3254 return err;; 3255 3256 /* allocate the tx busdma resources */ 3257 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3258 1, /* alignment */ 3259 sc->tx_boundary, /* boundary */ 3260 BUS_SPACE_MAXADDR, /* low */ 3261 BUS_SPACE_MAXADDR, /* high */ 3262 NULL, NULL, /* filter */ 3263 65536 + 256, /* maxsize */ 3264 ss->tx.max_desc - 2, /* num segs */ 3265 sc->tx_boundary, /* maxsegsz */ 3266 BUS_DMA_ALLOCNOW, /* flags */ 3267 NULL, NULL, /* lock */ 3268 &ss->tx.dmat); /* tag */ 3269 3270 if (err != 0) { 3271 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3272 err); 3273 return err;; 3274 } 3275 3276 /* now use these tags to setup dmamaps for each slot 3277 in the ring */ 3278 for (i = 0; i <= ss->tx.mask; i++) { 3279 err = bus_dmamap_create(ss->tx.dmat, 0, 3280 &ss->tx.info[i].map); 3281 if (err != 0) { 3282 device_printf(sc->dev, "Err %d tx dmamap\n", 3283 err); 3284 return err;; 3285 } 3286 } 3287 return 0; 3288 3289 } 3290 3291 static int 3292 mxge_alloc_rings(mxge_softc_t *sc) 3293 { 3294 mxge_cmd_t cmd; 3295 int tx_ring_size; 3296 int tx_ring_entries, rx_ring_entries; 3297 int err, slice; 3298 3299 /* get ring sizes */ 3300 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3301 tx_ring_size = cmd.data0; 3302 if (err != 0) { 3303 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3304 goto abort; 3305 } 3306 3307 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3308 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3309 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3310 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3311 IFQ_SET_READY(&sc->ifp->if_snd); 3312 3313 for (slice = 0; slice < sc->num_slices; slice++) { 3314 err = mxge_alloc_slice_rings(&sc->ss[slice], 3315 rx_ring_entries, 3316 tx_ring_entries); 3317 if (err != 0) 3318 goto abort; 3319 } 3320 return 0; 3321 3322 abort: 3323 mxge_free_rings(sc); 3324 return err; 3325 3326 } 3327 3328 3329 static void 3330 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3331 { 3332 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3333 3334 if (bufsize < MCLBYTES) { 3335 /* easy, everything fits in a single buffer */ 3336 *big_buf_size = MCLBYTES; 3337 *cl_size = MCLBYTES; 3338 *nbufs = 1; 3339 return; 3340 } 3341 3342 if (bufsize < MJUMPAGESIZE) { 3343 /* still easy, everything still fits in a single buffer */ 3344 *big_buf_size = MJUMPAGESIZE; 3345 *cl_size = MJUMPAGESIZE; 3346 *nbufs = 1; 3347 return; 3348 } 3349 #if MXGE_VIRT_JUMBOS 3350 /* now we need to use virtually contiguous buffers */ 3351 *cl_size = MJUM9BYTES; 3352 *big_buf_size = 4096; 3353 *nbufs = mtu / 4096 + 1; 3354 /* needs to be a power of two, so round up */ 3355 if (*nbufs == 3) 3356 *nbufs = 4; 3357 #else 3358 *cl_size = MJUM9BYTES; 3359 *big_buf_size = MJUM9BYTES; 3360 *nbufs = 1; 3361 #endif 3362 } 3363 3364 static int 3365 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3366 { 3367 mxge_softc_t *sc; 3368 mxge_cmd_t cmd; 3369 bus_dmamap_t map; 3370 struct lro_entry *lro_entry; 3371 int err, i, slice; 3372 3373 3374 sc = ss->sc; 3375 slice = ss - sc->ss; 3376 3377 SLIST_INIT(&ss->lro_free); 3378 SLIST_INIT(&ss->lro_active); 3379 3380 for (i = 0; i < sc->lro_cnt; i++) { 3381 lro_entry = (struct lro_entry *) 3382 malloc(sizeof (*lro_entry), M_DEVBUF, 3383 M_NOWAIT | M_ZERO); 3384 if (lro_entry == NULL) { 3385 sc->lro_cnt = i; 3386 break; 3387 } 3388 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3389 } 3390 /* get the lanai pointers to the send and receive rings */ 3391 3392 err = 0; 3393 #ifndef IFNET_BUF_RING 3394 /* We currently only send from the first slice */ 3395 if (slice == 0) { 3396 #endif 3397 cmd.data0 = slice; 3398 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3399 ss->tx.lanai = 3400 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3401 ss->tx.send_go = (volatile uint32_t *) 3402 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3403 ss->tx.send_stop = (volatile uint32_t *) 3404 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3405 #ifndef IFNET_BUF_RING 3406 } 3407 #endif 3408 cmd.data0 = slice; 3409 err |= mxge_send_cmd(sc, 3410 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3411 ss->rx_small.lanai = 3412 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3413 cmd.data0 = slice; 3414 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3415 ss->rx_big.lanai = 3416 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3417 3418 if (err != 0) { 3419 device_printf(sc->dev, 3420 "failed to get ring sizes or locations\n"); 3421 return EIO; 3422 } 3423 3424 /* stock receive rings */ 3425 for (i = 0; i <= ss->rx_small.mask; i++) { 3426 map = ss->rx_small.info[i].map; 3427 err = mxge_get_buf_small(ss, map, i); 3428 if (err) { 3429 device_printf(sc->dev, "alloced %d/%d smalls\n", 3430 i, ss->rx_small.mask + 1); 3431 return ENOMEM; 3432 } 3433 } 3434 for (i = 0; i <= ss->rx_big.mask; i++) { 3435 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3436 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3437 } 3438 ss->rx_big.nbufs = nbufs; 3439 ss->rx_big.cl_size = cl_size; 3440 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3441 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3442 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3443 map = ss->rx_big.info[i].map; 3444 err = mxge_get_buf_big(ss, map, i); 3445 if (err) { 3446 device_printf(sc->dev, "alloced %d/%d bigs\n", 3447 i, ss->rx_big.mask + 1); 3448 return ENOMEM; 3449 } 3450 } 3451 return 0; 3452 } 3453 3454 static int 3455 mxge_open(mxge_softc_t *sc) 3456 { 3457 mxge_cmd_t cmd; 3458 int err, big_bytes, nbufs, slice, cl_size, i; 3459 bus_addr_t bus; 3460 volatile uint8_t *itable; 3461 struct mxge_slice_state *ss; 3462 3463 /* Copy the MAC address in case it was overridden */ 3464 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3465 3466 err = mxge_reset(sc, 1); 3467 if (err != 0) { 3468 device_printf(sc->dev, "failed to reset\n"); 3469 return EIO; 3470 } 3471 3472 if (sc->num_slices > 1) { 3473 /* setup the indirection table */ 3474 cmd.data0 = sc->num_slices; 3475 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3476 &cmd); 3477 3478 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3479 &cmd); 3480 if (err != 0) { 3481 device_printf(sc->dev, 3482 "failed to setup rss tables\n"); 3483 return err; 3484 } 3485 3486 /* just enable an identity mapping */ 3487 itable = sc->sram + cmd.data0; 3488 for (i = 0; i < sc->num_slices; i++) 3489 itable[i] = (uint8_t)i; 3490 3491 cmd.data0 = 1; 3492 cmd.data1 = mxge_rss_hash_type; 3493 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3494 if (err != 0) { 3495 device_printf(sc->dev, "failed to enable slices\n"); 3496 return err; 3497 } 3498 } 3499 3500 3501 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3502 3503 cmd.data0 = nbufs; 3504 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3505 &cmd); 3506 /* error is only meaningful if we're trying to set 3507 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3508 if (err && nbufs > 1) { 3509 device_printf(sc->dev, 3510 "Failed to set alway-use-n to %d\n", 3511 nbufs); 3512 return EIO; 3513 } 3514 /* Give the firmware the mtu and the big and small buffer 3515 sizes. The firmware wants the big buf size to be a power 3516 of two. Luckily, FreeBSD's clusters are powers of two */ 3517 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3518 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3519 cmd.data0 = MHLEN - MXGEFW_PAD; 3520 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3521 &cmd); 3522 cmd.data0 = big_bytes; 3523 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3524 3525 if (err != 0) { 3526 device_printf(sc->dev, "failed to setup params\n"); 3527 goto abort; 3528 } 3529 3530 /* Now give him the pointer to the stats block */ 3531 for (slice = 0; 3532 #ifdef IFNET_BUF_RING 3533 slice < sc->num_slices; 3534 #else 3535 slice < 1; 3536 #endif 3537 slice++) { 3538 ss = &sc->ss[slice]; 3539 cmd.data0 = 3540 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3541 cmd.data1 = 3542 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3543 cmd.data2 = sizeof(struct mcp_irq_data); 3544 cmd.data2 |= (slice << 16); 3545 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3546 } 3547 3548 if (err != 0) { 3549 bus = sc->ss->fw_stats_dma.bus_addr; 3550 bus += offsetof(struct mcp_irq_data, send_done_count); 3551 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3552 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3553 err = mxge_send_cmd(sc, 3554 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3555 &cmd); 3556 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3557 sc->fw_multicast_support = 0; 3558 } else { 3559 sc->fw_multicast_support = 1; 3560 } 3561 3562 if (err != 0) { 3563 device_printf(sc->dev, "failed to setup params\n"); 3564 goto abort; 3565 } 3566 3567 for (slice = 0; slice < sc->num_slices; slice++) { 3568 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3569 if (err != 0) { 3570 device_printf(sc->dev, "couldn't open slice %d\n", 3571 slice); 3572 goto abort; 3573 } 3574 } 3575 3576 /* Finally, start the firmware running */ 3577 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3578 if (err) { 3579 device_printf(sc->dev, "Couldn't bring up link\n"); 3580 goto abort; 3581 } 3582 #ifdef IFNET_BUF_RING 3583 for (slice = 0; slice < sc->num_slices; slice++) { 3584 ss = &sc->ss[slice]; 3585 ss->if_drv_flags |= IFF_DRV_RUNNING; 3586 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3587 } 3588 #endif 3589 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3590 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3591 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3592 3593 return 0; 3594 3595 3596 abort: 3597 mxge_free_mbufs(sc); 3598 3599 return err; 3600 } 3601 3602 static int 3603 mxge_close(mxge_softc_t *sc) 3604 { 3605 mxge_cmd_t cmd; 3606 int err, old_down_cnt; 3607 #ifdef IFNET_BUF_RING 3608 struct mxge_slice_state *ss; 3609 int slice; 3610 #endif 3611 3612 callout_stop(&sc->co_hdl); 3613 #ifdef IFNET_BUF_RING 3614 for (slice = 0; slice < sc->num_slices; slice++) { 3615 ss = &sc->ss[slice]; 3616 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3617 } 3618 #endif 3619 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3620 old_down_cnt = sc->down_cnt; 3621 wmb(); 3622 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3623 if (err) { 3624 device_printf(sc->dev, "Couldn't bring down link\n"); 3625 } 3626 if (old_down_cnt == sc->down_cnt) { 3627 /* wait for down irq */ 3628 DELAY(10 * sc->intr_coal_delay); 3629 } 3630 wmb(); 3631 if (old_down_cnt == sc->down_cnt) { 3632 device_printf(sc->dev, "never got down irq\n"); 3633 } 3634 3635 mxge_free_mbufs(sc); 3636 3637 return 0; 3638 } 3639 3640 static void 3641 mxge_setup_cfg_space(mxge_softc_t *sc) 3642 { 3643 device_t dev = sc->dev; 3644 int reg; 3645 uint16_t cmd, lnk, pectl; 3646 3647 /* find the PCIe link width and set max read request to 4KB*/ 3648 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3649 lnk = pci_read_config(dev, reg + 0x12, 2); 3650 sc->link_width = (lnk >> 4) & 0x3f; 3651 3652 pectl = pci_read_config(dev, reg + 0x8, 2); 3653 pectl = (pectl & ~0x7000) | (5 << 12); 3654 pci_write_config(dev, reg + 0x8, pectl, 2); 3655 } 3656 3657 /* Enable DMA and Memory space access */ 3658 pci_enable_busmaster(dev); 3659 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3660 cmd |= PCIM_CMD_MEMEN; 3661 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3662 } 3663 3664 static uint32_t 3665 mxge_read_reboot(mxge_softc_t *sc) 3666 { 3667 device_t dev = sc->dev; 3668 uint32_t vs; 3669 3670 /* find the vendor specific offset */ 3671 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3672 device_printf(sc->dev, 3673 "could not find vendor specific offset\n"); 3674 return (uint32_t)-1; 3675 } 3676 /* enable read32 mode */ 3677 pci_write_config(dev, vs + 0x10, 0x3, 1); 3678 /* tell NIC which register to read */ 3679 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3680 return (pci_read_config(dev, vs + 0x14, 4)); 3681 } 3682 3683 static int 3684 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3685 { 3686 struct pci_devinfo *dinfo; 3687 mxge_tx_ring_t *tx; 3688 int err; 3689 uint32_t reboot; 3690 uint16_t cmd; 3691 3692 err = ENXIO; 3693 3694 device_printf(sc->dev, "Watchdog reset!\n"); 3695 3696 /* 3697 * check to see if the NIC rebooted. If it did, then all of 3698 * PCI config space has been reset, and things like the 3699 * busmaster bit will be zero. If this is the case, then we 3700 * must restore PCI config space before the NIC can be used 3701 * again 3702 */ 3703 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3704 if (cmd == 0xffff) { 3705 /* 3706 * maybe the watchdog caught the NIC rebooting; wait 3707 * up to 100ms for it to finish. If it does not come 3708 * back, then give up 3709 */ 3710 DELAY(1000*100); 3711 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3712 if (cmd == 0xffff) { 3713 device_printf(sc->dev, "NIC disappeared!\n"); 3714 return (err); 3715 } 3716 } 3717 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3718 /* print the reboot status */ 3719 reboot = mxge_read_reboot(sc); 3720 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3721 reboot); 3722 /* restore PCI configuration space */ 3723 dinfo = device_get_ivars(sc->dev); 3724 pci_cfg_restore(sc->dev, dinfo); 3725 3726 /* and redo any changes we made to our config space */ 3727 mxge_setup_cfg_space(sc); 3728 3729 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3730 mxge_close(sc); 3731 err = mxge_open(sc); 3732 } 3733 } else { 3734 tx = &sc->ss[slice].tx; 3735 device_printf(sc->dev, 3736 "NIC did not reboot, slice %d ring state:\n", 3737 slice); 3738 device_printf(sc->dev, 3739 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3740 tx->req, tx->done, tx->queue_active); 3741 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3742 tx->activate, tx->deactivate); 3743 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3744 tx->pkt_done, 3745 be32toh(sc->ss->fw_stats->send_done_count)); 3746 device_printf(sc->dev, "not resetting\n"); 3747 } 3748 return (err); 3749 } 3750 3751 static int 3752 mxge_watchdog(mxge_softc_t *sc) 3753 { 3754 mxge_tx_ring_t *tx; 3755 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3756 int i, err = 0; 3757 3758 /* see if we have outstanding transmits, which 3759 have been pending for more than mxge_ticks */ 3760 for (i = 0; 3761 #ifdef IFNET_BUF_RING 3762 (i < sc->num_slices) && (err == 0); 3763 #else 3764 (i < 1) && (err == 0); 3765 #endif 3766 i++) { 3767 tx = &sc->ss[i].tx; 3768 if (tx->req != tx->done && 3769 tx->watchdog_req != tx->watchdog_done && 3770 tx->done == tx->watchdog_done) { 3771 /* check for pause blocking before resetting */ 3772 if (tx->watchdog_rx_pause == rx_pause) 3773 err = mxge_watchdog_reset(sc, i); 3774 else 3775 device_printf(sc->dev, "Flow control blocking " 3776 "xmits, check link partner\n"); 3777 } 3778 3779 tx->watchdog_req = tx->req; 3780 tx->watchdog_done = tx->done; 3781 tx->watchdog_rx_pause = rx_pause; 3782 } 3783 3784 if (sc->need_media_probe) 3785 mxge_media_probe(sc); 3786 return (err); 3787 } 3788 3789 static void 3790 mxge_update_stats(mxge_softc_t *sc) 3791 { 3792 struct mxge_slice_state *ss; 3793 u_long ipackets = 0; 3794 u_long opackets = 0; 3795 #ifdef IFNET_BUF_RING 3796 u_long obytes = 0; 3797 u_long omcasts = 0; 3798 u_long odrops = 0; 3799 #endif 3800 u_long oerrors = 0; 3801 int slice; 3802 3803 for (slice = 0; slice < sc->num_slices; slice++) { 3804 ss = &sc->ss[slice]; 3805 ipackets += ss->ipackets; 3806 opackets += ss->opackets; 3807 #ifdef IFNET_BUF_RING 3808 obytes += ss->obytes; 3809 omcasts += ss->omcasts; 3810 odrops += ss->tx.br->br_drops; 3811 #endif 3812 oerrors += ss->oerrors; 3813 } 3814 sc->ifp->if_ipackets = ipackets; 3815 sc->ifp->if_opackets = opackets; 3816 #ifdef IFNET_BUF_RING 3817 sc->ifp->if_obytes = obytes; 3818 sc->ifp->if_omcasts = omcasts; 3819 sc->ifp->if_snd.ifq_drops = odrops; 3820 #endif 3821 sc->ifp->if_oerrors = oerrors; 3822 } 3823 3824 static void 3825 mxge_tick(void *arg) 3826 { 3827 mxge_softc_t *sc = arg; 3828 int err = 0; 3829 3830 /* aggregate stats from different slices */ 3831 mxge_update_stats(sc); 3832 if (!sc->watchdog_countdown) { 3833 err = mxge_watchdog(sc); 3834 sc->watchdog_countdown = 4; 3835 } 3836 sc->watchdog_countdown--; 3837 if (err == 0) 3838 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3839 3840 } 3841 3842 static int 3843 mxge_media_change(struct ifnet *ifp) 3844 { 3845 return EINVAL; 3846 } 3847 3848 static int 3849 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3850 { 3851 struct ifnet *ifp = sc->ifp; 3852 int real_mtu, old_mtu; 3853 int err = 0; 3854 3855 3856 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3857 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3858 return EINVAL; 3859 mtx_lock(&sc->driver_mtx); 3860 old_mtu = ifp->if_mtu; 3861 ifp->if_mtu = mtu; 3862 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3863 mxge_close(sc); 3864 err = mxge_open(sc); 3865 if (err != 0) { 3866 ifp->if_mtu = old_mtu; 3867 mxge_close(sc); 3868 (void) mxge_open(sc); 3869 } 3870 } 3871 mtx_unlock(&sc->driver_mtx); 3872 return err; 3873 } 3874 3875 static void 3876 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3877 { 3878 mxge_softc_t *sc = ifp->if_softc; 3879 3880 3881 if (sc == NULL) 3882 return; 3883 ifmr->ifm_status = IFM_AVALID; 3884 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3885 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3886 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3887 } 3888 3889 static int 3890 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3891 { 3892 mxge_softc_t *sc = ifp->if_softc; 3893 struct ifreq *ifr = (struct ifreq *)data; 3894 int err, mask; 3895 3896 err = 0; 3897 switch (command) { 3898 case SIOCSIFADDR: 3899 case SIOCGIFADDR: 3900 err = ether_ioctl(ifp, command, data); 3901 break; 3902 3903 case SIOCSIFMTU: 3904 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3905 break; 3906 3907 case SIOCSIFFLAGS: 3908 mtx_lock(&sc->driver_mtx); 3909 if (sc->dying) { 3910 mtx_unlock(&sc->driver_mtx); 3911 return EINVAL; 3912 } 3913 if (ifp->if_flags & IFF_UP) { 3914 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3915 err = mxge_open(sc); 3916 } else { 3917 /* take care of promis can allmulti 3918 flag chages */ 3919 mxge_change_promisc(sc, 3920 ifp->if_flags & IFF_PROMISC); 3921 mxge_set_multicast_list(sc); 3922 } 3923 } else { 3924 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3925 mxge_close(sc); 3926 } 3927 } 3928 mtx_unlock(&sc->driver_mtx); 3929 break; 3930 3931 case SIOCADDMULTI: 3932 case SIOCDELMULTI: 3933 mtx_lock(&sc->driver_mtx); 3934 mxge_set_multicast_list(sc); 3935 mtx_unlock(&sc->driver_mtx); 3936 break; 3937 3938 case SIOCSIFCAP: 3939 mtx_lock(&sc->driver_mtx); 3940 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3941 if (mask & IFCAP_TXCSUM) { 3942 if (IFCAP_TXCSUM & ifp->if_capenable) { 3943 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3944 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3945 | CSUM_TSO); 3946 } else { 3947 ifp->if_capenable |= IFCAP_TXCSUM; 3948 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3949 } 3950 } else if (mask & IFCAP_RXCSUM) { 3951 if (IFCAP_RXCSUM & ifp->if_capenable) { 3952 ifp->if_capenable &= ~IFCAP_RXCSUM; 3953 sc->csum_flag = 0; 3954 } else { 3955 ifp->if_capenable |= IFCAP_RXCSUM; 3956 sc->csum_flag = 1; 3957 } 3958 } 3959 if (mask & IFCAP_TSO4) { 3960 if (IFCAP_TSO4 & ifp->if_capenable) { 3961 ifp->if_capenable &= ~IFCAP_TSO4; 3962 ifp->if_hwassist &= ~CSUM_TSO; 3963 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3964 ifp->if_capenable |= IFCAP_TSO4; 3965 ifp->if_hwassist |= CSUM_TSO; 3966 } else { 3967 printf("mxge requires tx checksum offload" 3968 " be enabled to use TSO\n"); 3969 err = EINVAL; 3970 } 3971 } 3972 if (mask & IFCAP_LRO) { 3973 if (IFCAP_LRO & ifp->if_capenable) 3974 err = mxge_change_lro_locked(sc, 0); 3975 else 3976 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3977 } 3978 if (mask & IFCAP_VLAN_HWTAGGING) 3979 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3980 mtx_unlock(&sc->driver_mtx); 3981 VLAN_CAPABILITIES(ifp); 3982 3983 break; 3984 3985 case SIOCGIFMEDIA: 3986 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3987 &sc->media, command); 3988 break; 3989 3990 default: 3991 err = ENOTTY; 3992 } 3993 return err; 3994 } 3995 3996 static void 3997 mxge_fetch_tunables(mxge_softc_t *sc) 3998 { 3999 4000 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4001 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4002 &mxge_flow_control); 4003 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4004 &mxge_intr_coal_delay); 4005 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4006 &mxge_nvidia_ecrc_enable); 4007 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4008 &mxge_force_firmware); 4009 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4010 &mxge_deassert_wait); 4011 TUNABLE_INT_FETCH("hw.mxge.verbose", 4012 &mxge_verbose); 4013 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4014 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4015 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4016 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4017 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4018 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4019 if (sc->lro_cnt != 0) 4020 mxge_lro_cnt = sc->lro_cnt; 4021 4022 if (bootverbose) 4023 mxge_verbose = 1; 4024 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4025 mxge_intr_coal_delay = 30; 4026 if (mxge_ticks == 0) 4027 mxge_ticks = hz / 2; 4028 sc->pause = mxge_flow_control; 4029 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4030 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4031 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4032 } 4033 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4034 mxge_initial_mtu < ETHER_MIN_LEN) 4035 mxge_initial_mtu = ETHERMTU_JUMBO; 4036 } 4037 4038 4039 static void 4040 mxge_free_slices(mxge_softc_t *sc) 4041 { 4042 struct mxge_slice_state *ss; 4043 int i; 4044 4045 4046 if (sc->ss == NULL) 4047 return; 4048 4049 for (i = 0; i < sc->num_slices; i++) { 4050 ss = &sc->ss[i]; 4051 if (ss->fw_stats != NULL) { 4052 mxge_dma_free(&ss->fw_stats_dma); 4053 ss->fw_stats = NULL; 4054 #ifdef IFNET_BUF_RING 4055 if (ss->tx.br != NULL) { 4056 drbr_free(ss->tx.br, M_DEVBUF); 4057 ss->tx.br = NULL; 4058 } 4059 #endif 4060 mtx_destroy(&ss->tx.mtx); 4061 } 4062 if (ss->rx_done.entry != NULL) { 4063 mxge_dma_free(&ss->rx_done.dma); 4064 ss->rx_done.entry = NULL; 4065 } 4066 } 4067 free(sc->ss, M_DEVBUF); 4068 sc->ss = NULL; 4069 } 4070 4071 static int 4072 mxge_alloc_slices(mxge_softc_t *sc) 4073 { 4074 mxge_cmd_t cmd; 4075 struct mxge_slice_state *ss; 4076 size_t bytes; 4077 int err, i, max_intr_slots; 4078 4079 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4080 if (err != 0) { 4081 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4082 return err; 4083 } 4084 sc->rx_ring_size = cmd.data0; 4085 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4086 4087 bytes = sizeof (*sc->ss) * sc->num_slices; 4088 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4089 if (sc->ss == NULL) 4090 return (ENOMEM); 4091 for (i = 0; i < sc->num_slices; i++) { 4092 ss = &sc->ss[i]; 4093 4094 ss->sc = sc; 4095 4096 /* allocate per-slice rx interrupt queues */ 4097 4098 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4099 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4100 if (err != 0) 4101 goto abort; 4102 ss->rx_done.entry = ss->rx_done.dma.addr; 4103 bzero(ss->rx_done.entry, bytes); 4104 4105 /* 4106 * allocate the per-slice firmware stats; stats 4107 * (including tx) are used used only on the first 4108 * slice for now 4109 */ 4110 #ifndef IFNET_BUF_RING 4111 if (i > 0) 4112 continue; 4113 #endif 4114 4115 bytes = sizeof (*ss->fw_stats); 4116 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4117 sizeof (*ss->fw_stats), 64); 4118 if (err != 0) 4119 goto abort; 4120 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4121 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4122 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4123 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4124 #ifdef IFNET_BUF_RING 4125 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4126 &ss->tx.mtx); 4127 #endif 4128 } 4129 4130 return (0); 4131 4132 abort: 4133 mxge_free_slices(sc); 4134 return (ENOMEM); 4135 } 4136 4137 static void 4138 mxge_slice_probe(mxge_softc_t *sc) 4139 { 4140 mxge_cmd_t cmd; 4141 char *old_fw; 4142 int msix_cnt, status, max_intr_slots; 4143 4144 sc->num_slices = 1; 4145 /* 4146 * don't enable multiple slices if they are not enabled, 4147 * or if this is not an SMP system 4148 */ 4149 4150 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4151 return; 4152 4153 /* see how many MSI-X interrupts are available */ 4154 msix_cnt = pci_msix_count(sc->dev); 4155 if (msix_cnt < 2) 4156 return; 4157 4158 /* now load the slice aware firmware see what it supports */ 4159 old_fw = sc->fw_name; 4160 if (old_fw == mxge_fw_aligned) 4161 sc->fw_name = mxge_fw_rss_aligned; 4162 else 4163 sc->fw_name = mxge_fw_rss_unaligned; 4164 status = mxge_load_firmware(sc, 0); 4165 if (status != 0) { 4166 device_printf(sc->dev, "Falling back to a single slice\n"); 4167 return; 4168 } 4169 4170 /* try to send a reset command to the card to see if it 4171 is alive */ 4172 memset(&cmd, 0, sizeof (cmd)); 4173 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4174 if (status != 0) { 4175 device_printf(sc->dev, "failed reset\n"); 4176 goto abort_with_fw; 4177 } 4178 4179 /* get rx ring size */ 4180 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4181 if (status != 0) { 4182 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4183 goto abort_with_fw; 4184 } 4185 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4186 4187 /* tell it the size of the interrupt queues */ 4188 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4189 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4190 if (status != 0) { 4191 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4192 goto abort_with_fw; 4193 } 4194 4195 /* ask the maximum number of slices it supports */ 4196 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4197 if (status != 0) { 4198 device_printf(sc->dev, 4199 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4200 goto abort_with_fw; 4201 } 4202 sc->num_slices = cmd.data0; 4203 if (sc->num_slices > msix_cnt) 4204 sc->num_slices = msix_cnt; 4205 4206 if (mxge_max_slices == -1) { 4207 /* cap to number of CPUs in system */ 4208 if (sc->num_slices > mp_ncpus) 4209 sc->num_slices = mp_ncpus; 4210 } else { 4211 if (sc->num_slices > mxge_max_slices) 4212 sc->num_slices = mxge_max_slices; 4213 } 4214 /* make sure it is a power of two */ 4215 while (sc->num_slices & (sc->num_slices - 1)) 4216 sc->num_slices--; 4217 4218 if (mxge_verbose) 4219 device_printf(sc->dev, "using %d slices\n", 4220 sc->num_slices); 4221 4222 return; 4223 4224 abort_with_fw: 4225 sc->fw_name = old_fw; 4226 (void) mxge_load_firmware(sc, 0); 4227 } 4228 4229 static int 4230 mxge_add_msix_irqs(mxge_softc_t *sc) 4231 { 4232 size_t bytes; 4233 int count, err, i, rid; 4234 4235 rid = PCIR_BAR(2); 4236 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4237 &rid, RF_ACTIVE); 4238 4239 if (sc->msix_table_res == NULL) { 4240 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4241 return ENXIO; 4242 } 4243 4244 count = sc->num_slices; 4245 err = pci_alloc_msix(sc->dev, &count); 4246 if (err != 0) { 4247 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4248 "err = %d \n", sc->num_slices, err); 4249 goto abort_with_msix_table; 4250 } 4251 if (count < sc->num_slices) { 4252 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4253 count, sc->num_slices); 4254 device_printf(sc->dev, 4255 "Try setting hw.mxge.max_slices to %d\n", 4256 count); 4257 err = ENOSPC; 4258 goto abort_with_msix; 4259 } 4260 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4261 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4262 if (sc->msix_irq_res == NULL) { 4263 err = ENOMEM; 4264 goto abort_with_msix; 4265 } 4266 4267 for (i = 0; i < sc->num_slices; i++) { 4268 rid = i + 1; 4269 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4270 SYS_RES_IRQ, 4271 &rid, RF_ACTIVE); 4272 if (sc->msix_irq_res[i] == NULL) { 4273 device_printf(sc->dev, "couldn't allocate IRQ res" 4274 " for message %d\n", i); 4275 err = ENXIO; 4276 goto abort_with_res; 4277 } 4278 } 4279 4280 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4281 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4282 4283 for (i = 0; i < sc->num_slices; i++) { 4284 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4285 INTR_TYPE_NET | INTR_MPSAFE, 4286 #if __FreeBSD_version > 700030 4287 NULL, 4288 #endif 4289 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4290 if (err != 0) { 4291 device_printf(sc->dev, "couldn't setup intr for " 4292 "message %d\n", i); 4293 goto abort_with_intr; 4294 } 4295 } 4296 4297 if (mxge_verbose) { 4298 device_printf(sc->dev, "using %d msix IRQs:", 4299 sc->num_slices); 4300 for (i = 0; i < sc->num_slices; i++) 4301 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4302 printf("\n"); 4303 } 4304 return (0); 4305 4306 abort_with_intr: 4307 for (i = 0; i < sc->num_slices; i++) { 4308 if (sc->msix_ih[i] != NULL) { 4309 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4310 sc->msix_ih[i]); 4311 sc->msix_ih[i] = NULL; 4312 } 4313 } 4314 free(sc->msix_ih, M_DEVBUF); 4315 4316 4317 abort_with_res: 4318 for (i = 0; i < sc->num_slices; i++) { 4319 rid = i + 1; 4320 if (sc->msix_irq_res[i] != NULL) 4321 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4322 sc->msix_irq_res[i]); 4323 sc->msix_irq_res[i] = NULL; 4324 } 4325 free(sc->msix_irq_res, M_DEVBUF); 4326 4327 4328 abort_with_msix: 4329 pci_release_msi(sc->dev); 4330 4331 abort_with_msix_table: 4332 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4333 sc->msix_table_res); 4334 4335 return err; 4336 } 4337 4338 static int 4339 mxge_add_single_irq(mxge_softc_t *sc) 4340 { 4341 int count, err, rid; 4342 4343 count = pci_msi_count(sc->dev); 4344 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4345 rid = 1; 4346 } else { 4347 rid = 0; 4348 sc->legacy_irq = 1; 4349 } 4350 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4351 1, RF_SHAREABLE | RF_ACTIVE); 4352 if (sc->irq_res == NULL) { 4353 device_printf(sc->dev, "could not alloc interrupt\n"); 4354 return ENXIO; 4355 } 4356 if (mxge_verbose) 4357 device_printf(sc->dev, "using %s irq %ld\n", 4358 sc->legacy_irq ? "INTx" : "MSI", 4359 rman_get_start(sc->irq_res)); 4360 err = bus_setup_intr(sc->dev, sc->irq_res, 4361 INTR_TYPE_NET | INTR_MPSAFE, 4362 #if __FreeBSD_version > 700030 4363 NULL, 4364 #endif 4365 mxge_intr, &sc->ss[0], &sc->ih); 4366 if (err != 0) { 4367 bus_release_resource(sc->dev, SYS_RES_IRQ, 4368 sc->legacy_irq ? 0 : 1, sc->irq_res); 4369 if (!sc->legacy_irq) 4370 pci_release_msi(sc->dev); 4371 } 4372 return err; 4373 } 4374 4375 static void 4376 mxge_rem_msix_irqs(mxge_softc_t *sc) 4377 { 4378 int i, rid; 4379 4380 for (i = 0; i < sc->num_slices; i++) { 4381 if (sc->msix_ih[i] != NULL) { 4382 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4383 sc->msix_ih[i]); 4384 sc->msix_ih[i] = NULL; 4385 } 4386 } 4387 free(sc->msix_ih, M_DEVBUF); 4388 4389 for (i = 0; i < sc->num_slices; i++) { 4390 rid = i + 1; 4391 if (sc->msix_irq_res[i] != NULL) 4392 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4393 sc->msix_irq_res[i]); 4394 sc->msix_irq_res[i] = NULL; 4395 } 4396 free(sc->msix_irq_res, M_DEVBUF); 4397 4398 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4399 sc->msix_table_res); 4400 4401 pci_release_msi(sc->dev); 4402 return; 4403 } 4404 4405 static void 4406 mxge_rem_single_irq(mxge_softc_t *sc) 4407 { 4408 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4409 bus_release_resource(sc->dev, SYS_RES_IRQ, 4410 sc->legacy_irq ? 0 : 1, sc->irq_res); 4411 if (!sc->legacy_irq) 4412 pci_release_msi(sc->dev); 4413 } 4414 4415 static void 4416 mxge_rem_irq(mxge_softc_t *sc) 4417 { 4418 if (sc->num_slices > 1) 4419 mxge_rem_msix_irqs(sc); 4420 else 4421 mxge_rem_single_irq(sc); 4422 } 4423 4424 static int 4425 mxge_add_irq(mxge_softc_t *sc) 4426 { 4427 int err; 4428 4429 if (sc->num_slices > 1) 4430 err = mxge_add_msix_irqs(sc); 4431 else 4432 err = mxge_add_single_irq(sc); 4433 4434 if (0 && err == 0 && sc->num_slices > 1) { 4435 mxge_rem_msix_irqs(sc); 4436 err = mxge_add_msix_irqs(sc); 4437 } 4438 return err; 4439 } 4440 4441 4442 static int 4443 mxge_attach(device_t dev) 4444 { 4445 mxge_softc_t *sc = device_get_softc(dev); 4446 struct ifnet *ifp; 4447 int err, rid; 4448 4449 sc->dev = dev; 4450 mxge_fetch_tunables(sc); 4451 4452 err = bus_dma_tag_create(NULL, /* parent */ 4453 1, /* alignment */ 4454 0, /* boundary */ 4455 BUS_SPACE_MAXADDR, /* low */ 4456 BUS_SPACE_MAXADDR, /* high */ 4457 NULL, NULL, /* filter */ 4458 65536 + 256, /* maxsize */ 4459 MXGE_MAX_SEND_DESC, /* num segs */ 4460 65536, /* maxsegsize */ 4461 0, /* flags */ 4462 NULL, NULL, /* lock */ 4463 &sc->parent_dmat); /* tag */ 4464 4465 if (err != 0) { 4466 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4467 err); 4468 goto abort_with_nothing; 4469 } 4470 4471 ifp = sc->ifp = if_alloc(IFT_ETHER); 4472 if (ifp == NULL) { 4473 device_printf(dev, "can not if_alloc()\n"); 4474 err = ENOSPC; 4475 goto abort_with_parent_dmat; 4476 } 4477 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4478 4479 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4480 device_get_nameunit(dev)); 4481 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4482 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4483 "%s:drv", device_get_nameunit(dev)); 4484 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4485 MTX_NETWORK_LOCK, MTX_DEF); 4486 4487 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4488 4489 mxge_setup_cfg_space(sc); 4490 4491 /* Map the board into the kernel */ 4492 rid = PCIR_BARS; 4493 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4494 ~0, 1, RF_ACTIVE); 4495 if (sc->mem_res == NULL) { 4496 device_printf(dev, "could not map memory\n"); 4497 err = ENXIO; 4498 goto abort_with_lock; 4499 } 4500 sc->sram = rman_get_virtual(sc->mem_res); 4501 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4502 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4503 device_printf(dev, "impossible memory region size %ld\n", 4504 rman_get_size(sc->mem_res)); 4505 err = ENXIO; 4506 goto abort_with_mem_res; 4507 } 4508 4509 /* make NULL terminated copy of the EEPROM strings section of 4510 lanai SRAM */ 4511 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4512 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4513 rman_get_bushandle(sc->mem_res), 4514 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4515 sc->eeprom_strings, 4516 MXGE_EEPROM_STRINGS_SIZE - 2); 4517 err = mxge_parse_strings(sc); 4518 if (err != 0) 4519 goto abort_with_mem_res; 4520 4521 /* Enable write combining for efficient use of PCIe bus */ 4522 mxge_enable_wc(sc); 4523 4524 /* Allocate the out of band dma memory */ 4525 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4526 sizeof (mxge_cmd_t), 64); 4527 if (err != 0) 4528 goto abort_with_mem_res; 4529 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4530 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4531 if (err != 0) 4532 goto abort_with_cmd_dma; 4533 4534 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4535 if (err != 0) 4536 goto abort_with_zeropad_dma; 4537 4538 /* select & load the firmware */ 4539 err = mxge_select_firmware(sc); 4540 if (err != 0) 4541 goto abort_with_dmabench; 4542 sc->intr_coal_delay = mxge_intr_coal_delay; 4543 4544 mxge_slice_probe(sc); 4545 err = mxge_alloc_slices(sc); 4546 if (err != 0) 4547 goto abort_with_dmabench; 4548 4549 err = mxge_reset(sc, 0); 4550 if (err != 0) 4551 goto abort_with_slices; 4552 4553 err = mxge_alloc_rings(sc); 4554 if (err != 0) { 4555 device_printf(sc->dev, "failed to allocate rings\n"); 4556 goto abort_with_dmabench; 4557 } 4558 4559 err = mxge_add_irq(sc); 4560 if (err != 0) { 4561 device_printf(sc->dev, "failed to add irq\n"); 4562 goto abort_with_rings; 4563 } 4564 4565 ifp->if_baudrate = IF_Gbps(10UL); 4566 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4567 IFCAP_VLAN_MTU; 4568 #ifdef INET 4569 ifp->if_capabilities |= IFCAP_LRO; 4570 #endif 4571 4572 #ifdef MXGE_NEW_VLAN_API 4573 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4574 #endif 4575 4576 sc->max_mtu = mxge_max_mtu(sc); 4577 if (sc->max_mtu >= 9000) 4578 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4579 else 4580 device_printf(dev, "MTU limited to %d. Install " 4581 "latest firmware for 9000 byte jumbo support\n", 4582 sc->max_mtu - ETHER_HDR_LEN); 4583 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4584 ifp->if_capenable = ifp->if_capabilities; 4585 if (sc->lro_cnt == 0) 4586 ifp->if_capenable &= ~IFCAP_LRO; 4587 sc->csum_flag = 1; 4588 ifp->if_init = mxge_init; 4589 ifp->if_softc = sc; 4590 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4591 ifp->if_ioctl = mxge_ioctl; 4592 ifp->if_start = mxge_start; 4593 /* Initialise the ifmedia structure */ 4594 ifmedia_init(&sc->media, 0, mxge_media_change, 4595 mxge_media_status); 4596 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4597 mxge_media_probe(sc); 4598 sc->dying = 0; 4599 ether_ifattach(ifp, sc->mac_addr); 4600 /* ether_ifattach sets mtu to ETHERMTU */ 4601 if (mxge_initial_mtu != ETHERMTU) 4602 mxge_change_mtu(sc, mxge_initial_mtu); 4603 4604 mxge_add_sysctls(sc); 4605 #ifdef IFNET_BUF_RING 4606 ifp->if_transmit = mxge_transmit; 4607 ifp->if_qflush = mxge_qflush; 4608 #endif 4609 return 0; 4610 4611 abort_with_rings: 4612 mxge_free_rings(sc); 4613 abort_with_slices: 4614 mxge_free_slices(sc); 4615 abort_with_dmabench: 4616 mxge_dma_free(&sc->dmabench_dma); 4617 abort_with_zeropad_dma: 4618 mxge_dma_free(&sc->zeropad_dma); 4619 abort_with_cmd_dma: 4620 mxge_dma_free(&sc->cmd_dma); 4621 abort_with_mem_res: 4622 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4623 abort_with_lock: 4624 pci_disable_busmaster(dev); 4625 mtx_destroy(&sc->cmd_mtx); 4626 mtx_destroy(&sc->driver_mtx); 4627 if_free(ifp); 4628 abort_with_parent_dmat: 4629 bus_dma_tag_destroy(sc->parent_dmat); 4630 4631 abort_with_nothing: 4632 return err; 4633 } 4634 4635 static int 4636 mxge_detach(device_t dev) 4637 { 4638 mxge_softc_t *sc = device_get_softc(dev); 4639 4640 if (mxge_vlans_active(sc)) { 4641 device_printf(sc->dev, 4642 "Detach vlans before removing module\n"); 4643 return EBUSY; 4644 } 4645 mtx_lock(&sc->driver_mtx); 4646 sc->dying = 1; 4647 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4648 mxge_close(sc); 4649 mtx_unlock(&sc->driver_mtx); 4650 ether_ifdetach(sc->ifp); 4651 callout_drain(&sc->co_hdl); 4652 ifmedia_removeall(&sc->media); 4653 mxge_dummy_rdma(sc, 0); 4654 mxge_rem_sysctls(sc); 4655 mxge_rem_irq(sc); 4656 mxge_free_rings(sc); 4657 mxge_free_slices(sc); 4658 mxge_dma_free(&sc->dmabench_dma); 4659 mxge_dma_free(&sc->zeropad_dma); 4660 mxge_dma_free(&sc->cmd_dma); 4661 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4662 pci_disable_busmaster(dev); 4663 mtx_destroy(&sc->cmd_mtx); 4664 mtx_destroy(&sc->driver_mtx); 4665 if_free(sc->ifp); 4666 bus_dma_tag_destroy(sc->parent_dmat); 4667 return 0; 4668 } 4669 4670 static int 4671 mxge_shutdown(device_t dev) 4672 { 4673 return 0; 4674 } 4675 4676 /* 4677 This file uses Myri10GE driver indentation. 4678 4679 Local Variables: 4680 c-file-style:"linux" 4681 tab-width:8 4682 End: 4683 */ 4684