1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #define IFNET_BUF_RING 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/linker.h> 38 #include <sys/firmware.h> 39 #include <sys/endian.h> 40 #include <sys/sockio.h> 41 #include <sys/mbuf.h> 42 #include <sys/malloc.h> 43 #include <sys/kdb.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/module.h> 47 #include <sys/socket.h> 48 #include <sys/sysctl.h> 49 #include <sys/sx.h> 50 51 #include <net/if.h> 52 #include <net/if_arp.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 57 #include <net/bpf.h> 58 59 #include <net/if_types.h> 60 #include <net/if_vlan_var.h> 61 #include <net/zlib.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/tcp.h> 67 68 #include <machine/bus.h> 69 #include <machine/in_cksum.h> 70 #include <machine/resource.h> 71 #ifdef IFNET_BUF_RING 72 #include <sys/buf_ring.h> 73 #endif 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 #include <sys/smp.h> 77 78 #include <dev/pci/pcireg.h> 79 #include <dev/pci/pcivar.h> 80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 81 82 #include <vm/vm.h> /* for pmap_mapdev() */ 83 #include <vm/pmap.h> 84 85 #if defined(__i386) || defined(__amd64) 86 #include <machine/specialreg.h> 87 #endif 88 89 #include <dev/mxge/mxge_mcp.h> 90 #include <dev/mxge/mcp_gen_header.h> 91 /*#define MXGE_FAKE_IFP*/ 92 #include <dev/mxge/if_mxge_var.h> 93 94 /* tunable params */ 95 static int mxge_nvidia_ecrc_enable = 1; 96 static int mxge_force_firmware = 0; 97 static int mxge_intr_coal_delay = 30; 98 static int mxge_deassert_wait = 1; 99 static int mxge_flow_control = 1; 100 static int mxge_verbose = 0; 101 static int mxge_lro_cnt = 8; 102 static int mxge_ticks; 103 static int mxge_max_slices = 1; 104 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 105 static int mxge_always_promisc = 0; 106 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 107 static char *mxge_fw_aligned = "mxge_eth_z8e"; 108 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 109 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 110 111 static int mxge_probe(device_t dev); 112 static int mxge_attach(device_t dev); 113 static int mxge_detach(device_t dev); 114 static int mxge_shutdown(device_t dev); 115 static void mxge_intr(void *arg); 116 117 static device_method_t mxge_methods[] = 118 { 119 /* Device interface */ 120 DEVMETHOD(device_probe, mxge_probe), 121 DEVMETHOD(device_attach, mxge_attach), 122 DEVMETHOD(device_detach, mxge_detach), 123 DEVMETHOD(device_shutdown, mxge_shutdown), 124 {0, 0} 125 }; 126 127 static driver_t mxge_driver = 128 { 129 "mxge", 130 mxge_methods, 131 sizeof(mxge_softc_t), 132 }; 133 134 static devclass_t mxge_devclass; 135 136 /* Declare ourselves to be a child of the PCI bus.*/ 137 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 138 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 139 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 140 141 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 142 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 143 static int mxge_close(mxge_softc_t *sc); 144 static int mxge_open(mxge_softc_t *sc); 145 static void mxge_tick(void *arg); 146 147 static int 148 mxge_probe(device_t dev) 149 { 150 int rev; 151 152 153 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 154 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 155 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 156 rev = pci_get_revid(dev); 157 switch (rev) { 158 case MXGE_PCI_REV_Z8E: 159 device_set_desc(dev, "Myri10G-PCIE-8A"); 160 break; 161 case MXGE_PCI_REV_Z8ES: 162 device_set_desc(dev, "Myri10G-PCIE-8B"); 163 break; 164 default: 165 device_set_desc(dev, "Myri10G-PCIE-8??"); 166 device_printf(dev, "Unrecognized rev %d NIC\n", 167 rev); 168 break; 169 } 170 return 0; 171 } 172 return ENXIO; 173 } 174 175 static void 176 mxge_enable_wc(mxge_softc_t *sc) 177 { 178 #if defined(__i386) || defined(__amd64) 179 vm_offset_t len; 180 int err; 181 182 sc->wc = 1; 183 len = rman_get_size(sc->mem_res); 184 err = pmap_change_attr((vm_offset_t) sc->sram, 185 len, PAT_WRITE_COMBINING); 186 if (err != 0) { 187 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 188 err); 189 sc->wc = 0; 190 } 191 #endif 192 } 193 194 195 /* callback to get our DMA address */ 196 static void 197 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 198 int error) 199 { 200 if (error == 0) { 201 *(bus_addr_t *) arg = segs->ds_addr; 202 } 203 } 204 205 static int 206 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 207 bus_size_t alignment) 208 { 209 int err; 210 device_t dev = sc->dev; 211 bus_size_t boundary, maxsegsize; 212 213 if (bytes > 4096 && alignment == 4096) { 214 boundary = 0; 215 maxsegsize = bytes; 216 } else { 217 boundary = 4096; 218 maxsegsize = 4096; 219 } 220 221 /* allocate DMAable memory tags */ 222 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 223 alignment, /* alignment */ 224 boundary, /* boundary */ 225 BUS_SPACE_MAXADDR, /* low */ 226 BUS_SPACE_MAXADDR, /* high */ 227 NULL, NULL, /* filter */ 228 bytes, /* maxsize */ 229 1, /* num segs */ 230 maxsegsize, /* maxsegsize */ 231 BUS_DMA_COHERENT, /* flags */ 232 NULL, NULL, /* lock */ 233 &dma->dmat); /* tag */ 234 if (err != 0) { 235 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 236 return err; 237 } 238 239 /* allocate DMAable memory & map */ 240 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 241 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 242 | BUS_DMA_ZERO), &dma->map); 243 if (err != 0) { 244 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 245 goto abort_with_dmat; 246 } 247 248 /* load the memory */ 249 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 250 mxge_dmamap_callback, 251 (void *)&dma->bus_addr, 0); 252 if (err != 0) { 253 device_printf(dev, "couldn't load map (err = %d)\n", err); 254 goto abort_with_mem; 255 } 256 return 0; 257 258 abort_with_mem: 259 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 260 abort_with_dmat: 261 (void)bus_dma_tag_destroy(dma->dmat); 262 return err; 263 } 264 265 266 static void 267 mxge_dma_free(mxge_dma_t *dma) 268 { 269 bus_dmamap_unload(dma->dmat, dma->map); 270 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 271 (void)bus_dma_tag_destroy(dma->dmat); 272 } 273 274 /* 275 * The eeprom strings on the lanaiX have the format 276 * SN=x\0 277 * MAC=x:x:x:x:x:x\0 278 * PC=text\0 279 */ 280 281 static int 282 mxge_parse_strings(mxge_softc_t *sc) 283 { 284 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 285 286 char *ptr, *limit; 287 int i, found_mac; 288 289 ptr = sc->eeprom_strings; 290 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 291 found_mac = 0; 292 while (ptr < limit && *ptr != '\0') { 293 if (memcmp(ptr, "MAC=", 4) == 0) { 294 ptr += 1; 295 sc->mac_addr_string = ptr; 296 for (i = 0; i < 6; i++) { 297 ptr += 3; 298 if ((ptr + 2) > limit) 299 goto abort; 300 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 301 found_mac = 1; 302 } 303 } else if (memcmp(ptr, "PC=", 3) == 0) { 304 ptr += 3; 305 strncpy(sc->product_code_string, ptr, 306 sizeof (sc->product_code_string) - 1); 307 } else if (memcmp(ptr, "SN=", 3) == 0) { 308 ptr += 3; 309 strncpy(sc->serial_number_string, ptr, 310 sizeof (sc->serial_number_string) - 1); 311 } 312 MXGE_NEXT_STRING(ptr); 313 } 314 315 if (found_mac) 316 return 0; 317 318 abort: 319 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 320 321 return ENXIO; 322 } 323 324 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 325 static void 326 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 327 { 328 uint32_t val; 329 unsigned long base, off; 330 char *va, *cfgptr; 331 device_t pdev, mcp55; 332 uint16_t vendor_id, device_id, word; 333 uintptr_t bus, slot, func, ivend, idev; 334 uint32_t *ptr32; 335 336 337 if (!mxge_nvidia_ecrc_enable) 338 return; 339 340 pdev = device_get_parent(device_get_parent(sc->dev)); 341 if (pdev == NULL) { 342 device_printf(sc->dev, "could not find parent?\n"); 343 return; 344 } 345 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 346 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 347 348 if (vendor_id != 0x10de) 349 return; 350 351 base = 0; 352 353 if (device_id == 0x005d) { 354 /* ck804, base address is magic */ 355 base = 0xe0000000UL; 356 } else if (device_id >= 0x0374 && device_id <= 0x378) { 357 /* mcp55, base address stored in chipset */ 358 mcp55 = pci_find_bsf(0, 0, 0); 359 if (mcp55 && 360 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 361 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 362 word = pci_read_config(mcp55, 0x90, 2); 363 base = ((unsigned long)word & 0x7ffeU) << 25; 364 } 365 } 366 if (!base) 367 return; 368 369 /* XXXX 370 Test below is commented because it is believed that doing 371 config read/write beyond 0xff will access the config space 372 for the next larger function. Uncomment this and remove 373 the hacky pmap_mapdev() way of accessing config space when 374 FreeBSD grows support for extended pcie config space access 375 */ 376 #if 0 377 /* See if we can, by some miracle, access the extended 378 config space */ 379 val = pci_read_config(pdev, 0x178, 4); 380 if (val != 0xffffffff) { 381 val |= 0x40; 382 pci_write_config(pdev, 0x178, val, 4); 383 return; 384 } 385 #endif 386 /* Rather than using normal pci config space writes, we must 387 * map the Nvidia config space ourselves. This is because on 388 * opteron/nvidia class machine the 0xe000000 mapping is 389 * handled by the nvidia chipset, that means the internal PCI 390 * device (the on-chip northbridge), or the amd-8131 bridge 391 * and things behind them are not visible by this method. 392 */ 393 394 BUS_READ_IVAR(device_get_parent(pdev), pdev, 395 PCI_IVAR_BUS, &bus); 396 BUS_READ_IVAR(device_get_parent(pdev), pdev, 397 PCI_IVAR_SLOT, &slot); 398 BUS_READ_IVAR(device_get_parent(pdev), pdev, 399 PCI_IVAR_FUNCTION, &func); 400 BUS_READ_IVAR(device_get_parent(pdev), pdev, 401 PCI_IVAR_VENDOR, &ivend); 402 BUS_READ_IVAR(device_get_parent(pdev), pdev, 403 PCI_IVAR_DEVICE, &idev); 404 405 off = base 406 + 0x00100000UL * (unsigned long)bus 407 + 0x00001000UL * (unsigned long)(func 408 + 8 * slot); 409 410 /* map it into the kernel */ 411 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 412 413 414 if (va == NULL) { 415 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 416 return; 417 } 418 /* get a pointer to the config space mapped into the kernel */ 419 cfgptr = va + (off & PAGE_MASK); 420 421 /* make sure that we can really access it */ 422 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 423 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 424 if (! (vendor_id == ivend && device_id == idev)) { 425 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 426 vendor_id, device_id); 427 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 428 return; 429 } 430 431 ptr32 = (uint32_t*)(cfgptr + 0x178); 432 val = *ptr32; 433 434 if (val == 0xffffffff) { 435 device_printf(sc->dev, "extended mapping failed\n"); 436 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 437 return; 438 } 439 *ptr32 = val | 0x40; 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 if (mxge_verbose) 442 device_printf(sc->dev, 443 "Enabled ECRC on upstream Nvidia bridge " 444 "at %d:%d:%d\n", 445 (int)bus, (int)slot, (int)func); 446 return; 447 } 448 #else 449 static void 450 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 451 { 452 device_printf(sc->dev, 453 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 454 return; 455 } 456 #endif 457 458 459 static int 460 mxge_dma_test(mxge_softc_t *sc, int test_type) 461 { 462 mxge_cmd_t cmd; 463 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 464 int status; 465 uint32_t len; 466 char *test = " "; 467 468 469 /* Run a small DMA test. 470 * The magic multipliers to the length tell the firmware 471 * to do DMA read, write, or read+write tests. The 472 * results are returned in cmd.data0. The upper 16 473 * bits of the return is the number of transfers completed. 474 * The lower 16 bits is the time in 0.5us ticks that the 475 * transfers took to complete. 476 */ 477 478 len = sc->tx_boundary; 479 480 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 481 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 482 cmd.data2 = len * 0x10000; 483 status = mxge_send_cmd(sc, test_type, &cmd); 484 if (status != 0) { 485 test = "read"; 486 goto abort; 487 } 488 sc->read_dma = ((cmd.data0>>16) * len * 2) / 489 (cmd.data0 & 0xffff); 490 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 491 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 492 cmd.data2 = len * 0x1; 493 status = mxge_send_cmd(sc, test_type, &cmd); 494 if (status != 0) { 495 test = "write"; 496 goto abort; 497 } 498 sc->write_dma = ((cmd.data0>>16) * len * 2) / 499 (cmd.data0 & 0xffff); 500 501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 503 cmd.data2 = len * 0x10001; 504 status = mxge_send_cmd(sc, test_type, &cmd); 505 if (status != 0) { 506 test = "read/write"; 507 goto abort; 508 } 509 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 510 (cmd.data0 & 0xffff); 511 512 abort: 513 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 514 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 515 test, status); 516 517 return status; 518 } 519 520 /* 521 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 522 * when the PCI-E Completion packets are aligned on an 8-byte 523 * boundary. Some PCI-E chip sets always align Completion packets; on 524 * the ones that do not, the alignment can be enforced by enabling 525 * ECRC generation (if supported). 526 * 527 * When PCI-E Completion packets are not aligned, it is actually more 528 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 529 * 530 * If the driver can neither enable ECRC nor verify that it has 531 * already been enabled, then it must use a firmware image which works 532 * around unaligned completion packets (ethp_z8e.dat), and it should 533 * also ensure that it never gives the device a Read-DMA which is 534 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 535 * enabled, then the driver should use the aligned (eth_z8e.dat) 536 * firmware image, and set tx_boundary to 4KB. 537 */ 538 539 static int 540 mxge_firmware_probe(mxge_softc_t *sc) 541 { 542 device_t dev = sc->dev; 543 int reg, status; 544 uint16_t pectl; 545 546 sc->tx_boundary = 4096; 547 /* 548 * Verify the max read request size was set to 4KB 549 * before trying the test with 4KB. 550 */ 551 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 552 pectl = pci_read_config(dev, reg + 0x8, 2); 553 if ((pectl & (5 << 12)) != (5 << 12)) { 554 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 555 pectl); 556 sc->tx_boundary = 2048; 557 } 558 } 559 560 /* 561 * load the optimized firmware (which assumes aligned PCIe 562 * completions) in order to see if it works on this host. 563 */ 564 sc->fw_name = mxge_fw_aligned; 565 status = mxge_load_firmware(sc, 1); 566 if (status != 0) { 567 return status; 568 } 569 570 /* 571 * Enable ECRC if possible 572 */ 573 mxge_enable_nvidia_ecrc(sc); 574 575 /* 576 * Run a DMA test which watches for unaligned completions and 577 * aborts on the first one seen. 578 */ 579 580 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 581 if (status == 0) 582 return 0; /* keep the aligned firmware */ 583 584 if (status != E2BIG) 585 device_printf(dev, "DMA test failed: %d\n", status); 586 if (status == ENOSYS) 587 device_printf(dev, "Falling back to ethp! " 588 "Please install up to date fw\n"); 589 return status; 590 } 591 592 static int 593 mxge_select_firmware(mxge_softc_t *sc) 594 { 595 int aligned = 0; 596 597 598 if (mxge_force_firmware != 0) { 599 if (mxge_force_firmware == 1) 600 aligned = 1; 601 else 602 aligned = 0; 603 if (mxge_verbose) 604 device_printf(sc->dev, 605 "Assuming %s completions (forced)\n", 606 aligned ? "aligned" : "unaligned"); 607 goto abort; 608 } 609 610 /* if the PCIe link width is 4 or less, we can use the aligned 611 firmware and skip any checks */ 612 if (sc->link_width != 0 && sc->link_width <= 4) { 613 device_printf(sc->dev, 614 "PCIe x%d Link, expect reduced performance\n", 615 sc->link_width); 616 aligned = 1; 617 goto abort; 618 } 619 620 if (0 == mxge_firmware_probe(sc)) 621 return 0; 622 623 abort: 624 if (aligned) { 625 sc->fw_name = mxge_fw_aligned; 626 sc->tx_boundary = 4096; 627 } else { 628 sc->fw_name = mxge_fw_unaligned; 629 sc->tx_boundary = 2048; 630 } 631 return (mxge_load_firmware(sc, 0)); 632 } 633 634 union qualhack 635 { 636 const char *ro_char; 637 char *rw_char; 638 }; 639 640 static int 641 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 642 { 643 644 645 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 646 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 647 be32toh(hdr->mcp_type)); 648 return EIO; 649 } 650 651 /* save firmware version for sysctl */ 652 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 653 if (mxge_verbose) 654 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 655 656 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 657 &sc->fw_ver_minor, &sc->fw_ver_tiny); 658 659 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 660 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 661 device_printf(sc->dev, "Found firmware version %s\n", 662 sc->fw_version); 663 device_printf(sc->dev, "Driver needs %d.%d\n", 664 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 665 return EINVAL; 666 } 667 return 0; 668 669 } 670 671 static void * 672 z_alloc(void *nil, u_int items, u_int size) 673 { 674 void *ptr; 675 676 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 677 return ptr; 678 } 679 680 static void 681 z_free(void *nil, void *ptr) 682 { 683 free(ptr, M_TEMP); 684 } 685 686 687 static int 688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 689 { 690 z_stream zs; 691 char *inflate_buffer; 692 const struct firmware *fw; 693 const mcp_gen_header_t *hdr; 694 unsigned hdr_offset; 695 int status; 696 unsigned int i; 697 char dummy; 698 size_t fw_len; 699 700 fw = firmware_get(sc->fw_name); 701 if (fw == NULL) { 702 device_printf(sc->dev, "Could not find firmware image %s\n", 703 sc->fw_name); 704 return ENOENT; 705 } 706 707 708 709 /* setup zlib and decompress f/w */ 710 bzero(&zs, sizeof (zs)); 711 zs.zalloc = z_alloc; 712 zs.zfree = z_free; 713 status = inflateInit(&zs); 714 if (status != Z_OK) { 715 status = EIO; 716 goto abort_with_fw; 717 } 718 719 /* the uncompressed size is stored as the firmware version, 720 which would otherwise go unused */ 721 fw_len = (size_t) fw->version; 722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 723 if (inflate_buffer == NULL) 724 goto abort_with_zs; 725 zs.avail_in = fw->datasize; 726 zs.next_in = __DECONST(char *, fw->data); 727 zs.avail_out = fw_len; 728 zs.next_out = inflate_buffer; 729 status = inflate(&zs, Z_FINISH); 730 if (status != Z_STREAM_END) { 731 device_printf(sc->dev, "zlib %d\n", status); 732 status = EIO; 733 goto abort_with_buffer; 734 } 735 736 /* check id */ 737 hdr_offset = htobe32(*(const uint32_t *) 738 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 740 device_printf(sc->dev, "Bad firmware file"); 741 status = EIO; 742 goto abort_with_buffer; 743 } 744 hdr = (const void*)(inflate_buffer + hdr_offset); 745 746 status = mxge_validate_firmware(sc, hdr); 747 if (status != 0) 748 goto abort_with_buffer; 749 750 /* Copy the inflated firmware to NIC SRAM. */ 751 for (i = 0; i < fw_len; i += 256) { 752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 753 inflate_buffer + i, 754 min(256U, (unsigned)(fw_len - i))); 755 wmb(); 756 dummy = *sc->sram; 757 wmb(); 758 } 759 760 *limit = fw_len; 761 status = 0; 762 abort_with_buffer: 763 free(inflate_buffer, M_TEMP); 764 abort_with_zs: 765 inflateEnd(&zs); 766 abort_with_fw: 767 firmware_put(fw, FIRMWARE_UNLOAD); 768 return status; 769 } 770 771 /* 772 * Enable or disable periodic RDMAs from the host to make certain 773 * chipsets resend dropped PCIe messages 774 */ 775 776 static void 777 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 778 { 779 char buf_bytes[72]; 780 volatile uint32_t *confirm; 781 volatile char *submit; 782 uint32_t *buf, dma_low, dma_high; 783 int i; 784 785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 786 787 /* clear confirmation addr */ 788 confirm = (volatile uint32_t *)sc->cmd; 789 *confirm = 0; 790 wmb(); 791 792 /* send an rdma command to the PCIe engine, and wait for the 793 response in the confirmation address. The firmware should 794 write a -1 there to indicate it is alive and well 795 */ 796 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 799 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 800 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 801 buf[2] = htobe32(0xffffffff); /* confirm data */ 802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 804 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 805 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 806 buf[5] = htobe32(enable); /* enable? */ 807 808 809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 810 811 mxge_pio_copy(submit, buf, 64); 812 wmb(); 813 DELAY(1000); 814 wmb(); 815 i = 0; 816 while (*confirm != 0xffffffff && i < 20) { 817 DELAY(1000); 818 i++; 819 } 820 if (*confirm != 0xffffffff) { 821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 822 (enable ? "enable" : "disable"), confirm, 823 *confirm); 824 } 825 return; 826 } 827 828 static int 829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 830 { 831 mcp_cmd_t *buf; 832 char buf_bytes[sizeof(*buf) + 8]; 833 volatile mcp_cmd_response_t *response = sc->cmd; 834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 835 uint32_t dma_low, dma_high; 836 int err, sleep_total = 0; 837 838 /* ensure buf is aligned to 8 bytes */ 839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 840 841 buf->data0 = htobe32(data->data0); 842 buf->data1 = htobe32(data->data1); 843 buf->data2 = htobe32(data->data2); 844 buf->cmd = htobe32(cmd); 845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 847 848 buf->response_addr.low = htobe32(dma_low); 849 buf->response_addr.high = htobe32(dma_high); 850 mtx_lock(&sc->cmd_mtx); 851 response->result = 0xffffffff; 852 wmb(); 853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 854 855 /* wait up to 20ms */ 856 err = EAGAIN; 857 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 858 bus_dmamap_sync(sc->cmd_dma.dmat, 859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 860 wmb(); 861 switch (be32toh(response->result)) { 862 case 0: 863 data->data0 = be32toh(response->data); 864 err = 0; 865 break; 866 case 0xffffffff: 867 DELAY(1000); 868 break; 869 case MXGEFW_CMD_UNKNOWN: 870 err = ENOSYS; 871 break; 872 case MXGEFW_CMD_ERROR_UNALIGNED: 873 err = E2BIG; 874 break; 875 case MXGEFW_CMD_ERROR_BUSY: 876 err = EBUSY; 877 break; 878 default: 879 device_printf(sc->dev, 880 "mxge: command %d " 881 "failed, result = %d\n", 882 cmd, be32toh(response->result)); 883 err = ENXIO; 884 break; 885 } 886 if (err != EAGAIN) 887 break; 888 } 889 if (err == EAGAIN) 890 device_printf(sc->dev, "mxge: command %d timed out" 891 "result = %d\n", 892 cmd, be32toh(response->result)); 893 mtx_unlock(&sc->cmd_mtx); 894 return err; 895 } 896 897 static int 898 mxge_adopt_running_firmware(mxge_softc_t *sc) 899 { 900 struct mcp_gen_header *hdr; 901 const size_t bytes = sizeof (struct mcp_gen_header); 902 size_t hdr_offset; 903 int status; 904 905 /* find running firmware header */ 906 hdr_offset = htobe32(*(volatile uint32_t *) 907 (sc->sram + MCP_HEADER_PTR_OFFSET)); 908 909 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 910 device_printf(sc->dev, 911 "Running firmware has bad header offset (%d)\n", 912 (int)hdr_offset); 913 return EIO; 914 } 915 916 /* copy header of running firmware from SRAM to host memory to 917 * validate firmware */ 918 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 919 if (hdr == NULL) { 920 device_printf(sc->dev, "could not malloc firmware hdr\n"); 921 return ENOMEM; 922 } 923 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 924 rman_get_bushandle(sc->mem_res), 925 hdr_offset, (char *)hdr, bytes); 926 status = mxge_validate_firmware(sc, hdr); 927 free(hdr, M_DEVBUF); 928 929 /* 930 * check to see if adopted firmware has bug where adopting 931 * it will cause broadcasts to be filtered unless the NIC 932 * is kept in ALLMULTI mode 933 */ 934 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 935 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 936 sc->adopted_rx_filter_bug = 1; 937 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 938 "working around rx filter bug\n", 939 sc->fw_ver_major, sc->fw_ver_minor, 940 sc->fw_ver_tiny); 941 } 942 943 return status; 944 } 945 946 947 static int 948 mxge_load_firmware(mxge_softc_t *sc, int adopt) 949 { 950 volatile uint32_t *confirm; 951 volatile char *submit; 952 char buf_bytes[72]; 953 uint32_t *buf, size, dma_low, dma_high; 954 int status, i; 955 956 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 957 958 size = sc->sram_size; 959 status = mxge_load_firmware_helper(sc, &size); 960 if (status) { 961 if (!adopt) 962 return status; 963 /* Try to use the currently running firmware, if 964 it is new enough */ 965 status = mxge_adopt_running_firmware(sc); 966 if (status) { 967 device_printf(sc->dev, 968 "failed to adopt running firmware\n"); 969 return status; 970 } 971 device_printf(sc->dev, 972 "Successfully adopted running firmware\n"); 973 if (sc->tx_boundary == 4096) { 974 device_printf(sc->dev, 975 "Using firmware currently running on NIC" 976 ". For optimal\n"); 977 device_printf(sc->dev, 978 "performance consider loading optimized " 979 "firmware\n"); 980 } 981 sc->fw_name = mxge_fw_unaligned; 982 sc->tx_boundary = 2048; 983 return 0; 984 } 985 /* clear confirmation addr */ 986 confirm = (volatile uint32_t *)sc->cmd; 987 *confirm = 0; 988 wmb(); 989 /* send a reload command to the bootstrap MCP, and wait for the 990 response in the confirmation address. The firmware should 991 write a -1 there to indicate it is alive and well 992 */ 993 994 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 995 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 996 997 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 998 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 999 buf[2] = htobe32(0xffffffff); /* confirm data */ 1000 1001 /* FIX: All newest firmware should un-protect the bottom of 1002 the sram before handoff. However, the very first interfaces 1003 do not. Therefore the handoff copy must skip the first 8 bytes 1004 */ 1005 /* where the code starts*/ 1006 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1007 buf[4] = htobe32(size - 8); /* length of code */ 1008 buf[5] = htobe32(8); /* where to copy to */ 1009 buf[6] = htobe32(0); /* where to jump to */ 1010 1011 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1012 mxge_pio_copy(submit, buf, 64); 1013 wmb(); 1014 DELAY(1000); 1015 wmb(); 1016 i = 0; 1017 while (*confirm != 0xffffffff && i < 20) { 1018 DELAY(1000*10); 1019 i++; 1020 bus_dmamap_sync(sc->cmd_dma.dmat, 1021 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1022 } 1023 if (*confirm != 0xffffffff) { 1024 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1025 confirm, *confirm); 1026 1027 return ENXIO; 1028 } 1029 return 0; 1030 } 1031 1032 static int 1033 mxge_update_mac_address(mxge_softc_t *sc) 1034 { 1035 mxge_cmd_t cmd; 1036 uint8_t *addr = sc->mac_addr; 1037 int status; 1038 1039 1040 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1041 | (addr[2] << 8) | addr[3]); 1042 1043 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1044 1045 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1046 return status; 1047 } 1048 1049 static int 1050 mxge_change_pause(mxge_softc_t *sc, int pause) 1051 { 1052 mxge_cmd_t cmd; 1053 int status; 1054 1055 if (pause) 1056 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1057 &cmd); 1058 else 1059 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1060 &cmd); 1061 1062 if (status) { 1063 device_printf(sc->dev, "Failed to set flow control mode\n"); 1064 return ENXIO; 1065 } 1066 sc->pause = pause; 1067 return 0; 1068 } 1069 1070 static void 1071 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1072 { 1073 mxge_cmd_t cmd; 1074 int status; 1075 1076 if (mxge_always_promisc) 1077 promisc = 1; 1078 1079 if (promisc) 1080 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1081 &cmd); 1082 else 1083 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1084 &cmd); 1085 1086 if (status) { 1087 device_printf(sc->dev, "Failed to set promisc mode\n"); 1088 } 1089 } 1090 1091 static void 1092 mxge_set_multicast_list(mxge_softc_t *sc) 1093 { 1094 mxge_cmd_t cmd; 1095 struct ifmultiaddr *ifma; 1096 struct ifnet *ifp = sc->ifp; 1097 int err; 1098 1099 /* This firmware is known to not support multicast */ 1100 if (!sc->fw_multicast_support) 1101 return; 1102 1103 /* Disable multicast filtering while we play with the lists*/ 1104 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1105 if (err != 0) { 1106 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1107 " error status: %d\n", err); 1108 return; 1109 } 1110 1111 if (sc->adopted_rx_filter_bug) 1112 return; 1113 1114 if (ifp->if_flags & IFF_ALLMULTI) 1115 /* request to disable multicast filtering, so quit here */ 1116 return; 1117 1118 /* Flush all the filters */ 1119 1120 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1121 if (err != 0) { 1122 device_printf(sc->dev, 1123 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1124 ", error status: %d\n", err); 1125 return; 1126 } 1127 1128 /* Walk the multicast list, and add each address */ 1129 1130 IF_ADDR_LOCK(ifp); 1131 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1132 if (ifma->ifma_addr->sa_family != AF_LINK) 1133 continue; 1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1135 &cmd.data0, 4); 1136 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1137 &cmd.data1, 2); 1138 cmd.data0 = htonl(cmd.data0); 1139 cmd.data1 = htonl(cmd.data1); 1140 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1141 if (err != 0) { 1142 device_printf(sc->dev, "Failed " 1143 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1144 "%d\t", err); 1145 /* abort, leaving multicast filtering off */ 1146 IF_ADDR_UNLOCK(ifp); 1147 return; 1148 } 1149 } 1150 IF_ADDR_UNLOCK(ifp); 1151 /* Enable multicast filtering */ 1152 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1153 if (err != 0) { 1154 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1155 ", error status: %d\n", err); 1156 } 1157 } 1158 1159 static int 1160 mxge_max_mtu(mxge_softc_t *sc) 1161 { 1162 mxge_cmd_t cmd; 1163 int status; 1164 1165 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1166 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1167 1168 /* try to set nbufs to see if it we can 1169 use virtually contiguous jumbos */ 1170 cmd.data0 = 0; 1171 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1172 &cmd); 1173 if (status == 0) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* otherwise, we're limited to MJUMPAGESIZE */ 1177 return MJUMPAGESIZE - MXGEFW_PAD; 1178 } 1179 1180 static int 1181 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1182 { 1183 struct mxge_slice_state *ss; 1184 mxge_rx_done_t *rx_done; 1185 volatile uint32_t *irq_claim; 1186 mxge_cmd_t cmd; 1187 int slice, status; 1188 1189 /* try to send a reset command to the card to see if it 1190 is alive */ 1191 memset(&cmd, 0, sizeof (cmd)); 1192 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1193 if (status != 0) { 1194 device_printf(sc->dev, "failed reset\n"); 1195 return ENXIO; 1196 } 1197 1198 mxge_dummy_rdma(sc, 1); 1199 1200 1201 /* set the intrq size */ 1202 cmd.data0 = sc->rx_ring_size; 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1204 1205 /* 1206 * Even though we already know how many slices are supported 1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1208 * has magic side effects, and must be called after a reset. 1209 * It must be called prior to calling any RSS related cmds, 1210 * including assigning an interrupt queue for anything but 1211 * slice 0. It must also be called *after* 1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1213 * the firmware to compute offsets. 1214 */ 1215 1216 if (sc->num_slices > 1) { 1217 /* ask the maximum number of slices it supports */ 1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1219 &cmd); 1220 if (status != 0) { 1221 device_printf(sc->dev, 1222 "failed to get number of slices\n"); 1223 return status; 1224 } 1225 /* 1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1227 * to setting up the interrupt queue DMA 1228 */ 1229 cmd.data0 = sc->num_slices; 1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1231 #ifdef IFNET_BUF_RING 1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1233 #endif 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1235 &cmd); 1236 if (status != 0) { 1237 device_printf(sc->dev, 1238 "failed to set number of slices\n"); 1239 return status; 1240 } 1241 } 1242 1243 1244 if (interrupts_setup) { 1245 /* Now exchange information about interrupts */ 1246 for (slice = 0; slice < sc->num_slices; slice++) { 1247 rx_done = &sc->ss[slice].rx_done; 1248 memset(rx_done->entry, 0, sc->rx_ring_size); 1249 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1250 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1251 cmd.data2 = slice; 1252 status |= mxge_send_cmd(sc, 1253 MXGEFW_CMD_SET_INTRQ_DMA, 1254 &cmd); 1255 } 1256 } 1257 1258 status |= mxge_send_cmd(sc, 1259 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1260 1261 1262 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1263 1264 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1265 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1266 1267 1268 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1269 &cmd); 1270 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 if (status != 0) { 1272 device_printf(sc->dev, "failed set interrupt parameters\n"); 1273 return status; 1274 } 1275 1276 1277 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1278 1279 1280 /* run a DMA benchmark */ 1281 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1282 1283 for (slice = 0; slice < sc->num_slices; slice++) { 1284 ss = &sc->ss[slice]; 1285 1286 ss->irq_claim = irq_claim + (2 * slice); 1287 /* reset mcp/driver shared state back to 0 */ 1288 ss->rx_done.idx = 0; 1289 ss->rx_done.cnt = 0; 1290 ss->tx.req = 0; 1291 ss->tx.done = 0; 1292 ss->tx.pkt_done = 0; 1293 ss->tx.queue_active = 0; 1294 ss->tx.activate = 0; 1295 ss->tx.deactivate = 0; 1296 ss->tx.wake = 0; 1297 ss->tx.defrag = 0; 1298 ss->tx.stall = 0; 1299 ss->rx_big.cnt = 0; 1300 ss->rx_small.cnt = 0; 1301 ss->lro_bad_csum = 0; 1302 ss->lro_queued = 0; 1303 ss->lro_flushed = 0; 1304 if (ss->fw_stats != NULL) { 1305 ss->fw_stats->valid = 0; 1306 ss->fw_stats->send_done_count = 0; 1307 } 1308 } 1309 sc->rdma_tags_available = 15; 1310 status = mxge_update_mac_address(sc); 1311 mxge_change_promisc(sc, 0); 1312 mxge_change_pause(sc, sc->pause); 1313 mxge_set_multicast_list(sc); 1314 return status; 1315 } 1316 1317 static int 1318 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1319 { 1320 mxge_softc_t *sc; 1321 unsigned int intr_coal_delay; 1322 int err; 1323 1324 sc = arg1; 1325 intr_coal_delay = sc->intr_coal_delay; 1326 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1327 if (err != 0) { 1328 return err; 1329 } 1330 if (intr_coal_delay == sc->intr_coal_delay) 1331 return 0; 1332 1333 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1334 return EINVAL; 1335 1336 mtx_lock(&sc->driver_mtx); 1337 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1338 sc->intr_coal_delay = intr_coal_delay; 1339 1340 mtx_unlock(&sc->driver_mtx); 1341 return err; 1342 } 1343 1344 static int 1345 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1346 { 1347 mxge_softc_t *sc; 1348 unsigned int enabled; 1349 int err; 1350 1351 sc = arg1; 1352 enabled = sc->pause; 1353 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1354 if (err != 0) { 1355 return err; 1356 } 1357 if (enabled == sc->pause) 1358 return 0; 1359 1360 mtx_lock(&sc->driver_mtx); 1361 err = mxge_change_pause(sc, enabled); 1362 mtx_unlock(&sc->driver_mtx); 1363 return err; 1364 } 1365 1366 static int 1367 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1368 { 1369 struct ifnet *ifp; 1370 int err = 0; 1371 1372 ifp = sc->ifp; 1373 if (lro_cnt == 0) 1374 ifp->if_capenable &= ~IFCAP_LRO; 1375 else 1376 ifp->if_capenable |= IFCAP_LRO; 1377 sc->lro_cnt = lro_cnt; 1378 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1379 mxge_close(sc); 1380 err = mxge_open(sc); 1381 } 1382 return err; 1383 } 1384 1385 static int 1386 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1387 { 1388 mxge_softc_t *sc; 1389 unsigned int lro_cnt; 1390 int err; 1391 1392 sc = arg1; 1393 lro_cnt = sc->lro_cnt; 1394 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1395 if (err != 0) 1396 return err; 1397 1398 if (lro_cnt == sc->lro_cnt) 1399 return 0; 1400 1401 if (lro_cnt > 128) 1402 return EINVAL; 1403 1404 mtx_lock(&sc->driver_mtx); 1405 err = mxge_change_lro_locked(sc, lro_cnt); 1406 mtx_unlock(&sc->driver_mtx); 1407 return err; 1408 } 1409 1410 static int 1411 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1412 { 1413 int err; 1414 1415 if (arg1 == NULL) 1416 return EFAULT; 1417 arg2 = be32toh(*(int *)arg1); 1418 arg1 = NULL; 1419 err = sysctl_handle_int(oidp, arg1, arg2, req); 1420 1421 return err; 1422 } 1423 1424 static void 1425 mxge_rem_sysctls(mxge_softc_t *sc) 1426 { 1427 struct mxge_slice_state *ss; 1428 int slice; 1429 1430 if (sc->slice_sysctl_tree == NULL) 1431 return; 1432 1433 for (slice = 0; slice < sc->num_slices; slice++) { 1434 ss = &sc->ss[slice]; 1435 if (ss == NULL || ss->sysctl_tree == NULL) 1436 continue; 1437 sysctl_ctx_free(&ss->sysctl_ctx); 1438 ss->sysctl_tree = NULL; 1439 } 1440 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1441 sc->slice_sysctl_tree = NULL; 1442 } 1443 1444 static void 1445 mxge_add_sysctls(mxge_softc_t *sc) 1446 { 1447 struct sysctl_ctx_list *ctx; 1448 struct sysctl_oid_list *children; 1449 mcp_irq_data_t *fw; 1450 struct mxge_slice_state *ss; 1451 int slice; 1452 char slice_num[8]; 1453 1454 ctx = device_get_sysctl_ctx(sc->dev); 1455 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1456 fw = sc->ss[0].fw_stats; 1457 1458 /* random information */ 1459 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1460 "firmware_version", 1461 CTLFLAG_RD, &sc->fw_version, 1462 0, "firmware version"); 1463 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1464 "serial_number", 1465 CTLFLAG_RD, &sc->serial_number_string, 1466 0, "serial number"); 1467 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1468 "product_code", 1469 CTLFLAG_RD, &sc->product_code_string, 1470 0, "product_code"); 1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1472 "pcie_link_width", 1473 CTLFLAG_RD, &sc->link_width, 1474 0, "tx_boundary"); 1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1476 "tx_boundary", 1477 CTLFLAG_RD, &sc->tx_boundary, 1478 0, "tx_boundary"); 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1480 "write_combine", 1481 CTLFLAG_RD, &sc->wc, 1482 0, "write combining PIO?"); 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "read_dma_MBs", 1485 CTLFLAG_RD, &sc->read_dma, 1486 0, "DMA Read speed in MB/s"); 1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1488 "write_dma_MBs", 1489 CTLFLAG_RD, &sc->write_dma, 1490 0, "DMA Write speed in MB/s"); 1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1492 "read_write_dma_MBs", 1493 CTLFLAG_RD, &sc->read_write_dma, 1494 0, "DMA concurrent Read/Write speed in MB/s"); 1495 1496 1497 /* performance related tunables */ 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1499 "intr_coal_delay", 1500 CTLTYPE_INT|CTLFLAG_RW, sc, 1501 0, mxge_change_intr_coal, 1502 "I", "interrupt coalescing delay in usecs"); 1503 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1505 "flow_control_enabled", 1506 CTLTYPE_INT|CTLFLAG_RW, sc, 1507 0, mxge_change_flow_control, 1508 "I", "interrupt coalescing delay in usecs"); 1509 1510 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1511 "deassert_wait", 1512 CTLFLAG_RW, &mxge_deassert_wait, 1513 0, "Wait for IRQ line to go low in ihandler"); 1514 1515 /* stats block from firmware is in network byte order. 1516 Need to swap it */ 1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1518 "link_up", 1519 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1520 0, mxge_handle_be32, 1521 "I", "link up"); 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "rdma_tags_available", 1524 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1525 0, mxge_handle_be32, 1526 "I", "rdma_tags_available"); 1527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1528 "dropped_bad_crc32", 1529 CTLTYPE_INT|CTLFLAG_RD, 1530 &fw->dropped_bad_crc32, 1531 0, mxge_handle_be32, 1532 "I", "dropped_bad_crc32"); 1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1534 "dropped_bad_phy", 1535 CTLTYPE_INT|CTLFLAG_RD, 1536 &fw->dropped_bad_phy, 1537 0, mxge_handle_be32, 1538 "I", "dropped_bad_phy"); 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "dropped_link_error_or_filtered", 1541 CTLTYPE_INT|CTLFLAG_RD, 1542 &fw->dropped_link_error_or_filtered, 1543 0, mxge_handle_be32, 1544 "I", "dropped_link_error_or_filtered"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_link_overflow", 1547 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1548 0, mxge_handle_be32, 1549 "I", "dropped_link_overflow"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_multicast_filtered", 1552 CTLTYPE_INT|CTLFLAG_RD, 1553 &fw->dropped_multicast_filtered, 1554 0, mxge_handle_be32, 1555 "I", "dropped_multicast_filtered"); 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "dropped_no_big_buffer", 1558 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1559 0, mxge_handle_be32, 1560 "I", "dropped_no_big_buffer"); 1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1562 "dropped_no_small_buffer", 1563 CTLTYPE_INT|CTLFLAG_RD, 1564 &fw->dropped_no_small_buffer, 1565 0, mxge_handle_be32, 1566 "I", "dropped_no_small_buffer"); 1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1568 "dropped_overrun", 1569 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1570 0, mxge_handle_be32, 1571 "I", "dropped_overrun"); 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "dropped_pause", 1574 CTLTYPE_INT|CTLFLAG_RD, 1575 &fw->dropped_pause, 1576 0, mxge_handle_be32, 1577 "I", "dropped_pause"); 1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1579 "dropped_runt", 1580 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1581 0, mxge_handle_be32, 1582 "I", "dropped_runt"); 1583 1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1585 "dropped_unicast_filtered", 1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1587 0, mxge_handle_be32, 1588 "I", "dropped_unicast_filtered"); 1589 1590 /* verbose printing? */ 1591 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1592 "verbose", 1593 CTLFLAG_RW, &mxge_verbose, 1594 0, "verbose printing"); 1595 1596 /* lro */ 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "lro_cnt", 1599 CTLTYPE_INT|CTLFLAG_RW, sc, 1600 0, mxge_change_lro, 1601 "I", "number of lro merge queues"); 1602 1603 1604 /* add counters exported for debugging from all slices */ 1605 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1606 sc->slice_sysctl_tree = 1607 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1608 "slice", CTLFLAG_RD, 0, ""); 1609 1610 for (slice = 0; slice < sc->num_slices; slice++) { 1611 ss = &sc->ss[slice]; 1612 sysctl_ctx_init(&ss->sysctl_ctx); 1613 ctx = &ss->sysctl_ctx; 1614 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1615 sprintf(slice_num, "%d", slice); 1616 ss->sysctl_tree = 1617 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1618 CTLFLAG_RD, 0, ""); 1619 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1620 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1621 "rx_small_cnt", 1622 CTLFLAG_RD, &ss->rx_small.cnt, 1623 0, "rx_small_cnt"); 1624 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1625 "rx_big_cnt", 1626 CTLFLAG_RD, &ss->rx_big.cnt, 1627 0, "rx_small_cnt"); 1628 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1629 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1630 0, "number of lro merge queues flushed"); 1631 1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1633 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1634 0, "number of frames appended to lro merge" 1635 "queues"); 1636 1637 #ifndef IFNET_BUF_RING 1638 /* only transmit from slice 0 for now */ 1639 if (slice > 0) 1640 continue; 1641 #endif 1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1643 "tx_req", 1644 CTLFLAG_RD, &ss->tx.req, 1645 0, "tx_req"); 1646 1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1648 "tx_done", 1649 CTLFLAG_RD, &ss->tx.done, 1650 0, "tx_done"); 1651 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1652 "tx_pkt_done", 1653 CTLFLAG_RD, &ss->tx.pkt_done, 1654 0, "tx_done"); 1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1656 "tx_stall", 1657 CTLFLAG_RD, &ss->tx.stall, 1658 0, "tx_stall"); 1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1660 "tx_wake", 1661 CTLFLAG_RD, &ss->tx.wake, 1662 0, "tx_wake"); 1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1664 "tx_defrag", 1665 CTLFLAG_RD, &ss->tx.defrag, 1666 0, "tx_defrag"); 1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1668 "tx_queue_active", 1669 CTLFLAG_RD, &ss->tx.queue_active, 1670 0, "tx_queue_active"); 1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1672 "tx_activate", 1673 CTLFLAG_RD, &ss->tx.activate, 1674 0, "tx_activate"); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "tx_deactivate", 1677 CTLFLAG_RD, &ss->tx.deactivate, 1678 0, "tx_deactivate"); 1679 } 1680 } 1681 1682 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1683 backwards one at a time and handle ring wraps */ 1684 1685 static inline void 1686 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1687 mcp_kreq_ether_send_t *src, int cnt) 1688 { 1689 int idx, starting_slot; 1690 starting_slot = tx->req; 1691 while (cnt > 1) { 1692 cnt--; 1693 idx = (starting_slot + cnt) & tx->mask; 1694 mxge_pio_copy(&tx->lanai[idx], 1695 &src[cnt], sizeof(*src)); 1696 wmb(); 1697 } 1698 } 1699 1700 /* 1701 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1702 * at most 32 bytes at a time, so as to avoid involving the software 1703 * pio handler in the nic. We re-write the first segment's flags 1704 * to mark them valid only after writing the entire chain 1705 */ 1706 1707 static inline void 1708 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1709 int cnt) 1710 { 1711 int idx, i; 1712 uint32_t *src_ints; 1713 volatile uint32_t *dst_ints; 1714 mcp_kreq_ether_send_t *srcp; 1715 volatile mcp_kreq_ether_send_t *dstp, *dst; 1716 uint8_t last_flags; 1717 1718 idx = tx->req & tx->mask; 1719 1720 last_flags = src->flags; 1721 src->flags = 0; 1722 wmb(); 1723 dst = dstp = &tx->lanai[idx]; 1724 srcp = src; 1725 1726 if ((idx + cnt) < tx->mask) { 1727 for (i = 0; i < (cnt - 1); i += 2) { 1728 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1729 wmb(); /* force write every 32 bytes */ 1730 srcp += 2; 1731 dstp += 2; 1732 } 1733 } else { 1734 /* submit all but the first request, and ensure 1735 that it is submitted below */ 1736 mxge_submit_req_backwards(tx, src, cnt); 1737 i = 0; 1738 } 1739 if (i < cnt) { 1740 /* submit the first request */ 1741 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1742 wmb(); /* barrier before setting valid flag */ 1743 } 1744 1745 /* re-write the last 32-bits with the valid flags */ 1746 src->flags = last_flags; 1747 src_ints = (uint32_t *)src; 1748 src_ints+=3; 1749 dst_ints = (volatile uint32_t *)dst; 1750 dst_ints+=3; 1751 *dst_ints = *src_ints; 1752 tx->req += cnt; 1753 wmb(); 1754 } 1755 1756 #if IFCAP_TSO4 1757 1758 static void 1759 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1760 int busdma_seg_cnt, int ip_off) 1761 { 1762 mxge_tx_ring_t *tx; 1763 mcp_kreq_ether_send_t *req; 1764 bus_dma_segment_t *seg; 1765 struct ip *ip; 1766 struct tcphdr *tcp; 1767 uint32_t low, high_swapped; 1768 int len, seglen, cum_len, cum_len_next; 1769 int next_is_first, chop, cnt, rdma_count, small; 1770 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1771 uint8_t flags, flags_next; 1772 static int once; 1773 1774 mss = m->m_pkthdr.tso_segsz; 1775 1776 /* negative cum_len signifies to the 1777 * send loop that we are still in the 1778 * header portion of the TSO packet. 1779 */ 1780 1781 /* ensure we have the ethernet, IP and TCP 1782 header together in the first mbuf, copy 1783 it to a scratch buffer if not */ 1784 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1785 m_copydata(m, 0, ip_off + sizeof (*ip), 1786 ss->scratch); 1787 ip = (struct ip *)(ss->scratch + ip_off); 1788 } else { 1789 ip = (struct ip *)(mtod(m, char *) + ip_off); 1790 } 1791 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1792 + sizeof (*tcp))) { 1793 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1794 + sizeof (*tcp), ss->scratch); 1795 ip = (struct ip *)(mtod(m, char *) + ip_off); 1796 } 1797 1798 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1799 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1800 1801 /* TSO implies checksum offload on this hardware */ 1802 cksum_offset = ip_off + (ip->ip_hl << 2); 1803 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1804 1805 1806 /* for TSO, pseudo_hdr_offset holds mss. 1807 * The firmware figures out where to put 1808 * the checksum by parsing the header. */ 1809 pseudo_hdr_offset = htobe16(mss); 1810 1811 tx = &ss->tx; 1812 req = tx->req_list; 1813 seg = tx->seg_list; 1814 cnt = 0; 1815 rdma_count = 0; 1816 /* "rdma_count" is the number of RDMAs belonging to the 1817 * current packet BEFORE the current send request. For 1818 * non-TSO packets, this is equal to "count". 1819 * For TSO packets, rdma_count needs to be reset 1820 * to 0 after a segment cut. 1821 * 1822 * The rdma_count field of the send request is 1823 * the number of RDMAs of the packet starting at 1824 * that request. For TSO send requests with one ore more cuts 1825 * in the middle, this is the number of RDMAs starting 1826 * after the last cut in the request. All previous 1827 * segments before the last cut implicitly have 1 RDMA. 1828 * 1829 * Since the number of RDMAs is not known beforehand, 1830 * it must be filled-in retroactively - after each 1831 * segmentation cut or at the end of the entire packet. 1832 */ 1833 1834 while (busdma_seg_cnt) { 1835 /* Break the busdma segment up into pieces*/ 1836 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1837 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1838 len = seg->ds_len; 1839 1840 while (len) { 1841 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1842 seglen = len; 1843 cum_len_next = cum_len + seglen; 1844 (req-rdma_count)->rdma_count = rdma_count + 1; 1845 if (__predict_true(cum_len >= 0)) { 1846 /* payload */ 1847 chop = (cum_len_next > mss); 1848 cum_len_next = cum_len_next % mss; 1849 next_is_first = (cum_len_next == 0); 1850 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1851 flags_next |= next_is_first * 1852 MXGEFW_FLAGS_FIRST; 1853 rdma_count |= -(chop | next_is_first); 1854 rdma_count += chop & !next_is_first; 1855 } else if (cum_len_next >= 0) { 1856 /* header ends */ 1857 rdma_count = -1; 1858 cum_len_next = 0; 1859 seglen = -cum_len; 1860 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1861 flags_next = MXGEFW_FLAGS_TSO_PLD | 1862 MXGEFW_FLAGS_FIRST | 1863 (small * MXGEFW_FLAGS_SMALL); 1864 } 1865 1866 req->addr_high = high_swapped; 1867 req->addr_low = htobe32(low); 1868 req->pseudo_hdr_offset = pseudo_hdr_offset; 1869 req->pad = 0; 1870 req->rdma_count = 1; 1871 req->length = htobe16(seglen); 1872 req->cksum_offset = cksum_offset; 1873 req->flags = flags | ((cum_len & 1) * 1874 MXGEFW_FLAGS_ALIGN_ODD); 1875 low += seglen; 1876 len -= seglen; 1877 cum_len = cum_len_next; 1878 flags = flags_next; 1879 req++; 1880 cnt++; 1881 rdma_count++; 1882 if (__predict_false(cksum_offset > seglen)) 1883 cksum_offset -= seglen; 1884 else 1885 cksum_offset = 0; 1886 if (__predict_false(cnt > tx->max_desc)) 1887 goto drop; 1888 } 1889 busdma_seg_cnt--; 1890 seg++; 1891 } 1892 (req-rdma_count)->rdma_count = rdma_count; 1893 1894 do { 1895 req--; 1896 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1897 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1898 1899 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1900 mxge_submit_req(tx, tx->req_list, cnt); 1901 #ifdef IFNET_BUF_RING 1902 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1903 /* tell the NIC to start polling this slice */ 1904 *tx->send_go = 1; 1905 tx->queue_active = 1; 1906 tx->activate++; 1907 wmb(); 1908 } 1909 #endif 1910 return; 1911 1912 drop: 1913 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1914 m_freem(m); 1915 ss->oerrors++; 1916 if (!once) { 1917 printf("tx->max_desc exceeded via TSO!\n"); 1918 printf("mss = %d, %ld, %d!\n", mss, 1919 (long)seg - (long)tx->seg_list, tx->max_desc); 1920 once = 1; 1921 } 1922 return; 1923 1924 } 1925 1926 #endif /* IFCAP_TSO4 */ 1927 1928 #ifdef MXGE_NEW_VLAN_API 1929 /* 1930 * We reproduce the software vlan tag insertion from 1931 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1932 * vlan tag insertion. We need to advertise this in order to have the 1933 * vlan interface respect our csum offload flags. 1934 */ 1935 static struct mbuf * 1936 mxge_vlan_tag_insert(struct mbuf *m) 1937 { 1938 struct ether_vlan_header *evl; 1939 1940 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1941 if (__predict_false(m == NULL)) 1942 return NULL; 1943 if (m->m_len < sizeof(*evl)) { 1944 m = m_pullup(m, sizeof(*evl)); 1945 if (__predict_false(m == NULL)) 1946 return NULL; 1947 } 1948 /* 1949 * Transform the Ethernet header into an Ethernet header 1950 * with 802.1Q encapsulation. 1951 */ 1952 evl = mtod(m, struct ether_vlan_header *); 1953 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1954 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1955 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1956 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1957 m->m_flags &= ~M_VLANTAG; 1958 return m; 1959 } 1960 #endif /* MXGE_NEW_VLAN_API */ 1961 1962 static void 1963 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1964 { 1965 mxge_softc_t *sc; 1966 mcp_kreq_ether_send_t *req; 1967 bus_dma_segment_t *seg; 1968 struct mbuf *m_tmp; 1969 struct ifnet *ifp; 1970 mxge_tx_ring_t *tx; 1971 struct ip *ip; 1972 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1973 uint16_t pseudo_hdr_offset; 1974 uint8_t flags, cksum_offset; 1975 1976 1977 sc = ss->sc; 1978 ifp = sc->ifp; 1979 tx = &ss->tx; 1980 1981 ip_off = sizeof (struct ether_header); 1982 #ifdef MXGE_NEW_VLAN_API 1983 if (m->m_flags & M_VLANTAG) { 1984 m = mxge_vlan_tag_insert(m); 1985 if (__predict_false(m == NULL)) 1986 goto drop; 1987 ip_off += ETHER_VLAN_ENCAP_LEN; 1988 } 1989 #endif 1990 /* (try to) map the frame for DMA */ 1991 idx = tx->req & tx->mask; 1992 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1993 m, tx->seg_list, &cnt, 1994 BUS_DMA_NOWAIT); 1995 if (__predict_false(err == EFBIG)) { 1996 /* Too many segments in the chain. Try 1997 to defrag */ 1998 m_tmp = m_defrag(m, M_NOWAIT); 1999 if (m_tmp == NULL) { 2000 goto drop; 2001 } 2002 ss->tx.defrag++; 2003 m = m_tmp; 2004 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2005 tx->info[idx].map, 2006 m, tx->seg_list, &cnt, 2007 BUS_DMA_NOWAIT); 2008 } 2009 if (__predict_false(err != 0)) { 2010 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2011 " packet len = %d\n", err, m->m_pkthdr.len); 2012 goto drop; 2013 } 2014 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2015 BUS_DMASYNC_PREWRITE); 2016 tx->info[idx].m = m; 2017 2018 #if IFCAP_TSO4 2019 /* TSO is different enough, we handle it in another routine */ 2020 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2021 mxge_encap_tso(ss, m, cnt, ip_off); 2022 return; 2023 } 2024 #endif 2025 2026 req = tx->req_list; 2027 cksum_offset = 0; 2028 pseudo_hdr_offset = 0; 2029 flags = MXGEFW_FLAGS_NO_TSO; 2030 2031 /* checksum offloading? */ 2032 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2033 /* ensure ip header is in first mbuf, copy 2034 it to a scratch buffer if not */ 2035 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2036 m_copydata(m, 0, ip_off + sizeof (*ip), 2037 ss->scratch); 2038 ip = (struct ip *)(ss->scratch + ip_off); 2039 } else { 2040 ip = (struct ip *)(mtod(m, char *) + ip_off); 2041 } 2042 cksum_offset = ip_off + (ip->ip_hl << 2); 2043 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2044 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2045 req->cksum_offset = cksum_offset; 2046 flags |= MXGEFW_FLAGS_CKSUM; 2047 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2048 } else { 2049 odd_flag = 0; 2050 } 2051 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2052 flags |= MXGEFW_FLAGS_SMALL; 2053 2054 /* convert segments into a request list */ 2055 cum_len = 0; 2056 seg = tx->seg_list; 2057 req->flags = MXGEFW_FLAGS_FIRST; 2058 for (i = 0; i < cnt; i++) { 2059 req->addr_low = 2060 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2061 req->addr_high = 2062 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2063 req->length = htobe16(seg->ds_len); 2064 req->cksum_offset = cksum_offset; 2065 if (cksum_offset > seg->ds_len) 2066 cksum_offset -= seg->ds_len; 2067 else 2068 cksum_offset = 0; 2069 req->pseudo_hdr_offset = pseudo_hdr_offset; 2070 req->pad = 0; /* complete solid 16-byte block */ 2071 req->rdma_count = 1; 2072 req->flags |= flags | ((cum_len & 1) * odd_flag); 2073 cum_len += seg->ds_len; 2074 seg++; 2075 req++; 2076 req->flags = 0; 2077 } 2078 req--; 2079 /* pad runts to 60 bytes */ 2080 if (cum_len < 60) { 2081 req++; 2082 req->addr_low = 2083 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2084 req->addr_high = 2085 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2086 req->length = htobe16(60 - cum_len); 2087 req->cksum_offset = 0; 2088 req->pseudo_hdr_offset = pseudo_hdr_offset; 2089 req->pad = 0; /* complete solid 16-byte block */ 2090 req->rdma_count = 1; 2091 req->flags |= flags | ((cum_len & 1) * odd_flag); 2092 cnt++; 2093 } 2094 2095 tx->req_list[0].rdma_count = cnt; 2096 #if 0 2097 /* print what the firmware will see */ 2098 for (i = 0; i < cnt; i++) { 2099 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2100 "cso:%d, flags:0x%x, rdma:%d\n", 2101 i, (int)ntohl(tx->req_list[i].addr_high), 2102 (int)ntohl(tx->req_list[i].addr_low), 2103 (int)ntohs(tx->req_list[i].length), 2104 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2105 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2106 tx->req_list[i].rdma_count); 2107 } 2108 printf("--------------\n"); 2109 #endif 2110 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2111 mxge_submit_req(tx, tx->req_list, cnt); 2112 #ifdef IFNET_BUF_RING 2113 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2114 /* tell the NIC to start polling this slice */ 2115 *tx->send_go = 1; 2116 tx->queue_active = 1; 2117 tx->activate++; 2118 wmb(); 2119 } 2120 #endif 2121 return; 2122 2123 drop: 2124 m_freem(m); 2125 ss->oerrors++; 2126 return; 2127 } 2128 2129 #ifdef IFNET_BUF_RING 2130 static void 2131 mxge_qflush(struct ifnet *ifp) 2132 { 2133 mxge_softc_t *sc = ifp->if_softc; 2134 mxge_tx_ring_t *tx; 2135 struct mbuf *m; 2136 int slice; 2137 2138 for (slice = 0; slice < sc->num_slices; slice++) { 2139 tx = &sc->ss[slice].tx; 2140 mtx_lock(&tx->mtx); 2141 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2142 m_freem(m); 2143 mtx_unlock(&tx->mtx); 2144 } 2145 if_qflush(ifp); 2146 } 2147 2148 static inline void 2149 mxge_start_locked(struct mxge_slice_state *ss) 2150 { 2151 mxge_softc_t *sc; 2152 struct mbuf *m; 2153 struct ifnet *ifp; 2154 mxge_tx_ring_t *tx; 2155 2156 sc = ss->sc; 2157 ifp = sc->ifp; 2158 tx = &ss->tx; 2159 2160 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2161 m = drbr_dequeue(ifp, tx->br); 2162 if (m == NULL) { 2163 return; 2164 } 2165 /* let BPF see it */ 2166 BPF_MTAP(ifp, m); 2167 2168 /* give it to the nic */ 2169 mxge_encap(ss, m); 2170 } 2171 /* ran out of transmit slots */ 2172 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2173 && (!drbr_empty(ifp, tx->br))) { 2174 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2175 tx->stall++; 2176 } 2177 } 2178 2179 static int 2180 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2181 { 2182 mxge_softc_t *sc; 2183 struct ifnet *ifp; 2184 mxge_tx_ring_t *tx; 2185 int err; 2186 2187 sc = ss->sc; 2188 ifp = sc->ifp; 2189 tx = &ss->tx; 2190 2191 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2192 IFF_DRV_RUNNING) { 2193 err = drbr_enqueue(ifp, tx->br, m); 2194 return (err); 2195 } 2196 2197 if (drbr_empty(ifp, tx->br) && 2198 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2199 /* let BPF see it */ 2200 BPF_MTAP(ifp, m); 2201 /* give it to the nic */ 2202 mxge_encap(ss, m); 2203 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2204 return (err); 2205 } 2206 if (!drbr_empty(ifp, tx->br)) 2207 mxge_start_locked(ss); 2208 return (0); 2209 } 2210 2211 static int 2212 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2213 { 2214 mxge_softc_t *sc = ifp->if_softc; 2215 struct mxge_slice_state *ss; 2216 mxge_tx_ring_t *tx; 2217 int err = 0; 2218 int slice; 2219 2220 slice = m->m_pkthdr.flowid; 2221 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2222 2223 ss = &sc->ss[slice]; 2224 tx = &ss->tx; 2225 2226 if (mtx_trylock(&tx->mtx)) { 2227 err = mxge_transmit_locked(ss, m); 2228 mtx_unlock(&tx->mtx); 2229 } else { 2230 err = drbr_enqueue(ifp, tx->br, m); 2231 } 2232 2233 return (err); 2234 } 2235 2236 #else 2237 2238 static inline void 2239 mxge_start_locked(struct mxge_slice_state *ss) 2240 { 2241 mxge_softc_t *sc; 2242 struct mbuf *m; 2243 struct ifnet *ifp; 2244 mxge_tx_ring_t *tx; 2245 2246 sc = ss->sc; 2247 ifp = sc->ifp; 2248 tx = &ss->tx; 2249 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2250 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2251 if (m == NULL) { 2252 return; 2253 } 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 2257 /* give it to the nic */ 2258 mxge_encap(ss, m); 2259 } 2260 /* ran out of transmit slots */ 2261 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2262 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2263 tx->stall++; 2264 } 2265 } 2266 #endif 2267 static void 2268 mxge_start(struct ifnet *ifp) 2269 { 2270 mxge_softc_t *sc = ifp->if_softc; 2271 struct mxge_slice_state *ss; 2272 2273 /* only use the first slice for now */ 2274 ss = &sc->ss[0]; 2275 mtx_lock(&ss->tx.mtx); 2276 mxge_start_locked(ss); 2277 mtx_unlock(&ss->tx.mtx); 2278 } 2279 2280 /* 2281 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2282 * at most 32 bytes at a time, so as to avoid involving the software 2283 * pio handler in the nic. We re-write the first segment's low 2284 * DMA address to mark it valid only after we write the entire chunk 2285 * in a burst 2286 */ 2287 static inline void 2288 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2289 mcp_kreq_ether_recv_t *src) 2290 { 2291 uint32_t low; 2292 2293 low = src->addr_low; 2294 src->addr_low = 0xffffffff; 2295 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2296 wmb(); 2297 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2298 wmb(); 2299 src->addr_low = low; 2300 dst->addr_low = low; 2301 wmb(); 2302 } 2303 2304 static int 2305 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2306 { 2307 bus_dma_segment_t seg; 2308 struct mbuf *m; 2309 mxge_rx_ring_t *rx = &ss->rx_small; 2310 int cnt, err; 2311 2312 m = m_gethdr(M_DONTWAIT, MT_DATA); 2313 if (m == NULL) { 2314 rx->alloc_fail++; 2315 err = ENOBUFS; 2316 goto done; 2317 } 2318 m->m_len = MHLEN; 2319 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2320 &seg, &cnt, BUS_DMA_NOWAIT); 2321 if (err != 0) { 2322 m_free(m); 2323 goto done; 2324 } 2325 rx->info[idx].m = m; 2326 rx->shadow[idx].addr_low = 2327 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2328 rx->shadow[idx].addr_high = 2329 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2330 2331 done: 2332 if ((idx & 7) == 7) 2333 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2334 return err; 2335 } 2336 2337 static int 2338 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2339 { 2340 bus_dma_segment_t seg[3]; 2341 struct mbuf *m; 2342 mxge_rx_ring_t *rx = &ss->rx_big; 2343 int cnt, err, i; 2344 2345 if (rx->cl_size == MCLBYTES) 2346 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2347 else 2348 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2349 if (m == NULL) { 2350 rx->alloc_fail++; 2351 err = ENOBUFS; 2352 goto done; 2353 } 2354 m->m_len = rx->cl_size; 2355 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2356 seg, &cnt, BUS_DMA_NOWAIT); 2357 if (err != 0) { 2358 m_free(m); 2359 goto done; 2360 } 2361 rx->info[idx].m = m; 2362 rx->shadow[idx].addr_low = 2363 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2364 rx->shadow[idx].addr_high = 2365 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2366 2367 #if MXGE_VIRT_JUMBOS 2368 for (i = 1; i < cnt; i++) { 2369 rx->shadow[idx + i].addr_low = 2370 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2371 rx->shadow[idx + i].addr_high = 2372 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2373 } 2374 #endif 2375 2376 done: 2377 for (i = 0; i < rx->nbufs; i++) { 2378 if ((idx & 7) == 7) { 2379 mxge_submit_8rx(&rx->lanai[idx - 7], 2380 &rx->shadow[idx - 7]); 2381 } 2382 idx++; 2383 } 2384 return err; 2385 } 2386 2387 /* 2388 * Myri10GE hardware checksums are not valid if the sender 2389 * padded the frame with non-zero padding. This is because 2390 * the firmware just does a simple 16-bit 1s complement 2391 * checksum across the entire frame, excluding the first 14 2392 * bytes. It is best to simply to check the checksum and 2393 * tell the stack about it only if the checksum is good 2394 */ 2395 2396 static inline uint16_t 2397 mxge_rx_csum(struct mbuf *m, int csum) 2398 { 2399 struct ether_header *eh; 2400 struct ip *ip; 2401 uint16_t c; 2402 2403 eh = mtod(m, struct ether_header *); 2404 2405 /* only deal with IPv4 TCP & UDP for now */ 2406 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2407 return 1; 2408 ip = (struct ip *)(eh + 1); 2409 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2410 ip->ip_p != IPPROTO_UDP)) 2411 return 1; 2412 2413 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2414 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2415 - (ip->ip_hl << 2) + ip->ip_p)); 2416 c ^= 0xffff; 2417 return (c); 2418 } 2419 2420 static void 2421 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2422 { 2423 struct ether_vlan_header *evl; 2424 struct ether_header *eh; 2425 uint32_t partial; 2426 2427 evl = mtod(m, struct ether_vlan_header *); 2428 eh = mtod(m, struct ether_header *); 2429 2430 /* 2431 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2432 * after what the firmware thought was the end of the ethernet 2433 * header. 2434 */ 2435 2436 /* put checksum into host byte order */ 2437 *csum = ntohs(*csum); 2438 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2439 (*csum) += ~partial; 2440 (*csum) += ((*csum) < ~partial); 2441 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2442 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2443 2444 /* restore checksum to network byte order; 2445 later consumers expect this */ 2446 *csum = htons(*csum); 2447 2448 /* save the tag */ 2449 #ifdef MXGE_NEW_VLAN_API 2450 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2451 #else 2452 { 2453 struct m_tag *mtag; 2454 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2455 M_NOWAIT); 2456 if (mtag == NULL) 2457 return; 2458 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2459 m_tag_prepend(m, mtag); 2460 } 2461 2462 #endif 2463 m->m_flags |= M_VLANTAG; 2464 2465 /* 2466 * Remove the 802.1q header by copying the Ethernet 2467 * addresses over it and adjusting the beginning of 2468 * the data in the mbuf. The encapsulated Ethernet 2469 * type field is already in place. 2470 */ 2471 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2472 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2473 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2474 } 2475 2476 2477 static inline void 2478 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2479 { 2480 mxge_softc_t *sc; 2481 struct ifnet *ifp; 2482 struct mbuf *m; 2483 struct ether_header *eh; 2484 mxge_rx_ring_t *rx; 2485 bus_dmamap_t old_map; 2486 int idx; 2487 uint16_t tcpudp_csum; 2488 2489 sc = ss->sc; 2490 ifp = sc->ifp; 2491 rx = &ss->rx_big; 2492 idx = rx->cnt & rx->mask; 2493 rx->cnt += rx->nbufs; 2494 /* save a pointer to the received mbuf */ 2495 m = rx->info[idx].m; 2496 /* try to replace the received mbuf */ 2497 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2498 /* drop the frame -- the old mbuf is re-cycled */ 2499 ifp->if_ierrors++; 2500 return; 2501 } 2502 2503 /* unmap the received buffer */ 2504 old_map = rx->info[idx].map; 2505 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2506 bus_dmamap_unload(rx->dmat, old_map); 2507 2508 /* swap the bus_dmamap_t's */ 2509 rx->info[idx].map = rx->extra_map; 2510 rx->extra_map = old_map; 2511 2512 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2513 * aligned */ 2514 m->m_data += MXGEFW_PAD; 2515 2516 m->m_pkthdr.rcvif = ifp; 2517 m->m_len = m->m_pkthdr.len = len; 2518 ss->ipackets++; 2519 eh = mtod(m, struct ether_header *); 2520 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2521 mxge_vlan_tag_remove(m, &csum); 2522 } 2523 /* if the checksum is valid, mark it in the mbuf header */ 2524 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2525 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2526 return; 2527 /* otherwise, it was a UDP frame, or a TCP frame which 2528 we could not do LRO on. Tell the stack that the 2529 checksum is good */ 2530 m->m_pkthdr.csum_data = 0xffff; 2531 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2532 } 2533 /* flowid only valid if RSS hashing is enabled */ 2534 if (sc->num_slices > 1) { 2535 m->m_pkthdr.flowid = (ss - sc->ss); 2536 m->m_flags |= M_FLOWID; 2537 } 2538 /* pass the frame up the stack */ 2539 (*ifp->if_input)(ifp, m); 2540 } 2541 2542 static inline void 2543 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2544 { 2545 mxge_softc_t *sc; 2546 struct ifnet *ifp; 2547 struct ether_header *eh; 2548 struct mbuf *m; 2549 mxge_rx_ring_t *rx; 2550 bus_dmamap_t old_map; 2551 int idx; 2552 uint16_t tcpudp_csum; 2553 2554 sc = ss->sc; 2555 ifp = sc->ifp; 2556 rx = &ss->rx_small; 2557 idx = rx->cnt & rx->mask; 2558 rx->cnt++; 2559 /* save a pointer to the received mbuf */ 2560 m = rx->info[idx].m; 2561 /* try to replace the received mbuf */ 2562 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2563 /* drop the frame -- the old mbuf is re-cycled */ 2564 ifp->if_ierrors++; 2565 return; 2566 } 2567 2568 /* unmap the received buffer */ 2569 old_map = rx->info[idx].map; 2570 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2571 bus_dmamap_unload(rx->dmat, old_map); 2572 2573 /* swap the bus_dmamap_t's */ 2574 rx->info[idx].map = rx->extra_map; 2575 rx->extra_map = old_map; 2576 2577 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2578 * aligned */ 2579 m->m_data += MXGEFW_PAD; 2580 2581 m->m_pkthdr.rcvif = ifp; 2582 m->m_len = m->m_pkthdr.len = len; 2583 ss->ipackets++; 2584 eh = mtod(m, struct ether_header *); 2585 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2586 mxge_vlan_tag_remove(m, &csum); 2587 } 2588 /* if the checksum is valid, mark it in the mbuf header */ 2589 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2590 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2591 return; 2592 /* otherwise, it was a UDP frame, or a TCP frame which 2593 we could not do LRO on. Tell the stack that the 2594 checksum is good */ 2595 m->m_pkthdr.csum_data = 0xffff; 2596 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2597 } 2598 /* flowid only valid if RSS hashing is enabled */ 2599 if (sc->num_slices > 1) { 2600 m->m_pkthdr.flowid = (ss - sc->ss); 2601 m->m_flags |= M_FLOWID; 2602 } 2603 /* pass the frame up the stack */ 2604 (*ifp->if_input)(ifp, m); 2605 } 2606 2607 static inline void 2608 mxge_clean_rx_done(struct mxge_slice_state *ss) 2609 { 2610 mxge_rx_done_t *rx_done = &ss->rx_done; 2611 struct lro_entry *lro; 2612 int limit = 0; 2613 uint16_t length; 2614 uint16_t checksum; 2615 2616 2617 while (rx_done->entry[rx_done->idx].length != 0) { 2618 length = ntohs(rx_done->entry[rx_done->idx].length); 2619 rx_done->entry[rx_done->idx].length = 0; 2620 checksum = rx_done->entry[rx_done->idx].checksum; 2621 if (length <= (MHLEN - MXGEFW_PAD)) 2622 mxge_rx_done_small(ss, length, checksum); 2623 else 2624 mxge_rx_done_big(ss, length, checksum); 2625 rx_done->cnt++; 2626 rx_done->idx = rx_done->cnt & rx_done->mask; 2627 2628 /* limit potential for livelock */ 2629 if (__predict_false(++limit > rx_done->mask / 2)) 2630 break; 2631 } 2632 while (!SLIST_EMPTY(&ss->lro_active)) { 2633 lro = SLIST_FIRST(&ss->lro_active); 2634 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2635 mxge_lro_flush(ss, lro); 2636 } 2637 } 2638 2639 2640 static inline void 2641 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2642 { 2643 struct ifnet *ifp; 2644 mxge_tx_ring_t *tx; 2645 struct mbuf *m; 2646 bus_dmamap_t map; 2647 int idx; 2648 int *flags; 2649 2650 tx = &ss->tx; 2651 ifp = ss->sc->ifp; 2652 while (tx->pkt_done != mcp_idx) { 2653 idx = tx->done & tx->mask; 2654 tx->done++; 2655 m = tx->info[idx].m; 2656 /* mbuf and DMA map only attached to the first 2657 segment per-mbuf */ 2658 if (m != NULL) { 2659 #ifdef IFNET_BUF_RING 2660 ss->obytes += m->m_pkthdr.len; 2661 if (m->m_flags & M_MCAST) 2662 ss->omcasts++; 2663 #endif 2664 ss->opackets++; 2665 tx->info[idx].m = NULL; 2666 map = tx->info[idx].map; 2667 bus_dmamap_unload(tx->dmat, map); 2668 m_freem(m); 2669 } 2670 if (tx->info[idx].flag) { 2671 tx->info[idx].flag = 0; 2672 tx->pkt_done++; 2673 } 2674 } 2675 2676 /* If we have space, clear IFF_OACTIVE to tell the stack that 2677 its OK to send packets */ 2678 #ifdef IFNET_BUF_RING 2679 flags = &ss->if_drv_flags; 2680 #else 2681 flags = &ifp->if_drv_flags; 2682 #endif 2683 mtx_lock(&ss->tx.mtx); 2684 if ((*flags) & IFF_DRV_OACTIVE && 2685 tx->req - tx->done < (tx->mask + 1)/4) { 2686 *(flags) &= ~IFF_DRV_OACTIVE; 2687 ss->tx.wake++; 2688 mxge_start_locked(ss); 2689 } 2690 #ifdef IFNET_BUF_RING 2691 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2692 /* let the NIC stop polling this queue, since there 2693 * are no more transmits pending */ 2694 if (tx->req == tx->done) { 2695 *tx->send_stop = 1; 2696 tx->queue_active = 0; 2697 tx->deactivate++; 2698 wmb(); 2699 } 2700 } 2701 #endif 2702 mtx_unlock(&ss->tx.mtx); 2703 2704 } 2705 2706 static struct mxge_media_type mxge_xfp_media_types[] = 2707 { 2708 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2709 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2710 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2711 {0, (1 << 5), "10GBASE-ER"}, 2712 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2713 {0, (1 << 3), "10GBASE-SW"}, 2714 {0, (1 << 2), "10GBASE-LW"}, 2715 {0, (1 << 1), "10GBASE-EW"}, 2716 {0, (1 << 0), "Reserved"} 2717 }; 2718 static struct mxge_media_type mxge_sfp_media_types[] = 2719 { 2720 {0, (1 << 7), "Reserved"}, 2721 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2722 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2723 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2724 }; 2725 2726 static void 2727 mxge_set_media(mxge_softc_t *sc, int type) 2728 { 2729 sc->media_flags |= type; 2730 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2731 ifmedia_set(&sc->media, sc->media_flags); 2732 } 2733 2734 2735 /* 2736 * Determine the media type for a NIC. Some XFPs will identify 2737 * themselves only when their link is up, so this is initiated via a 2738 * link up interrupt. However, this can potentially take up to 2739 * several milliseconds, so it is run via the watchdog routine, rather 2740 * than in the interrupt handler itself. This need only be done 2741 * once, not each time the link is up. 2742 */ 2743 static void 2744 mxge_media_probe(mxge_softc_t *sc) 2745 { 2746 mxge_cmd_t cmd; 2747 char *cage_type; 2748 char *ptr; 2749 struct mxge_media_type *mxge_media_types = NULL; 2750 int i, err, ms, mxge_media_type_entries; 2751 uint32_t byte; 2752 2753 sc->need_media_probe = 0; 2754 2755 /* if we've already set a media type, we're done */ 2756 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2757 return; 2758 2759 /* 2760 * parse the product code to deterimine the interface type 2761 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2762 * after the 3rd dash in the driver's cached copy of the 2763 * EEPROM's product code string. 2764 */ 2765 ptr = sc->product_code_string; 2766 if (ptr == NULL) { 2767 device_printf(sc->dev, "Missing product code\n"); 2768 } 2769 2770 for (i = 0; i < 3; i++, ptr++) { 2771 ptr = index(ptr, '-'); 2772 if (ptr == NULL) { 2773 device_printf(sc->dev, 2774 "only %d dashes in PC?!?\n", i); 2775 return; 2776 } 2777 } 2778 if (*ptr == 'C') { 2779 /* -C is CX4 */ 2780 mxge_set_media(sc, IFM_10G_CX4); 2781 return; 2782 } 2783 else if (*ptr == 'Q') { 2784 /* -Q is Quad Ribbon Fiber */ 2785 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2786 /* FreeBSD has no media type for Quad ribbon fiber */ 2787 return; 2788 } 2789 2790 if (*ptr == 'R') { 2791 /* -R is XFP */ 2792 mxge_media_types = mxge_xfp_media_types; 2793 mxge_media_type_entries = 2794 sizeof (mxge_xfp_media_types) / 2795 sizeof (mxge_xfp_media_types[0]); 2796 byte = MXGE_XFP_COMPLIANCE_BYTE; 2797 cage_type = "XFP"; 2798 } 2799 2800 if (*ptr == 'S' || *(ptr +1) == 'S') { 2801 /* -S or -2S is SFP+ */ 2802 mxge_media_types = mxge_sfp_media_types; 2803 mxge_media_type_entries = 2804 sizeof (mxge_sfp_media_types) / 2805 sizeof (mxge_sfp_media_types[0]); 2806 cage_type = "SFP+"; 2807 byte = 3; 2808 } 2809 2810 if (mxge_media_types == NULL) { 2811 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2812 return; 2813 } 2814 2815 /* 2816 * At this point we know the NIC has an XFP cage, so now we 2817 * try to determine what is in the cage by using the 2818 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2819 * register. We read just one byte, which may take over 2820 * a millisecond 2821 */ 2822 2823 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2824 cmd.data1 = byte; 2825 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2826 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2827 device_printf(sc->dev, "failed to read XFP\n"); 2828 } 2829 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2830 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2831 } 2832 if (err != MXGEFW_CMD_OK) { 2833 return; 2834 } 2835 2836 /* now we wait for the data to be cached */ 2837 cmd.data0 = byte; 2838 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2839 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2840 DELAY(1000); 2841 cmd.data0 = byte; 2842 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2843 } 2844 if (err != MXGEFW_CMD_OK) { 2845 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2846 cage_type, err, ms); 2847 return; 2848 } 2849 2850 if (cmd.data0 == mxge_media_types[0].bitmask) { 2851 if (mxge_verbose) 2852 device_printf(sc->dev, "%s:%s\n", cage_type, 2853 mxge_media_types[0].name); 2854 mxge_set_media(sc, IFM_10G_CX4); 2855 return; 2856 } 2857 for (i = 1; i < mxge_media_type_entries; i++) { 2858 if (cmd.data0 & mxge_media_types[i].bitmask) { 2859 if (mxge_verbose) 2860 device_printf(sc->dev, "%s:%s\n", 2861 cage_type, 2862 mxge_media_types[i].name); 2863 2864 mxge_set_media(sc, mxge_media_types[i].flag); 2865 return; 2866 } 2867 } 2868 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2869 cmd.data0); 2870 2871 return; 2872 } 2873 2874 static void 2875 mxge_intr(void *arg) 2876 { 2877 struct mxge_slice_state *ss = arg; 2878 mxge_softc_t *sc = ss->sc; 2879 mcp_irq_data_t *stats = ss->fw_stats; 2880 mxge_tx_ring_t *tx = &ss->tx; 2881 mxge_rx_done_t *rx_done = &ss->rx_done; 2882 uint32_t send_done_count; 2883 uint8_t valid; 2884 2885 2886 #ifndef IFNET_BUF_RING 2887 /* an interrupt on a non-zero slice is implicitly valid 2888 since MSI-X irqs are not shared */ 2889 if (ss != sc->ss) { 2890 mxge_clean_rx_done(ss); 2891 *ss->irq_claim = be32toh(3); 2892 return; 2893 } 2894 #endif 2895 2896 /* make sure the DMA has finished */ 2897 if (!stats->valid) { 2898 return; 2899 } 2900 valid = stats->valid; 2901 2902 if (sc->legacy_irq) { 2903 /* lower legacy IRQ */ 2904 *sc->irq_deassert = 0; 2905 if (!mxge_deassert_wait) 2906 /* don't wait for conf. that irq is low */ 2907 stats->valid = 0; 2908 } else { 2909 stats->valid = 0; 2910 } 2911 2912 /* loop while waiting for legacy irq deassertion */ 2913 do { 2914 /* check for transmit completes and receives */ 2915 send_done_count = be32toh(stats->send_done_count); 2916 while ((send_done_count != tx->pkt_done) || 2917 (rx_done->entry[rx_done->idx].length != 0)) { 2918 if (send_done_count != tx->pkt_done) 2919 mxge_tx_done(ss, (int)send_done_count); 2920 mxge_clean_rx_done(ss); 2921 send_done_count = be32toh(stats->send_done_count); 2922 } 2923 if (sc->legacy_irq && mxge_deassert_wait) 2924 wmb(); 2925 } while (*((volatile uint8_t *) &stats->valid)); 2926 2927 /* fw link & error stats meaningful only on the first slice */ 2928 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2929 if (sc->link_state != stats->link_up) { 2930 sc->link_state = stats->link_up; 2931 if (sc->link_state) { 2932 if_link_state_change(sc->ifp, LINK_STATE_UP); 2933 if (mxge_verbose) 2934 device_printf(sc->dev, "link up\n"); 2935 } else { 2936 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2937 if (mxge_verbose) 2938 device_printf(sc->dev, "link down\n"); 2939 } 2940 sc->need_media_probe = 1; 2941 } 2942 if (sc->rdma_tags_available != 2943 be32toh(stats->rdma_tags_available)) { 2944 sc->rdma_tags_available = 2945 be32toh(stats->rdma_tags_available); 2946 device_printf(sc->dev, "RDMA timed out! %d tags " 2947 "left\n", sc->rdma_tags_available); 2948 } 2949 2950 if (stats->link_down) { 2951 sc->down_cnt += stats->link_down; 2952 sc->link_state = 0; 2953 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2954 } 2955 } 2956 2957 /* check to see if we have rx token to pass back */ 2958 if (valid & 0x1) 2959 *ss->irq_claim = be32toh(3); 2960 *(ss->irq_claim + 1) = be32toh(3); 2961 } 2962 2963 static void 2964 mxge_init(void *arg) 2965 { 2966 } 2967 2968 2969 2970 static void 2971 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2972 { 2973 struct lro_entry *lro_entry; 2974 int i; 2975 2976 while (!SLIST_EMPTY(&ss->lro_free)) { 2977 lro_entry = SLIST_FIRST(&ss->lro_free); 2978 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2979 free(lro_entry, M_DEVBUF); 2980 } 2981 2982 for (i = 0; i <= ss->rx_big.mask; i++) { 2983 if (ss->rx_big.info[i].m == NULL) 2984 continue; 2985 bus_dmamap_unload(ss->rx_big.dmat, 2986 ss->rx_big.info[i].map); 2987 m_freem(ss->rx_big.info[i].m); 2988 ss->rx_big.info[i].m = NULL; 2989 } 2990 2991 for (i = 0; i <= ss->rx_small.mask; i++) { 2992 if (ss->rx_small.info[i].m == NULL) 2993 continue; 2994 bus_dmamap_unload(ss->rx_small.dmat, 2995 ss->rx_small.info[i].map); 2996 m_freem(ss->rx_small.info[i].m); 2997 ss->rx_small.info[i].m = NULL; 2998 } 2999 3000 /* transmit ring used only on the first slice */ 3001 if (ss->tx.info == NULL) 3002 return; 3003 3004 for (i = 0; i <= ss->tx.mask; i++) { 3005 ss->tx.info[i].flag = 0; 3006 if (ss->tx.info[i].m == NULL) 3007 continue; 3008 bus_dmamap_unload(ss->tx.dmat, 3009 ss->tx.info[i].map); 3010 m_freem(ss->tx.info[i].m); 3011 ss->tx.info[i].m = NULL; 3012 } 3013 } 3014 3015 static void 3016 mxge_free_mbufs(mxge_softc_t *sc) 3017 { 3018 int slice; 3019 3020 for (slice = 0; slice < sc->num_slices; slice++) 3021 mxge_free_slice_mbufs(&sc->ss[slice]); 3022 } 3023 3024 static void 3025 mxge_free_slice_rings(struct mxge_slice_state *ss) 3026 { 3027 int i; 3028 3029 3030 if (ss->rx_done.entry != NULL) 3031 mxge_dma_free(&ss->rx_done.dma); 3032 ss->rx_done.entry = NULL; 3033 3034 if (ss->tx.req_bytes != NULL) 3035 free(ss->tx.req_bytes, M_DEVBUF); 3036 ss->tx.req_bytes = NULL; 3037 3038 if (ss->tx.seg_list != NULL) 3039 free(ss->tx.seg_list, M_DEVBUF); 3040 ss->tx.seg_list = NULL; 3041 3042 if (ss->rx_small.shadow != NULL) 3043 free(ss->rx_small.shadow, M_DEVBUF); 3044 ss->rx_small.shadow = NULL; 3045 3046 if (ss->rx_big.shadow != NULL) 3047 free(ss->rx_big.shadow, M_DEVBUF); 3048 ss->rx_big.shadow = NULL; 3049 3050 if (ss->tx.info != NULL) { 3051 if (ss->tx.dmat != NULL) { 3052 for (i = 0; i <= ss->tx.mask; i++) { 3053 bus_dmamap_destroy(ss->tx.dmat, 3054 ss->tx.info[i].map); 3055 } 3056 bus_dma_tag_destroy(ss->tx.dmat); 3057 } 3058 free(ss->tx.info, M_DEVBUF); 3059 } 3060 ss->tx.info = NULL; 3061 3062 if (ss->rx_small.info != NULL) { 3063 if (ss->rx_small.dmat != NULL) { 3064 for (i = 0; i <= ss->rx_small.mask; i++) { 3065 bus_dmamap_destroy(ss->rx_small.dmat, 3066 ss->rx_small.info[i].map); 3067 } 3068 bus_dmamap_destroy(ss->rx_small.dmat, 3069 ss->rx_small.extra_map); 3070 bus_dma_tag_destroy(ss->rx_small.dmat); 3071 } 3072 free(ss->rx_small.info, M_DEVBUF); 3073 } 3074 ss->rx_small.info = NULL; 3075 3076 if (ss->rx_big.info != NULL) { 3077 if (ss->rx_big.dmat != NULL) { 3078 for (i = 0; i <= ss->rx_big.mask; i++) { 3079 bus_dmamap_destroy(ss->rx_big.dmat, 3080 ss->rx_big.info[i].map); 3081 } 3082 bus_dmamap_destroy(ss->rx_big.dmat, 3083 ss->rx_big.extra_map); 3084 bus_dma_tag_destroy(ss->rx_big.dmat); 3085 } 3086 free(ss->rx_big.info, M_DEVBUF); 3087 } 3088 ss->rx_big.info = NULL; 3089 } 3090 3091 static void 3092 mxge_free_rings(mxge_softc_t *sc) 3093 { 3094 int slice; 3095 3096 for (slice = 0; slice < sc->num_slices; slice++) 3097 mxge_free_slice_rings(&sc->ss[slice]); 3098 } 3099 3100 static int 3101 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3102 int tx_ring_entries) 3103 { 3104 mxge_softc_t *sc = ss->sc; 3105 size_t bytes; 3106 int err, i; 3107 3108 err = ENOMEM; 3109 3110 /* allocate per-slice receive resources */ 3111 3112 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3113 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3114 3115 /* allocate the rx shadow rings */ 3116 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3117 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3118 if (ss->rx_small.shadow == NULL) 3119 return err;; 3120 3121 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3122 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3123 if (ss->rx_big.shadow == NULL) 3124 return err;; 3125 3126 /* allocate the rx host info rings */ 3127 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3128 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3129 if (ss->rx_small.info == NULL) 3130 return err;; 3131 3132 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3133 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3134 if (ss->rx_big.info == NULL) 3135 return err;; 3136 3137 /* allocate the rx busdma resources */ 3138 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3139 1, /* alignment */ 3140 4096, /* boundary */ 3141 BUS_SPACE_MAXADDR, /* low */ 3142 BUS_SPACE_MAXADDR, /* high */ 3143 NULL, NULL, /* filter */ 3144 MHLEN, /* maxsize */ 3145 1, /* num segs */ 3146 MHLEN, /* maxsegsize */ 3147 BUS_DMA_ALLOCNOW, /* flags */ 3148 NULL, NULL, /* lock */ 3149 &ss->rx_small.dmat); /* tag */ 3150 if (err != 0) { 3151 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3152 err); 3153 return err;; 3154 } 3155 3156 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3157 1, /* alignment */ 3158 #if MXGE_VIRT_JUMBOS 3159 4096, /* boundary */ 3160 #else 3161 0, /* boundary */ 3162 #endif 3163 BUS_SPACE_MAXADDR, /* low */ 3164 BUS_SPACE_MAXADDR, /* high */ 3165 NULL, NULL, /* filter */ 3166 3*4096, /* maxsize */ 3167 #if MXGE_VIRT_JUMBOS 3168 3, /* num segs */ 3169 4096, /* maxsegsize*/ 3170 #else 3171 1, /* num segs */ 3172 MJUM9BYTES, /* maxsegsize*/ 3173 #endif 3174 BUS_DMA_ALLOCNOW, /* flags */ 3175 NULL, NULL, /* lock */ 3176 &ss->rx_big.dmat); /* tag */ 3177 if (err != 0) { 3178 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3179 err); 3180 return err;; 3181 } 3182 for (i = 0; i <= ss->rx_small.mask; i++) { 3183 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3184 &ss->rx_small.info[i].map); 3185 if (err != 0) { 3186 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3187 err); 3188 return err;; 3189 } 3190 } 3191 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3192 &ss->rx_small.extra_map); 3193 if (err != 0) { 3194 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3195 err); 3196 return err;; 3197 } 3198 3199 for (i = 0; i <= ss->rx_big.mask; i++) { 3200 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3201 &ss->rx_big.info[i].map); 3202 if (err != 0) { 3203 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3204 err); 3205 return err;; 3206 } 3207 } 3208 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3209 &ss->rx_big.extra_map); 3210 if (err != 0) { 3211 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3212 err); 3213 return err;; 3214 } 3215 3216 /* now allocate TX resouces */ 3217 3218 #ifndef IFNET_BUF_RING 3219 /* only use a single TX ring for now */ 3220 if (ss != ss->sc->ss) 3221 return 0; 3222 #endif 3223 3224 ss->tx.mask = tx_ring_entries - 1; 3225 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3226 3227 3228 /* allocate the tx request copy block */ 3229 bytes = 8 + 3230 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3231 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3232 if (ss->tx.req_bytes == NULL) 3233 return err;; 3234 /* ensure req_list entries are aligned to 8 bytes */ 3235 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3236 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3237 3238 /* allocate the tx busdma segment list */ 3239 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3240 ss->tx.seg_list = (bus_dma_segment_t *) 3241 malloc(bytes, M_DEVBUF, M_WAITOK); 3242 if (ss->tx.seg_list == NULL) 3243 return err;; 3244 3245 /* allocate the tx host info ring */ 3246 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3247 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3248 if (ss->tx.info == NULL) 3249 return err;; 3250 3251 /* allocate the tx busdma resources */ 3252 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3253 1, /* alignment */ 3254 sc->tx_boundary, /* boundary */ 3255 BUS_SPACE_MAXADDR, /* low */ 3256 BUS_SPACE_MAXADDR, /* high */ 3257 NULL, NULL, /* filter */ 3258 65536 + 256, /* maxsize */ 3259 ss->tx.max_desc - 2, /* num segs */ 3260 sc->tx_boundary, /* maxsegsz */ 3261 BUS_DMA_ALLOCNOW, /* flags */ 3262 NULL, NULL, /* lock */ 3263 &ss->tx.dmat); /* tag */ 3264 3265 if (err != 0) { 3266 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3267 err); 3268 return err;; 3269 } 3270 3271 /* now use these tags to setup dmamaps for each slot 3272 in the ring */ 3273 for (i = 0; i <= ss->tx.mask; i++) { 3274 err = bus_dmamap_create(ss->tx.dmat, 0, 3275 &ss->tx.info[i].map); 3276 if (err != 0) { 3277 device_printf(sc->dev, "Err %d tx dmamap\n", 3278 err); 3279 return err;; 3280 } 3281 } 3282 return 0; 3283 3284 } 3285 3286 static int 3287 mxge_alloc_rings(mxge_softc_t *sc) 3288 { 3289 mxge_cmd_t cmd; 3290 int tx_ring_size; 3291 int tx_ring_entries, rx_ring_entries; 3292 int err, slice; 3293 3294 /* get ring sizes */ 3295 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3296 tx_ring_size = cmd.data0; 3297 if (err != 0) { 3298 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3299 goto abort; 3300 } 3301 3302 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3303 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3304 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3305 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3306 IFQ_SET_READY(&sc->ifp->if_snd); 3307 3308 for (slice = 0; slice < sc->num_slices; slice++) { 3309 err = mxge_alloc_slice_rings(&sc->ss[slice], 3310 rx_ring_entries, 3311 tx_ring_entries); 3312 if (err != 0) 3313 goto abort; 3314 } 3315 return 0; 3316 3317 abort: 3318 mxge_free_rings(sc); 3319 return err; 3320 3321 } 3322 3323 3324 static void 3325 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3326 { 3327 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3328 3329 if (bufsize < MCLBYTES) { 3330 /* easy, everything fits in a single buffer */ 3331 *big_buf_size = MCLBYTES; 3332 *cl_size = MCLBYTES; 3333 *nbufs = 1; 3334 return; 3335 } 3336 3337 if (bufsize < MJUMPAGESIZE) { 3338 /* still easy, everything still fits in a single buffer */ 3339 *big_buf_size = MJUMPAGESIZE; 3340 *cl_size = MJUMPAGESIZE; 3341 *nbufs = 1; 3342 return; 3343 } 3344 #if MXGE_VIRT_JUMBOS 3345 /* now we need to use virtually contiguous buffers */ 3346 *cl_size = MJUM9BYTES; 3347 *big_buf_size = 4096; 3348 *nbufs = mtu / 4096 + 1; 3349 /* needs to be a power of two, so round up */ 3350 if (*nbufs == 3) 3351 *nbufs = 4; 3352 #else 3353 *cl_size = MJUM9BYTES; 3354 *big_buf_size = MJUM9BYTES; 3355 *nbufs = 1; 3356 #endif 3357 } 3358 3359 static int 3360 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3361 { 3362 mxge_softc_t *sc; 3363 mxge_cmd_t cmd; 3364 bus_dmamap_t map; 3365 struct lro_entry *lro_entry; 3366 int err, i, slice; 3367 3368 3369 sc = ss->sc; 3370 slice = ss - sc->ss; 3371 3372 SLIST_INIT(&ss->lro_free); 3373 SLIST_INIT(&ss->lro_active); 3374 3375 for (i = 0; i < sc->lro_cnt; i++) { 3376 lro_entry = (struct lro_entry *) 3377 malloc(sizeof (*lro_entry), M_DEVBUF, 3378 M_NOWAIT | M_ZERO); 3379 if (lro_entry == NULL) { 3380 sc->lro_cnt = i; 3381 break; 3382 } 3383 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3384 } 3385 /* get the lanai pointers to the send and receive rings */ 3386 3387 err = 0; 3388 #ifndef IFNET_BUF_RING 3389 /* We currently only send from the first slice */ 3390 if (slice == 0) { 3391 #endif 3392 cmd.data0 = slice; 3393 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3394 ss->tx.lanai = 3395 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3396 ss->tx.send_go = (volatile uint32_t *) 3397 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3398 ss->tx.send_stop = (volatile uint32_t *) 3399 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3400 #ifndef IFNET_BUF_RING 3401 } 3402 #endif 3403 cmd.data0 = slice; 3404 err |= mxge_send_cmd(sc, 3405 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3406 ss->rx_small.lanai = 3407 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3408 cmd.data0 = slice; 3409 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3410 ss->rx_big.lanai = 3411 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3412 3413 if (err != 0) { 3414 device_printf(sc->dev, 3415 "failed to get ring sizes or locations\n"); 3416 return EIO; 3417 } 3418 3419 /* stock receive rings */ 3420 for (i = 0; i <= ss->rx_small.mask; i++) { 3421 map = ss->rx_small.info[i].map; 3422 err = mxge_get_buf_small(ss, map, i); 3423 if (err) { 3424 device_printf(sc->dev, "alloced %d/%d smalls\n", 3425 i, ss->rx_small.mask + 1); 3426 return ENOMEM; 3427 } 3428 } 3429 for (i = 0; i <= ss->rx_big.mask; i++) { 3430 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3431 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3432 } 3433 ss->rx_big.nbufs = nbufs; 3434 ss->rx_big.cl_size = cl_size; 3435 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3436 map = ss->rx_big.info[i].map; 3437 err = mxge_get_buf_big(ss, map, i); 3438 if (err) { 3439 device_printf(sc->dev, "alloced %d/%d bigs\n", 3440 i, ss->rx_big.mask + 1); 3441 return ENOMEM; 3442 } 3443 } 3444 return 0; 3445 } 3446 3447 static int 3448 mxge_open(mxge_softc_t *sc) 3449 { 3450 mxge_cmd_t cmd; 3451 int err, big_bytes, nbufs, slice, cl_size, i; 3452 bus_addr_t bus; 3453 volatile uint8_t *itable; 3454 struct mxge_slice_state *ss; 3455 3456 /* Copy the MAC address in case it was overridden */ 3457 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3458 3459 err = mxge_reset(sc, 1); 3460 if (err != 0) { 3461 device_printf(sc->dev, "failed to reset\n"); 3462 return EIO; 3463 } 3464 3465 if (sc->num_slices > 1) { 3466 /* setup the indirection table */ 3467 cmd.data0 = sc->num_slices; 3468 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3469 &cmd); 3470 3471 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3472 &cmd); 3473 if (err != 0) { 3474 device_printf(sc->dev, 3475 "failed to setup rss tables\n"); 3476 return err; 3477 } 3478 3479 /* just enable an identity mapping */ 3480 itable = sc->sram + cmd.data0; 3481 for (i = 0; i < sc->num_slices; i++) 3482 itable[i] = (uint8_t)i; 3483 3484 cmd.data0 = 1; 3485 cmd.data1 = mxge_rss_hash_type; 3486 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3487 if (err != 0) { 3488 device_printf(sc->dev, "failed to enable slices\n"); 3489 return err; 3490 } 3491 } 3492 3493 3494 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3495 3496 cmd.data0 = nbufs; 3497 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3498 &cmd); 3499 /* error is only meaningful if we're trying to set 3500 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3501 if (err && nbufs > 1) { 3502 device_printf(sc->dev, 3503 "Failed to set alway-use-n to %d\n", 3504 nbufs); 3505 return EIO; 3506 } 3507 /* Give the firmware the mtu and the big and small buffer 3508 sizes. The firmware wants the big buf size to be a power 3509 of two. Luckily, FreeBSD's clusters are powers of two */ 3510 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3511 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3512 cmd.data0 = MHLEN - MXGEFW_PAD; 3513 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3514 &cmd); 3515 cmd.data0 = big_bytes; 3516 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3517 3518 if (err != 0) { 3519 device_printf(sc->dev, "failed to setup params\n"); 3520 goto abort; 3521 } 3522 3523 /* Now give him the pointer to the stats block */ 3524 for (slice = 0; 3525 #ifdef IFNET_BUF_RING 3526 slice < sc->num_slices; 3527 #else 3528 slice < 1; 3529 #endif 3530 slice++) { 3531 ss = &sc->ss[slice]; 3532 cmd.data0 = 3533 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3534 cmd.data1 = 3535 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3536 cmd.data2 = sizeof(struct mcp_irq_data); 3537 cmd.data2 |= (slice << 16); 3538 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3539 } 3540 3541 if (err != 0) { 3542 bus = sc->ss->fw_stats_dma.bus_addr; 3543 bus += offsetof(struct mcp_irq_data, send_done_count); 3544 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3545 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3546 err = mxge_send_cmd(sc, 3547 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3548 &cmd); 3549 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3550 sc->fw_multicast_support = 0; 3551 } else { 3552 sc->fw_multicast_support = 1; 3553 } 3554 3555 if (err != 0) { 3556 device_printf(sc->dev, "failed to setup params\n"); 3557 goto abort; 3558 } 3559 3560 for (slice = 0; slice < sc->num_slices; slice++) { 3561 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3562 if (err != 0) { 3563 device_printf(sc->dev, "couldn't open slice %d\n", 3564 slice); 3565 goto abort; 3566 } 3567 } 3568 3569 /* Finally, start the firmware running */ 3570 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3571 if (err) { 3572 device_printf(sc->dev, "Couldn't bring up link\n"); 3573 goto abort; 3574 } 3575 #ifdef IFNET_BUF_RING 3576 for (slice = 0; slice < sc->num_slices; slice++) { 3577 ss = &sc->ss[slice]; 3578 ss->if_drv_flags |= IFF_DRV_RUNNING; 3579 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3580 } 3581 #endif 3582 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3583 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3584 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3585 3586 return 0; 3587 3588 3589 abort: 3590 mxge_free_mbufs(sc); 3591 3592 return err; 3593 } 3594 3595 static int 3596 mxge_close(mxge_softc_t *sc) 3597 { 3598 mxge_cmd_t cmd; 3599 int err, old_down_cnt; 3600 #ifdef IFNET_BUF_RING 3601 struct mxge_slice_state *ss; 3602 int slice; 3603 #endif 3604 3605 callout_stop(&sc->co_hdl); 3606 #ifdef IFNET_BUF_RING 3607 for (slice = 0; slice < sc->num_slices; slice++) { 3608 ss = &sc->ss[slice]; 3609 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3610 } 3611 #endif 3612 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3613 old_down_cnt = sc->down_cnt; 3614 wmb(); 3615 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3616 if (err) { 3617 device_printf(sc->dev, "Couldn't bring down link\n"); 3618 } 3619 if (old_down_cnt == sc->down_cnt) { 3620 /* wait for down irq */ 3621 DELAY(10 * sc->intr_coal_delay); 3622 } 3623 wmb(); 3624 if (old_down_cnt == sc->down_cnt) { 3625 device_printf(sc->dev, "never got down irq\n"); 3626 } 3627 3628 mxge_free_mbufs(sc); 3629 3630 return 0; 3631 } 3632 3633 static void 3634 mxge_setup_cfg_space(mxge_softc_t *sc) 3635 { 3636 device_t dev = sc->dev; 3637 int reg; 3638 uint16_t cmd, lnk, pectl; 3639 3640 /* find the PCIe link width and set max read request to 4KB*/ 3641 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3642 lnk = pci_read_config(dev, reg + 0x12, 2); 3643 sc->link_width = (lnk >> 4) & 0x3f; 3644 3645 pectl = pci_read_config(dev, reg + 0x8, 2); 3646 pectl = (pectl & ~0x7000) | (5 << 12); 3647 pci_write_config(dev, reg + 0x8, pectl, 2); 3648 } 3649 3650 /* Enable DMA and Memory space access */ 3651 pci_enable_busmaster(dev); 3652 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3653 cmd |= PCIM_CMD_MEMEN; 3654 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3655 } 3656 3657 static uint32_t 3658 mxge_read_reboot(mxge_softc_t *sc) 3659 { 3660 device_t dev = sc->dev; 3661 uint32_t vs; 3662 3663 /* find the vendor specific offset */ 3664 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3665 device_printf(sc->dev, 3666 "could not find vendor specific offset\n"); 3667 return (uint32_t)-1; 3668 } 3669 /* enable read32 mode */ 3670 pci_write_config(dev, vs + 0x10, 0x3, 1); 3671 /* tell NIC which register to read */ 3672 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3673 return (pci_read_config(dev, vs + 0x14, 4)); 3674 } 3675 3676 static int 3677 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3678 { 3679 struct pci_devinfo *dinfo; 3680 mxge_tx_ring_t *tx; 3681 int err; 3682 uint32_t reboot; 3683 uint16_t cmd; 3684 3685 err = ENXIO; 3686 3687 device_printf(sc->dev, "Watchdog reset!\n"); 3688 3689 /* 3690 * check to see if the NIC rebooted. If it did, then all of 3691 * PCI config space has been reset, and things like the 3692 * busmaster bit will be zero. If this is the case, then we 3693 * must restore PCI config space before the NIC can be used 3694 * again 3695 */ 3696 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3697 if (cmd == 0xffff) { 3698 /* 3699 * maybe the watchdog caught the NIC rebooting; wait 3700 * up to 100ms for it to finish. If it does not come 3701 * back, then give up 3702 */ 3703 DELAY(1000*100); 3704 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3705 if (cmd == 0xffff) { 3706 device_printf(sc->dev, "NIC disappeared!\n"); 3707 return (err); 3708 } 3709 } 3710 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3711 /* print the reboot status */ 3712 reboot = mxge_read_reboot(sc); 3713 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3714 reboot); 3715 /* restore PCI configuration space */ 3716 dinfo = device_get_ivars(sc->dev); 3717 pci_cfg_restore(sc->dev, dinfo); 3718 3719 /* and redo any changes we made to our config space */ 3720 mxge_setup_cfg_space(sc); 3721 3722 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3723 mxge_close(sc); 3724 err = mxge_open(sc); 3725 } 3726 } else { 3727 tx = &sc->ss[slice].tx; 3728 device_printf(sc->dev, 3729 "NIC did not reboot, slice %d ring state:\n", 3730 slice); 3731 device_printf(sc->dev, 3732 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3733 tx->req, tx->done, tx->queue_active); 3734 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3735 tx->activate, tx->deactivate); 3736 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3737 tx->pkt_done, 3738 be32toh(sc->ss->fw_stats->send_done_count)); 3739 device_printf(sc->dev, "not resetting\n"); 3740 } 3741 return (err); 3742 } 3743 3744 static int 3745 mxge_watchdog(mxge_softc_t *sc) 3746 { 3747 mxge_tx_ring_t *tx; 3748 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3749 int i, err = 0; 3750 3751 /* see if we have outstanding transmits, which 3752 have been pending for more than mxge_ticks */ 3753 for (i = 0; 3754 #ifdef IFNET_BUF_RING 3755 (i < sc->num_slices) && (err == 0); 3756 #else 3757 (i < 1) && (err == 0); 3758 #endif 3759 i++) { 3760 tx = &sc->ss[i].tx; 3761 if (tx->req != tx->done && 3762 tx->watchdog_req != tx->watchdog_done && 3763 tx->done == tx->watchdog_done) { 3764 /* check for pause blocking before resetting */ 3765 if (tx->watchdog_rx_pause == rx_pause) 3766 err = mxge_watchdog_reset(sc, i); 3767 else 3768 device_printf(sc->dev, "Flow control blocking " 3769 "xmits, check link partner\n"); 3770 } 3771 3772 tx->watchdog_req = tx->req; 3773 tx->watchdog_done = tx->done; 3774 tx->watchdog_rx_pause = rx_pause; 3775 } 3776 3777 if (sc->need_media_probe) 3778 mxge_media_probe(sc); 3779 return (err); 3780 } 3781 3782 static void 3783 mxge_update_stats(mxge_softc_t *sc) 3784 { 3785 struct mxge_slice_state *ss; 3786 u_long ipackets = 0; 3787 u_long opackets = 0; 3788 #ifdef IFNET_BUF_RING 3789 u_long obytes = 0; 3790 u_long omcasts = 0; 3791 u_long odrops = 0; 3792 #endif 3793 u_long oerrors = 0; 3794 int slice; 3795 3796 for (slice = 0; slice < sc->num_slices; slice++) { 3797 ss = &sc->ss[slice]; 3798 ipackets += ss->ipackets; 3799 opackets += ss->opackets; 3800 #ifdef IFNET_BUF_RING 3801 obytes += ss->obytes; 3802 omcasts += ss->omcasts; 3803 odrops += ss->tx.br->br_drops; 3804 #endif 3805 oerrors += ss->oerrors; 3806 } 3807 sc->ifp->if_ipackets = ipackets; 3808 sc->ifp->if_opackets = opackets; 3809 #ifdef IFNET_BUF_RING 3810 sc->ifp->if_obytes = obytes; 3811 sc->ifp->if_omcasts = omcasts; 3812 sc->ifp->if_snd.ifq_drops = odrops; 3813 #endif 3814 sc->ifp->if_oerrors = oerrors; 3815 } 3816 3817 static void 3818 mxge_tick(void *arg) 3819 { 3820 mxge_softc_t *sc = arg; 3821 int err = 0; 3822 3823 /* aggregate stats from different slices */ 3824 mxge_update_stats(sc); 3825 if (!sc->watchdog_countdown) { 3826 err = mxge_watchdog(sc); 3827 sc->watchdog_countdown = 4; 3828 } 3829 sc->watchdog_countdown--; 3830 if (err == 0) 3831 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3832 3833 } 3834 3835 static int 3836 mxge_media_change(struct ifnet *ifp) 3837 { 3838 return EINVAL; 3839 } 3840 3841 static int 3842 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3843 { 3844 struct ifnet *ifp = sc->ifp; 3845 int real_mtu, old_mtu; 3846 int err = 0; 3847 3848 3849 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3850 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3851 return EINVAL; 3852 mtx_lock(&sc->driver_mtx); 3853 old_mtu = ifp->if_mtu; 3854 ifp->if_mtu = mtu; 3855 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3856 mxge_close(sc); 3857 err = mxge_open(sc); 3858 if (err != 0) { 3859 ifp->if_mtu = old_mtu; 3860 mxge_close(sc); 3861 (void) mxge_open(sc); 3862 } 3863 } 3864 mtx_unlock(&sc->driver_mtx); 3865 return err; 3866 } 3867 3868 static void 3869 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3870 { 3871 mxge_softc_t *sc = ifp->if_softc; 3872 3873 3874 if (sc == NULL) 3875 return; 3876 ifmr->ifm_status = IFM_AVALID; 3877 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3878 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3879 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3880 } 3881 3882 static int 3883 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3884 { 3885 mxge_softc_t *sc = ifp->if_softc; 3886 struct ifreq *ifr = (struct ifreq *)data; 3887 int err, mask; 3888 3889 err = 0; 3890 switch (command) { 3891 case SIOCSIFADDR: 3892 case SIOCGIFADDR: 3893 err = ether_ioctl(ifp, command, data); 3894 break; 3895 3896 case SIOCSIFMTU: 3897 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3898 break; 3899 3900 case SIOCSIFFLAGS: 3901 mtx_lock(&sc->driver_mtx); 3902 if (ifp->if_flags & IFF_UP) { 3903 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3904 err = mxge_open(sc); 3905 } else { 3906 /* take care of promis can allmulti 3907 flag chages */ 3908 mxge_change_promisc(sc, 3909 ifp->if_flags & IFF_PROMISC); 3910 mxge_set_multicast_list(sc); 3911 } 3912 } else { 3913 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3914 mxge_close(sc); 3915 } 3916 } 3917 mtx_unlock(&sc->driver_mtx); 3918 break; 3919 3920 case SIOCADDMULTI: 3921 case SIOCDELMULTI: 3922 mtx_lock(&sc->driver_mtx); 3923 mxge_set_multicast_list(sc); 3924 mtx_unlock(&sc->driver_mtx); 3925 break; 3926 3927 case SIOCSIFCAP: 3928 mtx_lock(&sc->driver_mtx); 3929 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3930 if (mask & IFCAP_TXCSUM) { 3931 if (IFCAP_TXCSUM & ifp->if_capenable) { 3932 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3933 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3934 | CSUM_TSO); 3935 } else { 3936 ifp->if_capenable |= IFCAP_TXCSUM; 3937 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3938 } 3939 } else if (mask & IFCAP_RXCSUM) { 3940 if (IFCAP_RXCSUM & ifp->if_capenable) { 3941 ifp->if_capenable &= ~IFCAP_RXCSUM; 3942 sc->csum_flag = 0; 3943 } else { 3944 ifp->if_capenable |= IFCAP_RXCSUM; 3945 sc->csum_flag = 1; 3946 } 3947 } 3948 if (mask & IFCAP_TSO4) { 3949 if (IFCAP_TSO4 & ifp->if_capenable) { 3950 ifp->if_capenable &= ~IFCAP_TSO4; 3951 ifp->if_hwassist &= ~CSUM_TSO; 3952 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3953 ifp->if_capenable |= IFCAP_TSO4; 3954 ifp->if_hwassist |= CSUM_TSO; 3955 } else { 3956 printf("mxge requires tx checksum offload" 3957 " be enabled to use TSO\n"); 3958 err = EINVAL; 3959 } 3960 } 3961 if (mask & IFCAP_LRO) { 3962 if (IFCAP_LRO & ifp->if_capenable) 3963 err = mxge_change_lro_locked(sc, 0); 3964 else 3965 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3966 } 3967 if (mask & IFCAP_VLAN_HWTAGGING) 3968 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3969 mtx_unlock(&sc->driver_mtx); 3970 VLAN_CAPABILITIES(ifp); 3971 3972 break; 3973 3974 case SIOCGIFMEDIA: 3975 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3976 &sc->media, command); 3977 break; 3978 3979 default: 3980 err = ENOTTY; 3981 } 3982 return err; 3983 } 3984 3985 static void 3986 mxge_fetch_tunables(mxge_softc_t *sc) 3987 { 3988 3989 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3990 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3991 &mxge_flow_control); 3992 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3993 &mxge_intr_coal_delay); 3994 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3995 &mxge_nvidia_ecrc_enable); 3996 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3997 &mxge_force_firmware); 3998 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3999 &mxge_deassert_wait); 4000 TUNABLE_INT_FETCH("hw.mxge.verbose", 4001 &mxge_verbose); 4002 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4003 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4004 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4005 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4006 if (sc->lro_cnt != 0) 4007 mxge_lro_cnt = sc->lro_cnt; 4008 4009 if (bootverbose) 4010 mxge_verbose = 1; 4011 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4012 mxge_intr_coal_delay = 30; 4013 if (mxge_ticks == 0) 4014 mxge_ticks = hz / 2; 4015 sc->pause = mxge_flow_control; 4016 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4017 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 4018 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4019 } 4020 } 4021 4022 4023 static void 4024 mxge_free_slices(mxge_softc_t *sc) 4025 { 4026 struct mxge_slice_state *ss; 4027 int i; 4028 4029 4030 if (sc->ss == NULL) 4031 return; 4032 4033 for (i = 0; i < sc->num_slices; i++) { 4034 ss = &sc->ss[i]; 4035 if (ss->fw_stats != NULL) { 4036 mxge_dma_free(&ss->fw_stats_dma); 4037 ss->fw_stats = NULL; 4038 #ifdef IFNET_BUF_RING 4039 if (ss->tx.br != NULL) { 4040 drbr_free(ss->tx.br, M_DEVBUF); 4041 ss->tx.br = NULL; 4042 } 4043 #endif 4044 mtx_destroy(&ss->tx.mtx); 4045 } 4046 if (ss->rx_done.entry != NULL) { 4047 mxge_dma_free(&ss->rx_done.dma); 4048 ss->rx_done.entry = NULL; 4049 } 4050 } 4051 free(sc->ss, M_DEVBUF); 4052 sc->ss = NULL; 4053 } 4054 4055 static int 4056 mxge_alloc_slices(mxge_softc_t *sc) 4057 { 4058 mxge_cmd_t cmd; 4059 struct mxge_slice_state *ss; 4060 size_t bytes; 4061 int err, i, max_intr_slots; 4062 4063 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4064 if (err != 0) { 4065 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4066 return err; 4067 } 4068 sc->rx_ring_size = cmd.data0; 4069 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4070 4071 bytes = sizeof (*sc->ss) * sc->num_slices; 4072 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4073 if (sc->ss == NULL) 4074 return (ENOMEM); 4075 for (i = 0; i < sc->num_slices; i++) { 4076 ss = &sc->ss[i]; 4077 4078 ss->sc = sc; 4079 4080 /* allocate per-slice rx interrupt queues */ 4081 4082 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4083 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4084 if (err != 0) 4085 goto abort; 4086 ss->rx_done.entry = ss->rx_done.dma.addr; 4087 bzero(ss->rx_done.entry, bytes); 4088 4089 /* 4090 * allocate the per-slice firmware stats; stats 4091 * (including tx) are used used only on the first 4092 * slice for now 4093 */ 4094 #ifndef IFNET_BUF_RING 4095 if (i > 0) 4096 continue; 4097 #endif 4098 4099 bytes = sizeof (*ss->fw_stats); 4100 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4101 sizeof (*ss->fw_stats), 64); 4102 if (err != 0) 4103 goto abort; 4104 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4105 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4106 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4107 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4108 #ifdef IFNET_BUF_RING 4109 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4110 &ss->tx.mtx); 4111 #endif 4112 } 4113 4114 return (0); 4115 4116 abort: 4117 mxge_free_slices(sc); 4118 return (ENOMEM); 4119 } 4120 4121 static void 4122 mxge_slice_probe(mxge_softc_t *sc) 4123 { 4124 mxge_cmd_t cmd; 4125 char *old_fw; 4126 int msix_cnt, status, max_intr_slots; 4127 4128 sc->num_slices = 1; 4129 /* 4130 * don't enable multiple slices if they are not enabled, 4131 * or if this is not an SMP system 4132 */ 4133 4134 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4135 return; 4136 4137 /* see how many MSI-X interrupts are available */ 4138 msix_cnt = pci_msix_count(sc->dev); 4139 if (msix_cnt < 2) 4140 return; 4141 4142 /* now load the slice aware firmware see what it supports */ 4143 old_fw = sc->fw_name; 4144 if (old_fw == mxge_fw_aligned) 4145 sc->fw_name = mxge_fw_rss_aligned; 4146 else 4147 sc->fw_name = mxge_fw_rss_unaligned; 4148 status = mxge_load_firmware(sc, 0); 4149 if (status != 0) { 4150 device_printf(sc->dev, "Falling back to a single slice\n"); 4151 return; 4152 } 4153 4154 /* try to send a reset command to the card to see if it 4155 is alive */ 4156 memset(&cmd, 0, sizeof (cmd)); 4157 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4158 if (status != 0) { 4159 device_printf(sc->dev, "failed reset\n"); 4160 goto abort_with_fw; 4161 } 4162 4163 /* get rx ring size */ 4164 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4165 if (status != 0) { 4166 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4167 goto abort_with_fw; 4168 } 4169 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4170 4171 /* tell it the size of the interrupt queues */ 4172 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4173 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4174 if (status != 0) { 4175 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4176 goto abort_with_fw; 4177 } 4178 4179 /* ask the maximum number of slices it supports */ 4180 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4181 if (status != 0) { 4182 device_printf(sc->dev, 4183 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4184 goto abort_with_fw; 4185 } 4186 sc->num_slices = cmd.data0; 4187 if (sc->num_slices > msix_cnt) 4188 sc->num_slices = msix_cnt; 4189 4190 if (mxge_max_slices == -1) { 4191 /* cap to number of CPUs in system */ 4192 if (sc->num_slices > mp_ncpus) 4193 sc->num_slices = mp_ncpus; 4194 } else { 4195 if (sc->num_slices > mxge_max_slices) 4196 sc->num_slices = mxge_max_slices; 4197 } 4198 /* make sure it is a power of two */ 4199 while (sc->num_slices & (sc->num_slices - 1)) 4200 sc->num_slices--; 4201 4202 if (mxge_verbose) 4203 device_printf(sc->dev, "using %d slices\n", 4204 sc->num_slices); 4205 4206 return; 4207 4208 abort_with_fw: 4209 sc->fw_name = old_fw; 4210 (void) mxge_load_firmware(sc, 0); 4211 } 4212 4213 static int 4214 mxge_add_msix_irqs(mxge_softc_t *sc) 4215 { 4216 size_t bytes; 4217 int count, err, i, rid; 4218 4219 rid = PCIR_BAR(2); 4220 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4221 &rid, RF_ACTIVE); 4222 4223 if (sc->msix_table_res == NULL) { 4224 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4225 return ENXIO; 4226 } 4227 4228 count = sc->num_slices; 4229 err = pci_alloc_msix(sc->dev, &count); 4230 if (err != 0) { 4231 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4232 "err = %d \n", sc->num_slices, err); 4233 goto abort_with_msix_table; 4234 } 4235 if (count < sc->num_slices) { 4236 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4237 count, sc->num_slices); 4238 device_printf(sc->dev, 4239 "Try setting hw.mxge.max_slices to %d\n", 4240 count); 4241 err = ENOSPC; 4242 goto abort_with_msix; 4243 } 4244 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4245 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4246 if (sc->msix_irq_res == NULL) { 4247 err = ENOMEM; 4248 goto abort_with_msix; 4249 } 4250 4251 for (i = 0; i < sc->num_slices; i++) { 4252 rid = i + 1; 4253 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4254 SYS_RES_IRQ, 4255 &rid, RF_ACTIVE); 4256 if (sc->msix_irq_res[i] == NULL) { 4257 device_printf(sc->dev, "couldn't allocate IRQ res" 4258 " for message %d\n", i); 4259 err = ENXIO; 4260 goto abort_with_res; 4261 } 4262 } 4263 4264 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4265 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4266 4267 for (i = 0; i < sc->num_slices; i++) { 4268 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4269 INTR_TYPE_NET | INTR_MPSAFE, 4270 #if __FreeBSD_version > 700030 4271 NULL, 4272 #endif 4273 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4274 if (err != 0) { 4275 device_printf(sc->dev, "couldn't setup intr for " 4276 "message %d\n", i); 4277 goto abort_with_intr; 4278 } 4279 } 4280 4281 if (mxge_verbose) { 4282 device_printf(sc->dev, "using %d msix IRQs:", 4283 sc->num_slices); 4284 for (i = 0; i < sc->num_slices; i++) 4285 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4286 printf("\n"); 4287 } 4288 return (0); 4289 4290 abort_with_intr: 4291 for (i = 0; i < sc->num_slices; i++) { 4292 if (sc->msix_ih[i] != NULL) { 4293 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4294 sc->msix_ih[i]); 4295 sc->msix_ih[i] = NULL; 4296 } 4297 } 4298 free(sc->msix_ih, M_DEVBUF); 4299 4300 4301 abort_with_res: 4302 for (i = 0; i < sc->num_slices; i++) { 4303 rid = i + 1; 4304 if (sc->msix_irq_res[i] != NULL) 4305 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4306 sc->msix_irq_res[i]); 4307 sc->msix_irq_res[i] = NULL; 4308 } 4309 free(sc->msix_irq_res, M_DEVBUF); 4310 4311 4312 abort_with_msix: 4313 pci_release_msi(sc->dev); 4314 4315 abort_with_msix_table: 4316 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4317 sc->msix_table_res); 4318 4319 return err; 4320 } 4321 4322 static int 4323 mxge_add_single_irq(mxge_softc_t *sc) 4324 { 4325 int count, err, rid; 4326 4327 count = pci_msi_count(sc->dev); 4328 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4329 rid = 1; 4330 } else { 4331 rid = 0; 4332 sc->legacy_irq = 1; 4333 } 4334 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4335 1, RF_SHAREABLE | RF_ACTIVE); 4336 if (sc->irq_res == NULL) { 4337 device_printf(sc->dev, "could not alloc interrupt\n"); 4338 return ENXIO; 4339 } 4340 if (mxge_verbose) 4341 device_printf(sc->dev, "using %s irq %ld\n", 4342 sc->legacy_irq ? "INTx" : "MSI", 4343 rman_get_start(sc->irq_res)); 4344 err = bus_setup_intr(sc->dev, sc->irq_res, 4345 INTR_TYPE_NET | INTR_MPSAFE, 4346 #if __FreeBSD_version > 700030 4347 NULL, 4348 #endif 4349 mxge_intr, &sc->ss[0], &sc->ih); 4350 if (err != 0) { 4351 bus_release_resource(sc->dev, SYS_RES_IRQ, 4352 sc->legacy_irq ? 0 : 1, sc->irq_res); 4353 if (!sc->legacy_irq) 4354 pci_release_msi(sc->dev); 4355 } 4356 return err; 4357 } 4358 4359 static void 4360 mxge_rem_msix_irqs(mxge_softc_t *sc) 4361 { 4362 int i, rid; 4363 4364 for (i = 0; i < sc->num_slices; i++) { 4365 if (sc->msix_ih[i] != NULL) { 4366 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4367 sc->msix_ih[i]); 4368 sc->msix_ih[i] = NULL; 4369 } 4370 } 4371 free(sc->msix_ih, M_DEVBUF); 4372 4373 for (i = 0; i < sc->num_slices; i++) { 4374 rid = i + 1; 4375 if (sc->msix_irq_res[i] != NULL) 4376 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4377 sc->msix_irq_res[i]); 4378 sc->msix_irq_res[i] = NULL; 4379 } 4380 free(sc->msix_irq_res, M_DEVBUF); 4381 4382 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4383 sc->msix_table_res); 4384 4385 pci_release_msi(sc->dev); 4386 return; 4387 } 4388 4389 static void 4390 mxge_rem_single_irq(mxge_softc_t *sc) 4391 { 4392 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4393 bus_release_resource(sc->dev, SYS_RES_IRQ, 4394 sc->legacy_irq ? 0 : 1, sc->irq_res); 4395 if (!sc->legacy_irq) 4396 pci_release_msi(sc->dev); 4397 } 4398 4399 static void 4400 mxge_rem_irq(mxge_softc_t *sc) 4401 { 4402 if (sc->num_slices > 1) 4403 mxge_rem_msix_irqs(sc); 4404 else 4405 mxge_rem_single_irq(sc); 4406 } 4407 4408 static int 4409 mxge_add_irq(mxge_softc_t *sc) 4410 { 4411 int err; 4412 4413 if (sc->num_slices > 1) 4414 err = mxge_add_msix_irqs(sc); 4415 else 4416 err = mxge_add_single_irq(sc); 4417 4418 if (0 && err == 0 && sc->num_slices > 1) { 4419 mxge_rem_msix_irqs(sc); 4420 err = mxge_add_msix_irqs(sc); 4421 } 4422 return err; 4423 } 4424 4425 4426 static int 4427 mxge_attach(device_t dev) 4428 { 4429 mxge_softc_t *sc = device_get_softc(dev); 4430 struct ifnet *ifp; 4431 int err, rid; 4432 4433 sc->dev = dev; 4434 mxge_fetch_tunables(sc); 4435 4436 err = bus_dma_tag_create(NULL, /* parent */ 4437 1, /* alignment */ 4438 0, /* boundary */ 4439 BUS_SPACE_MAXADDR, /* low */ 4440 BUS_SPACE_MAXADDR, /* high */ 4441 NULL, NULL, /* filter */ 4442 65536 + 256, /* maxsize */ 4443 MXGE_MAX_SEND_DESC, /* num segs */ 4444 65536, /* maxsegsize */ 4445 0, /* flags */ 4446 NULL, NULL, /* lock */ 4447 &sc->parent_dmat); /* tag */ 4448 4449 if (err != 0) { 4450 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4451 err); 4452 goto abort_with_nothing; 4453 } 4454 4455 ifp = sc->ifp = if_alloc(IFT_ETHER); 4456 if (ifp == NULL) { 4457 device_printf(dev, "can not if_alloc()\n"); 4458 err = ENOSPC; 4459 goto abort_with_parent_dmat; 4460 } 4461 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4462 4463 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4464 device_get_nameunit(dev)); 4465 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4466 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4467 "%s:drv", device_get_nameunit(dev)); 4468 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4469 MTX_NETWORK_LOCK, MTX_DEF); 4470 4471 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4472 4473 mxge_setup_cfg_space(sc); 4474 4475 /* Map the board into the kernel */ 4476 rid = PCIR_BARS; 4477 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4478 ~0, 1, RF_ACTIVE); 4479 if (sc->mem_res == NULL) { 4480 device_printf(dev, "could not map memory\n"); 4481 err = ENXIO; 4482 goto abort_with_lock; 4483 } 4484 sc->sram = rman_get_virtual(sc->mem_res); 4485 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4486 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4487 device_printf(dev, "impossible memory region size %ld\n", 4488 rman_get_size(sc->mem_res)); 4489 err = ENXIO; 4490 goto abort_with_mem_res; 4491 } 4492 4493 /* make NULL terminated copy of the EEPROM strings section of 4494 lanai SRAM */ 4495 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4496 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4497 rman_get_bushandle(sc->mem_res), 4498 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4499 sc->eeprom_strings, 4500 MXGE_EEPROM_STRINGS_SIZE - 2); 4501 err = mxge_parse_strings(sc); 4502 if (err != 0) 4503 goto abort_with_mem_res; 4504 4505 /* Enable write combining for efficient use of PCIe bus */ 4506 mxge_enable_wc(sc); 4507 4508 /* Allocate the out of band dma memory */ 4509 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4510 sizeof (mxge_cmd_t), 64); 4511 if (err != 0) 4512 goto abort_with_mem_res; 4513 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4514 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4515 if (err != 0) 4516 goto abort_with_cmd_dma; 4517 4518 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4519 if (err != 0) 4520 goto abort_with_zeropad_dma; 4521 4522 /* select & load the firmware */ 4523 err = mxge_select_firmware(sc); 4524 if (err != 0) 4525 goto abort_with_dmabench; 4526 sc->intr_coal_delay = mxge_intr_coal_delay; 4527 4528 mxge_slice_probe(sc); 4529 err = mxge_alloc_slices(sc); 4530 if (err != 0) 4531 goto abort_with_dmabench; 4532 4533 err = mxge_reset(sc, 0); 4534 if (err != 0) 4535 goto abort_with_slices; 4536 4537 err = mxge_alloc_rings(sc); 4538 if (err != 0) { 4539 device_printf(sc->dev, "failed to allocate rings\n"); 4540 goto abort_with_dmabench; 4541 } 4542 4543 err = mxge_add_irq(sc); 4544 if (err != 0) { 4545 device_printf(sc->dev, "failed to add irq\n"); 4546 goto abort_with_rings; 4547 } 4548 4549 ifp->if_baudrate = IF_Gbps(10UL); 4550 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4551 IFCAP_VLAN_MTU | IFCAP_LRO; 4552 4553 #ifdef MXGE_NEW_VLAN_API 4554 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4555 #endif 4556 4557 sc->max_mtu = mxge_max_mtu(sc); 4558 if (sc->max_mtu >= 9000) 4559 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4560 else 4561 device_printf(dev, "MTU limited to %d. Install " 4562 "latest firmware for 9000 byte jumbo support\n", 4563 sc->max_mtu - ETHER_HDR_LEN); 4564 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4565 ifp->if_capenable = ifp->if_capabilities; 4566 if (sc->lro_cnt == 0) 4567 ifp->if_capenable &= ~IFCAP_LRO; 4568 sc->csum_flag = 1; 4569 ifp->if_init = mxge_init; 4570 ifp->if_softc = sc; 4571 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4572 ifp->if_ioctl = mxge_ioctl; 4573 ifp->if_start = mxge_start; 4574 /* Initialise the ifmedia structure */ 4575 ifmedia_init(&sc->media, 0, mxge_media_change, 4576 mxge_media_status); 4577 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4578 mxge_media_probe(sc); 4579 ether_ifattach(ifp, sc->mac_addr); 4580 /* ether_ifattach sets mtu to 1500 */ 4581 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4582 ifp->if_mtu = 9000; 4583 4584 mxge_add_sysctls(sc); 4585 #ifdef IFNET_BUF_RING 4586 ifp->if_transmit = mxge_transmit; 4587 ifp->if_qflush = mxge_qflush; 4588 #endif 4589 return 0; 4590 4591 abort_with_rings: 4592 mxge_free_rings(sc); 4593 abort_with_slices: 4594 mxge_free_slices(sc); 4595 abort_with_dmabench: 4596 mxge_dma_free(&sc->dmabench_dma); 4597 abort_with_zeropad_dma: 4598 mxge_dma_free(&sc->zeropad_dma); 4599 abort_with_cmd_dma: 4600 mxge_dma_free(&sc->cmd_dma); 4601 abort_with_mem_res: 4602 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4603 abort_with_lock: 4604 pci_disable_busmaster(dev); 4605 mtx_destroy(&sc->cmd_mtx); 4606 mtx_destroy(&sc->driver_mtx); 4607 if_free(ifp); 4608 abort_with_parent_dmat: 4609 bus_dma_tag_destroy(sc->parent_dmat); 4610 4611 abort_with_nothing: 4612 return err; 4613 } 4614 4615 static int 4616 mxge_detach(device_t dev) 4617 { 4618 mxge_softc_t *sc = device_get_softc(dev); 4619 4620 if (mxge_vlans_active(sc)) { 4621 device_printf(sc->dev, 4622 "Detach vlans before removing module\n"); 4623 return EBUSY; 4624 } 4625 mtx_lock(&sc->driver_mtx); 4626 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4627 mxge_close(sc); 4628 mtx_unlock(&sc->driver_mtx); 4629 ether_ifdetach(sc->ifp); 4630 callout_drain(&sc->co_hdl); 4631 ifmedia_removeall(&sc->media); 4632 mxge_dummy_rdma(sc, 0); 4633 mxge_rem_sysctls(sc); 4634 mxge_rem_irq(sc); 4635 mxge_free_rings(sc); 4636 mxge_free_slices(sc); 4637 mxge_dma_free(&sc->dmabench_dma); 4638 mxge_dma_free(&sc->zeropad_dma); 4639 mxge_dma_free(&sc->cmd_dma); 4640 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4641 pci_disable_busmaster(dev); 4642 mtx_destroy(&sc->cmd_mtx); 4643 mtx_destroy(&sc->driver_mtx); 4644 if_free(sc->ifp); 4645 bus_dma_tag_destroy(sc->parent_dmat); 4646 return 0; 4647 } 4648 4649 static int 4650 mxge_shutdown(device_t dev) 4651 { 4652 return 0; 4653 } 4654 4655 /* 4656 This file uses Myri10GE driver indentation. 4657 4658 Local Variables: 4659 c-file-style:"linux" 4660 tab-width:8 4661 End: 4662 */ 4663