1 /****************************************************************************** 2 3 Copyright (c) 2006-2008, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ethernet.h> 52 #include <net/if_dl.h> 53 #include <net/if_media.h> 54 55 #include <net/bpf.h> 56 57 #include <net/if_types.h> 58 #include <net/if_vlan_var.h> 59 #include <net/zlib.h> 60 61 #include <netinet/in_systm.h> 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/tcp.h> 65 66 #include <machine/bus.h> 67 #include <machine/in_cksum.h> 68 #include <machine/resource.h> 69 #include <sys/bus.h> 70 #include <sys/rman.h> 71 #include <sys/smp.h> 72 73 #include <dev/pci/pcireg.h> 74 #include <dev/pci/pcivar.h> 75 76 #include <vm/vm.h> /* for pmap_mapdev() */ 77 #include <vm/pmap.h> 78 79 #if defined(__i386) || defined(__amd64) 80 #include <machine/specialreg.h> 81 #endif 82 83 #include <dev/mxge/mxge_mcp.h> 84 #include <dev/mxge/mcp_gen_header.h> 85 /*#define MXGE_FAKE_IFP*/ 86 #include <dev/mxge/if_mxge_var.h> 87 88 /* tunable params */ 89 static int mxge_nvidia_ecrc_enable = 1; 90 static int mxge_force_firmware = 0; 91 static int mxge_intr_coal_delay = 30; 92 static int mxge_deassert_wait = 1; 93 static int mxge_flow_control = 1; 94 static int mxge_verbose = 0; 95 static int mxge_lro_cnt = 8; 96 static int mxge_ticks; 97 static int mxge_max_slices = 1; 98 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 99 static int mxge_always_promisc = 0; 100 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 101 static char *mxge_fw_aligned = "mxge_eth_z8e"; 102 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 103 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 104 105 static int mxge_probe(device_t dev); 106 static int mxge_attach(device_t dev); 107 static int mxge_detach(device_t dev); 108 static int mxge_shutdown(device_t dev); 109 static void mxge_intr(void *arg); 110 111 static device_method_t mxge_methods[] = 112 { 113 /* Device interface */ 114 DEVMETHOD(device_probe, mxge_probe), 115 DEVMETHOD(device_attach, mxge_attach), 116 DEVMETHOD(device_detach, mxge_detach), 117 DEVMETHOD(device_shutdown, mxge_shutdown), 118 {0, 0} 119 }; 120 121 static driver_t mxge_driver = 122 { 123 "mxge", 124 mxge_methods, 125 sizeof(mxge_softc_t), 126 }; 127 128 static devclass_t mxge_devclass; 129 130 /* Declare ourselves to be a child of the PCI bus.*/ 131 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 132 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 133 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 134 135 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 136 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 137 static int mxge_close(mxge_softc_t *sc); 138 static int mxge_open(mxge_softc_t *sc); 139 static void mxge_tick(void *arg); 140 141 static int 142 mxge_probe(device_t dev) 143 { 144 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 145 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 146 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 147 device_set_desc(dev, "Myri10G-PCIE-8A"); 148 return 0; 149 } 150 return ENXIO; 151 } 152 153 static void 154 mxge_enable_wc(mxge_softc_t *sc) 155 { 156 #if defined(__i386) || defined(__amd64) 157 vm_offset_t len; 158 int err; 159 160 sc->wc = 1; 161 len = rman_get_size(sc->mem_res); 162 err = pmap_change_attr((vm_offset_t) sc->sram, 163 len, PAT_WRITE_COMBINING); 164 if (err != 0) { 165 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 166 err); 167 sc->wc = 0; 168 } 169 #endif 170 } 171 172 173 /* callback to get our DMA address */ 174 static void 175 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 176 int error) 177 { 178 if (error == 0) { 179 *(bus_addr_t *) arg = segs->ds_addr; 180 } 181 } 182 183 static int 184 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 185 bus_size_t alignment) 186 { 187 int err; 188 device_t dev = sc->dev; 189 bus_size_t boundary, maxsegsize; 190 191 if (bytes > 4096 && alignment == 4096) { 192 boundary = 0; 193 maxsegsize = bytes; 194 } else { 195 boundary = 4096; 196 maxsegsize = 4096; 197 } 198 199 /* allocate DMAable memory tags */ 200 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 201 alignment, /* alignment */ 202 boundary, /* boundary */ 203 BUS_SPACE_MAXADDR, /* low */ 204 BUS_SPACE_MAXADDR, /* high */ 205 NULL, NULL, /* filter */ 206 bytes, /* maxsize */ 207 1, /* num segs */ 208 maxsegsize, /* maxsegsize */ 209 BUS_DMA_COHERENT, /* flags */ 210 NULL, NULL, /* lock */ 211 &dma->dmat); /* tag */ 212 if (err != 0) { 213 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 214 return err; 215 } 216 217 /* allocate DMAable memory & map */ 218 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 219 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 220 | BUS_DMA_ZERO), &dma->map); 221 if (err != 0) { 222 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 223 goto abort_with_dmat; 224 } 225 226 /* load the memory */ 227 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 228 mxge_dmamap_callback, 229 (void *)&dma->bus_addr, 0); 230 if (err != 0) { 231 device_printf(dev, "couldn't load map (err = %d)\n", err); 232 goto abort_with_mem; 233 } 234 return 0; 235 236 abort_with_mem: 237 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 238 abort_with_dmat: 239 (void)bus_dma_tag_destroy(dma->dmat); 240 return err; 241 } 242 243 244 static void 245 mxge_dma_free(mxge_dma_t *dma) 246 { 247 bus_dmamap_unload(dma->dmat, dma->map); 248 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 249 (void)bus_dma_tag_destroy(dma->dmat); 250 } 251 252 /* 253 * The eeprom strings on the lanaiX have the format 254 * SN=x\0 255 * MAC=x:x:x:x:x:x\0 256 * PC=text\0 257 */ 258 259 static int 260 mxge_parse_strings(mxge_softc_t *sc) 261 { 262 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 263 264 char *ptr, *limit; 265 int i, found_mac; 266 267 ptr = sc->eeprom_strings; 268 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 269 found_mac = 0; 270 while (ptr < limit && *ptr != '\0') { 271 if (memcmp(ptr, "MAC=", 4) == 0) { 272 ptr += 1; 273 sc->mac_addr_string = ptr; 274 for (i = 0; i < 6; i++) { 275 ptr += 3; 276 if ((ptr + 2) > limit) 277 goto abort; 278 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 279 found_mac = 1; 280 } 281 } else if (memcmp(ptr, "PC=", 3) == 0) { 282 ptr += 3; 283 strncpy(sc->product_code_string, ptr, 284 sizeof (sc->product_code_string) - 1); 285 } else if (memcmp(ptr, "SN=", 3) == 0) { 286 ptr += 3; 287 strncpy(sc->serial_number_string, ptr, 288 sizeof (sc->serial_number_string) - 1); 289 } 290 MXGE_NEXT_STRING(ptr); 291 } 292 293 if (found_mac) 294 return 0; 295 296 abort: 297 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 298 299 return ENXIO; 300 } 301 302 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 303 static void 304 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 305 { 306 uint32_t val; 307 unsigned long base, off; 308 char *va, *cfgptr; 309 device_t pdev, mcp55; 310 uint16_t vendor_id, device_id, word; 311 uintptr_t bus, slot, func, ivend, idev; 312 uint32_t *ptr32; 313 314 315 if (!mxge_nvidia_ecrc_enable) 316 return; 317 318 pdev = device_get_parent(device_get_parent(sc->dev)); 319 if (pdev == NULL) { 320 device_printf(sc->dev, "could not find parent?\n"); 321 return; 322 } 323 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 324 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 325 326 if (vendor_id != 0x10de) 327 return; 328 329 base = 0; 330 331 if (device_id == 0x005d) { 332 /* ck804, base address is magic */ 333 base = 0xe0000000UL; 334 } else if (device_id >= 0x0374 && device_id <= 0x378) { 335 /* mcp55, base address stored in chipset */ 336 mcp55 = pci_find_bsf(0, 0, 0); 337 if (mcp55 && 338 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 339 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 340 word = pci_read_config(mcp55, 0x90, 2); 341 base = ((unsigned long)word & 0x7ffeU) << 25; 342 } 343 } 344 if (!base) 345 return; 346 347 /* XXXX 348 Test below is commented because it is believed that doing 349 config read/write beyond 0xff will access the config space 350 for the next larger function. Uncomment this and remove 351 the hacky pmap_mapdev() way of accessing config space when 352 FreeBSD grows support for extended pcie config space access 353 */ 354 #if 0 355 /* See if we can, by some miracle, access the extended 356 config space */ 357 val = pci_read_config(pdev, 0x178, 4); 358 if (val != 0xffffffff) { 359 val |= 0x40; 360 pci_write_config(pdev, 0x178, val, 4); 361 return; 362 } 363 #endif 364 /* Rather than using normal pci config space writes, we must 365 * map the Nvidia config space ourselves. This is because on 366 * opteron/nvidia class machine the 0xe000000 mapping is 367 * handled by the nvidia chipset, that means the internal PCI 368 * device (the on-chip northbridge), or the amd-8131 bridge 369 * and things behind them are not visible by this method. 370 */ 371 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_BUS, &bus); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_SLOT, &slot); 376 BUS_READ_IVAR(device_get_parent(pdev), pdev, 377 PCI_IVAR_FUNCTION, &func); 378 BUS_READ_IVAR(device_get_parent(pdev), pdev, 379 PCI_IVAR_VENDOR, &ivend); 380 BUS_READ_IVAR(device_get_parent(pdev), pdev, 381 PCI_IVAR_DEVICE, &idev); 382 383 off = base 384 + 0x00100000UL * (unsigned long)bus 385 + 0x00001000UL * (unsigned long)(func 386 + 8 * slot); 387 388 /* map it into the kernel */ 389 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 390 391 392 if (va == NULL) { 393 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 394 return; 395 } 396 /* get a pointer to the config space mapped into the kernel */ 397 cfgptr = va + (off & PAGE_MASK); 398 399 /* make sure that we can really access it */ 400 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 401 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 402 if (! (vendor_id == ivend && device_id == idev)) { 403 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 404 vendor_id, device_id); 405 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 406 return; 407 } 408 409 ptr32 = (uint32_t*)(cfgptr + 0x178); 410 val = *ptr32; 411 412 if (val == 0xffffffff) { 413 device_printf(sc->dev, "extended mapping failed\n"); 414 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 415 return; 416 } 417 *ptr32 = val | 0x40; 418 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 419 if (mxge_verbose) 420 device_printf(sc->dev, 421 "Enabled ECRC on upstream Nvidia bridge " 422 "at %d:%d:%d\n", 423 (int)bus, (int)slot, (int)func); 424 return; 425 } 426 #else 427 static void 428 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 429 { 430 device_printf(sc->dev, 431 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 432 return; 433 } 434 #endif 435 436 437 static int 438 mxge_dma_test(mxge_softc_t *sc, int test_type) 439 { 440 mxge_cmd_t cmd; 441 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 442 int status; 443 uint32_t len; 444 char *test = " "; 445 446 447 /* Run a small DMA test. 448 * The magic multipliers to the length tell the firmware 449 * to do DMA read, write, or read+write tests. The 450 * results are returned in cmd.data0. The upper 16 451 * bits of the return is the number of transfers completed. 452 * The lower 16 bits is the time in 0.5us ticks that the 453 * transfers took to complete. 454 */ 455 456 len = sc->tx_boundary; 457 458 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 459 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 460 cmd.data2 = len * 0x10000; 461 status = mxge_send_cmd(sc, test_type, &cmd); 462 if (status != 0) { 463 test = "read"; 464 goto abort; 465 } 466 sc->read_dma = ((cmd.data0>>16) * len * 2) / 467 (cmd.data0 & 0xffff); 468 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 469 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 470 cmd.data2 = len * 0x1; 471 status = mxge_send_cmd(sc, test_type, &cmd); 472 if (status != 0) { 473 test = "write"; 474 goto abort; 475 } 476 sc->write_dma = ((cmd.data0>>16) * len * 2) / 477 (cmd.data0 & 0xffff); 478 479 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 480 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 481 cmd.data2 = len * 0x10001; 482 status = mxge_send_cmd(sc, test_type, &cmd); 483 if (status != 0) { 484 test = "read/write"; 485 goto abort; 486 } 487 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 488 (cmd.data0 & 0xffff); 489 490 abort: 491 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 492 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 493 test, status); 494 495 return status; 496 } 497 498 /* 499 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 500 * when the PCI-E Completion packets are aligned on an 8-byte 501 * boundary. Some PCI-E chip sets always align Completion packets; on 502 * the ones that do not, the alignment can be enforced by enabling 503 * ECRC generation (if supported). 504 * 505 * When PCI-E Completion packets are not aligned, it is actually more 506 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 507 * 508 * If the driver can neither enable ECRC nor verify that it has 509 * already been enabled, then it must use a firmware image which works 510 * around unaligned completion packets (ethp_z8e.dat), and it should 511 * also ensure that it never gives the device a Read-DMA which is 512 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 513 * enabled, then the driver should use the aligned (eth_z8e.dat) 514 * firmware image, and set tx_boundary to 4KB. 515 */ 516 517 static int 518 mxge_firmware_probe(mxge_softc_t *sc) 519 { 520 device_t dev = sc->dev; 521 int reg, status; 522 uint16_t pectl; 523 524 sc->tx_boundary = 4096; 525 /* 526 * Verify the max read request size was set to 4KB 527 * before trying the test with 4KB. 528 */ 529 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 530 pectl = pci_read_config(dev, reg + 0x8, 2); 531 if ((pectl & (5 << 12)) != (5 << 12)) { 532 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 533 pectl); 534 sc->tx_boundary = 2048; 535 } 536 } 537 538 /* 539 * load the optimized firmware (which assumes aligned PCIe 540 * completions) in order to see if it works on this host. 541 */ 542 sc->fw_name = mxge_fw_aligned; 543 status = mxge_load_firmware(sc, 1); 544 if (status != 0) { 545 return status; 546 } 547 548 /* 549 * Enable ECRC if possible 550 */ 551 mxge_enable_nvidia_ecrc(sc); 552 553 /* 554 * Run a DMA test which watches for unaligned completions and 555 * aborts on the first one seen. 556 */ 557 558 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 559 if (status == 0) 560 return 0; /* keep the aligned firmware */ 561 562 if (status != E2BIG) 563 device_printf(dev, "DMA test failed: %d\n", status); 564 if (status == ENOSYS) 565 device_printf(dev, "Falling back to ethp! " 566 "Please install up to date fw\n"); 567 return status; 568 } 569 570 static int 571 mxge_select_firmware(mxge_softc_t *sc) 572 { 573 int aligned = 0; 574 575 576 if (mxge_force_firmware != 0) { 577 if (mxge_force_firmware == 1) 578 aligned = 1; 579 else 580 aligned = 0; 581 if (mxge_verbose) 582 device_printf(sc->dev, 583 "Assuming %s completions (forced)\n", 584 aligned ? "aligned" : "unaligned"); 585 goto abort; 586 } 587 588 /* if the PCIe link width is 4 or less, we can use the aligned 589 firmware and skip any checks */ 590 if (sc->link_width != 0 && sc->link_width <= 4) { 591 device_printf(sc->dev, 592 "PCIe x%d Link, expect reduced performance\n", 593 sc->link_width); 594 aligned = 1; 595 goto abort; 596 } 597 598 if (0 == mxge_firmware_probe(sc)) 599 return 0; 600 601 abort: 602 if (aligned) { 603 sc->fw_name = mxge_fw_aligned; 604 sc->tx_boundary = 4096; 605 } else { 606 sc->fw_name = mxge_fw_unaligned; 607 sc->tx_boundary = 2048; 608 } 609 return (mxge_load_firmware(sc, 0)); 610 } 611 612 union qualhack 613 { 614 const char *ro_char; 615 char *rw_char; 616 }; 617 618 static int 619 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 620 { 621 622 623 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 624 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 625 be32toh(hdr->mcp_type)); 626 return EIO; 627 } 628 629 /* save firmware version for sysctl */ 630 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 631 if (mxge_verbose) 632 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 633 634 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 635 &sc->fw_ver_minor, &sc->fw_ver_tiny); 636 637 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 638 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 639 device_printf(sc->dev, "Found firmware version %s\n", 640 sc->fw_version); 641 device_printf(sc->dev, "Driver needs %d.%d\n", 642 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 643 return EINVAL; 644 } 645 return 0; 646 647 } 648 649 static void * 650 z_alloc(void *nil, u_int items, u_int size) 651 { 652 void *ptr; 653 654 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 655 return ptr; 656 } 657 658 static void 659 z_free(void *nil, void *ptr) 660 { 661 free(ptr, M_TEMP); 662 } 663 664 665 static int 666 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 667 { 668 z_stream zs; 669 char *inflate_buffer; 670 const struct firmware *fw; 671 const mcp_gen_header_t *hdr; 672 unsigned hdr_offset; 673 int status; 674 unsigned int i; 675 char dummy; 676 size_t fw_len; 677 678 fw = firmware_get(sc->fw_name); 679 if (fw == NULL) { 680 device_printf(sc->dev, "Could not find firmware image %s\n", 681 sc->fw_name); 682 return ENOENT; 683 } 684 685 686 687 /* setup zlib and decompress f/w */ 688 bzero(&zs, sizeof (zs)); 689 zs.zalloc = z_alloc; 690 zs.zfree = z_free; 691 status = inflateInit(&zs); 692 if (status != Z_OK) { 693 status = EIO; 694 goto abort_with_fw; 695 } 696 697 /* the uncompressed size is stored as the firmware version, 698 which would otherwise go unused */ 699 fw_len = (size_t) fw->version; 700 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 701 if (inflate_buffer == NULL) 702 goto abort_with_zs; 703 zs.avail_in = fw->datasize; 704 zs.next_in = __DECONST(char *, fw->data); 705 zs.avail_out = fw_len; 706 zs.next_out = inflate_buffer; 707 status = inflate(&zs, Z_FINISH); 708 if (status != Z_STREAM_END) { 709 device_printf(sc->dev, "zlib %d\n", status); 710 status = EIO; 711 goto abort_with_buffer; 712 } 713 714 /* check id */ 715 hdr_offset = htobe32(*(const uint32_t *) 716 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 717 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 718 device_printf(sc->dev, "Bad firmware file"); 719 status = EIO; 720 goto abort_with_buffer; 721 } 722 hdr = (const void*)(inflate_buffer + hdr_offset); 723 724 status = mxge_validate_firmware(sc, hdr); 725 if (status != 0) 726 goto abort_with_buffer; 727 728 /* Copy the inflated firmware to NIC SRAM. */ 729 for (i = 0; i < fw_len; i += 256) { 730 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 731 inflate_buffer + i, 732 min(256U, (unsigned)(fw_len - i))); 733 mb(); 734 dummy = *sc->sram; 735 mb(); 736 } 737 738 *limit = fw_len; 739 status = 0; 740 abort_with_buffer: 741 free(inflate_buffer, M_TEMP); 742 abort_with_zs: 743 inflateEnd(&zs); 744 abort_with_fw: 745 firmware_put(fw, FIRMWARE_UNLOAD); 746 return status; 747 } 748 749 /* 750 * Enable or disable periodic RDMAs from the host to make certain 751 * chipsets resend dropped PCIe messages 752 */ 753 754 static void 755 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 756 { 757 char buf_bytes[72]; 758 volatile uint32_t *confirm; 759 volatile char *submit; 760 uint32_t *buf, dma_low, dma_high; 761 int i; 762 763 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 764 765 /* clear confirmation addr */ 766 confirm = (volatile uint32_t *)sc->cmd; 767 *confirm = 0; 768 mb(); 769 770 /* send an rdma command to the PCIe engine, and wait for the 771 response in the confirmation address. The firmware should 772 write a -1 there to indicate it is alive and well 773 */ 774 775 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 776 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 777 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 778 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 779 buf[2] = htobe32(0xffffffff); /* confirm data */ 780 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 781 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 782 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 783 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 784 buf[5] = htobe32(enable); /* enable? */ 785 786 787 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 788 789 mxge_pio_copy(submit, buf, 64); 790 mb(); 791 DELAY(1000); 792 mb(); 793 i = 0; 794 while (*confirm != 0xffffffff && i < 20) { 795 DELAY(1000); 796 i++; 797 } 798 if (*confirm != 0xffffffff) { 799 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 800 (enable ? "enable" : "disable"), confirm, 801 *confirm); 802 } 803 return; 804 } 805 806 static int 807 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 808 { 809 mcp_cmd_t *buf; 810 char buf_bytes[sizeof(*buf) + 8]; 811 volatile mcp_cmd_response_t *response = sc->cmd; 812 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 813 uint32_t dma_low, dma_high; 814 int err, sleep_total = 0; 815 816 /* ensure buf is aligned to 8 bytes */ 817 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 818 819 buf->data0 = htobe32(data->data0); 820 buf->data1 = htobe32(data->data1); 821 buf->data2 = htobe32(data->data2); 822 buf->cmd = htobe32(cmd); 823 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 824 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 825 826 buf->response_addr.low = htobe32(dma_low); 827 buf->response_addr.high = htobe32(dma_high); 828 mtx_lock(&sc->cmd_mtx); 829 response->result = 0xffffffff; 830 mb(); 831 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 832 833 /* wait up to 20ms */ 834 err = EAGAIN; 835 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 836 bus_dmamap_sync(sc->cmd_dma.dmat, 837 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 838 mb(); 839 switch (be32toh(response->result)) { 840 case 0: 841 data->data0 = be32toh(response->data); 842 err = 0; 843 break; 844 case 0xffffffff: 845 DELAY(1000); 846 break; 847 case MXGEFW_CMD_UNKNOWN: 848 err = ENOSYS; 849 break; 850 case MXGEFW_CMD_ERROR_UNALIGNED: 851 err = E2BIG; 852 break; 853 case MXGEFW_CMD_ERROR_BUSY: 854 err = EBUSY; 855 break; 856 default: 857 device_printf(sc->dev, 858 "mxge: command %d " 859 "failed, result = %d\n", 860 cmd, be32toh(response->result)); 861 err = ENXIO; 862 break; 863 } 864 if (err != EAGAIN) 865 break; 866 } 867 if (err == EAGAIN) 868 device_printf(sc->dev, "mxge: command %d timed out" 869 "result = %d\n", 870 cmd, be32toh(response->result)); 871 mtx_unlock(&sc->cmd_mtx); 872 return err; 873 } 874 875 static int 876 mxge_adopt_running_firmware(mxge_softc_t *sc) 877 { 878 struct mcp_gen_header *hdr; 879 const size_t bytes = sizeof (struct mcp_gen_header); 880 size_t hdr_offset; 881 int status; 882 883 /* find running firmware header */ 884 hdr_offset = htobe32(*(volatile uint32_t *) 885 (sc->sram + MCP_HEADER_PTR_OFFSET)); 886 887 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 888 device_printf(sc->dev, 889 "Running firmware has bad header offset (%d)\n", 890 (int)hdr_offset); 891 return EIO; 892 } 893 894 /* copy header of running firmware from SRAM to host memory to 895 * validate firmware */ 896 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 897 if (hdr == NULL) { 898 device_printf(sc->dev, "could not malloc firmware hdr\n"); 899 return ENOMEM; 900 } 901 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 902 rman_get_bushandle(sc->mem_res), 903 hdr_offset, (char *)hdr, bytes); 904 status = mxge_validate_firmware(sc, hdr); 905 free(hdr, M_DEVBUF); 906 907 /* 908 * check to see if adopted firmware has bug where adopting 909 * it will cause broadcasts to be filtered unless the NIC 910 * is kept in ALLMULTI mode 911 */ 912 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 913 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 914 sc->adopted_rx_filter_bug = 1; 915 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 916 "working around rx filter bug\n", 917 sc->fw_ver_major, sc->fw_ver_minor, 918 sc->fw_ver_tiny); 919 } 920 921 return status; 922 } 923 924 925 static int 926 mxge_load_firmware(mxge_softc_t *sc, int adopt) 927 { 928 volatile uint32_t *confirm; 929 volatile char *submit; 930 char buf_bytes[72]; 931 uint32_t *buf, size, dma_low, dma_high; 932 int status, i; 933 934 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 935 936 size = sc->sram_size; 937 status = mxge_load_firmware_helper(sc, &size); 938 if (status) { 939 if (!adopt) 940 return status; 941 /* Try to use the currently running firmware, if 942 it is new enough */ 943 status = mxge_adopt_running_firmware(sc); 944 if (status) { 945 device_printf(sc->dev, 946 "failed to adopt running firmware\n"); 947 return status; 948 } 949 device_printf(sc->dev, 950 "Successfully adopted running firmware\n"); 951 if (sc->tx_boundary == 4096) { 952 device_printf(sc->dev, 953 "Using firmware currently running on NIC" 954 ". For optimal\n"); 955 device_printf(sc->dev, 956 "performance consider loading optimized " 957 "firmware\n"); 958 } 959 sc->fw_name = mxge_fw_unaligned; 960 sc->tx_boundary = 2048; 961 return 0; 962 } 963 /* clear confirmation addr */ 964 confirm = (volatile uint32_t *)sc->cmd; 965 *confirm = 0; 966 mb(); 967 /* send a reload command to the bootstrap MCP, and wait for the 968 response in the confirmation address. The firmware should 969 write a -1 there to indicate it is alive and well 970 */ 971 972 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 973 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 974 975 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 976 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 977 buf[2] = htobe32(0xffffffff); /* confirm data */ 978 979 /* FIX: All newest firmware should un-protect the bottom of 980 the sram before handoff. However, the very first interfaces 981 do not. Therefore the handoff copy must skip the first 8 bytes 982 */ 983 /* where the code starts*/ 984 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 985 buf[4] = htobe32(size - 8); /* length of code */ 986 buf[5] = htobe32(8); /* where to copy to */ 987 buf[6] = htobe32(0); /* where to jump to */ 988 989 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 990 mxge_pio_copy(submit, buf, 64); 991 mb(); 992 DELAY(1000); 993 mb(); 994 i = 0; 995 while (*confirm != 0xffffffff && i < 20) { 996 DELAY(1000*10); 997 i++; 998 bus_dmamap_sync(sc->cmd_dma.dmat, 999 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1000 } 1001 if (*confirm != 0xffffffff) { 1002 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1003 confirm, *confirm); 1004 1005 return ENXIO; 1006 } 1007 return 0; 1008 } 1009 1010 static int 1011 mxge_update_mac_address(mxge_softc_t *sc) 1012 { 1013 mxge_cmd_t cmd; 1014 uint8_t *addr = sc->mac_addr; 1015 int status; 1016 1017 1018 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1019 | (addr[2] << 8) | addr[3]); 1020 1021 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1022 1023 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1024 return status; 1025 } 1026 1027 static int 1028 mxge_change_pause(mxge_softc_t *sc, int pause) 1029 { 1030 mxge_cmd_t cmd; 1031 int status; 1032 1033 if (pause) 1034 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1035 &cmd); 1036 else 1037 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1038 &cmd); 1039 1040 if (status) { 1041 device_printf(sc->dev, "Failed to set flow control mode\n"); 1042 return ENXIO; 1043 } 1044 sc->pause = pause; 1045 return 0; 1046 } 1047 1048 static void 1049 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1050 { 1051 mxge_cmd_t cmd; 1052 int status; 1053 1054 if (mxge_always_promisc) 1055 promisc = 1; 1056 1057 if (promisc) 1058 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1059 &cmd); 1060 else 1061 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1062 &cmd); 1063 1064 if (status) { 1065 device_printf(sc->dev, "Failed to set promisc mode\n"); 1066 } 1067 } 1068 1069 static void 1070 mxge_set_multicast_list(mxge_softc_t *sc) 1071 { 1072 mxge_cmd_t cmd; 1073 struct ifmultiaddr *ifma; 1074 struct ifnet *ifp = sc->ifp; 1075 int err; 1076 1077 /* This firmware is known to not support multicast */ 1078 if (!sc->fw_multicast_support) 1079 return; 1080 1081 /* Disable multicast filtering while we play with the lists*/ 1082 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1083 if (err != 0) { 1084 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1085 " error status: %d\n", err); 1086 return; 1087 } 1088 1089 if (sc->adopted_rx_filter_bug) 1090 return; 1091 1092 if (ifp->if_flags & IFF_ALLMULTI) 1093 /* request to disable multicast filtering, so quit here */ 1094 return; 1095 1096 /* Flush all the filters */ 1097 1098 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1099 if (err != 0) { 1100 device_printf(sc->dev, 1101 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1102 ", error status: %d\n", err); 1103 return; 1104 } 1105 1106 /* Walk the multicast list, and add each address */ 1107 1108 IF_ADDR_LOCK(ifp); 1109 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1110 if (ifma->ifma_addr->sa_family != AF_LINK) 1111 continue; 1112 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1113 &cmd.data0, 4); 1114 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1115 &cmd.data1, 2); 1116 cmd.data0 = htonl(cmd.data0); 1117 cmd.data1 = htonl(cmd.data1); 1118 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1119 if (err != 0) { 1120 device_printf(sc->dev, "Failed " 1121 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1122 "%d\t", err); 1123 /* abort, leaving multicast filtering off */ 1124 IF_ADDR_UNLOCK(ifp); 1125 return; 1126 } 1127 } 1128 IF_ADDR_UNLOCK(ifp); 1129 /* Enable multicast filtering */ 1130 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1131 if (err != 0) { 1132 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1133 ", error status: %d\n", err); 1134 } 1135 } 1136 1137 static int 1138 mxge_max_mtu(mxge_softc_t *sc) 1139 { 1140 mxge_cmd_t cmd; 1141 int status; 1142 1143 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1144 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1145 1146 /* try to set nbufs to see if it we can 1147 use virtually contiguous jumbos */ 1148 cmd.data0 = 0; 1149 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1150 &cmd); 1151 if (status == 0) 1152 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1153 1154 /* otherwise, we're limited to MJUMPAGESIZE */ 1155 return MJUMPAGESIZE - MXGEFW_PAD; 1156 } 1157 1158 static int 1159 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1160 { 1161 struct mxge_slice_state *ss; 1162 mxge_rx_done_t *rx_done; 1163 volatile uint32_t *irq_claim; 1164 mxge_cmd_t cmd; 1165 int slice, status; 1166 1167 /* try to send a reset command to the card to see if it 1168 is alive */ 1169 memset(&cmd, 0, sizeof (cmd)); 1170 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1171 if (status != 0) { 1172 device_printf(sc->dev, "failed reset\n"); 1173 return ENXIO; 1174 } 1175 1176 mxge_dummy_rdma(sc, 1); 1177 1178 1179 /* set the intrq size */ 1180 cmd.data0 = sc->rx_ring_size; 1181 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1182 1183 /* 1184 * Even though we already know how many slices are supported 1185 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1186 * has magic side effects, and must be called after a reset. 1187 * It must be called prior to calling any RSS related cmds, 1188 * including assigning an interrupt queue for anything but 1189 * slice 0. It must also be called *after* 1190 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1191 * the firmware to compute offsets. 1192 */ 1193 1194 if (sc->num_slices > 1) { 1195 /* ask the maximum number of slices it supports */ 1196 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1197 &cmd); 1198 if (status != 0) { 1199 device_printf(sc->dev, 1200 "failed to get number of slices\n"); 1201 return status; 1202 } 1203 /* 1204 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1205 * to setting up the interrupt queue DMA 1206 */ 1207 cmd.data0 = sc->num_slices; 1208 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1209 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1210 &cmd); 1211 if (status != 0) { 1212 device_printf(sc->dev, 1213 "failed to set number of slices\n"); 1214 return status; 1215 } 1216 } 1217 1218 1219 if (interrupts_setup) { 1220 /* Now exchange information about interrupts */ 1221 for (slice = 0; slice < sc->num_slices; slice++) { 1222 rx_done = &sc->ss[slice].rx_done; 1223 memset(rx_done->entry, 0, sc->rx_ring_size); 1224 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1225 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1226 cmd.data2 = slice; 1227 status |= mxge_send_cmd(sc, 1228 MXGEFW_CMD_SET_INTRQ_DMA, 1229 &cmd); 1230 } 1231 } 1232 1233 status |= mxge_send_cmd(sc, 1234 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1235 1236 1237 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1238 1239 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1240 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1241 1242 1243 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1244 &cmd); 1245 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1246 if (status != 0) { 1247 device_printf(sc->dev, "failed set interrupt parameters\n"); 1248 return status; 1249 } 1250 1251 1252 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1253 1254 1255 /* run a DMA benchmark */ 1256 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1257 1258 for (slice = 0; slice < sc->num_slices; slice++) { 1259 ss = &sc->ss[slice]; 1260 1261 ss->irq_claim = irq_claim + (2 * slice); 1262 /* reset mcp/driver shared state back to 0 */ 1263 ss->rx_done.idx = 0; 1264 ss->rx_done.cnt = 0; 1265 ss->tx.req = 0; 1266 ss->tx.done = 0; 1267 ss->tx.pkt_done = 0; 1268 ss->tx.wake = 0; 1269 ss->tx.defrag = 0; 1270 ss->tx.stall = 0; 1271 ss->rx_big.cnt = 0; 1272 ss->rx_small.cnt = 0; 1273 ss->lro_bad_csum = 0; 1274 ss->lro_queued = 0; 1275 ss->lro_flushed = 0; 1276 if (ss->fw_stats != NULL) { 1277 ss->fw_stats->valid = 0; 1278 ss->fw_stats->send_done_count = 0; 1279 } 1280 } 1281 sc->rdma_tags_available = 15; 1282 status = mxge_update_mac_address(sc); 1283 mxge_change_promisc(sc, 0); 1284 mxge_change_pause(sc, sc->pause); 1285 mxge_set_multicast_list(sc); 1286 return status; 1287 } 1288 1289 static int 1290 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1291 { 1292 mxge_softc_t *sc; 1293 unsigned int intr_coal_delay; 1294 int err; 1295 1296 sc = arg1; 1297 intr_coal_delay = sc->intr_coal_delay; 1298 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1299 if (err != 0) { 1300 return err; 1301 } 1302 if (intr_coal_delay == sc->intr_coal_delay) 1303 return 0; 1304 1305 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1306 return EINVAL; 1307 1308 mtx_lock(&sc->driver_mtx); 1309 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1310 sc->intr_coal_delay = intr_coal_delay; 1311 1312 mtx_unlock(&sc->driver_mtx); 1313 return err; 1314 } 1315 1316 static int 1317 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1318 { 1319 mxge_softc_t *sc; 1320 unsigned int enabled; 1321 int err; 1322 1323 sc = arg1; 1324 enabled = sc->pause; 1325 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1326 if (err != 0) { 1327 return err; 1328 } 1329 if (enabled == sc->pause) 1330 return 0; 1331 1332 mtx_lock(&sc->driver_mtx); 1333 err = mxge_change_pause(sc, enabled); 1334 mtx_unlock(&sc->driver_mtx); 1335 return err; 1336 } 1337 1338 static int 1339 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1340 { 1341 struct ifnet *ifp; 1342 int err = 0; 1343 1344 ifp = sc->ifp; 1345 if (lro_cnt == 0) 1346 ifp->if_capenable &= ~IFCAP_LRO; 1347 else 1348 ifp->if_capenable |= IFCAP_LRO; 1349 sc->lro_cnt = lro_cnt; 1350 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1351 callout_stop(&sc->co_hdl); 1352 mxge_close(sc); 1353 err = mxge_open(sc); 1354 if (err == 0) 1355 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 1356 } 1357 return err; 1358 } 1359 1360 static int 1361 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1362 { 1363 mxge_softc_t *sc; 1364 unsigned int lro_cnt; 1365 int err; 1366 1367 sc = arg1; 1368 lro_cnt = sc->lro_cnt; 1369 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1370 if (err != 0) 1371 return err; 1372 1373 if (lro_cnt == sc->lro_cnt) 1374 return 0; 1375 1376 if (lro_cnt > 128) 1377 return EINVAL; 1378 1379 mtx_lock(&sc->driver_mtx); 1380 err = mxge_change_lro_locked(sc, lro_cnt); 1381 mtx_unlock(&sc->driver_mtx); 1382 return err; 1383 } 1384 1385 static int 1386 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1387 { 1388 int err; 1389 1390 if (arg1 == NULL) 1391 return EFAULT; 1392 arg2 = be32toh(*(int *)arg1); 1393 arg1 = NULL; 1394 err = sysctl_handle_int(oidp, arg1, arg2, req); 1395 1396 return err; 1397 } 1398 1399 static void 1400 mxge_rem_sysctls(mxge_softc_t *sc) 1401 { 1402 struct mxge_slice_state *ss; 1403 int slice; 1404 1405 if (sc->slice_sysctl_tree == NULL) 1406 return; 1407 1408 for (slice = 0; slice < sc->num_slices; slice++) { 1409 ss = &sc->ss[slice]; 1410 if (ss == NULL || ss->sysctl_tree == NULL) 1411 continue; 1412 sysctl_ctx_free(&ss->sysctl_ctx); 1413 ss->sysctl_tree = NULL; 1414 } 1415 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1416 sc->slice_sysctl_tree = NULL; 1417 } 1418 1419 static void 1420 mxge_add_sysctls(mxge_softc_t *sc) 1421 { 1422 struct sysctl_ctx_list *ctx; 1423 struct sysctl_oid_list *children; 1424 mcp_irq_data_t *fw; 1425 struct mxge_slice_state *ss; 1426 int slice; 1427 char slice_num[8]; 1428 1429 ctx = device_get_sysctl_ctx(sc->dev); 1430 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1431 fw = sc->ss[0].fw_stats; 1432 1433 /* random information */ 1434 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1435 "firmware_version", 1436 CTLFLAG_RD, &sc->fw_version, 1437 0, "firmware version"); 1438 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1439 "serial_number", 1440 CTLFLAG_RD, &sc->serial_number_string, 1441 0, "serial number"); 1442 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1443 "product_code", 1444 CTLFLAG_RD, &sc->product_code_string, 1445 0, "product_code"); 1446 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1447 "pcie_link_width", 1448 CTLFLAG_RD, &sc->link_width, 1449 0, "tx_boundary"); 1450 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1451 "tx_boundary", 1452 CTLFLAG_RD, &sc->tx_boundary, 1453 0, "tx_boundary"); 1454 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1455 "write_combine", 1456 CTLFLAG_RD, &sc->wc, 1457 0, "write combining PIO?"); 1458 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1459 "read_dma_MBs", 1460 CTLFLAG_RD, &sc->read_dma, 1461 0, "DMA Read speed in MB/s"); 1462 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1463 "write_dma_MBs", 1464 CTLFLAG_RD, &sc->write_dma, 1465 0, "DMA Write speed in MB/s"); 1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1467 "read_write_dma_MBs", 1468 CTLFLAG_RD, &sc->read_write_dma, 1469 0, "DMA concurrent Read/Write speed in MB/s"); 1470 1471 1472 /* performance related tunables */ 1473 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1474 "intr_coal_delay", 1475 CTLTYPE_INT|CTLFLAG_RW, sc, 1476 0, mxge_change_intr_coal, 1477 "I", "interrupt coalescing delay in usecs"); 1478 1479 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1480 "flow_control_enabled", 1481 CTLTYPE_INT|CTLFLAG_RW, sc, 1482 0, mxge_change_flow_control, 1483 "I", "interrupt coalescing delay in usecs"); 1484 1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1486 "deassert_wait", 1487 CTLFLAG_RW, &mxge_deassert_wait, 1488 0, "Wait for IRQ line to go low in ihandler"); 1489 1490 /* stats block from firmware is in network byte order. 1491 Need to swap it */ 1492 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1493 "link_up", 1494 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1495 0, mxge_handle_be32, 1496 "I", "link up"); 1497 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1498 "rdma_tags_available", 1499 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1500 0, mxge_handle_be32, 1501 "I", "rdma_tags_available"); 1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1503 "dropped_bad_crc32", 1504 CTLTYPE_INT|CTLFLAG_RD, 1505 &fw->dropped_bad_crc32, 1506 0, mxge_handle_be32, 1507 "I", "dropped_bad_crc32"); 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1509 "dropped_bad_phy", 1510 CTLTYPE_INT|CTLFLAG_RD, 1511 &fw->dropped_bad_phy, 1512 0, mxge_handle_be32, 1513 "I", "dropped_bad_phy"); 1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1515 "dropped_link_error_or_filtered", 1516 CTLTYPE_INT|CTLFLAG_RD, 1517 &fw->dropped_link_error_or_filtered, 1518 0, mxge_handle_be32, 1519 "I", "dropped_link_error_or_filtered"); 1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1521 "dropped_link_overflow", 1522 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1523 0, mxge_handle_be32, 1524 "I", "dropped_link_overflow"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "dropped_multicast_filtered", 1527 CTLTYPE_INT|CTLFLAG_RD, 1528 &fw->dropped_multicast_filtered, 1529 0, mxge_handle_be32, 1530 "I", "dropped_multicast_filtered"); 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1532 "dropped_no_big_buffer", 1533 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1534 0, mxge_handle_be32, 1535 "I", "dropped_no_big_buffer"); 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "dropped_no_small_buffer", 1538 CTLTYPE_INT|CTLFLAG_RD, 1539 &fw->dropped_no_small_buffer, 1540 0, mxge_handle_be32, 1541 "I", "dropped_no_small_buffer"); 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "dropped_overrun", 1544 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1545 0, mxge_handle_be32, 1546 "I", "dropped_overrun"); 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "dropped_pause", 1549 CTLTYPE_INT|CTLFLAG_RD, 1550 &fw->dropped_pause, 1551 0, mxge_handle_be32, 1552 "I", "dropped_pause"); 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "dropped_runt", 1555 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1556 0, mxge_handle_be32, 1557 "I", "dropped_runt"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "dropped_unicast_filtered", 1561 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1562 0, mxge_handle_be32, 1563 "I", "dropped_unicast_filtered"); 1564 1565 /* verbose printing? */ 1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1567 "verbose", 1568 CTLFLAG_RW, &mxge_verbose, 1569 0, "verbose printing"); 1570 1571 /* lro */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "lro_cnt", 1574 CTLTYPE_INT|CTLFLAG_RW, sc, 1575 0, mxge_change_lro, 1576 "I", "number of lro merge queues"); 1577 1578 1579 /* add counters exported for debugging from all slices */ 1580 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1581 sc->slice_sysctl_tree = 1582 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1583 "slice", CTLFLAG_RD, 0, ""); 1584 1585 for (slice = 0; slice < sc->num_slices; slice++) { 1586 ss = &sc->ss[slice]; 1587 sysctl_ctx_init(&ss->sysctl_ctx); 1588 ctx = &ss->sysctl_ctx; 1589 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1590 sprintf(slice_num, "%d", slice); 1591 ss->sysctl_tree = 1592 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1593 CTLFLAG_RD, 0, ""); 1594 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1595 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1596 "rx_small_cnt", 1597 CTLFLAG_RD, &ss->rx_small.cnt, 1598 0, "rx_small_cnt"); 1599 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1600 "rx_big_cnt", 1601 CTLFLAG_RD, &ss->rx_big.cnt, 1602 0, "rx_small_cnt"); 1603 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1604 "tx_req", 1605 CTLFLAG_RD, &ss->tx.req, 1606 0, "tx_req"); 1607 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1608 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1609 0, "number of lro merge queues flushed"); 1610 1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1612 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1613 0, "number of frames appended to lro merge" 1614 "queues"); 1615 1616 /* only transmit from slice 0 for now */ 1617 if (slice > 0) 1618 continue; 1619 1620 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1621 "tx_done", 1622 CTLFLAG_RD, &ss->tx.done, 1623 0, "tx_done"); 1624 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1625 "tx_pkt_done", 1626 CTLFLAG_RD, &ss->tx.pkt_done, 1627 0, "tx_done"); 1628 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1629 "tx_stall", 1630 CTLFLAG_RD, &ss->tx.stall, 1631 0, "tx_stall"); 1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1633 "tx_wake", 1634 CTLFLAG_RD, &ss->tx.wake, 1635 0, "tx_wake"); 1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1637 "tx_defrag", 1638 CTLFLAG_RD, &ss->tx.defrag, 1639 0, "tx_defrag"); 1640 } 1641 } 1642 1643 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1644 backwards one at a time and handle ring wraps */ 1645 1646 static inline void 1647 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1648 mcp_kreq_ether_send_t *src, int cnt) 1649 { 1650 int idx, starting_slot; 1651 starting_slot = tx->req; 1652 while (cnt > 1) { 1653 cnt--; 1654 idx = (starting_slot + cnt) & tx->mask; 1655 mxge_pio_copy(&tx->lanai[idx], 1656 &src[cnt], sizeof(*src)); 1657 mb(); 1658 } 1659 } 1660 1661 /* 1662 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1663 * at most 32 bytes at a time, so as to avoid involving the software 1664 * pio handler in the nic. We re-write the first segment's flags 1665 * to mark them valid only after writing the entire chain 1666 */ 1667 1668 static inline void 1669 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1670 int cnt) 1671 { 1672 int idx, i; 1673 uint32_t *src_ints; 1674 volatile uint32_t *dst_ints; 1675 mcp_kreq_ether_send_t *srcp; 1676 volatile mcp_kreq_ether_send_t *dstp, *dst; 1677 uint8_t last_flags; 1678 1679 idx = tx->req & tx->mask; 1680 1681 last_flags = src->flags; 1682 src->flags = 0; 1683 mb(); 1684 dst = dstp = &tx->lanai[idx]; 1685 srcp = src; 1686 1687 if ((idx + cnt) < tx->mask) { 1688 for (i = 0; i < (cnt - 1); i += 2) { 1689 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1690 mb(); /* force write every 32 bytes */ 1691 srcp += 2; 1692 dstp += 2; 1693 } 1694 } else { 1695 /* submit all but the first request, and ensure 1696 that it is submitted below */ 1697 mxge_submit_req_backwards(tx, src, cnt); 1698 i = 0; 1699 } 1700 if (i < cnt) { 1701 /* submit the first request */ 1702 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1703 mb(); /* barrier before setting valid flag */ 1704 } 1705 1706 /* re-write the last 32-bits with the valid flags */ 1707 src->flags = last_flags; 1708 src_ints = (uint32_t *)src; 1709 src_ints+=3; 1710 dst_ints = (volatile uint32_t *)dst; 1711 dst_ints+=3; 1712 *dst_ints = *src_ints; 1713 tx->req += cnt; 1714 mb(); 1715 } 1716 1717 #if IFCAP_TSO4 1718 1719 static void 1720 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1721 int busdma_seg_cnt, int ip_off) 1722 { 1723 mxge_tx_ring_t *tx; 1724 mcp_kreq_ether_send_t *req; 1725 bus_dma_segment_t *seg; 1726 struct ip *ip; 1727 struct tcphdr *tcp; 1728 uint32_t low, high_swapped; 1729 int len, seglen, cum_len, cum_len_next; 1730 int next_is_first, chop, cnt, rdma_count, small; 1731 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1732 uint8_t flags, flags_next; 1733 static int once; 1734 1735 mss = m->m_pkthdr.tso_segsz; 1736 1737 /* negative cum_len signifies to the 1738 * send loop that we are still in the 1739 * header portion of the TSO packet. 1740 */ 1741 1742 /* ensure we have the ethernet, IP and TCP 1743 header together in the first mbuf, copy 1744 it to a scratch buffer if not */ 1745 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1746 m_copydata(m, 0, ip_off + sizeof (*ip), 1747 ss->scratch); 1748 ip = (struct ip *)(ss->scratch + ip_off); 1749 } else { 1750 ip = (struct ip *)(mtod(m, char *) + ip_off); 1751 } 1752 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1753 + sizeof (*tcp))) { 1754 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1755 + sizeof (*tcp), ss->scratch); 1756 ip = (struct ip *)(mtod(m, char *) + ip_off); 1757 } 1758 1759 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1760 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1761 1762 /* TSO implies checksum offload on this hardware */ 1763 cksum_offset = ip_off + (ip->ip_hl << 2); 1764 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1765 1766 1767 /* for TSO, pseudo_hdr_offset holds mss. 1768 * The firmware figures out where to put 1769 * the checksum by parsing the header. */ 1770 pseudo_hdr_offset = htobe16(mss); 1771 1772 tx = &ss->tx; 1773 req = tx->req_list; 1774 seg = tx->seg_list; 1775 cnt = 0; 1776 rdma_count = 0; 1777 /* "rdma_count" is the number of RDMAs belonging to the 1778 * current packet BEFORE the current send request. For 1779 * non-TSO packets, this is equal to "count". 1780 * For TSO packets, rdma_count needs to be reset 1781 * to 0 after a segment cut. 1782 * 1783 * The rdma_count field of the send request is 1784 * the number of RDMAs of the packet starting at 1785 * that request. For TSO send requests with one ore more cuts 1786 * in the middle, this is the number of RDMAs starting 1787 * after the last cut in the request. All previous 1788 * segments before the last cut implicitly have 1 RDMA. 1789 * 1790 * Since the number of RDMAs is not known beforehand, 1791 * it must be filled-in retroactively - after each 1792 * segmentation cut or at the end of the entire packet. 1793 */ 1794 1795 while (busdma_seg_cnt) { 1796 /* Break the busdma segment up into pieces*/ 1797 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1798 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1799 len = seg->ds_len; 1800 1801 while (len) { 1802 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1803 seglen = len; 1804 cum_len_next = cum_len + seglen; 1805 (req-rdma_count)->rdma_count = rdma_count + 1; 1806 if (__predict_true(cum_len >= 0)) { 1807 /* payload */ 1808 chop = (cum_len_next > mss); 1809 cum_len_next = cum_len_next % mss; 1810 next_is_first = (cum_len_next == 0); 1811 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1812 flags_next |= next_is_first * 1813 MXGEFW_FLAGS_FIRST; 1814 rdma_count |= -(chop | next_is_first); 1815 rdma_count += chop & !next_is_first; 1816 } else if (cum_len_next >= 0) { 1817 /* header ends */ 1818 rdma_count = -1; 1819 cum_len_next = 0; 1820 seglen = -cum_len; 1821 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1822 flags_next = MXGEFW_FLAGS_TSO_PLD | 1823 MXGEFW_FLAGS_FIRST | 1824 (small * MXGEFW_FLAGS_SMALL); 1825 } 1826 1827 req->addr_high = high_swapped; 1828 req->addr_low = htobe32(low); 1829 req->pseudo_hdr_offset = pseudo_hdr_offset; 1830 req->pad = 0; 1831 req->rdma_count = 1; 1832 req->length = htobe16(seglen); 1833 req->cksum_offset = cksum_offset; 1834 req->flags = flags | ((cum_len & 1) * 1835 MXGEFW_FLAGS_ALIGN_ODD); 1836 low += seglen; 1837 len -= seglen; 1838 cum_len = cum_len_next; 1839 flags = flags_next; 1840 req++; 1841 cnt++; 1842 rdma_count++; 1843 if (__predict_false(cksum_offset > seglen)) 1844 cksum_offset -= seglen; 1845 else 1846 cksum_offset = 0; 1847 if (__predict_false(cnt > tx->max_desc)) 1848 goto drop; 1849 } 1850 busdma_seg_cnt--; 1851 seg++; 1852 } 1853 (req-rdma_count)->rdma_count = rdma_count; 1854 1855 do { 1856 req--; 1857 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1858 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1859 1860 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1861 mxge_submit_req(tx, tx->req_list, cnt); 1862 return; 1863 1864 drop: 1865 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1866 m_freem(m); 1867 ss->sc->ifp->if_oerrors++; 1868 if (!once) { 1869 printf("tx->max_desc exceeded via TSO!\n"); 1870 printf("mss = %d, %ld, %d!\n", mss, 1871 (long)seg - (long)tx->seg_list, tx->max_desc); 1872 once = 1; 1873 } 1874 return; 1875 1876 } 1877 1878 #endif /* IFCAP_TSO4 */ 1879 1880 #ifdef MXGE_NEW_VLAN_API 1881 /* 1882 * We reproduce the software vlan tag insertion from 1883 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1884 * vlan tag insertion. We need to advertise this in order to have the 1885 * vlan interface respect our csum offload flags. 1886 */ 1887 static struct mbuf * 1888 mxge_vlan_tag_insert(struct mbuf *m) 1889 { 1890 struct ether_vlan_header *evl; 1891 1892 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1893 if (__predict_false(m == NULL)) 1894 return NULL; 1895 if (m->m_len < sizeof(*evl)) { 1896 m = m_pullup(m, sizeof(*evl)); 1897 if (__predict_false(m == NULL)) 1898 return NULL; 1899 } 1900 /* 1901 * Transform the Ethernet header into an Ethernet header 1902 * with 802.1Q encapsulation. 1903 */ 1904 evl = mtod(m, struct ether_vlan_header *); 1905 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1906 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1907 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1908 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1909 m->m_flags &= ~M_VLANTAG; 1910 return m; 1911 } 1912 #endif /* MXGE_NEW_VLAN_API */ 1913 1914 static void 1915 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1916 { 1917 mxge_softc_t *sc; 1918 mcp_kreq_ether_send_t *req; 1919 bus_dma_segment_t *seg; 1920 struct mbuf *m_tmp; 1921 struct ifnet *ifp; 1922 mxge_tx_ring_t *tx; 1923 struct ip *ip; 1924 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1925 uint16_t pseudo_hdr_offset; 1926 uint8_t flags, cksum_offset; 1927 1928 1929 sc = ss->sc; 1930 ifp = sc->ifp; 1931 tx = &ss->tx; 1932 1933 ip_off = sizeof (struct ether_header); 1934 #ifdef MXGE_NEW_VLAN_API 1935 if (m->m_flags & M_VLANTAG) { 1936 m = mxge_vlan_tag_insert(m); 1937 if (__predict_false(m == NULL)) 1938 goto drop; 1939 ip_off += ETHER_VLAN_ENCAP_LEN; 1940 } 1941 #endif 1942 /* (try to) map the frame for DMA */ 1943 idx = tx->req & tx->mask; 1944 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1945 m, tx->seg_list, &cnt, 1946 BUS_DMA_NOWAIT); 1947 if (__predict_false(err == EFBIG)) { 1948 /* Too many segments in the chain. Try 1949 to defrag */ 1950 m_tmp = m_defrag(m, M_NOWAIT); 1951 if (m_tmp == NULL) { 1952 goto drop; 1953 } 1954 ss->tx.defrag++; 1955 m = m_tmp; 1956 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1957 tx->info[idx].map, 1958 m, tx->seg_list, &cnt, 1959 BUS_DMA_NOWAIT); 1960 } 1961 if (__predict_false(err != 0)) { 1962 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1963 " packet len = %d\n", err, m->m_pkthdr.len); 1964 goto drop; 1965 } 1966 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1967 BUS_DMASYNC_PREWRITE); 1968 tx->info[idx].m = m; 1969 1970 #if IFCAP_TSO4 1971 /* TSO is different enough, we handle it in another routine */ 1972 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1973 mxge_encap_tso(ss, m, cnt, ip_off); 1974 return; 1975 } 1976 #endif 1977 1978 req = tx->req_list; 1979 cksum_offset = 0; 1980 pseudo_hdr_offset = 0; 1981 flags = MXGEFW_FLAGS_NO_TSO; 1982 1983 /* checksum offloading? */ 1984 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1985 /* ensure ip header is in first mbuf, copy 1986 it to a scratch buffer if not */ 1987 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1988 m_copydata(m, 0, ip_off + sizeof (*ip), 1989 ss->scratch); 1990 ip = (struct ip *)(ss->scratch + ip_off); 1991 } else { 1992 ip = (struct ip *)(mtod(m, char *) + ip_off); 1993 } 1994 cksum_offset = ip_off + (ip->ip_hl << 2); 1995 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1996 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1997 req->cksum_offset = cksum_offset; 1998 flags |= MXGEFW_FLAGS_CKSUM; 1999 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2000 } else { 2001 odd_flag = 0; 2002 } 2003 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2004 flags |= MXGEFW_FLAGS_SMALL; 2005 2006 /* convert segments into a request list */ 2007 cum_len = 0; 2008 seg = tx->seg_list; 2009 req->flags = MXGEFW_FLAGS_FIRST; 2010 for (i = 0; i < cnt; i++) { 2011 req->addr_low = 2012 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2013 req->addr_high = 2014 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2015 req->length = htobe16(seg->ds_len); 2016 req->cksum_offset = cksum_offset; 2017 if (cksum_offset > seg->ds_len) 2018 cksum_offset -= seg->ds_len; 2019 else 2020 cksum_offset = 0; 2021 req->pseudo_hdr_offset = pseudo_hdr_offset; 2022 req->pad = 0; /* complete solid 16-byte block */ 2023 req->rdma_count = 1; 2024 req->flags |= flags | ((cum_len & 1) * odd_flag); 2025 cum_len += seg->ds_len; 2026 seg++; 2027 req++; 2028 req->flags = 0; 2029 } 2030 req--; 2031 /* pad runts to 60 bytes */ 2032 if (cum_len < 60) { 2033 req++; 2034 req->addr_low = 2035 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2036 req->addr_high = 2037 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2038 req->length = htobe16(60 - cum_len); 2039 req->cksum_offset = 0; 2040 req->pseudo_hdr_offset = pseudo_hdr_offset; 2041 req->pad = 0; /* complete solid 16-byte block */ 2042 req->rdma_count = 1; 2043 req->flags |= flags | ((cum_len & 1) * odd_flag); 2044 cnt++; 2045 } 2046 2047 tx->req_list[0].rdma_count = cnt; 2048 #if 0 2049 /* print what the firmware will see */ 2050 for (i = 0; i < cnt; i++) { 2051 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2052 "cso:%d, flags:0x%x, rdma:%d\n", 2053 i, (int)ntohl(tx->req_list[i].addr_high), 2054 (int)ntohl(tx->req_list[i].addr_low), 2055 (int)ntohs(tx->req_list[i].length), 2056 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2057 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2058 tx->req_list[i].rdma_count); 2059 } 2060 printf("--------------\n"); 2061 #endif 2062 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2063 mxge_submit_req(tx, tx->req_list, cnt); 2064 return; 2065 2066 drop: 2067 m_freem(m); 2068 ifp->if_oerrors++; 2069 return; 2070 } 2071 2072 2073 2074 2075 static inline void 2076 mxge_start_locked(struct mxge_slice_state *ss) 2077 { 2078 mxge_softc_t *sc; 2079 struct mbuf *m; 2080 struct ifnet *ifp; 2081 mxge_tx_ring_t *tx; 2082 2083 sc = ss->sc; 2084 ifp = sc->ifp; 2085 tx = &ss->tx; 2086 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2087 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2088 if (m == NULL) { 2089 return; 2090 } 2091 /* let BPF see it */ 2092 BPF_MTAP(ifp, m); 2093 2094 /* give it to the nic */ 2095 mxge_encap(ss, m); 2096 } 2097 /* ran out of transmit slots */ 2098 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2099 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2100 tx->stall++; 2101 } 2102 } 2103 2104 static void 2105 mxge_start(struct ifnet *ifp) 2106 { 2107 mxge_softc_t *sc = ifp->if_softc; 2108 struct mxge_slice_state *ss; 2109 2110 /* only use the first slice for now */ 2111 ss = &sc->ss[0]; 2112 mtx_lock(&ss->tx.mtx); 2113 mxge_start_locked(ss); 2114 mtx_unlock(&ss->tx.mtx); 2115 } 2116 2117 /* 2118 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2119 * at most 32 bytes at a time, so as to avoid involving the software 2120 * pio handler in the nic. We re-write the first segment's low 2121 * DMA address to mark it valid only after we write the entire chunk 2122 * in a burst 2123 */ 2124 static inline void 2125 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2126 mcp_kreq_ether_recv_t *src) 2127 { 2128 uint32_t low; 2129 2130 low = src->addr_low; 2131 src->addr_low = 0xffffffff; 2132 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2133 mb(); 2134 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2135 mb(); 2136 src->addr_low = low; 2137 dst->addr_low = low; 2138 mb(); 2139 } 2140 2141 static int 2142 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2143 { 2144 bus_dma_segment_t seg; 2145 struct mbuf *m; 2146 mxge_rx_ring_t *rx = &ss->rx_small; 2147 int cnt, err; 2148 2149 m = m_gethdr(M_DONTWAIT, MT_DATA); 2150 if (m == NULL) { 2151 rx->alloc_fail++; 2152 err = ENOBUFS; 2153 goto done; 2154 } 2155 m->m_len = MHLEN; 2156 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2157 &seg, &cnt, BUS_DMA_NOWAIT); 2158 if (err != 0) { 2159 m_free(m); 2160 goto done; 2161 } 2162 rx->info[idx].m = m; 2163 rx->shadow[idx].addr_low = 2164 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2165 rx->shadow[idx].addr_high = 2166 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2167 2168 done: 2169 if ((idx & 7) == 7) 2170 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2171 return err; 2172 } 2173 2174 static int 2175 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2176 { 2177 bus_dma_segment_t seg[3]; 2178 struct mbuf *m; 2179 mxge_rx_ring_t *rx = &ss->rx_big; 2180 int cnt, err, i; 2181 2182 if (rx->cl_size == MCLBYTES) 2183 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2184 else 2185 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2186 if (m == NULL) { 2187 rx->alloc_fail++; 2188 err = ENOBUFS; 2189 goto done; 2190 } 2191 m->m_len = rx->cl_size; 2192 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2193 seg, &cnt, BUS_DMA_NOWAIT); 2194 if (err != 0) { 2195 m_free(m); 2196 goto done; 2197 } 2198 rx->info[idx].m = m; 2199 rx->shadow[idx].addr_low = 2200 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2201 rx->shadow[idx].addr_high = 2202 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2203 2204 #if MXGE_VIRT_JUMBOS 2205 for (i = 1; i < cnt; i++) { 2206 rx->shadow[idx + i].addr_low = 2207 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2208 rx->shadow[idx + i].addr_high = 2209 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2210 } 2211 #endif 2212 2213 done: 2214 for (i = 0; i < rx->nbufs; i++) { 2215 if ((idx & 7) == 7) { 2216 mxge_submit_8rx(&rx->lanai[idx - 7], 2217 &rx->shadow[idx - 7]); 2218 } 2219 idx++; 2220 } 2221 return err; 2222 } 2223 2224 /* 2225 * Myri10GE hardware checksums are not valid if the sender 2226 * padded the frame with non-zero padding. This is because 2227 * the firmware just does a simple 16-bit 1s complement 2228 * checksum across the entire frame, excluding the first 14 2229 * bytes. It is best to simply to check the checksum and 2230 * tell the stack about it only if the checksum is good 2231 */ 2232 2233 static inline uint16_t 2234 mxge_rx_csum(struct mbuf *m, int csum) 2235 { 2236 struct ether_header *eh; 2237 struct ip *ip; 2238 uint16_t c; 2239 2240 eh = mtod(m, struct ether_header *); 2241 2242 /* only deal with IPv4 TCP & UDP for now */ 2243 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2244 return 1; 2245 ip = (struct ip *)(eh + 1); 2246 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2247 ip->ip_p != IPPROTO_UDP)) 2248 return 1; 2249 2250 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2251 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2252 - (ip->ip_hl << 2) + ip->ip_p)); 2253 c ^= 0xffff; 2254 return (c); 2255 } 2256 2257 static void 2258 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2259 { 2260 struct ether_vlan_header *evl; 2261 struct ether_header *eh; 2262 uint32_t partial; 2263 2264 evl = mtod(m, struct ether_vlan_header *); 2265 eh = mtod(m, struct ether_header *); 2266 2267 /* 2268 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2269 * after what the firmware thought was the end of the ethernet 2270 * header. 2271 */ 2272 2273 /* put checksum into host byte order */ 2274 *csum = ntohs(*csum); 2275 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2276 (*csum) += ~partial; 2277 (*csum) += ((*csum) < ~partial); 2278 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2279 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2280 2281 /* restore checksum to network byte order; 2282 later consumers expect this */ 2283 *csum = htons(*csum); 2284 2285 /* save the tag */ 2286 #ifdef MXGE_NEW_VLAN_API 2287 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2288 #else 2289 { 2290 struct m_tag *mtag; 2291 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2292 M_NOWAIT); 2293 if (mtag == NULL) 2294 return; 2295 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2296 m_tag_prepend(m, mtag); 2297 } 2298 2299 #endif 2300 m->m_flags |= M_VLANTAG; 2301 2302 /* 2303 * Remove the 802.1q header by copying the Ethernet 2304 * addresses over it and adjusting the beginning of 2305 * the data in the mbuf. The encapsulated Ethernet 2306 * type field is already in place. 2307 */ 2308 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2309 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2310 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2311 } 2312 2313 2314 static inline void 2315 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2316 { 2317 mxge_softc_t *sc; 2318 struct ifnet *ifp; 2319 struct mbuf *m; 2320 struct ether_header *eh; 2321 mxge_rx_ring_t *rx; 2322 bus_dmamap_t old_map; 2323 int idx; 2324 uint16_t tcpudp_csum; 2325 2326 sc = ss->sc; 2327 ifp = sc->ifp; 2328 rx = &ss->rx_big; 2329 idx = rx->cnt & rx->mask; 2330 rx->cnt += rx->nbufs; 2331 /* save a pointer to the received mbuf */ 2332 m = rx->info[idx].m; 2333 /* try to replace the received mbuf */ 2334 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2335 /* drop the frame -- the old mbuf is re-cycled */ 2336 ifp->if_ierrors++; 2337 return; 2338 } 2339 2340 /* unmap the received buffer */ 2341 old_map = rx->info[idx].map; 2342 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2343 bus_dmamap_unload(rx->dmat, old_map); 2344 2345 /* swap the bus_dmamap_t's */ 2346 rx->info[idx].map = rx->extra_map; 2347 rx->extra_map = old_map; 2348 2349 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2350 * aligned */ 2351 m->m_data += MXGEFW_PAD; 2352 2353 m->m_pkthdr.rcvif = ifp; 2354 m->m_len = m->m_pkthdr.len = len; 2355 ss->ipackets++; 2356 eh = mtod(m, struct ether_header *); 2357 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2358 mxge_vlan_tag_remove(m, &csum); 2359 } 2360 /* if the checksum is valid, mark it in the mbuf header */ 2361 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2362 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2363 return; 2364 /* otherwise, it was a UDP frame, or a TCP frame which 2365 we could not do LRO on. Tell the stack that the 2366 checksum is good */ 2367 m->m_pkthdr.csum_data = 0xffff; 2368 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2369 } 2370 /* pass the frame up the stack */ 2371 (*ifp->if_input)(ifp, m); 2372 } 2373 2374 static inline void 2375 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2376 { 2377 mxge_softc_t *sc; 2378 struct ifnet *ifp; 2379 struct ether_header *eh; 2380 struct mbuf *m; 2381 mxge_rx_ring_t *rx; 2382 bus_dmamap_t old_map; 2383 int idx; 2384 uint16_t tcpudp_csum; 2385 2386 sc = ss->sc; 2387 ifp = sc->ifp; 2388 rx = &ss->rx_small; 2389 idx = rx->cnt & rx->mask; 2390 rx->cnt++; 2391 /* save a pointer to the received mbuf */ 2392 m = rx->info[idx].m; 2393 /* try to replace the received mbuf */ 2394 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2395 /* drop the frame -- the old mbuf is re-cycled */ 2396 ifp->if_ierrors++; 2397 return; 2398 } 2399 2400 /* unmap the received buffer */ 2401 old_map = rx->info[idx].map; 2402 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2403 bus_dmamap_unload(rx->dmat, old_map); 2404 2405 /* swap the bus_dmamap_t's */ 2406 rx->info[idx].map = rx->extra_map; 2407 rx->extra_map = old_map; 2408 2409 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2410 * aligned */ 2411 m->m_data += MXGEFW_PAD; 2412 2413 m->m_pkthdr.rcvif = ifp; 2414 m->m_len = m->m_pkthdr.len = len; 2415 ss->ipackets++; 2416 eh = mtod(m, struct ether_header *); 2417 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2418 mxge_vlan_tag_remove(m, &csum); 2419 } 2420 /* if the checksum is valid, mark it in the mbuf header */ 2421 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2422 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2423 return; 2424 /* otherwise, it was a UDP frame, or a TCP frame which 2425 we could not do LRO on. Tell the stack that the 2426 checksum is good */ 2427 m->m_pkthdr.csum_data = 0xffff; 2428 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2429 } 2430 /* pass the frame up the stack */ 2431 (*ifp->if_input)(ifp, m); 2432 } 2433 2434 static inline void 2435 mxge_clean_rx_done(struct mxge_slice_state *ss) 2436 { 2437 mxge_rx_done_t *rx_done = &ss->rx_done; 2438 struct lro_entry *lro; 2439 int limit = 0; 2440 uint16_t length; 2441 uint16_t checksum; 2442 2443 2444 while (rx_done->entry[rx_done->idx].length != 0) { 2445 length = ntohs(rx_done->entry[rx_done->idx].length); 2446 rx_done->entry[rx_done->idx].length = 0; 2447 checksum = rx_done->entry[rx_done->idx].checksum; 2448 if (length <= (MHLEN - MXGEFW_PAD)) 2449 mxge_rx_done_small(ss, length, checksum); 2450 else 2451 mxge_rx_done_big(ss, length, checksum); 2452 rx_done->cnt++; 2453 rx_done->idx = rx_done->cnt & rx_done->mask; 2454 2455 /* limit potential for livelock */ 2456 if (__predict_false(++limit > rx_done->mask / 2)) 2457 break; 2458 } 2459 while (!SLIST_EMPTY(&ss->lro_active)) { 2460 lro = SLIST_FIRST(&ss->lro_active); 2461 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2462 mxge_lro_flush(ss, lro); 2463 } 2464 } 2465 2466 2467 static inline void 2468 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2469 { 2470 struct ifnet *ifp; 2471 mxge_tx_ring_t *tx; 2472 struct mbuf *m; 2473 bus_dmamap_t map; 2474 int idx; 2475 2476 tx = &ss->tx; 2477 ifp = ss->sc->ifp; 2478 while (tx->pkt_done != mcp_idx) { 2479 idx = tx->done & tx->mask; 2480 tx->done++; 2481 m = tx->info[idx].m; 2482 /* mbuf and DMA map only attached to the first 2483 segment per-mbuf */ 2484 if (m != NULL) { 2485 ifp->if_opackets++; 2486 tx->info[idx].m = NULL; 2487 map = tx->info[idx].map; 2488 bus_dmamap_unload(tx->dmat, map); 2489 m_freem(m); 2490 } 2491 if (tx->info[idx].flag) { 2492 tx->info[idx].flag = 0; 2493 tx->pkt_done++; 2494 } 2495 } 2496 2497 /* If we have space, clear IFF_OACTIVE to tell the stack that 2498 its OK to send packets */ 2499 2500 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2501 tx->req - tx->done < (tx->mask + 1)/4) { 2502 mtx_lock(&ss->tx.mtx); 2503 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2504 ss->tx.wake++; 2505 mxge_start_locked(ss); 2506 mtx_unlock(&ss->tx.mtx); 2507 } 2508 } 2509 2510 static struct mxge_media_type mxge_media_types[] = 2511 { 2512 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2513 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2514 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2515 {0, (1 << 5), "10GBASE-ER"}, 2516 {0, (1 << 4), "10GBASE-LRM"}, 2517 {0, (1 << 3), "10GBASE-SW"}, 2518 {0, (1 << 2), "10GBASE-LW"}, 2519 {0, (1 << 1), "10GBASE-EW"}, 2520 {0, (1 << 0), "Reserved"} 2521 }; 2522 2523 static void 2524 mxge_set_media(mxge_softc_t *sc, int type) 2525 { 2526 sc->media_flags |= type; 2527 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2528 ifmedia_set(&sc->media, sc->media_flags); 2529 } 2530 2531 2532 /* 2533 * Determine the media type for a NIC. Some XFPs will identify 2534 * themselves only when their link is up, so this is initiated via a 2535 * link up interrupt. However, this can potentially take up to 2536 * several milliseconds, so it is run via the watchdog routine, rather 2537 * than in the interrupt handler itself. This need only be done 2538 * once, not each time the link is up. 2539 */ 2540 static void 2541 mxge_media_probe(mxge_softc_t *sc) 2542 { 2543 mxge_cmd_t cmd; 2544 char *ptr; 2545 int i, err, ms; 2546 2547 sc->need_media_probe = 0; 2548 2549 /* if we've already set a media type, we're done */ 2550 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2551 return; 2552 2553 /* 2554 * parse the product code to deterimine the interface type 2555 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2556 * after the 3rd dash in the driver's cached copy of the 2557 * EEPROM's product code string. 2558 */ 2559 ptr = sc->product_code_string; 2560 if (ptr == NULL) { 2561 device_printf(sc->dev, "Missing product code\n"); 2562 } 2563 2564 for (i = 0; i < 3; i++, ptr++) { 2565 ptr = index(ptr, '-'); 2566 if (ptr == NULL) { 2567 device_printf(sc->dev, 2568 "only %d dashes in PC?!?\n", i); 2569 return; 2570 } 2571 } 2572 if (*ptr == 'C') { 2573 mxge_set_media(sc, IFM_10G_CX4); 2574 return; 2575 } 2576 else if (*ptr == 'Q') { 2577 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2578 /* FreeBSD has no media type for Quad ribbon fiber */ 2579 return; 2580 } 2581 2582 if (*ptr != 'R') { 2583 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2584 return; 2585 } 2586 2587 /* 2588 * At this point we know the NIC has an XFP cage, so now we 2589 * try to determine what is in the cage by using the 2590 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2591 * register. We read just one byte, which may take over 2592 * a millisecond 2593 */ 2594 2595 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2596 cmd.data1 = MXGE_XFP_COMPLIANCE_BYTE; /* the byte we want */ 2597 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_I2C_READ, &cmd); 2598 if (err == MXGEFW_CMD_ERROR_XFP_FAILURE) { 2599 device_printf(sc->dev, "failed to read XFP\n"); 2600 } 2601 if (err == MXGEFW_CMD_ERROR_XFP_ABSENT) { 2602 device_printf(sc->dev, "Type R with no XFP!?!?\n"); 2603 } 2604 if (err != MXGEFW_CMD_OK) { 2605 return; 2606 } 2607 2608 /* now we wait for the data to be cached */ 2609 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2610 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2611 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2612 DELAY(1000); 2613 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2614 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2615 } 2616 if (err != MXGEFW_CMD_OK) { 2617 device_printf(sc->dev, "failed to read XFP (%d, %dms)\n", 2618 err, ms); 2619 return; 2620 } 2621 2622 if (cmd.data0 == mxge_media_types[0].bitmask) { 2623 if (mxge_verbose) 2624 device_printf(sc->dev, "XFP:%s\n", 2625 mxge_media_types[0].name); 2626 mxge_set_media(sc, IFM_10G_CX4); 2627 return; 2628 } 2629 for (i = 1; 2630 i < sizeof (mxge_media_types) / sizeof (mxge_media_types[0]); 2631 i++) { 2632 if (cmd.data0 & mxge_media_types[i].bitmask) { 2633 if (mxge_verbose) 2634 device_printf(sc->dev, "XFP:%s\n", 2635 mxge_media_types[i].name); 2636 2637 mxge_set_media(sc, mxge_media_types[i].flag); 2638 return; 2639 } 2640 } 2641 device_printf(sc->dev, "XFP media 0x%x unknown\n", cmd.data0); 2642 2643 return; 2644 } 2645 2646 static void 2647 mxge_intr(void *arg) 2648 { 2649 struct mxge_slice_state *ss = arg; 2650 mxge_softc_t *sc = ss->sc; 2651 mcp_irq_data_t *stats = ss->fw_stats; 2652 mxge_tx_ring_t *tx = &ss->tx; 2653 mxge_rx_done_t *rx_done = &ss->rx_done; 2654 uint32_t send_done_count; 2655 uint8_t valid; 2656 2657 2658 /* an interrupt on a non-zero slice is implicitly valid 2659 since MSI-X irqs are not shared */ 2660 if (ss != sc->ss) { 2661 mxge_clean_rx_done(ss); 2662 *ss->irq_claim = be32toh(3); 2663 return; 2664 } 2665 2666 /* make sure the DMA has finished */ 2667 if (!stats->valid) { 2668 return; 2669 } 2670 valid = stats->valid; 2671 2672 if (sc->legacy_irq) { 2673 /* lower legacy IRQ */ 2674 *sc->irq_deassert = 0; 2675 if (!mxge_deassert_wait) 2676 /* don't wait for conf. that irq is low */ 2677 stats->valid = 0; 2678 } else { 2679 stats->valid = 0; 2680 } 2681 2682 /* loop while waiting for legacy irq deassertion */ 2683 do { 2684 /* check for transmit completes and receives */ 2685 send_done_count = be32toh(stats->send_done_count); 2686 while ((send_done_count != tx->pkt_done) || 2687 (rx_done->entry[rx_done->idx].length != 0)) { 2688 mxge_tx_done(ss, (int)send_done_count); 2689 mxge_clean_rx_done(ss); 2690 send_done_count = be32toh(stats->send_done_count); 2691 } 2692 if (sc->legacy_irq && mxge_deassert_wait) 2693 mb(); 2694 } while (*((volatile uint8_t *) &stats->valid)); 2695 2696 if (__predict_false(stats->stats_updated)) { 2697 if (sc->link_state != stats->link_up) { 2698 sc->link_state = stats->link_up; 2699 if (sc->link_state) { 2700 if_link_state_change(sc->ifp, LINK_STATE_UP); 2701 if (mxge_verbose) 2702 device_printf(sc->dev, "link up\n"); 2703 } else { 2704 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2705 if (mxge_verbose) 2706 device_printf(sc->dev, "link down\n"); 2707 } 2708 sc->need_media_probe = 1; 2709 } 2710 if (sc->rdma_tags_available != 2711 be32toh(stats->rdma_tags_available)) { 2712 sc->rdma_tags_available = 2713 be32toh(stats->rdma_tags_available); 2714 device_printf(sc->dev, "RDMA timed out! %d tags " 2715 "left\n", sc->rdma_tags_available); 2716 } 2717 2718 if (stats->link_down) { 2719 sc->down_cnt += stats->link_down; 2720 sc->link_state = 0; 2721 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2722 } 2723 } 2724 2725 /* check to see if we have rx token to pass back */ 2726 if (valid & 0x1) 2727 *ss->irq_claim = be32toh(3); 2728 *(ss->irq_claim + 1) = be32toh(3); 2729 } 2730 2731 static void 2732 mxge_init(void *arg) 2733 { 2734 } 2735 2736 2737 2738 static void 2739 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2740 { 2741 struct lro_entry *lro_entry; 2742 int i; 2743 2744 while (!SLIST_EMPTY(&ss->lro_free)) { 2745 lro_entry = SLIST_FIRST(&ss->lro_free); 2746 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2747 free(lro_entry, M_DEVBUF); 2748 } 2749 2750 for (i = 0; i <= ss->rx_big.mask; i++) { 2751 if (ss->rx_big.info[i].m == NULL) 2752 continue; 2753 bus_dmamap_unload(ss->rx_big.dmat, 2754 ss->rx_big.info[i].map); 2755 m_freem(ss->rx_big.info[i].m); 2756 ss->rx_big.info[i].m = NULL; 2757 } 2758 2759 for (i = 0; i <= ss->rx_small.mask; i++) { 2760 if (ss->rx_small.info[i].m == NULL) 2761 continue; 2762 bus_dmamap_unload(ss->rx_small.dmat, 2763 ss->rx_small.info[i].map); 2764 m_freem(ss->rx_small.info[i].m); 2765 ss->rx_small.info[i].m = NULL; 2766 } 2767 2768 /* transmit ring used only on the first slice */ 2769 if (ss->tx.info == NULL) 2770 return; 2771 2772 for (i = 0; i <= ss->tx.mask; i++) { 2773 ss->tx.info[i].flag = 0; 2774 if (ss->tx.info[i].m == NULL) 2775 continue; 2776 bus_dmamap_unload(ss->tx.dmat, 2777 ss->tx.info[i].map); 2778 m_freem(ss->tx.info[i].m); 2779 ss->tx.info[i].m = NULL; 2780 } 2781 } 2782 2783 static void 2784 mxge_free_mbufs(mxge_softc_t *sc) 2785 { 2786 int slice; 2787 2788 for (slice = 0; slice < sc->num_slices; slice++) 2789 mxge_free_slice_mbufs(&sc->ss[slice]); 2790 } 2791 2792 static void 2793 mxge_free_slice_rings(struct mxge_slice_state *ss) 2794 { 2795 int i; 2796 2797 2798 if (ss->rx_done.entry != NULL) 2799 mxge_dma_free(&ss->rx_done.dma); 2800 ss->rx_done.entry = NULL; 2801 2802 if (ss->tx.req_bytes != NULL) 2803 free(ss->tx.req_bytes, M_DEVBUF); 2804 ss->tx.req_bytes = NULL; 2805 2806 if (ss->tx.seg_list != NULL) 2807 free(ss->tx.seg_list, M_DEVBUF); 2808 ss->tx.seg_list = NULL; 2809 2810 if (ss->rx_small.shadow != NULL) 2811 free(ss->rx_small.shadow, M_DEVBUF); 2812 ss->rx_small.shadow = NULL; 2813 2814 if (ss->rx_big.shadow != NULL) 2815 free(ss->rx_big.shadow, M_DEVBUF); 2816 ss->rx_big.shadow = NULL; 2817 2818 if (ss->tx.info != NULL) { 2819 if (ss->tx.dmat != NULL) { 2820 for (i = 0; i <= ss->tx.mask; i++) { 2821 bus_dmamap_destroy(ss->tx.dmat, 2822 ss->tx.info[i].map); 2823 } 2824 bus_dma_tag_destroy(ss->tx.dmat); 2825 } 2826 free(ss->tx.info, M_DEVBUF); 2827 } 2828 ss->tx.info = NULL; 2829 2830 if (ss->rx_small.info != NULL) { 2831 if (ss->rx_small.dmat != NULL) { 2832 for (i = 0; i <= ss->rx_small.mask; i++) { 2833 bus_dmamap_destroy(ss->rx_small.dmat, 2834 ss->rx_small.info[i].map); 2835 } 2836 bus_dmamap_destroy(ss->rx_small.dmat, 2837 ss->rx_small.extra_map); 2838 bus_dma_tag_destroy(ss->rx_small.dmat); 2839 } 2840 free(ss->rx_small.info, M_DEVBUF); 2841 } 2842 ss->rx_small.info = NULL; 2843 2844 if (ss->rx_big.info != NULL) { 2845 if (ss->rx_big.dmat != NULL) { 2846 for (i = 0; i <= ss->rx_big.mask; i++) { 2847 bus_dmamap_destroy(ss->rx_big.dmat, 2848 ss->rx_big.info[i].map); 2849 } 2850 bus_dmamap_destroy(ss->rx_big.dmat, 2851 ss->rx_big.extra_map); 2852 bus_dma_tag_destroy(ss->rx_big.dmat); 2853 } 2854 free(ss->rx_big.info, M_DEVBUF); 2855 } 2856 ss->rx_big.info = NULL; 2857 } 2858 2859 static void 2860 mxge_free_rings(mxge_softc_t *sc) 2861 { 2862 int slice; 2863 2864 for (slice = 0; slice < sc->num_slices; slice++) 2865 mxge_free_slice_rings(&sc->ss[slice]); 2866 } 2867 2868 static int 2869 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2870 int tx_ring_entries) 2871 { 2872 mxge_softc_t *sc = ss->sc; 2873 size_t bytes; 2874 int err, i; 2875 2876 err = ENOMEM; 2877 2878 /* allocate per-slice receive resources */ 2879 2880 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2881 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 2882 2883 /* allocate the rx shadow rings */ 2884 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2885 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2886 if (ss->rx_small.shadow == NULL) 2887 return err;; 2888 2889 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2890 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2891 if (ss->rx_big.shadow == NULL) 2892 return err;; 2893 2894 /* allocate the rx host info rings */ 2895 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2896 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2897 if (ss->rx_small.info == NULL) 2898 return err;; 2899 2900 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2901 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2902 if (ss->rx_big.info == NULL) 2903 return err;; 2904 2905 /* allocate the rx busdma resources */ 2906 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2907 1, /* alignment */ 2908 4096, /* boundary */ 2909 BUS_SPACE_MAXADDR, /* low */ 2910 BUS_SPACE_MAXADDR, /* high */ 2911 NULL, NULL, /* filter */ 2912 MHLEN, /* maxsize */ 2913 1, /* num segs */ 2914 MHLEN, /* maxsegsize */ 2915 BUS_DMA_ALLOCNOW, /* flags */ 2916 NULL, NULL, /* lock */ 2917 &ss->rx_small.dmat); /* tag */ 2918 if (err != 0) { 2919 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2920 err); 2921 return err;; 2922 } 2923 2924 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2925 1, /* alignment */ 2926 #if MXGE_VIRT_JUMBOS 2927 4096, /* boundary */ 2928 #else 2929 0, /* boundary */ 2930 #endif 2931 BUS_SPACE_MAXADDR, /* low */ 2932 BUS_SPACE_MAXADDR, /* high */ 2933 NULL, NULL, /* filter */ 2934 3*4096, /* maxsize */ 2935 #if MXGE_VIRT_JUMBOS 2936 3, /* num segs */ 2937 4096, /* maxsegsize*/ 2938 #else 2939 1, /* num segs */ 2940 MJUM9BYTES, /* maxsegsize*/ 2941 #endif 2942 BUS_DMA_ALLOCNOW, /* flags */ 2943 NULL, NULL, /* lock */ 2944 &ss->rx_big.dmat); /* tag */ 2945 if (err != 0) { 2946 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2947 err); 2948 return err;; 2949 } 2950 for (i = 0; i <= ss->rx_small.mask; i++) { 2951 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2952 &ss->rx_small.info[i].map); 2953 if (err != 0) { 2954 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2955 err); 2956 return err;; 2957 } 2958 } 2959 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2960 &ss->rx_small.extra_map); 2961 if (err != 0) { 2962 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2963 err); 2964 return err;; 2965 } 2966 2967 for (i = 0; i <= ss->rx_big.mask; i++) { 2968 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2969 &ss->rx_big.info[i].map); 2970 if (err != 0) { 2971 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2972 err); 2973 return err;; 2974 } 2975 } 2976 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2977 &ss->rx_big.extra_map); 2978 if (err != 0) { 2979 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2980 err); 2981 return err;; 2982 } 2983 2984 /* now allocate TX resouces */ 2985 2986 /* only use a single TX ring for now */ 2987 if (ss != ss->sc->ss) 2988 return 0; 2989 2990 ss->tx.mask = tx_ring_entries - 1; 2991 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2992 2993 2994 /* allocate the tx request copy block */ 2995 bytes = 8 + 2996 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 2997 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2998 if (ss->tx.req_bytes == NULL) 2999 return err;; 3000 /* ensure req_list entries are aligned to 8 bytes */ 3001 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3002 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3003 3004 /* allocate the tx busdma segment list */ 3005 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3006 ss->tx.seg_list = (bus_dma_segment_t *) 3007 malloc(bytes, M_DEVBUF, M_WAITOK); 3008 if (ss->tx.seg_list == NULL) 3009 return err;; 3010 3011 /* allocate the tx host info ring */ 3012 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3013 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3014 if (ss->tx.info == NULL) 3015 return err;; 3016 3017 /* allocate the tx busdma resources */ 3018 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3019 1, /* alignment */ 3020 sc->tx_boundary, /* boundary */ 3021 BUS_SPACE_MAXADDR, /* low */ 3022 BUS_SPACE_MAXADDR, /* high */ 3023 NULL, NULL, /* filter */ 3024 65536 + 256, /* maxsize */ 3025 ss->tx.max_desc - 2, /* num segs */ 3026 sc->tx_boundary, /* maxsegsz */ 3027 BUS_DMA_ALLOCNOW, /* flags */ 3028 NULL, NULL, /* lock */ 3029 &ss->tx.dmat); /* tag */ 3030 3031 if (err != 0) { 3032 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3033 err); 3034 return err;; 3035 } 3036 3037 /* now use these tags to setup dmamaps for each slot 3038 in the ring */ 3039 for (i = 0; i <= ss->tx.mask; i++) { 3040 err = bus_dmamap_create(ss->tx.dmat, 0, 3041 &ss->tx.info[i].map); 3042 if (err != 0) { 3043 device_printf(sc->dev, "Err %d tx dmamap\n", 3044 err); 3045 return err;; 3046 } 3047 } 3048 return 0; 3049 3050 } 3051 3052 static int 3053 mxge_alloc_rings(mxge_softc_t *sc) 3054 { 3055 mxge_cmd_t cmd; 3056 int tx_ring_size; 3057 int tx_ring_entries, rx_ring_entries; 3058 int err, slice; 3059 3060 /* get ring sizes */ 3061 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3062 tx_ring_size = cmd.data0; 3063 if (err != 0) { 3064 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3065 goto abort; 3066 } 3067 3068 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3069 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3070 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3071 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3072 IFQ_SET_READY(&sc->ifp->if_snd); 3073 3074 for (slice = 0; slice < sc->num_slices; slice++) { 3075 err = mxge_alloc_slice_rings(&sc->ss[slice], 3076 rx_ring_entries, 3077 tx_ring_entries); 3078 if (err != 0) 3079 goto abort; 3080 } 3081 return 0; 3082 3083 abort: 3084 mxge_free_rings(sc); 3085 return err; 3086 3087 } 3088 3089 3090 static void 3091 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3092 { 3093 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3094 3095 if (bufsize < MCLBYTES) { 3096 /* easy, everything fits in a single buffer */ 3097 *big_buf_size = MCLBYTES; 3098 *cl_size = MCLBYTES; 3099 *nbufs = 1; 3100 return; 3101 } 3102 3103 if (bufsize < MJUMPAGESIZE) { 3104 /* still easy, everything still fits in a single buffer */ 3105 *big_buf_size = MJUMPAGESIZE; 3106 *cl_size = MJUMPAGESIZE; 3107 *nbufs = 1; 3108 return; 3109 } 3110 #if MXGE_VIRT_JUMBOS 3111 /* now we need to use virtually contiguous buffers */ 3112 *cl_size = MJUM9BYTES; 3113 *big_buf_size = 4096; 3114 *nbufs = mtu / 4096 + 1; 3115 /* needs to be a power of two, so round up */ 3116 if (*nbufs == 3) 3117 *nbufs = 4; 3118 #else 3119 *cl_size = MJUM9BYTES; 3120 *big_buf_size = MJUM9BYTES; 3121 *nbufs = 1; 3122 #endif 3123 } 3124 3125 static int 3126 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3127 { 3128 mxge_softc_t *sc; 3129 mxge_cmd_t cmd; 3130 bus_dmamap_t map; 3131 struct lro_entry *lro_entry; 3132 int err, i, slice; 3133 3134 3135 sc = ss->sc; 3136 slice = ss - sc->ss; 3137 3138 SLIST_INIT(&ss->lro_free); 3139 SLIST_INIT(&ss->lro_active); 3140 3141 for (i = 0; i < sc->lro_cnt; i++) { 3142 lro_entry = (struct lro_entry *) 3143 malloc(sizeof (*lro_entry), M_DEVBUF, 3144 M_NOWAIT | M_ZERO); 3145 if (lro_entry == NULL) { 3146 sc->lro_cnt = i; 3147 break; 3148 } 3149 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3150 } 3151 /* get the lanai pointers to the send and receive rings */ 3152 3153 err = 0; 3154 /* We currently only send from the first slice */ 3155 if (slice == 0) { 3156 cmd.data0 = slice; 3157 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3158 ss->tx.lanai = 3159 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3160 } 3161 cmd.data0 = slice; 3162 err |= mxge_send_cmd(sc, 3163 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3164 ss->rx_small.lanai = 3165 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3166 cmd.data0 = slice; 3167 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3168 ss->rx_big.lanai = 3169 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3170 3171 if (err != 0) { 3172 device_printf(sc->dev, 3173 "failed to get ring sizes or locations\n"); 3174 return EIO; 3175 } 3176 3177 /* stock receive rings */ 3178 for (i = 0; i <= ss->rx_small.mask; i++) { 3179 map = ss->rx_small.info[i].map; 3180 err = mxge_get_buf_small(ss, map, i); 3181 if (err) { 3182 device_printf(sc->dev, "alloced %d/%d smalls\n", 3183 i, ss->rx_small.mask + 1); 3184 return ENOMEM; 3185 } 3186 } 3187 for (i = 0; i <= ss->rx_big.mask; i++) { 3188 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3189 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3190 } 3191 ss->rx_big.nbufs = nbufs; 3192 ss->rx_big.cl_size = cl_size; 3193 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3194 map = ss->rx_big.info[i].map; 3195 err = mxge_get_buf_big(ss, map, i); 3196 if (err) { 3197 device_printf(sc->dev, "alloced %d/%d bigs\n", 3198 i, ss->rx_big.mask + 1); 3199 return ENOMEM; 3200 } 3201 } 3202 return 0; 3203 } 3204 3205 static int 3206 mxge_open(mxge_softc_t *sc) 3207 { 3208 mxge_cmd_t cmd; 3209 int err, big_bytes, nbufs, slice, cl_size, i; 3210 bus_addr_t bus; 3211 volatile uint8_t *itable; 3212 3213 /* Copy the MAC address in case it was overridden */ 3214 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3215 3216 err = mxge_reset(sc, 1); 3217 if (err != 0) { 3218 device_printf(sc->dev, "failed to reset\n"); 3219 return EIO; 3220 } 3221 3222 if (sc->num_slices > 1) { 3223 /* setup the indirection table */ 3224 cmd.data0 = sc->num_slices; 3225 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3226 &cmd); 3227 3228 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3229 &cmd); 3230 if (err != 0) { 3231 device_printf(sc->dev, 3232 "failed to setup rss tables\n"); 3233 return err; 3234 } 3235 3236 /* just enable an identity mapping */ 3237 itable = sc->sram + cmd.data0; 3238 for (i = 0; i < sc->num_slices; i++) 3239 itable[i] = (uint8_t)i; 3240 3241 cmd.data0 = 1; 3242 cmd.data1 = mxge_rss_hash_type; 3243 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3244 if (err != 0) { 3245 device_printf(sc->dev, "failed to enable slices\n"); 3246 return err; 3247 } 3248 } 3249 3250 3251 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3252 3253 cmd.data0 = nbufs; 3254 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3255 &cmd); 3256 /* error is only meaningful if we're trying to set 3257 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3258 if (err && nbufs > 1) { 3259 device_printf(sc->dev, 3260 "Failed to set alway-use-n to %d\n", 3261 nbufs); 3262 return EIO; 3263 } 3264 /* Give the firmware the mtu and the big and small buffer 3265 sizes. The firmware wants the big buf size to be a power 3266 of two. Luckily, FreeBSD's clusters are powers of two */ 3267 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3268 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3269 cmd.data0 = MHLEN - MXGEFW_PAD; 3270 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3271 &cmd); 3272 cmd.data0 = big_bytes; 3273 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3274 3275 if (err != 0) { 3276 device_printf(sc->dev, "failed to setup params\n"); 3277 goto abort; 3278 } 3279 3280 /* Now give him the pointer to the stats block */ 3281 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3282 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3283 cmd.data2 = sizeof(struct mcp_irq_data); 3284 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3285 3286 if (err != 0) { 3287 bus = sc->ss->fw_stats_dma.bus_addr; 3288 bus += offsetof(struct mcp_irq_data, send_done_count); 3289 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3290 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3291 err = mxge_send_cmd(sc, 3292 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3293 &cmd); 3294 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3295 sc->fw_multicast_support = 0; 3296 } else { 3297 sc->fw_multicast_support = 1; 3298 } 3299 3300 if (err != 0) { 3301 device_printf(sc->dev, "failed to setup params\n"); 3302 goto abort; 3303 } 3304 3305 for (slice = 0; slice < sc->num_slices; slice++) { 3306 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3307 if (err != 0) { 3308 device_printf(sc->dev, "couldn't open slice %d\n", 3309 slice); 3310 goto abort; 3311 } 3312 } 3313 3314 /* Finally, start the firmware running */ 3315 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3316 if (err) { 3317 device_printf(sc->dev, "Couldn't bring up link\n"); 3318 goto abort; 3319 } 3320 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3321 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3322 3323 return 0; 3324 3325 3326 abort: 3327 mxge_free_mbufs(sc); 3328 3329 return err; 3330 } 3331 3332 static int 3333 mxge_close(mxge_softc_t *sc) 3334 { 3335 mxge_cmd_t cmd; 3336 int err, old_down_cnt; 3337 3338 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3339 old_down_cnt = sc->down_cnt; 3340 mb(); 3341 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3342 if (err) { 3343 device_printf(sc->dev, "Couldn't bring down link\n"); 3344 } 3345 if (old_down_cnt == sc->down_cnt) { 3346 /* wait for down irq */ 3347 DELAY(10 * sc->intr_coal_delay); 3348 } 3349 mb(); 3350 if (old_down_cnt == sc->down_cnt) { 3351 device_printf(sc->dev, "never got down irq\n"); 3352 } 3353 3354 mxge_free_mbufs(sc); 3355 3356 return 0; 3357 } 3358 3359 static void 3360 mxge_setup_cfg_space(mxge_softc_t *sc) 3361 { 3362 device_t dev = sc->dev; 3363 int reg; 3364 uint16_t cmd, lnk, pectl; 3365 3366 /* find the PCIe link width and set max read request to 4KB*/ 3367 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3368 lnk = pci_read_config(dev, reg + 0x12, 2); 3369 sc->link_width = (lnk >> 4) & 0x3f; 3370 3371 pectl = pci_read_config(dev, reg + 0x8, 2); 3372 pectl = (pectl & ~0x7000) | (5 << 12); 3373 pci_write_config(dev, reg + 0x8, pectl, 2); 3374 } 3375 3376 /* Enable DMA and Memory space access */ 3377 pci_enable_busmaster(dev); 3378 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3379 cmd |= PCIM_CMD_MEMEN; 3380 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3381 } 3382 3383 static uint32_t 3384 mxge_read_reboot(mxge_softc_t *sc) 3385 { 3386 device_t dev = sc->dev; 3387 uint32_t vs; 3388 3389 /* find the vendor specific offset */ 3390 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3391 device_printf(sc->dev, 3392 "could not find vendor specific offset\n"); 3393 return (uint32_t)-1; 3394 } 3395 /* enable read32 mode */ 3396 pci_write_config(dev, vs + 0x10, 0x3, 1); 3397 /* tell NIC which register to read */ 3398 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3399 return (pci_read_config(dev, vs + 0x14, 4)); 3400 } 3401 3402 static void 3403 mxge_watchdog_reset(mxge_softc_t *sc) 3404 { 3405 int err; 3406 uint32_t reboot; 3407 uint16_t cmd; 3408 3409 err = ENXIO; 3410 3411 device_printf(sc->dev, "Watchdog reset!\n"); 3412 3413 /* 3414 * check to see if the NIC rebooted. If it did, then all of 3415 * PCI config space has been reset, and things like the 3416 * busmaster bit will be zero. If this is the case, then we 3417 * must restore PCI config space before the NIC can be used 3418 * again 3419 */ 3420 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3421 if (cmd == 0xffff) { 3422 /* 3423 * maybe the watchdog caught the NIC rebooting; wait 3424 * up to 100ms for it to finish. If it does not come 3425 * back, then give up 3426 */ 3427 DELAY(1000*100); 3428 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3429 if (cmd == 0xffff) { 3430 device_printf(sc->dev, "NIC disappeared!\n"); 3431 goto abort; 3432 } 3433 } 3434 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3435 /* print the reboot status */ 3436 reboot = mxge_read_reboot(sc); 3437 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3438 reboot); 3439 /* restore PCI configuration space */ 3440 3441 /* XXXX waiting for pci_cfg_restore() to be exported */ 3442 goto abort; /* just abort for now */ 3443 3444 /* and redo any changes we made to our config space */ 3445 mxge_setup_cfg_space(sc); 3446 3447 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3448 mxge_close(sc); 3449 err = mxge_open(sc); 3450 } 3451 } else { 3452 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 3453 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 3454 sc->ss->tx.req, sc->ss->tx.done); 3455 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3456 sc->ss->tx.pkt_done, 3457 be32toh(sc->ss->fw_stats->send_done_count)); 3458 device_printf(sc->dev, "not resetting\n"); 3459 } 3460 3461 abort: 3462 /* 3463 * stop the watchdog if the nic is dead, to avoid spamming the 3464 * console 3465 */ 3466 if (err != 0) { 3467 callout_stop(&sc->co_hdl); 3468 } 3469 } 3470 3471 static void 3472 mxge_watchdog(mxge_softc_t *sc) 3473 { 3474 mxge_tx_ring_t *tx = &sc->ss->tx; 3475 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3476 3477 /* see if we have outstanding transmits, which 3478 have been pending for more than mxge_ticks */ 3479 if (tx->req != tx->done && 3480 tx->watchdog_req != tx->watchdog_done && 3481 tx->done == tx->watchdog_done) { 3482 /* check for pause blocking before resetting */ 3483 if (tx->watchdog_rx_pause == rx_pause) 3484 mxge_watchdog_reset(sc); 3485 else 3486 device_printf(sc->dev, "Flow control blocking " 3487 "xmits, check link partner\n"); 3488 } 3489 3490 tx->watchdog_req = tx->req; 3491 tx->watchdog_done = tx->done; 3492 tx->watchdog_rx_pause = rx_pause; 3493 3494 if (sc->need_media_probe) 3495 mxge_media_probe(sc); 3496 } 3497 3498 static void 3499 mxge_update_stats(mxge_softc_t *sc) 3500 { 3501 struct mxge_slice_state *ss; 3502 u_long ipackets = 0; 3503 int slice; 3504 3505 for(slice = 0; slice < sc->num_slices; slice++) { 3506 ss = &sc->ss[slice]; 3507 ipackets += ss->ipackets; 3508 } 3509 sc->ifp->if_ipackets = ipackets; 3510 3511 } 3512 static void 3513 mxge_tick(void *arg) 3514 { 3515 mxge_softc_t *sc = arg; 3516 3517 3518 /* Synchronize with possible callout reset/stop. */ 3519 if (callout_pending(&sc->co_hdl) || 3520 !callout_active(&sc->co_hdl)) { 3521 mtx_unlock(&sc->driver_mtx); 3522 return; 3523 } 3524 3525 /* aggregate stats from different slices */ 3526 mxge_update_stats(sc); 3527 3528 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3529 if (!sc->watchdog_countdown) { 3530 mxge_watchdog(sc); 3531 sc->watchdog_countdown = 4; 3532 } 3533 sc->watchdog_countdown--; 3534 } 3535 3536 static int 3537 mxge_media_change(struct ifnet *ifp) 3538 { 3539 return EINVAL; 3540 } 3541 3542 static int 3543 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3544 { 3545 struct ifnet *ifp = sc->ifp; 3546 int real_mtu, old_mtu; 3547 int err = 0; 3548 3549 3550 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3551 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3552 return EINVAL; 3553 mtx_lock(&sc->driver_mtx); 3554 old_mtu = ifp->if_mtu; 3555 ifp->if_mtu = mtu; 3556 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3557 callout_stop(&sc->co_hdl); 3558 mxge_close(sc); 3559 err = mxge_open(sc); 3560 if (err != 0) { 3561 ifp->if_mtu = old_mtu; 3562 mxge_close(sc); 3563 (void) mxge_open(sc); 3564 } 3565 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3566 } 3567 mtx_unlock(&sc->driver_mtx); 3568 return err; 3569 } 3570 3571 static void 3572 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3573 { 3574 mxge_softc_t *sc = ifp->if_softc; 3575 3576 3577 if (sc == NULL) 3578 return; 3579 ifmr->ifm_status = IFM_AVALID; 3580 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3581 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3582 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3583 } 3584 3585 static int 3586 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3587 { 3588 mxge_softc_t *sc = ifp->if_softc; 3589 struct ifreq *ifr = (struct ifreq *)data; 3590 int err, mask; 3591 3592 err = 0; 3593 switch (command) { 3594 case SIOCSIFADDR: 3595 case SIOCGIFADDR: 3596 err = ether_ioctl(ifp, command, data); 3597 break; 3598 3599 case SIOCSIFMTU: 3600 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3601 break; 3602 3603 case SIOCSIFFLAGS: 3604 mtx_lock(&sc->driver_mtx); 3605 if (ifp->if_flags & IFF_UP) { 3606 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3607 err = mxge_open(sc); 3608 callout_reset(&sc->co_hdl, mxge_ticks, 3609 mxge_tick, sc); 3610 } else { 3611 /* take care of promis can allmulti 3612 flag chages */ 3613 mxge_change_promisc(sc, 3614 ifp->if_flags & IFF_PROMISC); 3615 mxge_set_multicast_list(sc); 3616 } 3617 } else { 3618 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3619 callout_stop(&sc->co_hdl); 3620 mxge_close(sc); 3621 } 3622 } 3623 mtx_unlock(&sc->driver_mtx); 3624 break; 3625 3626 case SIOCADDMULTI: 3627 case SIOCDELMULTI: 3628 mtx_lock(&sc->driver_mtx); 3629 mxge_set_multicast_list(sc); 3630 mtx_unlock(&sc->driver_mtx); 3631 break; 3632 3633 case SIOCSIFCAP: 3634 mtx_lock(&sc->driver_mtx); 3635 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3636 if (mask & IFCAP_TXCSUM) { 3637 if (IFCAP_TXCSUM & ifp->if_capenable) { 3638 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3639 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3640 | CSUM_TSO); 3641 } else { 3642 ifp->if_capenable |= IFCAP_TXCSUM; 3643 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3644 } 3645 } else if (mask & IFCAP_RXCSUM) { 3646 if (IFCAP_RXCSUM & ifp->if_capenable) { 3647 ifp->if_capenable &= ~IFCAP_RXCSUM; 3648 sc->csum_flag = 0; 3649 } else { 3650 ifp->if_capenable |= IFCAP_RXCSUM; 3651 sc->csum_flag = 1; 3652 } 3653 } 3654 if (mask & IFCAP_TSO4) { 3655 if (IFCAP_TSO4 & ifp->if_capenable) { 3656 ifp->if_capenable &= ~IFCAP_TSO4; 3657 ifp->if_hwassist &= ~CSUM_TSO; 3658 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3659 ifp->if_capenable |= IFCAP_TSO4; 3660 ifp->if_hwassist |= CSUM_TSO; 3661 } else { 3662 printf("mxge requires tx checksum offload" 3663 " be enabled to use TSO\n"); 3664 err = EINVAL; 3665 } 3666 } 3667 if (mask & IFCAP_LRO) { 3668 if (IFCAP_LRO & ifp->if_capenable) 3669 err = mxge_change_lro_locked(sc, 0); 3670 else 3671 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3672 } 3673 if (mask & IFCAP_VLAN_HWTAGGING) 3674 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3675 mtx_unlock(&sc->driver_mtx); 3676 VLAN_CAPABILITIES(ifp); 3677 3678 break; 3679 3680 case SIOCGIFMEDIA: 3681 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3682 &sc->media, command); 3683 break; 3684 3685 default: 3686 err = ENOTTY; 3687 } 3688 return err; 3689 } 3690 3691 static void 3692 mxge_fetch_tunables(mxge_softc_t *sc) 3693 { 3694 3695 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3696 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3697 &mxge_flow_control); 3698 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3699 &mxge_intr_coal_delay); 3700 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3701 &mxge_nvidia_ecrc_enable); 3702 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3703 &mxge_force_firmware); 3704 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3705 &mxge_deassert_wait); 3706 TUNABLE_INT_FETCH("hw.mxge.verbose", 3707 &mxge_verbose); 3708 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3709 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3710 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3711 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3712 if (sc->lro_cnt != 0) 3713 mxge_lro_cnt = sc->lro_cnt; 3714 3715 if (bootverbose) 3716 mxge_verbose = 1; 3717 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3718 mxge_intr_coal_delay = 30; 3719 if (mxge_ticks == 0) 3720 mxge_ticks = hz / 2; 3721 sc->pause = mxge_flow_control; 3722 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3723 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 3724 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 3725 } 3726 } 3727 3728 3729 static void 3730 mxge_free_slices(mxge_softc_t *sc) 3731 { 3732 struct mxge_slice_state *ss; 3733 int i; 3734 3735 3736 if (sc->ss == NULL) 3737 return; 3738 3739 for (i = 0; i < sc->num_slices; i++) { 3740 ss = &sc->ss[i]; 3741 if (ss->fw_stats != NULL) { 3742 mxge_dma_free(&ss->fw_stats_dma); 3743 ss->fw_stats = NULL; 3744 mtx_destroy(&ss->tx.mtx); 3745 } 3746 if (ss->rx_done.entry != NULL) { 3747 mxge_dma_free(&ss->rx_done.dma); 3748 ss->rx_done.entry = NULL; 3749 } 3750 } 3751 free(sc->ss, M_DEVBUF); 3752 sc->ss = NULL; 3753 } 3754 3755 static int 3756 mxge_alloc_slices(mxge_softc_t *sc) 3757 { 3758 mxge_cmd_t cmd; 3759 struct mxge_slice_state *ss; 3760 size_t bytes; 3761 int err, i, max_intr_slots; 3762 3763 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3764 if (err != 0) { 3765 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3766 return err; 3767 } 3768 sc->rx_ring_size = cmd.data0; 3769 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3770 3771 bytes = sizeof (*sc->ss) * sc->num_slices; 3772 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 3773 if (sc->ss == NULL) 3774 return (ENOMEM); 3775 for (i = 0; i < sc->num_slices; i++) { 3776 ss = &sc->ss[i]; 3777 3778 ss->sc = sc; 3779 3780 /* allocate per-slice rx interrupt queues */ 3781 3782 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 3783 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 3784 if (err != 0) 3785 goto abort; 3786 ss->rx_done.entry = ss->rx_done.dma.addr; 3787 bzero(ss->rx_done.entry, bytes); 3788 3789 /* 3790 * allocate the per-slice firmware stats; stats 3791 * (including tx) are used used only on the first 3792 * slice for now 3793 */ 3794 if (i > 0) 3795 continue; 3796 3797 bytes = sizeof (*ss->fw_stats); 3798 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3799 sizeof (*ss->fw_stats), 64); 3800 if (err != 0) 3801 goto abort; 3802 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 3803 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 3804 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 3805 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 3806 } 3807 3808 return (0); 3809 3810 abort: 3811 mxge_free_slices(sc); 3812 return (ENOMEM); 3813 } 3814 3815 static void 3816 mxge_slice_probe(mxge_softc_t *sc) 3817 { 3818 mxge_cmd_t cmd; 3819 char *old_fw; 3820 int msix_cnt, status, max_intr_slots; 3821 3822 sc->num_slices = 1; 3823 /* 3824 * don't enable multiple slices if they are not enabled, 3825 * or if this is not an SMP system 3826 */ 3827 3828 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 3829 return; 3830 3831 /* see how many MSI-X interrupts are available */ 3832 msix_cnt = pci_msix_count(sc->dev); 3833 if (msix_cnt < 2) 3834 return; 3835 3836 /* now load the slice aware firmware see what it supports */ 3837 old_fw = sc->fw_name; 3838 if (old_fw == mxge_fw_aligned) 3839 sc->fw_name = mxge_fw_rss_aligned; 3840 else 3841 sc->fw_name = mxge_fw_rss_unaligned; 3842 status = mxge_load_firmware(sc, 0); 3843 if (status != 0) { 3844 device_printf(sc->dev, "Falling back to a single slice\n"); 3845 return; 3846 } 3847 3848 /* try to send a reset command to the card to see if it 3849 is alive */ 3850 memset(&cmd, 0, sizeof (cmd)); 3851 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3852 if (status != 0) { 3853 device_printf(sc->dev, "failed reset\n"); 3854 goto abort_with_fw; 3855 } 3856 3857 /* get rx ring size */ 3858 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3859 if (status != 0) { 3860 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3861 goto abort_with_fw; 3862 } 3863 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3864 3865 /* tell it the size of the interrupt queues */ 3866 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3867 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3868 if (status != 0) { 3869 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3870 goto abort_with_fw; 3871 } 3872 3873 /* ask the maximum number of slices it supports */ 3874 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3875 if (status != 0) { 3876 device_printf(sc->dev, 3877 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3878 goto abort_with_fw; 3879 } 3880 sc->num_slices = cmd.data0; 3881 if (sc->num_slices > msix_cnt) 3882 sc->num_slices = msix_cnt; 3883 3884 if (mxge_max_slices == -1) { 3885 /* cap to number of CPUs in system */ 3886 if (sc->num_slices > mp_ncpus) 3887 sc->num_slices = mp_ncpus; 3888 } else { 3889 if (sc->num_slices > mxge_max_slices) 3890 sc->num_slices = mxge_max_slices; 3891 } 3892 /* make sure it is a power of two */ 3893 while (sc->num_slices & (sc->num_slices - 1)) 3894 sc->num_slices--; 3895 3896 if (mxge_verbose) 3897 device_printf(sc->dev, "using %d slices\n", 3898 sc->num_slices); 3899 3900 return; 3901 3902 abort_with_fw: 3903 sc->fw_name = old_fw; 3904 (void) mxge_load_firmware(sc, 0); 3905 } 3906 3907 static int 3908 mxge_add_msix_irqs(mxge_softc_t *sc) 3909 { 3910 size_t bytes; 3911 int count, err, i, rid; 3912 3913 rid = PCIR_BAR(2); 3914 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3915 &rid, RF_ACTIVE); 3916 3917 if (sc->msix_table_res == NULL) { 3918 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3919 return ENXIO; 3920 } 3921 3922 count = sc->num_slices; 3923 err = pci_alloc_msix(sc->dev, &count); 3924 if (err != 0) { 3925 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3926 "err = %d \n", sc->num_slices, err); 3927 goto abort_with_msix_table; 3928 } 3929 if (count < sc->num_slices) { 3930 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3931 count, sc->num_slices); 3932 device_printf(sc->dev, 3933 "Try setting hw.mxge.max_slices to %d\n", 3934 count); 3935 err = ENOSPC; 3936 goto abort_with_msix; 3937 } 3938 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3939 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3940 if (sc->msix_irq_res == NULL) { 3941 err = ENOMEM; 3942 goto abort_with_msix; 3943 } 3944 3945 for (i = 0; i < sc->num_slices; i++) { 3946 rid = i + 1; 3947 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3948 SYS_RES_IRQ, 3949 &rid, RF_ACTIVE); 3950 if (sc->msix_irq_res[i] == NULL) { 3951 device_printf(sc->dev, "couldn't allocate IRQ res" 3952 " for message %d\n", i); 3953 err = ENXIO; 3954 goto abort_with_res; 3955 } 3956 } 3957 3958 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3959 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3960 3961 for (i = 0; i < sc->num_slices; i++) { 3962 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3963 INTR_TYPE_NET | INTR_MPSAFE, 3964 #if __FreeBSD_version > 700030 3965 NULL, 3966 #endif 3967 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 3968 if (err != 0) { 3969 device_printf(sc->dev, "couldn't setup intr for " 3970 "message %d\n", i); 3971 goto abort_with_intr; 3972 } 3973 } 3974 3975 if (mxge_verbose) { 3976 device_printf(sc->dev, "using %d msix IRQs:", 3977 sc->num_slices); 3978 for (i = 0; i < sc->num_slices; i++) 3979 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 3980 printf("\n"); 3981 } 3982 return (0); 3983 3984 abort_with_intr: 3985 for (i = 0; i < sc->num_slices; i++) { 3986 if (sc->msix_ih[i] != NULL) { 3987 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 3988 sc->msix_ih[i]); 3989 sc->msix_ih[i] = NULL; 3990 } 3991 } 3992 free(sc->msix_ih, M_DEVBUF); 3993 3994 3995 abort_with_res: 3996 for (i = 0; i < sc->num_slices; i++) { 3997 rid = i + 1; 3998 if (sc->msix_irq_res[i] != NULL) 3999 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4000 sc->msix_irq_res[i]); 4001 sc->msix_irq_res[i] = NULL; 4002 } 4003 free(sc->msix_irq_res, M_DEVBUF); 4004 4005 4006 abort_with_msix: 4007 pci_release_msi(sc->dev); 4008 4009 abort_with_msix_table: 4010 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4011 sc->msix_table_res); 4012 4013 return err; 4014 } 4015 4016 static int 4017 mxge_add_single_irq(mxge_softc_t *sc) 4018 { 4019 int count, err, rid; 4020 4021 count = pci_msi_count(sc->dev); 4022 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4023 rid = 1; 4024 } else { 4025 rid = 0; 4026 sc->legacy_irq = 1; 4027 } 4028 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4029 1, RF_SHAREABLE | RF_ACTIVE); 4030 if (sc->irq_res == NULL) { 4031 device_printf(sc->dev, "could not alloc interrupt\n"); 4032 return ENXIO; 4033 } 4034 if (mxge_verbose) 4035 device_printf(sc->dev, "using %s irq %ld\n", 4036 sc->legacy_irq ? "INTx" : "MSI", 4037 rman_get_start(sc->irq_res)); 4038 err = bus_setup_intr(sc->dev, sc->irq_res, 4039 INTR_TYPE_NET | INTR_MPSAFE, 4040 #if __FreeBSD_version > 700030 4041 NULL, 4042 #endif 4043 mxge_intr, &sc->ss[0], &sc->ih); 4044 if (err != 0) { 4045 bus_release_resource(sc->dev, SYS_RES_IRQ, 4046 sc->legacy_irq ? 0 : 1, sc->irq_res); 4047 if (!sc->legacy_irq) 4048 pci_release_msi(sc->dev); 4049 } 4050 return err; 4051 } 4052 4053 static void 4054 mxge_rem_msix_irqs(mxge_softc_t *sc) 4055 { 4056 int i, rid; 4057 4058 for (i = 0; i < sc->num_slices; i++) { 4059 if (sc->msix_ih[i] != NULL) { 4060 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4061 sc->msix_ih[i]); 4062 sc->msix_ih[i] = NULL; 4063 } 4064 } 4065 free(sc->msix_ih, M_DEVBUF); 4066 4067 for (i = 0; i < sc->num_slices; i++) { 4068 rid = i + 1; 4069 if (sc->msix_irq_res[i] != NULL) 4070 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4071 sc->msix_irq_res[i]); 4072 sc->msix_irq_res[i] = NULL; 4073 } 4074 free(sc->msix_irq_res, M_DEVBUF); 4075 4076 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4077 sc->msix_table_res); 4078 4079 pci_release_msi(sc->dev); 4080 return; 4081 } 4082 4083 static void 4084 mxge_rem_single_irq(mxge_softc_t *sc) 4085 { 4086 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4087 bus_release_resource(sc->dev, SYS_RES_IRQ, 4088 sc->legacy_irq ? 0 : 1, sc->irq_res); 4089 if (!sc->legacy_irq) 4090 pci_release_msi(sc->dev); 4091 } 4092 4093 static void 4094 mxge_rem_irq(mxge_softc_t *sc) 4095 { 4096 if (sc->num_slices > 1) 4097 mxge_rem_msix_irqs(sc); 4098 else 4099 mxge_rem_single_irq(sc); 4100 } 4101 4102 static int 4103 mxge_add_irq(mxge_softc_t *sc) 4104 { 4105 int err; 4106 4107 if (sc->num_slices > 1) 4108 err = mxge_add_msix_irqs(sc); 4109 else 4110 err = mxge_add_single_irq(sc); 4111 4112 if (0 && err == 0 && sc->num_slices > 1) { 4113 mxge_rem_msix_irqs(sc); 4114 err = mxge_add_msix_irqs(sc); 4115 } 4116 return err; 4117 } 4118 4119 4120 static int 4121 mxge_attach(device_t dev) 4122 { 4123 mxge_softc_t *sc = device_get_softc(dev); 4124 struct ifnet *ifp; 4125 int err, rid; 4126 4127 sc->dev = dev; 4128 mxge_fetch_tunables(sc); 4129 4130 err = bus_dma_tag_create(NULL, /* parent */ 4131 1, /* alignment */ 4132 0, /* boundary */ 4133 BUS_SPACE_MAXADDR, /* low */ 4134 BUS_SPACE_MAXADDR, /* high */ 4135 NULL, NULL, /* filter */ 4136 65536 + 256, /* maxsize */ 4137 MXGE_MAX_SEND_DESC, /* num segs */ 4138 65536, /* maxsegsize */ 4139 0, /* flags */ 4140 NULL, NULL, /* lock */ 4141 &sc->parent_dmat); /* tag */ 4142 4143 if (err != 0) { 4144 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4145 err); 4146 goto abort_with_nothing; 4147 } 4148 4149 ifp = sc->ifp = if_alloc(IFT_ETHER); 4150 if (ifp == NULL) { 4151 device_printf(dev, "can not if_alloc()\n"); 4152 err = ENOSPC; 4153 goto abort_with_parent_dmat; 4154 } 4155 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4156 4157 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4158 device_get_nameunit(dev)); 4159 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4160 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4161 "%s:drv", device_get_nameunit(dev)); 4162 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4163 MTX_NETWORK_LOCK, MTX_DEF); 4164 4165 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4166 4167 mxge_setup_cfg_space(sc); 4168 4169 /* Map the board into the kernel */ 4170 rid = PCIR_BARS; 4171 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4172 ~0, 1, RF_ACTIVE); 4173 if (sc->mem_res == NULL) { 4174 device_printf(dev, "could not map memory\n"); 4175 err = ENXIO; 4176 goto abort_with_lock; 4177 } 4178 sc->sram = rman_get_virtual(sc->mem_res); 4179 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4180 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4181 device_printf(dev, "impossible memory region size %ld\n", 4182 rman_get_size(sc->mem_res)); 4183 err = ENXIO; 4184 goto abort_with_mem_res; 4185 } 4186 4187 /* make NULL terminated copy of the EEPROM strings section of 4188 lanai SRAM */ 4189 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4190 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4191 rman_get_bushandle(sc->mem_res), 4192 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4193 sc->eeprom_strings, 4194 MXGE_EEPROM_STRINGS_SIZE - 2); 4195 err = mxge_parse_strings(sc); 4196 if (err != 0) 4197 goto abort_with_mem_res; 4198 4199 /* Enable write combining for efficient use of PCIe bus */ 4200 mxge_enable_wc(sc); 4201 4202 /* Allocate the out of band dma memory */ 4203 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4204 sizeof (mxge_cmd_t), 64); 4205 if (err != 0) 4206 goto abort_with_mem_res; 4207 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4208 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4209 if (err != 0) 4210 goto abort_with_cmd_dma; 4211 4212 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4213 if (err != 0) 4214 goto abort_with_zeropad_dma; 4215 4216 /* select & load the firmware */ 4217 err = mxge_select_firmware(sc); 4218 if (err != 0) 4219 goto abort_with_dmabench; 4220 sc->intr_coal_delay = mxge_intr_coal_delay; 4221 4222 mxge_slice_probe(sc); 4223 err = mxge_alloc_slices(sc); 4224 if (err != 0) 4225 goto abort_with_dmabench; 4226 4227 err = mxge_reset(sc, 0); 4228 if (err != 0) 4229 goto abort_with_slices; 4230 4231 err = mxge_alloc_rings(sc); 4232 if (err != 0) { 4233 device_printf(sc->dev, "failed to allocate rings\n"); 4234 goto abort_with_dmabench; 4235 } 4236 4237 err = mxge_add_irq(sc); 4238 if (err != 0) { 4239 device_printf(sc->dev, "failed to add irq\n"); 4240 goto abort_with_rings; 4241 } 4242 4243 ifp->if_baudrate = IF_Gbps(10UL); 4244 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4245 IFCAP_VLAN_MTU | IFCAP_LRO; 4246 4247 #ifdef MXGE_NEW_VLAN_API 4248 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4249 #endif 4250 4251 sc->max_mtu = mxge_max_mtu(sc); 4252 if (sc->max_mtu >= 9000) 4253 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4254 else 4255 device_printf(dev, "MTU limited to %d. Install " 4256 "latest firmware for 9000 byte jumbo support\n", 4257 sc->max_mtu - ETHER_HDR_LEN); 4258 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4259 ifp->if_capenable = ifp->if_capabilities; 4260 if (sc->lro_cnt == 0) 4261 ifp->if_capenable &= ~IFCAP_LRO; 4262 sc->csum_flag = 1; 4263 ifp->if_init = mxge_init; 4264 ifp->if_softc = sc; 4265 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4266 ifp->if_ioctl = mxge_ioctl; 4267 ifp->if_start = mxge_start; 4268 /* Initialise the ifmedia structure */ 4269 ifmedia_init(&sc->media, 0, mxge_media_change, 4270 mxge_media_status); 4271 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4272 mxge_media_probe(sc); 4273 ether_ifattach(ifp, sc->mac_addr); 4274 /* ether_ifattach sets mtu to 1500 */ 4275 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4276 ifp->if_mtu = 9000; 4277 4278 mxge_add_sysctls(sc); 4279 return 0; 4280 4281 abort_with_rings: 4282 mxge_free_rings(sc); 4283 abort_with_slices: 4284 mxge_free_slices(sc); 4285 abort_with_dmabench: 4286 mxge_dma_free(&sc->dmabench_dma); 4287 abort_with_zeropad_dma: 4288 mxge_dma_free(&sc->zeropad_dma); 4289 abort_with_cmd_dma: 4290 mxge_dma_free(&sc->cmd_dma); 4291 abort_with_mem_res: 4292 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4293 abort_with_lock: 4294 pci_disable_busmaster(dev); 4295 mtx_destroy(&sc->cmd_mtx); 4296 mtx_destroy(&sc->driver_mtx); 4297 if_free(ifp); 4298 abort_with_parent_dmat: 4299 bus_dma_tag_destroy(sc->parent_dmat); 4300 4301 abort_with_nothing: 4302 return err; 4303 } 4304 4305 static int 4306 mxge_detach(device_t dev) 4307 { 4308 mxge_softc_t *sc = device_get_softc(dev); 4309 4310 if (mxge_vlans_active(sc)) { 4311 device_printf(sc->dev, 4312 "Detach vlans before removing module\n"); 4313 return EBUSY; 4314 } 4315 mtx_lock(&sc->driver_mtx); 4316 callout_stop(&sc->co_hdl); 4317 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4318 mxge_close(sc); 4319 mtx_unlock(&sc->driver_mtx); 4320 ether_ifdetach(sc->ifp); 4321 ifmedia_removeall(&sc->media); 4322 mxge_dummy_rdma(sc, 0); 4323 mxge_rem_sysctls(sc); 4324 mxge_rem_irq(sc); 4325 mxge_free_rings(sc); 4326 mxge_free_slices(sc); 4327 mxge_dma_free(&sc->dmabench_dma); 4328 mxge_dma_free(&sc->zeropad_dma); 4329 mxge_dma_free(&sc->cmd_dma); 4330 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4331 pci_disable_busmaster(dev); 4332 mtx_destroy(&sc->cmd_mtx); 4333 mtx_destroy(&sc->driver_mtx); 4334 if_free(sc->ifp); 4335 bus_dma_tag_destroy(sc->parent_dmat); 4336 return 0; 4337 } 4338 4339 static int 4340 mxge_shutdown(device_t dev) 4341 { 4342 return 0; 4343 } 4344 4345 /* 4346 This file uses Myri10GE driver indentation. 4347 4348 Local Variables: 4349 c-file-style:"linux" 4350 tab-width:8 4351 End: 4352 */ 4353