1 /****************************************************************************** 2 3 Copyright (c) 2006-2008, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/memrange.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <machine/bus.h> 68 #include <machine/in_cksum.h> 69 #include <machine/resource.h> 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 #include <sys/smp.h> 73 74 #include <dev/pci/pcireg.h> 75 #include <dev/pci/pcivar.h> 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386) || defined(__amd64) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/mxge/mxge_mcp.h> 85 #include <dev/mxge/mcp_gen_header.h> 86 /*#define MXGE_FAKE_IFP*/ 87 #include <dev/mxge/if_mxge_var.h> 88 89 /* tunable params */ 90 static int mxge_nvidia_ecrc_enable = 1; 91 static int mxge_force_firmware = 0; 92 static int mxge_intr_coal_delay = 30; 93 static int mxge_deassert_wait = 1; 94 static int mxge_flow_control = 1; 95 static int mxge_verbose = 0; 96 static int mxge_lro_cnt = 8; 97 static int mxge_ticks; 98 static int mxge_max_slices = 1; 99 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 100 static int mxge_always_promisc = 0; 101 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 102 static char *mxge_fw_aligned = "mxge_eth_z8e"; 103 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 104 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 105 106 static int mxge_probe(device_t dev); 107 static int mxge_attach(device_t dev); 108 static int mxge_detach(device_t dev); 109 static int mxge_shutdown(device_t dev); 110 static void mxge_intr(void *arg); 111 112 static device_method_t mxge_methods[] = 113 { 114 /* Device interface */ 115 DEVMETHOD(device_probe, mxge_probe), 116 DEVMETHOD(device_attach, mxge_attach), 117 DEVMETHOD(device_detach, mxge_detach), 118 DEVMETHOD(device_shutdown, mxge_shutdown), 119 {0, 0} 120 }; 121 122 static driver_t mxge_driver = 123 { 124 "mxge", 125 mxge_methods, 126 sizeof(mxge_softc_t), 127 }; 128 129 static devclass_t mxge_devclass; 130 131 /* Declare ourselves to be a child of the PCI bus.*/ 132 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 133 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 134 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 135 136 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 137 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 138 static int mxge_close(mxge_softc_t *sc); 139 static int mxge_open(mxge_softc_t *sc); 140 static void mxge_tick(void *arg); 141 142 static int 143 mxge_probe(device_t dev) 144 { 145 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 146 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 147 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 148 device_set_desc(dev, "Myri10G-PCIE-8A"); 149 return 0; 150 } 151 return ENXIO; 152 } 153 154 static void 155 mxge_enable_wc(mxge_softc_t *sc) 156 { 157 #if defined(__i386) || defined(__amd64) 158 struct mem_range_desc mrdesc; 159 vm_paddr_t pa; 160 vm_offset_t len; 161 int err, action; 162 163 sc->wc = 1; 164 len = rman_get_size(sc->mem_res); 165 err = pmap_change_attr((vm_offset_t) sc->sram, 166 len, PAT_WRITE_COMBINING); 167 if (err == 0) 168 return; 169 else 170 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 171 err); 172 pa = rman_get_start(sc->mem_res); 173 mrdesc.mr_base = pa; 174 mrdesc.mr_len = len; 175 mrdesc.mr_flags = MDF_WRITECOMBINE; 176 action = MEMRANGE_SET_UPDATE; 177 strcpy((char *)&mrdesc.mr_owner, "mxge"); 178 err = mem_range_attr_set(&mrdesc, &action); 179 if (err != 0) { 180 sc->wc = 0; 181 device_printf(sc->dev, 182 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 183 (unsigned long)pa, (unsigned long)len, err); 184 } 185 #endif 186 } 187 188 189 /* callback to get our DMA address */ 190 static void 191 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 192 int error) 193 { 194 if (error == 0) { 195 *(bus_addr_t *) arg = segs->ds_addr; 196 } 197 } 198 199 static int 200 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 201 bus_size_t alignment) 202 { 203 int err; 204 device_t dev = sc->dev; 205 bus_size_t boundary, maxsegsize; 206 207 if (bytes > 4096 && alignment == 4096) { 208 boundary = 0; 209 maxsegsize = bytes; 210 } else { 211 boundary = 4096; 212 maxsegsize = 4096; 213 } 214 215 /* allocate DMAable memory tags */ 216 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 217 alignment, /* alignment */ 218 boundary, /* boundary */ 219 BUS_SPACE_MAXADDR, /* low */ 220 BUS_SPACE_MAXADDR, /* high */ 221 NULL, NULL, /* filter */ 222 bytes, /* maxsize */ 223 1, /* num segs */ 224 maxsegsize, /* maxsegsize */ 225 BUS_DMA_COHERENT, /* flags */ 226 NULL, NULL, /* lock */ 227 &dma->dmat); /* tag */ 228 if (err != 0) { 229 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 230 return err; 231 } 232 233 /* allocate DMAable memory & map */ 234 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 235 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 236 | BUS_DMA_ZERO), &dma->map); 237 if (err != 0) { 238 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 239 goto abort_with_dmat; 240 } 241 242 /* load the memory */ 243 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 244 mxge_dmamap_callback, 245 (void *)&dma->bus_addr, 0); 246 if (err != 0) { 247 device_printf(dev, "couldn't load map (err = %d)\n", err); 248 goto abort_with_mem; 249 } 250 return 0; 251 252 abort_with_mem: 253 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 254 abort_with_dmat: 255 (void)bus_dma_tag_destroy(dma->dmat); 256 return err; 257 } 258 259 260 static void 261 mxge_dma_free(mxge_dma_t *dma) 262 { 263 bus_dmamap_unload(dma->dmat, dma->map); 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 (void)bus_dma_tag_destroy(dma->dmat); 266 } 267 268 /* 269 * The eeprom strings on the lanaiX have the format 270 * SN=x\0 271 * MAC=x:x:x:x:x:x\0 272 * PC=text\0 273 */ 274 275 static int 276 mxge_parse_strings(mxge_softc_t *sc) 277 { 278 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 279 280 char *ptr, *limit; 281 int i, found_mac; 282 283 ptr = sc->eeprom_strings; 284 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 285 found_mac = 0; 286 while (ptr < limit && *ptr != '\0') { 287 if (memcmp(ptr, "MAC=", 4) == 0) { 288 ptr += 1; 289 sc->mac_addr_string = ptr; 290 for (i = 0; i < 6; i++) { 291 ptr += 3; 292 if ((ptr + 2) > limit) 293 goto abort; 294 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 295 found_mac = 1; 296 } 297 } else if (memcmp(ptr, "PC=", 3) == 0) { 298 ptr += 3; 299 strncpy(sc->product_code_string, ptr, 300 sizeof (sc->product_code_string) - 1); 301 } else if (memcmp(ptr, "SN=", 3) == 0) { 302 ptr += 3; 303 strncpy(sc->serial_number_string, ptr, 304 sizeof (sc->serial_number_string) - 1); 305 } 306 MXGE_NEXT_STRING(ptr); 307 } 308 309 if (found_mac) 310 return 0; 311 312 abort: 313 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 314 315 return ENXIO; 316 } 317 318 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 319 static void 320 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 321 { 322 uint32_t val; 323 unsigned long base, off; 324 char *va, *cfgptr; 325 device_t pdev, mcp55; 326 uint16_t vendor_id, device_id, word; 327 uintptr_t bus, slot, func, ivend, idev; 328 uint32_t *ptr32; 329 330 331 if (!mxge_nvidia_ecrc_enable) 332 return; 333 334 pdev = device_get_parent(device_get_parent(sc->dev)); 335 if (pdev == NULL) { 336 device_printf(sc->dev, "could not find parent?\n"); 337 return; 338 } 339 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 340 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 341 342 if (vendor_id != 0x10de) 343 return; 344 345 base = 0; 346 347 if (device_id == 0x005d) { 348 /* ck804, base address is magic */ 349 base = 0xe0000000UL; 350 } else if (device_id >= 0x0374 && device_id <= 0x378) { 351 /* mcp55, base address stored in chipset */ 352 mcp55 = pci_find_bsf(0, 0, 0); 353 if (mcp55 && 354 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 355 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 356 word = pci_read_config(mcp55, 0x90, 2); 357 base = ((unsigned long)word & 0x7ffeU) << 25; 358 } 359 } 360 if (!base) 361 return; 362 363 /* XXXX 364 Test below is commented because it is believed that doing 365 config read/write beyond 0xff will access the config space 366 for the next larger function. Uncomment this and remove 367 the hacky pmap_mapdev() way of accessing config space when 368 FreeBSD grows support for extended pcie config space access 369 */ 370 #if 0 371 /* See if we can, by some miracle, access the extended 372 config space */ 373 val = pci_read_config(pdev, 0x178, 4); 374 if (val != 0xffffffff) { 375 val |= 0x40; 376 pci_write_config(pdev, 0x178, val, 4); 377 return; 378 } 379 #endif 380 /* Rather than using normal pci config space writes, we must 381 * map the Nvidia config space ourselves. This is because on 382 * opteron/nvidia class machine the 0xe000000 mapping is 383 * handled by the nvidia chipset, that means the internal PCI 384 * device (the on-chip northbridge), or the amd-8131 bridge 385 * and things behind them are not visible by this method. 386 */ 387 388 BUS_READ_IVAR(device_get_parent(pdev), pdev, 389 PCI_IVAR_BUS, &bus); 390 BUS_READ_IVAR(device_get_parent(pdev), pdev, 391 PCI_IVAR_SLOT, &slot); 392 BUS_READ_IVAR(device_get_parent(pdev), pdev, 393 PCI_IVAR_FUNCTION, &func); 394 BUS_READ_IVAR(device_get_parent(pdev), pdev, 395 PCI_IVAR_VENDOR, &ivend); 396 BUS_READ_IVAR(device_get_parent(pdev), pdev, 397 PCI_IVAR_DEVICE, &idev); 398 399 off = base 400 + 0x00100000UL * (unsigned long)bus 401 + 0x00001000UL * (unsigned long)(func 402 + 8 * slot); 403 404 /* map it into the kernel */ 405 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 406 407 408 if (va == NULL) { 409 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 410 return; 411 } 412 /* get a pointer to the config space mapped into the kernel */ 413 cfgptr = va + (off & PAGE_MASK); 414 415 /* make sure that we can really access it */ 416 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 417 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 418 if (! (vendor_id == ivend && device_id == idev)) { 419 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 420 vendor_id, device_id); 421 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 422 return; 423 } 424 425 ptr32 = (uint32_t*)(cfgptr + 0x178); 426 val = *ptr32; 427 428 if (val == 0xffffffff) { 429 device_printf(sc->dev, "extended mapping failed\n"); 430 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 431 return; 432 } 433 *ptr32 = val | 0x40; 434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 435 if (mxge_verbose) 436 device_printf(sc->dev, 437 "Enabled ECRC on upstream Nvidia bridge " 438 "at %d:%d:%d\n", 439 (int)bus, (int)slot, (int)func); 440 return; 441 } 442 #else 443 static void 444 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 445 { 446 device_printf(sc->dev, 447 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 448 return; 449 } 450 #endif 451 452 453 static int 454 mxge_dma_test(mxge_softc_t *sc, int test_type) 455 { 456 mxge_cmd_t cmd; 457 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 458 int status; 459 uint32_t len; 460 char *test = " "; 461 462 463 /* Run a small DMA test. 464 * The magic multipliers to the length tell the firmware 465 * to do DMA read, write, or read+write tests. The 466 * results are returned in cmd.data0. The upper 16 467 * bits of the return is the number of transfers completed. 468 * The lower 16 bits is the time in 0.5us ticks that the 469 * transfers took to complete. 470 */ 471 472 len = sc->tx_boundary; 473 474 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 475 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 476 cmd.data2 = len * 0x10000; 477 status = mxge_send_cmd(sc, test_type, &cmd); 478 if (status != 0) { 479 test = "read"; 480 goto abort; 481 } 482 sc->read_dma = ((cmd.data0>>16) * len * 2) / 483 (cmd.data0 & 0xffff); 484 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 485 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 486 cmd.data2 = len * 0x1; 487 status = mxge_send_cmd(sc, test_type, &cmd); 488 if (status != 0) { 489 test = "write"; 490 goto abort; 491 } 492 sc->write_dma = ((cmd.data0>>16) * len * 2) / 493 (cmd.data0 & 0xffff); 494 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x10001; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "read/write"; 501 goto abort; 502 } 503 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 504 (cmd.data0 & 0xffff); 505 506 abort: 507 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 508 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 509 test, status); 510 511 return status; 512 } 513 514 /* 515 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 516 * when the PCI-E Completion packets are aligned on an 8-byte 517 * boundary. Some PCI-E chip sets always align Completion packets; on 518 * the ones that do not, the alignment can be enforced by enabling 519 * ECRC generation (if supported). 520 * 521 * When PCI-E Completion packets are not aligned, it is actually more 522 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 523 * 524 * If the driver can neither enable ECRC nor verify that it has 525 * already been enabled, then it must use a firmware image which works 526 * around unaligned completion packets (ethp_z8e.dat), and it should 527 * also ensure that it never gives the device a Read-DMA which is 528 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 529 * enabled, then the driver should use the aligned (eth_z8e.dat) 530 * firmware image, and set tx_boundary to 4KB. 531 */ 532 533 static int 534 mxge_firmware_probe(mxge_softc_t *sc) 535 { 536 device_t dev = sc->dev; 537 int reg, status; 538 uint16_t pectl; 539 540 sc->tx_boundary = 4096; 541 /* 542 * Verify the max read request size was set to 4KB 543 * before trying the test with 4KB. 544 */ 545 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 546 pectl = pci_read_config(dev, reg + 0x8, 2); 547 if ((pectl & (5 << 12)) != (5 << 12)) { 548 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 549 pectl); 550 sc->tx_boundary = 2048; 551 } 552 } 553 554 /* 555 * load the optimized firmware (which assumes aligned PCIe 556 * completions) in order to see if it works on this host. 557 */ 558 sc->fw_name = mxge_fw_aligned; 559 status = mxge_load_firmware(sc, 1); 560 if (status != 0) { 561 return status; 562 } 563 564 /* 565 * Enable ECRC if possible 566 */ 567 mxge_enable_nvidia_ecrc(sc); 568 569 /* 570 * Run a DMA test which watches for unaligned completions and 571 * aborts on the first one seen. 572 */ 573 574 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 575 if (status == 0) 576 return 0; /* keep the aligned firmware */ 577 578 if (status != E2BIG) 579 device_printf(dev, "DMA test failed: %d\n", status); 580 if (status == ENOSYS) 581 device_printf(dev, "Falling back to ethp! " 582 "Please install up to date fw\n"); 583 return status; 584 } 585 586 static int 587 mxge_select_firmware(mxge_softc_t *sc) 588 { 589 int aligned = 0; 590 591 592 if (mxge_force_firmware != 0) { 593 if (mxge_force_firmware == 1) 594 aligned = 1; 595 else 596 aligned = 0; 597 if (mxge_verbose) 598 device_printf(sc->dev, 599 "Assuming %s completions (forced)\n", 600 aligned ? "aligned" : "unaligned"); 601 goto abort; 602 } 603 604 /* if the PCIe link width is 4 or less, we can use the aligned 605 firmware and skip any checks */ 606 if (sc->link_width != 0 && sc->link_width <= 4) { 607 device_printf(sc->dev, 608 "PCIe x%d Link, expect reduced performance\n", 609 sc->link_width); 610 aligned = 1; 611 goto abort; 612 } 613 614 if (0 == mxge_firmware_probe(sc)) 615 return 0; 616 617 abort: 618 if (aligned) { 619 sc->fw_name = mxge_fw_aligned; 620 sc->tx_boundary = 4096; 621 } else { 622 sc->fw_name = mxge_fw_unaligned; 623 sc->tx_boundary = 2048; 624 } 625 return (mxge_load_firmware(sc, 0)); 626 } 627 628 union qualhack 629 { 630 const char *ro_char; 631 char *rw_char; 632 }; 633 634 static int 635 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 636 { 637 638 639 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 640 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 641 be32toh(hdr->mcp_type)); 642 return EIO; 643 } 644 645 /* save firmware version for sysctl */ 646 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 647 if (mxge_verbose) 648 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 649 650 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 651 &sc->fw_ver_minor, &sc->fw_ver_tiny); 652 653 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 654 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 655 device_printf(sc->dev, "Found firmware version %s\n", 656 sc->fw_version); 657 device_printf(sc->dev, "Driver needs %d.%d\n", 658 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 659 return EINVAL; 660 } 661 return 0; 662 663 } 664 665 static void * 666 z_alloc(void *nil, u_int items, u_int size) 667 { 668 void *ptr; 669 670 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 671 return ptr; 672 } 673 674 static void 675 z_free(void *nil, void *ptr) 676 { 677 free(ptr, M_TEMP); 678 } 679 680 681 static int 682 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 683 { 684 z_stream zs; 685 char *inflate_buffer; 686 const struct firmware *fw; 687 const mcp_gen_header_t *hdr; 688 unsigned hdr_offset; 689 int status; 690 unsigned int i; 691 char dummy; 692 size_t fw_len; 693 694 fw = firmware_get(sc->fw_name); 695 if (fw == NULL) { 696 device_printf(sc->dev, "Could not find firmware image %s\n", 697 sc->fw_name); 698 return ENOENT; 699 } 700 701 702 703 /* setup zlib and decompress f/w */ 704 bzero(&zs, sizeof (zs)); 705 zs.zalloc = z_alloc; 706 zs.zfree = z_free; 707 status = inflateInit(&zs); 708 if (status != Z_OK) { 709 status = EIO; 710 goto abort_with_fw; 711 } 712 713 /* the uncompressed size is stored as the firmware version, 714 which would otherwise go unused */ 715 fw_len = (size_t) fw->version; 716 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 717 if (inflate_buffer == NULL) 718 goto abort_with_zs; 719 zs.avail_in = fw->datasize; 720 zs.next_in = __DECONST(char *, fw->data); 721 zs.avail_out = fw_len; 722 zs.next_out = inflate_buffer; 723 status = inflate(&zs, Z_FINISH); 724 if (status != Z_STREAM_END) { 725 device_printf(sc->dev, "zlib %d\n", status); 726 status = EIO; 727 goto abort_with_buffer; 728 } 729 730 /* check id */ 731 hdr_offset = htobe32(*(const uint32_t *) 732 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 733 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 734 device_printf(sc->dev, "Bad firmware file"); 735 status = EIO; 736 goto abort_with_buffer; 737 } 738 hdr = (const void*)(inflate_buffer + hdr_offset); 739 740 status = mxge_validate_firmware(sc, hdr); 741 if (status != 0) 742 goto abort_with_buffer; 743 744 /* Copy the inflated firmware to NIC SRAM. */ 745 for (i = 0; i < fw_len; i += 256) { 746 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 747 inflate_buffer + i, 748 min(256U, (unsigned)(fw_len - i))); 749 mb(); 750 dummy = *sc->sram; 751 mb(); 752 } 753 754 *limit = fw_len; 755 status = 0; 756 abort_with_buffer: 757 free(inflate_buffer, M_TEMP); 758 abort_with_zs: 759 inflateEnd(&zs); 760 abort_with_fw: 761 firmware_put(fw, FIRMWARE_UNLOAD); 762 return status; 763 } 764 765 /* 766 * Enable or disable periodic RDMAs from the host to make certain 767 * chipsets resend dropped PCIe messages 768 */ 769 770 static void 771 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 772 { 773 char buf_bytes[72]; 774 volatile uint32_t *confirm; 775 volatile char *submit; 776 uint32_t *buf, dma_low, dma_high; 777 int i; 778 779 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 780 781 /* clear confirmation addr */ 782 confirm = (volatile uint32_t *)sc->cmd; 783 *confirm = 0; 784 mb(); 785 786 /* send an rdma command to the PCIe engine, and wait for the 787 response in the confirmation address. The firmware should 788 write a -1 there to indicate it is alive and well 789 */ 790 791 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 792 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 793 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 794 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 795 buf[2] = htobe32(0xffffffff); /* confirm data */ 796 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 797 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 798 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 799 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 800 buf[5] = htobe32(enable); /* enable? */ 801 802 803 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 804 805 mxge_pio_copy(submit, buf, 64); 806 mb(); 807 DELAY(1000); 808 mb(); 809 i = 0; 810 while (*confirm != 0xffffffff && i < 20) { 811 DELAY(1000); 812 i++; 813 } 814 if (*confirm != 0xffffffff) { 815 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 816 (enable ? "enable" : "disable"), confirm, 817 *confirm); 818 } 819 return; 820 } 821 822 static int 823 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 824 { 825 mcp_cmd_t *buf; 826 char buf_bytes[sizeof(*buf) + 8]; 827 volatile mcp_cmd_response_t *response = sc->cmd; 828 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 829 uint32_t dma_low, dma_high; 830 int err, sleep_total = 0; 831 832 /* ensure buf is aligned to 8 bytes */ 833 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 834 835 buf->data0 = htobe32(data->data0); 836 buf->data1 = htobe32(data->data1); 837 buf->data2 = htobe32(data->data2); 838 buf->cmd = htobe32(cmd); 839 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 840 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 841 842 buf->response_addr.low = htobe32(dma_low); 843 buf->response_addr.high = htobe32(dma_high); 844 mtx_lock(&sc->cmd_mtx); 845 response->result = 0xffffffff; 846 mb(); 847 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 848 849 /* wait up to 20ms */ 850 err = EAGAIN; 851 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 852 bus_dmamap_sync(sc->cmd_dma.dmat, 853 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 854 mb(); 855 switch (be32toh(response->result)) { 856 case 0: 857 data->data0 = be32toh(response->data); 858 err = 0; 859 break; 860 case 0xffffffff: 861 DELAY(1000); 862 break; 863 case MXGEFW_CMD_UNKNOWN: 864 err = ENOSYS; 865 break; 866 case MXGEFW_CMD_ERROR_UNALIGNED: 867 err = E2BIG; 868 break; 869 case MXGEFW_CMD_ERROR_BUSY: 870 err = EBUSY; 871 break; 872 default: 873 device_printf(sc->dev, 874 "mxge: command %d " 875 "failed, result = %d\n", 876 cmd, be32toh(response->result)); 877 err = ENXIO; 878 break; 879 } 880 if (err != EAGAIN) 881 break; 882 } 883 if (err == EAGAIN) 884 device_printf(sc->dev, "mxge: command %d timed out" 885 "result = %d\n", 886 cmd, be32toh(response->result)); 887 mtx_unlock(&sc->cmd_mtx); 888 return err; 889 } 890 891 static int 892 mxge_adopt_running_firmware(mxge_softc_t *sc) 893 { 894 struct mcp_gen_header *hdr; 895 const size_t bytes = sizeof (struct mcp_gen_header); 896 size_t hdr_offset; 897 int status; 898 899 /* find running firmware header */ 900 hdr_offset = htobe32(*(volatile uint32_t *) 901 (sc->sram + MCP_HEADER_PTR_OFFSET)); 902 903 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 904 device_printf(sc->dev, 905 "Running firmware has bad header offset (%d)\n", 906 (int)hdr_offset); 907 return EIO; 908 } 909 910 /* copy header of running firmware from SRAM to host memory to 911 * validate firmware */ 912 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 913 if (hdr == NULL) { 914 device_printf(sc->dev, "could not malloc firmware hdr\n"); 915 return ENOMEM; 916 } 917 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 918 rman_get_bushandle(sc->mem_res), 919 hdr_offset, (char *)hdr, bytes); 920 status = mxge_validate_firmware(sc, hdr); 921 free(hdr, M_DEVBUF); 922 923 /* 924 * check to see if adopted firmware has bug where adopting 925 * it will cause broadcasts to be filtered unless the NIC 926 * is kept in ALLMULTI mode 927 */ 928 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 929 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 930 sc->adopted_rx_filter_bug = 1; 931 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 932 "working around rx filter bug\n", 933 sc->fw_ver_major, sc->fw_ver_minor, 934 sc->fw_ver_tiny); 935 } 936 937 return status; 938 } 939 940 941 static int 942 mxge_load_firmware(mxge_softc_t *sc, int adopt) 943 { 944 volatile uint32_t *confirm; 945 volatile char *submit; 946 char buf_bytes[72]; 947 uint32_t *buf, size, dma_low, dma_high; 948 int status, i; 949 950 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 951 952 size = sc->sram_size; 953 status = mxge_load_firmware_helper(sc, &size); 954 if (status) { 955 if (!adopt) 956 return status; 957 /* Try to use the currently running firmware, if 958 it is new enough */ 959 status = mxge_adopt_running_firmware(sc); 960 if (status) { 961 device_printf(sc->dev, 962 "failed to adopt running firmware\n"); 963 return status; 964 } 965 device_printf(sc->dev, 966 "Successfully adopted running firmware\n"); 967 if (sc->tx_boundary == 4096) { 968 device_printf(sc->dev, 969 "Using firmware currently running on NIC" 970 ". For optimal\n"); 971 device_printf(sc->dev, 972 "performance consider loading optimized " 973 "firmware\n"); 974 } 975 sc->fw_name = mxge_fw_unaligned; 976 sc->tx_boundary = 2048; 977 return 0; 978 } 979 /* clear confirmation addr */ 980 confirm = (volatile uint32_t *)sc->cmd; 981 *confirm = 0; 982 mb(); 983 /* send a reload command to the bootstrap MCP, and wait for the 984 response in the confirmation address. The firmware should 985 write a -1 there to indicate it is alive and well 986 */ 987 988 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 989 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 990 991 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 992 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 993 buf[2] = htobe32(0xffffffff); /* confirm data */ 994 995 /* FIX: All newest firmware should un-protect the bottom of 996 the sram before handoff. However, the very first interfaces 997 do not. Therefore the handoff copy must skip the first 8 bytes 998 */ 999 /* where the code starts*/ 1000 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1001 buf[4] = htobe32(size - 8); /* length of code */ 1002 buf[5] = htobe32(8); /* where to copy to */ 1003 buf[6] = htobe32(0); /* where to jump to */ 1004 1005 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1006 mxge_pio_copy(submit, buf, 64); 1007 mb(); 1008 DELAY(1000); 1009 mb(); 1010 i = 0; 1011 while (*confirm != 0xffffffff && i < 20) { 1012 DELAY(1000*10); 1013 i++; 1014 bus_dmamap_sync(sc->cmd_dma.dmat, 1015 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1016 } 1017 if (*confirm != 0xffffffff) { 1018 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1019 confirm, *confirm); 1020 1021 return ENXIO; 1022 } 1023 return 0; 1024 } 1025 1026 static int 1027 mxge_update_mac_address(mxge_softc_t *sc) 1028 { 1029 mxge_cmd_t cmd; 1030 uint8_t *addr = sc->mac_addr; 1031 int status; 1032 1033 1034 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1035 | (addr[2] << 8) | addr[3]); 1036 1037 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1038 1039 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1040 return status; 1041 } 1042 1043 static int 1044 mxge_change_pause(mxge_softc_t *sc, int pause) 1045 { 1046 mxge_cmd_t cmd; 1047 int status; 1048 1049 if (pause) 1050 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1051 &cmd); 1052 else 1053 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1054 &cmd); 1055 1056 if (status) { 1057 device_printf(sc->dev, "Failed to set flow control mode\n"); 1058 return ENXIO; 1059 } 1060 sc->pause = pause; 1061 return 0; 1062 } 1063 1064 static void 1065 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1066 { 1067 mxge_cmd_t cmd; 1068 int status; 1069 1070 if (mxge_always_promisc) 1071 promisc = 1; 1072 1073 if (promisc) 1074 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1075 &cmd); 1076 else 1077 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1078 &cmd); 1079 1080 if (status) { 1081 device_printf(sc->dev, "Failed to set promisc mode\n"); 1082 } 1083 } 1084 1085 static void 1086 mxge_set_multicast_list(mxge_softc_t *sc) 1087 { 1088 mxge_cmd_t cmd; 1089 struct ifmultiaddr *ifma; 1090 struct ifnet *ifp = sc->ifp; 1091 int err; 1092 1093 /* This firmware is known to not support multicast */ 1094 if (!sc->fw_multicast_support) 1095 return; 1096 1097 /* Disable multicast filtering while we play with the lists*/ 1098 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1099 if (err != 0) { 1100 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1101 " error status: %d\n", err); 1102 return; 1103 } 1104 1105 if (sc->adopted_rx_filter_bug) 1106 return; 1107 1108 if (ifp->if_flags & IFF_ALLMULTI) 1109 /* request to disable multicast filtering, so quit here */ 1110 return; 1111 1112 /* Flush all the filters */ 1113 1114 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1115 if (err != 0) { 1116 device_printf(sc->dev, 1117 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1118 ", error status: %d\n", err); 1119 return; 1120 } 1121 1122 /* Walk the multicast list, and add each address */ 1123 1124 IF_ADDR_LOCK(ifp); 1125 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1126 if (ifma->ifma_addr->sa_family != AF_LINK) 1127 continue; 1128 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1129 &cmd.data0, 4); 1130 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1131 &cmd.data1, 2); 1132 cmd.data0 = htonl(cmd.data0); 1133 cmd.data1 = htonl(cmd.data1); 1134 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1135 if (err != 0) { 1136 device_printf(sc->dev, "Failed " 1137 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1138 "%d\t", err); 1139 /* abort, leaving multicast filtering off */ 1140 IF_ADDR_UNLOCK(ifp); 1141 return; 1142 } 1143 } 1144 IF_ADDR_UNLOCK(ifp); 1145 /* Enable multicast filtering */ 1146 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1147 if (err != 0) { 1148 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1149 ", error status: %d\n", err); 1150 } 1151 } 1152 1153 static int 1154 mxge_max_mtu(mxge_softc_t *sc) 1155 { 1156 mxge_cmd_t cmd; 1157 int status; 1158 1159 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1160 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1161 1162 /* try to set nbufs to see if it we can 1163 use virtually contiguous jumbos */ 1164 cmd.data0 = 0; 1165 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1166 &cmd); 1167 if (status == 0) 1168 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1169 1170 /* otherwise, we're limited to MJUMPAGESIZE */ 1171 return MJUMPAGESIZE - MXGEFW_PAD; 1172 } 1173 1174 static int 1175 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1176 { 1177 struct mxge_slice_state *ss; 1178 mxge_rx_done_t *rx_done; 1179 volatile uint32_t *irq_claim; 1180 mxge_cmd_t cmd; 1181 int slice, status; 1182 1183 /* try to send a reset command to the card to see if it 1184 is alive */ 1185 memset(&cmd, 0, sizeof (cmd)); 1186 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1187 if (status != 0) { 1188 device_printf(sc->dev, "failed reset\n"); 1189 return ENXIO; 1190 } 1191 1192 mxge_dummy_rdma(sc, 1); 1193 1194 1195 /* set the intrq size */ 1196 cmd.data0 = sc->rx_ring_size; 1197 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1198 1199 /* 1200 * Even though we already know how many slices are supported 1201 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1202 * has magic side effects, and must be called after a reset. 1203 * It must be called prior to calling any RSS related cmds, 1204 * including assigning an interrupt queue for anything but 1205 * slice 0. It must also be called *after* 1206 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1207 * the firmware to compute offsets. 1208 */ 1209 1210 if (sc->num_slices > 1) { 1211 /* ask the maximum number of slices it supports */ 1212 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1213 &cmd); 1214 if (status != 0) { 1215 device_printf(sc->dev, 1216 "failed to get number of slices\n"); 1217 return status; 1218 } 1219 /* 1220 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1221 * to setting up the interrupt queue DMA 1222 */ 1223 cmd.data0 = sc->num_slices; 1224 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1225 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1226 &cmd); 1227 if (status != 0) { 1228 device_printf(sc->dev, 1229 "failed to set number of slices\n"); 1230 return status; 1231 } 1232 } 1233 1234 1235 if (interrupts_setup) { 1236 /* Now exchange information about interrupts */ 1237 for (slice = 0; slice < sc->num_slices; slice++) { 1238 rx_done = &sc->ss[slice].rx_done; 1239 memset(rx_done->entry, 0, sc->rx_ring_size); 1240 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1241 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1242 cmd.data2 = slice; 1243 status |= mxge_send_cmd(sc, 1244 MXGEFW_CMD_SET_INTRQ_DMA, 1245 &cmd); 1246 } 1247 } 1248 1249 status |= mxge_send_cmd(sc, 1250 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1251 1252 1253 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1254 1255 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1256 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1257 1258 1259 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1260 &cmd); 1261 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1262 if (status != 0) { 1263 device_printf(sc->dev, "failed set interrupt parameters\n"); 1264 return status; 1265 } 1266 1267 1268 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1269 1270 1271 /* run a DMA benchmark */ 1272 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1273 1274 for (slice = 0; slice < sc->num_slices; slice++) { 1275 ss = &sc->ss[slice]; 1276 1277 ss->irq_claim = irq_claim + (2 * slice); 1278 /* reset mcp/driver shared state back to 0 */ 1279 ss->rx_done.idx = 0; 1280 ss->rx_done.cnt = 0; 1281 ss->tx.req = 0; 1282 ss->tx.done = 0; 1283 ss->tx.pkt_done = 0; 1284 ss->tx.wake = 0; 1285 ss->tx.defrag = 0; 1286 ss->tx.stall = 0; 1287 ss->rx_big.cnt = 0; 1288 ss->rx_small.cnt = 0; 1289 ss->lro_bad_csum = 0; 1290 ss->lro_queued = 0; 1291 ss->lro_flushed = 0; 1292 if (ss->fw_stats != NULL) { 1293 ss->fw_stats->valid = 0; 1294 ss->fw_stats->send_done_count = 0; 1295 } 1296 } 1297 sc->rdma_tags_available = 15; 1298 status = mxge_update_mac_address(sc); 1299 mxge_change_promisc(sc, 0); 1300 mxge_change_pause(sc, sc->pause); 1301 mxge_set_multicast_list(sc); 1302 return status; 1303 } 1304 1305 static int 1306 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1307 { 1308 mxge_softc_t *sc; 1309 unsigned int intr_coal_delay; 1310 int err; 1311 1312 sc = arg1; 1313 intr_coal_delay = sc->intr_coal_delay; 1314 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1315 if (err != 0) { 1316 return err; 1317 } 1318 if (intr_coal_delay == sc->intr_coal_delay) 1319 return 0; 1320 1321 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1322 return EINVAL; 1323 1324 mtx_lock(&sc->driver_mtx); 1325 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1326 sc->intr_coal_delay = intr_coal_delay; 1327 1328 mtx_unlock(&sc->driver_mtx); 1329 return err; 1330 } 1331 1332 static int 1333 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1334 { 1335 mxge_softc_t *sc; 1336 unsigned int enabled; 1337 int err; 1338 1339 sc = arg1; 1340 enabled = sc->pause; 1341 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1342 if (err != 0) { 1343 return err; 1344 } 1345 if (enabled == sc->pause) 1346 return 0; 1347 1348 mtx_lock(&sc->driver_mtx); 1349 err = mxge_change_pause(sc, enabled); 1350 mtx_unlock(&sc->driver_mtx); 1351 return err; 1352 } 1353 1354 static int 1355 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1356 { 1357 struct ifnet *ifp; 1358 int err = 0; 1359 1360 ifp = sc->ifp; 1361 if (lro_cnt == 0) 1362 ifp->if_capenable &= ~IFCAP_LRO; 1363 else 1364 ifp->if_capenable |= IFCAP_LRO; 1365 sc->lro_cnt = lro_cnt; 1366 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1367 callout_stop(&sc->co_hdl); 1368 mxge_close(sc); 1369 err = mxge_open(sc); 1370 if (err == 0) 1371 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 1372 } 1373 return err; 1374 } 1375 1376 static int 1377 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1378 { 1379 mxge_softc_t *sc; 1380 unsigned int lro_cnt; 1381 int err; 1382 1383 sc = arg1; 1384 lro_cnt = sc->lro_cnt; 1385 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1386 if (err != 0) 1387 return err; 1388 1389 if (lro_cnt == sc->lro_cnt) 1390 return 0; 1391 1392 if (lro_cnt > 128) 1393 return EINVAL; 1394 1395 mtx_lock(&sc->driver_mtx); 1396 err = mxge_change_lro_locked(sc, lro_cnt); 1397 mtx_unlock(&sc->driver_mtx); 1398 return err; 1399 } 1400 1401 static int 1402 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1403 { 1404 int err; 1405 1406 if (arg1 == NULL) 1407 return EFAULT; 1408 arg2 = be32toh(*(int *)arg1); 1409 arg1 = NULL; 1410 err = sysctl_handle_int(oidp, arg1, arg2, req); 1411 1412 return err; 1413 } 1414 1415 static void 1416 mxge_rem_sysctls(mxge_softc_t *sc) 1417 { 1418 struct mxge_slice_state *ss; 1419 int slice; 1420 1421 if (sc->slice_sysctl_tree == NULL) 1422 return; 1423 1424 for (slice = 0; slice < sc->num_slices; slice++) { 1425 ss = &sc->ss[slice]; 1426 if (ss == NULL || ss->sysctl_tree == NULL) 1427 continue; 1428 sysctl_ctx_free(&ss->sysctl_ctx); 1429 ss->sysctl_tree = NULL; 1430 } 1431 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1432 sc->slice_sysctl_tree = NULL; 1433 } 1434 1435 static void 1436 mxge_add_sysctls(mxge_softc_t *sc) 1437 { 1438 struct sysctl_ctx_list *ctx; 1439 struct sysctl_oid_list *children; 1440 mcp_irq_data_t *fw; 1441 struct mxge_slice_state *ss; 1442 int slice; 1443 char slice_num[8]; 1444 1445 ctx = device_get_sysctl_ctx(sc->dev); 1446 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1447 fw = sc->ss[0].fw_stats; 1448 1449 /* random information */ 1450 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1451 "firmware_version", 1452 CTLFLAG_RD, &sc->fw_version, 1453 0, "firmware version"); 1454 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1455 "serial_number", 1456 CTLFLAG_RD, &sc->serial_number_string, 1457 0, "serial number"); 1458 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1459 "product_code", 1460 CTLFLAG_RD, &sc->product_code_string, 1461 0, "product_code"); 1462 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1463 "pcie_link_width", 1464 CTLFLAG_RD, &sc->link_width, 1465 0, "tx_boundary"); 1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1467 "tx_boundary", 1468 CTLFLAG_RD, &sc->tx_boundary, 1469 0, "tx_boundary"); 1470 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1471 "write_combine", 1472 CTLFLAG_RD, &sc->wc, 1473 0, "write combining PIO?"); 1474 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1475 "read_dma_MBs", 1476 CTLFLAG_RD, &sc->read_dma, 1477 0, "DMA Read speed in MB/s"); 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "write_dma_MBs", 1480 CTLFLAG_RD, &sc->write_dma, 1481 0, "DMA Write speed in MB/s"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "read_write_dma_MBs", 1484 CTLFLAG_RD, &sc->read_write_dma, 1485 0, "DMA concurrent Read/Write speed in MB/s"); 1486 1487 1488 /* performance related tunables */ 1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1490 "intr_coal_delay", 1491 CTLTYPE_INT|CTLFLAG_RW, sc, 1492 0, mxge_change_intr_coal, 1493 "I", "interrupt coalescing delay in usecs"); 1494 1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1496 "flow_control_enabled", 1497 CTLTYPE_INT|CTLFLAG_RW, sc, 1498 0, mxge_change_flow_control, 1499 "I", "interrupt coalescing delay in usecs"); 1500 1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1502 "deassert_wait", 1503 CTLFLAG_RW, &mxge_deassert_wait, 1504 0, "Wait for IRQ line to go low in ihandler"); 1505 1506 /* stats block from firmware is in network byte order. 1507 Need to swap it */ 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1509 "link_up", 1510 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1511 0, mxge_handle_be32, 1512 "I", "link up"); 1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1514 "rdma_tags_available", 1515 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1516 0, mxge_handle_be32, 1517 "I", "rdma_tags_available"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_bad_crc32", 1520 CTLTYPE_INT|CTLFLAG_RD, 1521 &fw->dropped_bad_crc32, 1522 0, mxge_handle_be32, 1523 "I", "dropped_bad_crc32"); 1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1525 "dropped_bad_phy", 1526 CTLTYPE_INT|CTLFLAG_RD, 1527 &fw->dropped_bad_phy, 1528 0, mxge_handle_be32, 1529 "I", "dropped_bad_phy"); 1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1531 "dropped_link_error_or_filtered", 1532 CTLTYPE_INT|CTLFLAG_RD, 1533 &fw->dropped_link_error_or_filtered, 1534 0, mxge_handle_be32, 1535 "I", "dropped_link_error_or_filtered"); 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "dropped_link_overflow", 1538 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1539 0, mxge_handle_be32, 1540 "I", "dropped_link_overflow"); 1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1542 "dropped_multicast_filtered", 1543 CTLTYPE_INT|CTLFLAG_RD, 1544 &fw->dropped_multicast_filtered, 1545 0, mxge_handle_be32, 1546 "I", "dropped_multicast_filtered"); 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "dropped_no_big_buffer", 1549 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1550 0, mxge_handle_be32, 1551 "I", "dropped_no_big_buffer"); 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "dropped_no_small_buffer", 1554 CTLTYPE_INT|CTLFLAG_RD, 1555 &fw->dropped_no_small_buffer, 1556 0, mxge_handle_be32, 1557 "I", "dropped_no_small_buffer"); 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "dropped_overrun", 1560 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1561 0, mxge_handle_be32, 1562 "I", "dropped_overrun"); 1563 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1564 "dropped_pause", 1565 CTLTYPE_INT|CTLFLAG_RD, 1566 &fw->dropped_pause, 1567 0, mxge_handle_be32, 1568 "I", "dropped_pause"); 1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1570 "dropped_runt", 1571 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1572 0, mxge_handle_be32, 1573 "I", "dropped_runt"); 1574 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "dropped_unicast_filtered", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1578 0, mxge_handle_be32, 1579 "I", "dropped_unicast_filtered"); 1580 1581 /* verbose printing? */ 1582 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1583 "verbose", 1584 CTLFLAG_RW, &mxge_verbose, 1585 0, "verbose printing"); 1586 1587 /* lro */ 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "lro_cnt", 1590 CTLTYPE_INT|CTLFLAG_RW, sc, 1591 0, mxge_change_lro, 1592 "I", "number of lro merge queues"); 1593 1594 1595 /* add counters exported for debugging from all slices */ 1596 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1597 sc->slice_sysctl_tree = 1598 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1599 "slice", CTLFLAG_RD, 0, ""); 1600 1601 for (slice = 0; slice < sc->num_slices; slice++) { 1602 ss = &sc->ss[slice]; 1603 sysctl_ctx_init(&ss->sysctl_ctx); 1604 ctx = &ss->sysctl_ctx; 1605 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1606 sprintf(slice_num, "%d", slice); 1607 ss->sysctl_tree = 1608 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1609 CTLFLAG_RD, 0, ""); 1610 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1612 "rx_small_cnt", 1613 CTLFLAG_RD, &ss->rx_small.cnt, 1614 0, "rx_small_cnt"); 1615 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1616 "rx_big_cnt", 1617 CTLFLAG_RD, &ss->rx_big.cnt, 1618 0, "rx_small_cnt"); 1619 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1620 "tx_req", 1621 CTLFLAG_RD, &ss->tx.req, 1622 0, "tx_req"); 1623 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1624 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1625 0, "number of lro merge queues flushed"); 1626 1627 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1628 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1629 0, "number of frames appended to lro merge" 1630 "queues"); 1631 1632 /* only transmit from slice 0 for now */ 1633 if (slice > 0) 1634 continue; 1635 1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1637 "tx_done", 1638 CTLFLAG_RD, &ss->tx.done, 1639 0, "tx_done"); 1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1641 "tx_pkt_done", 1642 CTLFLAG_RD, &ss->tx.pkt_done, 1643 0, "tx_done"); 1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1645 "tx_stall", 1646 CTLFLAG_RD, &ss->tx.stall, 1647 0, "tx_stall"); 1648 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1649 "tx_wake", 1650 CTLFLAG_RD, &ss->tx.wake, 1651 0, "tx_wake"); 1652 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1653 "tx_defrag", 1654 CTLFLAG_RD, &ss->tx.defrag, 1655 0, "tx_defrag"); 1656 } 1657 } 1658 1659 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1660 backwards one at a time and handle ring wraps */ 1661 1662 static inline void 1663 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1664 mcp_kreq_ether_send_t *src, int cnt) 1665 { 1666 int idx, starting_slot; 1667 starting_slot = tx->req; 1668 while (cnt > 1) { 1669 cnt--; 1670 idx = (starting_slot + cnt) & tx->mask; 1671 mxge_pio_copy(&tx->lanai[idx], 1672 &src[cnt], sizeof(*src)); 1673 mb(); 1674 } 1675 } 1676 1677 /* 1678 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1679 * at most 32 bytes at a time, so as to avoid involving the software 1680 * pio handler in the nic. We re-write the first segment's flags 1681 * to mark them valid only after writing the entire chain 1682 */ 1683 1684 static inline void 1685 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1686 int cnt) 1687 { 1688 int idx, i; 1689 uint32_t *src_ints; 1690 volatile uint32_t *dst_ints; 1691 mcp_kreq_ether_send_t *srcp; 1692 volatile mcp_kreq_ether_send_t *dstp, *dst; 1693 uint8_t last_flags; 1694 1695 idx = tx->req & tx->mask; 1696 1697 last_flags = src->flags; 1698 src->flags = 0; 1699 mb(); 1700 dst = dstp = &tx->lanai[idx]; 1701 srcp = src; 1702 1703 if ((idx + cnt) < tx->mask) { 1704 for (i = 0; i < (cnt - 1); i += 2) { 1705 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1706 mb(); /* force write every 32 bytes */ 1707 srcp += 2; 1708 dstp += 2; 1709 } 1710 } else { 1711 /* submit all but the first request, and ensure 1712 that it is submitted below */ 1713 mxge_submit_req_backwards(tx, src, cnt); 1714 i = 0; 1715 } 1716 if (i < cnt) { 1717 /* submit the first request */ 1718 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1719 mb(); /* barrier before setting valid flag */ 1720 } 1721 1722 /* re-write the last 32-bits with the valid flags */ 1723 src->flags = last_flags; 1724 src_ints = (uint32_t *)src; 1725 src_ints+=3; 1726 dst_ints = (volatile uint32_t *)dst; 1727 dst_ints+=3; 1728 *dst_ints = *src_ints; 1729 tx->req += cnt; 1730 mb(); 1731 } 1732 1733 #if IFCAP_TSO4 1734 1735 static void 1736 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1737 int busdma_seg_cnt, int ip_off) 1738 { 1739 mxge_tx_ring_t *tx; 1740 mcp_kreq_ether_send_t *req; 1741 bus_dma_segment_t *seg; 1742 struct ip *ip; 1743 struct tcphdr *tcp; 1744 uint32_t low, high_swapped; 1745 int len, seglen, cum_len, cum_len_next; 1746 int next_is_first, chop, cnt, rdma_count, small; 1747 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1748 uint8_t flags, flags_next; 1749 static int once; 1750 1751 mss = m->m_pkthdr.tso_segsz; 1752 1753 /* negative cum_len signifies to the 1754 * send loop that we are still in the 1755 * header portion of the TSO packet. 1756 */ 1757 1758 /* ensure we have the ethernet, IP and TCP 1759 header together in the first mbuf, copy 1760 it to a scratch buffer if not */ 1761 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1762 m_copydata(m, 0, ip_off + sizeof (*ip), 1763 ss->scratch); 1764 ip = (struct ip *)(ss->scratch + ip_off); 1765 } else { 1766 ip = (struct ip *)(mtod(m, char *) + ip_off); 1767 } 1768 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1769 + sizeof (*tcp))) { 1770 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1771 + sizeof (*tcp), ss->scratch); 1772 ip = (struct ip *)(mtod(m, char *) + ip_off); 1773 } 1774 1775 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1776 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1777 1778 /* TSO implies checksum offload on this hardware */ 1779 cksum_offset = ip_off + (ip->ip_hl << 2); 1780 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1781 1782 1783 /* for TSO, pseudo_hdr_offset holds mss. 1784 * The firmware figures out where to put 1785 * the checksum by parsing the header. */ 1786 pseudo_hdr_offset = htobe16(mss); 1787 1788 tx = &ss->tx; 1789 req = tx->req_list; 1790 seg = tx->seg_list; 1791 cnt = 0; 1792 rdma_count = 0; 1793 /* "rdma_count" is the number of RDMAs belonging to the 1794 * current packet BEFORE the current send request. For 1795 * non-TSO packets, this is equal to "count". 1796 * For TSO packets, rdma_count needs to be reset 1797 * to 0 after a segment cut. 1798 * 1799 * The rdma_count field of the send request is 1800 * the number of RDMAs of the packet starting at 1801 * that request. For TSO send requests with one ore more cuts 1802 * in the middle, this is the number of RDMAs starting 1803 * after the last cut in the request. All previous 1804 * segments before the last cut implicitly have 1 RDMA. 1805 * 1806 * Since the number of RDMAs is not known beforehand, 1807 * it must be filled-in retroactively - after each 1808 * segmentation cut or at the end of the entire packet. 1809 */ 1810 1811 while (busdma_seg_cnt) { 1812 /* Break the busdma segment up into pieces*/ 1813 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1814 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1815 len = seg->ds_len; 1816 1817 while (len) { 1818 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1819 seglen = len; 1820 cum_len_next = cum_len + seglen; 1821 (req-rdma_count)->rdma_count = rdma_count + 1; 1822 if (__predict_true(cum_len >= 0)) { 1823 /* payload */ 1824 chop = (cum_len_next > mss); 1825 cum_len_next = cum_len_next % mss; 1826 next_is_first = (cum_len_next == 0); 1827 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1828 flags_next |= next_is_first * 1829 MXGEFW_FLAGS_FIRST; 1830 rdma_count |= -(chop | next_is_first); 1831 rdma_count += chop & !next_is_first; 1832 } else if (cum_len_next >= 0) { 1833 /* header ends */ 1834 rdma_count = -1; 1835 cum_len_next = 0; 1836 seglen = -cum_len; 1837 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1838 flags_next = MXGEFW_FLAGS_TSO_PLD | 1839 MXGEFW_FLAGS_FIRST | 1840 (small * MXGEFW_FLAGS_SMALL); 1841 } 1842 1843 req->addr_high = high_swapped; 1844 req->addr_low = htobe32(low); 1845 req->pseudo_hdr_offset = pseudo_hdr_offset; 1846 req->pad = 0; 1847 req->rdma_count = 1; 1848 req->length = htobe16(seglen); 1849 req->cksum_offset = cksum_offset; 1850 req->flags = flags | ((cum_len & 1) * 1851 MXGEFW_FLAGS_ALIGN_ODD); 1852 low += seglen; 1853 len -= seglen; 1854 cum_len = cum_len_next; 1855 flags = flags_next; 1856 req++; 1857 cnt++; 1858 rdma_count++; 1859 if (__predict_false(cksum_offset > seglen)) 1860 cksum_offset -= seglen; 1861 else 1862 cksum_offset = 0; 1863 if (__predict_false(cnt > tx->max_desc)) 1864 goto drop; 1865 } 1866 busdma_seg_cnt--; 1867 seg++; 1868 } 1869 (req-rdma_count)->rdma_count = rdma_count; 1870 1871 do { 1872 req--; 1873 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1874 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1875 1876 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1877 mxge_submit_req(tx, tx->req_list, cnt); 1878 return; 1879 1880 drop: 1881 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1882 m_freem(m); 1883 ss->sc->ifp->if_oerrors++; 1884 if (!once) { 1885 printf("tx->max_desc exceeded via TSO!\n"); 1886 printf("mss = %d, %ld, %d!\n", mss, 1887 (long)seg - (long)tx->seg_list, tx->max_desc); 1888 once = 1; 1889 } 1890 return; 1891 1892 } 1893 1894 #endif /* IFCAP_TSO4 */ 1895 1896 #ifdef MXGE_NEW_VLAN_API 1897 /* 1898 * We reproduce the software vlan tag insertion from 1899 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1900 * vlan tag insertion. We need to advertise this in order to have the 1901 * vlan interface respect our csum offload flags. 1902 */ 1903 static struct mbuf * 1904 mxge_vlan_tag_insert(struct mbuf *m) 1905 { 1906 struct ether_vlan_header *evl; 1907 1908 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1909 if (__predict_false(m == NULL)) 1910 return NULL; 1911 if (m->m_len < sizeof(*evl)) { 1912 m = m_pullup(m, sizeof(*evl)); 1913 if (__predict_false(m == NULL)) 1914 return NULL; 1915 } 1916 /* 1917 * Transform the Ethernet header into an Ethernet header 1918 * with 802.1Q encapsulation. 1919 */ 1920 evl = mtod(m, struct ether_vlan_header *); 1921 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1922 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1923 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1924 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1925 m->m_flags &= ~M_VLANTAG; 1926 return m; 1927 } 1928 #endif /* MXGE_NEW_VLAN_API */ 1929 1930 static void 1931 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1932 { 1933 mxge_softc_t *sc; 1934 mcp_kreq_ether_send_t *req; 1935 bus_dma_segment_t *seg; 1936 struct mbuf *m_tmp; 1937 struct ifnet *ifp; 1938 mxge_tx_ring_t *tx; 1939 struct ip *ip; 1940 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1941 uint16_t pseudo_hdr_offset; 1942 uint8_t flags, cksum_offset; 1943 1944 1945 sc = ss->sc; 1946 ifp = sc->ifp; 1947 tx = &ss->tx; 1948 1949 ip_off = sizeof (struct ether_header); 1950 #ifdef MXGE_NEW_VLAN_API 1951 if (m->m_flags & M_VLANTAG) { 1952 m = mxge_vlan_tag_insert(m); 1953 if (__predict_false(m == NULL)) 1954 goto drop; 1955 ip_off += ETHER_VLAN_ENCAP_LEN; 1956 } 1957 #endif 1958 /* (try to) map the frame for DMA */ 1959 idx = tx->req & tx->mask; 1960 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1961 m, tx->seg_list, &cnt, 1962 BUS_DMA_NOWAIT); 1963 if (__predict_false(err == EFBIG)) { 1964 /* Too many segments in the chain. Try 1965 to defrag */ 1966 m_tmp = m_defrag(m, M_NOWAIT); 1967 if (m_tmp == NULL) { 1968 goto drop; 1969 } 1970 ss->tx.defrag++; 1971 m = m_tmp; 1972 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1973 tx->info[idx].map, 1974 m, tx->seg_list, &cnt, 1975 BUS_DMA_NOWAIT); 1976 } 1977 if (__predict_false(err != 0)) { 1978 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1979 " packet len = %d\n", err, m->m_pkthdr.len); 1980 goto drop; 1981 } 1982 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1983 BUS_DMASYNC_PREWRITE); 1984 tx->info[idx].m = m; 1985 1986 #if IFCAP_TSO4 1987 /* TSO is different enough, we handle it in another routine */ 1988 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1989 mxge_encap_tso(ss, m, cnt, ip_off); 1990 return; 1991 } 1992 #endif 1993 1994 req = tx->req_list; 1995 cksum_offset = 0; 1996 pseudo_hdr_offset = 0; 1997 flags = MXGEFW_FLAGS_NO_TSO; 1998 1999 /* checksum offloading? */ 2000 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2001 /* ensure ip header is in first mbuf, copy 2002 it to a scratch buffer if not */ 2003 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2004 m_copydata(m, 0, ip_off + sizeof (*ip), 2005 ss->scratch); 2006 ip = (struct ip *)(ss->scratch + ip_off); 2007 } else { 2008 ip = (struct ip *)(mtod(m, char *) + ip_off); 2009 } 2010 cksum_offset = ip_off + (ip->ip_hl << 2); 2011 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2012 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2013 req->cksum_offset = cksum_offset; 2014 flags |= MXGEFW_FLAGS_CKSUM; 2015 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2016 } else { 2017 odd_flag = 0; 2018 } 2019 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2020 flags |= MXGEFW_FLAGS_SMALL; 2021 2022 /* convert segments into a request list */ 2023 cum_len = 0; 2024 seg = tx->seg_list; 2025 req->flags = MXGEFW_FLAGS_FIRST; 2026 for (i = 0; i < cnt; i++) { 2027 req->addr_low = 2028 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2029 req->addr_high = 2030 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2031 req->length = htobe16(seg->ds_len); 2032 req->cksum_offset = cksum_offset; 2033 if (cksum_offset > seg->ds_len) 2034 cksum_offset -= seg->ds_len; 2035 else 2036 cksum_offset = 0; 2037 req->pseudo_hdr_offset = pseudo_hdr_offset; 2038 req->pad = 0; /* complete solid 16-byte block */ 2039 req->rdma_count = 1; 2040 req->flags |= flags | ((cum_len & 1) * odd_flag); 2041 cum_len += seg->ds_len; 2042 seg++; 2043 req++; 2044 req->flags = 0; 2045 } 2046 req--; 2047 /* pad runts to 60 bytes */ 2048 if (cum_len < 60) { 2049 req++; 2050 req->addr_low = 2051 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2052 req->addr_high = 2053 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2054 req->length = htobe16(60 - cum_len); 2055 req->cksum_offset = 0; 2056 req->pseudo_hdr_offset = pseudo_hdr_offset; 2057 req->pad = 0; /* complete solid 16-byte block */ 2058 req->rdma_count = 1; 2059 req->flags |= flags | ((cum_len & 1) * odd_flag); 2060 cnt++; 2061 } 2062 2063 tx->req_list[0].rdma_count = cnt; 2064 #if 0 2065 /* print what the firmware will see */ 2066 for (i = 0; i < cnt; i++) { 2067 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2068 "cso:%d, flags:0x%x, rdma:%d\n", 2069 i, (int)ntohl(tx->req_list[i].addr_high), 2070 (int)ntohl(tx->req_list[i].addr_low), 2071 (int)ntohs(tx->req_list[i].length), 2072 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2073 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2074 tx->req_list[i].rdma_count); 2075 } 2076 printf("--------------\n"); 2077 #endif 2078 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2079 mxge_submit_req(tx, tx->req_list, cnt); 2080 return; 2081 2082 drop: 2083 m_freem(m); 2084 ifp->if_oerrors++; 2085 return; 2086 } 2087 2088 2089 2090 2091 static inline void 2092 mxge_start_locked(struct mxge_slice_state *ss) 2093 { 2094 mxge_softc_t *sc; 2095 struct mbuf *m; 2096 struct ifnet *ifp; 2097 mxge_tx_ring_t *tx; 2098 2099 sc = ss->sc; 2100 ifp = sc->ifp; 2101 tx = &ss->tx; 2102 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2103 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2104 if (m == NULL) { 2105 return; 2106 } 2107 /* let BPF see it */ 2108 BPF_MTAP(ifp, m); 2109 2110 /* give it to the nic */ 2111 mxge_encap(ss, m); 2112 } 2113 /* ran out of transmit slots */ 2114 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2115 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2116 tx->stall++; 2117 } 2118 } 2119 2120 static void 2121 mxge_start(struct ifnet *ifp) 2122 { 2123 mxge_softc_t *sc = ifp->if_softc; 2124 struct mxge_slice_state *ss; 2125 2126 /* only use the first slice for now */ 2127 ss = &sc->ss[0]; 2128 mtx_lock(&ss->tx.mtx); 2129 mxge_start_locked(ss); 2130 mtx_unlock(&ss->tx.mtx); 2131 } 2132 2133 /* 2134 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2135 * at most 32 bytes at a time, so as to avoid involving the software 2136 * pio handler in the nic. We re-write the first segment's low 2137 * DMA address to mark it valid only after we write the entire chunk 2138 * in a burst 2139 */ 2140 static inline void 2141 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2142 mcp_kreq_ether_recv_t *src) 2143 { 2144 uint32_t low; 2145 2146 low = src->addr_low; 2147 src->addr_low = 0xffffffff; 2148 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2149 mb(); 2150 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2151 mb(); 2152 src->addr_low = low; 2153 dst->addr_low = low; 2154 mb(); 2155 } 2156 2157 static int 2158 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2159 { 2160 bus_dma_segment_t seg; 2161 struct mbuf *m; 2162 mxge_rx_ring_t *rx = &ss->rx_small; 2163 int cnt, err; 2164 2165 m = m_gethdr(M_DONTWAIT, MT_DATA); 2166 if (m == NULL) { 2167 rx->alloc_fail++; 2168 err = ENOBUFS; 2169 goto done; 2170 } 2171 m->m_len = MHLEN; 2172 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2173 &seg, &cnt, BUS_DMA_NOWAIT); 2174 if (err != 0) { 2175 m_free(m); 2176 goto done; 2177 } 2178 rx->info[idx].m = m; 2179 rx->shadow[idx].addr_low = 2180 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2181 rx->shadow[idx].addr_high = 2182 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2183 2184 done: 2185 if ((idx & 7) == 7) 2186 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2187 return err; 2188 } 2189 2190 static int 2191 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2192 { 2193 bus_dma_segment_t seg[3]; 2194 struct mbuf *m; 2195 mxge_rx_ring_t *rx = &ss->rx_big; 2196 int cnt, err, i; 2197 2198 if (rx->cl_size == MCLBYTES) 2199 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2200 else 2201 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2202 if (m == NULL) { 2203 rx->alloc_fail++; 2204 err = ENOBUFS; 2205 goto done; 2206 } 2207 m->m_len = rx->cl_size; 2208 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2209 seg, &cnt, BUS_DMA_NOWAIT); 2210 if (err != 0) { 2211 m_free(m); 2212 goto done; 2213 } 2214 rx->info[idx].m = m; 2215 rx->shadow[idx].addr_low = 2216 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2217 rx->shadow[idx].addr_high = 2218 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2219 2220 #if MXGE_VIRT_JUMBOS 2221 for (i = 1; i < cnt; i++) { 2222 rx->shadow[idx + i].addr_low = 2223 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2224 rx->shadow[idx + i].addr_high = 2225 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2226 } 2227 #endif 2228 2229 done: 2230 for (i = 0; i < rx->nbufs; i++) { 2231 if ((idx & 7) == 7) { 2232 mxge_submit_8rx(&rx->lanai[idx - 7], 2233 &rx->shadow[idx - 7]); 2234 } 2235 idx++; 2236 } 2237 return err; 2238 } 2239 2240 /* 2241 * Myri10GE hardware checksums are not valid if the sender 2242 * padded the frame with non-zero padding. This is because 2243 * the firmware just does a simple 16-bit 1s complement 2244 * checksum across the entire frame, excluding the first 14 2245 * bytes. It is best to simply to check the checksum and 2246 * tell the stack about it only if the checksum is good 2247 */ 2248 2249 static inline uint16_t 2250 mxge_rx_csum(struct mbuf *m, int csum) 2251 { 2252 struct ether_header *eh; 2253 struct ip *ip; 2254 uint16_t c; 2255 2256 eh = mtod(m, struct ether_header *); 2257 2258 /* only deal with IPv4 TCP & UDP for now */ 2259 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2260 return 1; 2261 ip = (struct ip *)(eh + 1); 2262 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2263 ip->ip_p != IPPROTO_UDP)) 2264 return 1; 2265 2266 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2267 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2268 - (ip->ip_hl << 2) + ip->ip_p)); 2269 c ^= 0xffff; 2270 return (c); 2271 } 2272 2273 static void 2274 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2275 { 2276 struct ether_vlan_header *evl; 2277 struct ether_header *eh; 2278 uint32_t partial; 2279 2280 evl = mtod(m, struct ether_vlan_header *); 2281 eh = mtod(m, struct ether_header *); 2282 2283 /* 2284 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2285 * after what the firmware thought was the end of the ethernet 2286 * header. 2287 */ 2288 2289 /* put checksum into host byte order */ 2290 *csum = ntohs(*csum); 2291 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2292 (*csum) += ~partial; 2293 (*csum) += ((*csum) < ~partial); 2294 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2295 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2296 2297 /* restore checksum to network byte order; 2298 later consumers expect this */ 2299 *csum = htons(*csum); 2300 2301 /* save the tag */ 2302 #ifdef MXGE_NEW_VLAN_API 2303 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2304 #else 2305 { 2306 struct m_tag *mtag; 2307 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2308 M_NOWAIT); 2309 if (mtag == NULL) 2310 return; 2311 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2312 m_tag_prepend(m, mtag); 2313 } 2314 2315 #endif 2316 m->m_flags |= M_VLANTAG; 2317 2318 /* 2319 * Remove the 802.1q header by copying the Ethernet 2320 * addresses over it and adjusting the beginning of 2321 * the data in the mbuf. The encapsulated Ethernet 2322 * type field is already in place. 2323 */ 2324 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2325 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2326 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2327 } 2328 2329 2330 static inline void 2331 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2332 { 2333 mxge_softc_t *sc; 2334 struct ifnet *ifp; 2335 struct mbuf *m; 2336 struct ether_header *eh; 2337 mxge_rx_ring_t *rx; 2338 bus_dmamap_t old_map; 2339 int idx; 2340 uint16_t tcpudp_csum; 2341 2342 sc = ss->sc; 2343 ifp = sc->ifp; 2344 rx = &ss->rx_big; 2345 idx = rx->cnt & rx->mask; 2346 rx->cnt += rx->nbufs; 2347 /* save a pointer to the received mbuf */ 2348 m = rx->info[idx].m; 2349 /* try to replace the received mbuf */ 2350 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2351 /* drop the frame -- the old mbuf is re-cycled */ 2352 ifp->if_ierrors++; 2353 return; 2354 } 2355 2356 /* unmap the received buffer */ 2357 old_map = rx->info[idx].map; 2358 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2359 bus_dmamap_unload(rx->dmat, old_map); 2360 2361 /* swap the bus_dmamap_t's */ 2362 rx->info[idx].map = rx->extra_map; 2363 rx->extra_map = old_map; 2364 2365 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2366 * aligned */ 2367 m->m_data += MXGEFW_PAD; 2368 2369 m->m_pkthdr.rcvif = ifp; 2370 m->m_len = m->m_pkthdr.len = len; 2371 ss->ipackets++; 2372 eh = mtod(m, struct ether_header *); 2373 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2374 mxge_vlan_tag_remove(m, &csum); 2375 } 2376 /* if the checksum is valid, mark it in the mbuf header */ 2377 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2378 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2379 return; 2380 /* otherwise, it was a UDP frame, or a TCP frame which 2381 we could not do LRO on. Tell the stack that the 2382 checksum is good */ 2383 m->m_pkthdr.csum_data = 0xffff; 2384 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2385 } 2386 /* pass the frame up the stack */ 2387 (*ifp->if_input)(ifp, m); 2388 } 2389 2390 static inline void 2391 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2392 { 2393 mxge_softc_t *sc; 2394 struct ifnet *ifp; 2395 struct ether_header *eh; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx; 2398 bus_dmamap_t old_map; 2399 int idx; 2400 uint16_t tcpudp_csum; 2401 2402 sc = ss->sc; 2403 ifp = sc->ifp; 2404 rx = &ss->rx_small; 2405 idx = rx->cnt & rx->mask; 2406 rx->cnt++; 2407 /* save a pointer to the received mbuf */ 2408 m = rx->info[idx].m; 2409 /* try to replace the received mbuf */ 2410 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2411 /* drop the frame -- the old mbuf is re-cycled */ 2412 ifp->if_ierrors++; 2413 return; 2414 } 2415 2416 /* unmap the received buffer */ 2417 old_map = rx->info[idx].map; 2418 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2419 bus_dmamap_unload(rx->dmat, old_map); 2420 2421 /* swap the bus_dmamap_t's */ 2422 rx->info[idx].map = rx->extra_map; 2423 rx->extra_map = old_map; 2424 2425 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2426 * aligned */ 2427 m->m_data += MXGEFW_PAD; 2428 2429 m->m_pkthdr.rcvif = ifp; 2430 m->m_len = m->m_pkthdr.len = len; 2431 ss->ipackets++; 2432 eh = mtod(m, struct ether_header *); 2433 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2434 mxge_vlan_tag_remove(m, &csum); 2435 } 2436 /* if the checksum is valid, mark it in the mbuf header */ 2437 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2438 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2439 return; 2440 /* otherwise, it was a UDP frame, or a TCP frame which 2441 we could not do LRO on. Tell the stack that the 2442 checksum is good */ 2443 m->m_pkthdr.csum_data = 0xffff; 2444 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2445 } 2446 /* pass the frame up the stack */ 2447 (*ifp->if_input)(ifp, m); 2448 } 2449 2450 static inline void 2451 mxge_clean_rx_done(struct mxge_slice_state *ss) 2452 { 2453 mxge_rx_done_t *rx_done = &ss->rx_done; 2454 struct lro_entry *lro; 2455 int limit = 0; 2456 uint16_t length; 2457 uint16_t checksum; 2458 2459 2460 while (rx_done->entry[rx_done->idx].length != 0) { 2461 length = ntohs(rx_done->entry[rx_done->idx].length); 2462 rx_done->entry[rx_done->idx].length = 0; 2463 checksum = rx_done->entry[rx_done->idx].checksum; 2464 if (length <= (MHLEN - MXGEFW_PAD)) 2465 mxge_rx_done_small(ss, length, checksum); 2466 else 2467 mxge_rx_done_big(ss, length, checksum); 2468 rx_done->cnt++; 2469 rx_done->idx = rx_done->cnt & rx_done->mask; 2470 2471 /* limit potential for livelock */ 2472 if (__predict_false(++limit > rx_done->mask / 2)) 2473 break; 2474 } 2475 while (!SLIST_EMPTY(&ss->lro_active)) { 2476 lro = SLIST_FIRST(&ss->lro_active); 2477 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2478 mxge_lro_flush(ss, lro); 2479 } 2480 } 2481 2482 2483 static inline void 2484 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2485 { 2486 struct ifnet *ifp; 2487 mxge_tx_ring_t *tx; 2488 struct mbuf *m; 2489 bus_dmamap_t map; 2490 int idx; 2491 2492 tx = &ss->tx; 2493 ifp = ss->sc->ifp; 2494 while (tx->pkt_done != mcp_idx) { 2495 idx = tx->done & tx->mask; 2496 tx->done++; 2497 m = tx->info[idx].m; 2498 /* mbuf and DMA map only attached to the first 2499 segment per-mbuf */ 2500 if (m != NULL) { 2501 ifp->if_opackets++; 2502 tx->info[idx].m = NULL; 2503 map = tx->info[idx].map; 2504 bus_dmamap_unload(tx->dmat, map); 2505 m_freem(m); 2506 } 2507 if (tx->info[idx].flag) { 2508 tx->info[idx].flag = 0; 2509 tx->pkt_done++; 2510 } 2511 } 2512 2513 /* If we have space, clear IFF_OACTIVE to tell the stack that 2514 its OK to send packets */ 2515 2516 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2517 tx->req - tx->done < (tx->mask + 1)/4) { 2518 mtx_lock(&ss->tx.mtx); 2519 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2520 ss->tx.wake++; 2521 mxge_start_locked(ss); 2522 mtx_unlock(&ss->tx.mtx); 2523 } 2524 } 2525 2526 static struct mxge_media_type mxge_media_types[] = 2527 { 2528 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2529 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2530 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2531 {0, (1 << 5), "10GBASE-ER"}, 2532 {0, (1 << 4), "10GBASE-LRM"}, 2533 {0, (1 << 3), "10GBASE-SW"}, 2534 {0, (1 << 2), "10GBASE-LW"}, 2535 {0, (1 << 1), "10GBASE-EW"}, 2536 {0, (1 << 0), "Reserved"} 2537 }; 2538 2539 static void 2540 mxge_set_media(mxge_softc_t *sc, int type) 2541 { 2542 sc->media_flags |= type; 2543 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2544 ifmedia_set(&sc->media, sc->media_flags); 2545 } 2546 2547 2548 /* 2549 * Determine the media type for a NIC. Some XFPs will identify 2550 * themselves only when their link is up, so this is initiated via a 2551 * link up interrupt. However, this can potentially take up to 2552 * several milliseconds, so it is run via the watchdog routine, rather 2553 * than in the interrupt handler itself. This need only be done 2554 * once, not each time the link is up. 2555 */ 2556 static void 2557 mxge_media_probe(mxge_softc_t *sc) 2558 { 2559 mxge_cmd_t cmd; 2560 char *ptr; 2561 int i, err, ms; 2562 2563 sc->need_media_probe = 0; 2564 2565 /* if we've already set a media type, we're done */ 2566 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2567 return; 2568 2569 /* 2570 * parse the product code to deterimine the interface type 2571 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2572 * after the 3rd dash in the driver's cached copy of the 2573 * EEPROM's product code string. 2574 */ 2575 ptr = sc->product_code_string; 2576 if (ptr == NULL) { 2577 device_printf(sc->dev, "Missing product code\n"); 2578 } 2579 2580 for (i = 0; i < 3; i++, ptr++) { 2581 ptr = index(ptr, '-'); 2582 if (ptr == NULL) { 2583 device_printf(sc->dev, 2584 "only %d dashes in PC?!?\n", i); 2585 return; 2586 } 2587 } 2588 if (*ptr == 'C') { 2589 mxge_set_media(sc, IFM_10G_CX4); 2590 return; 2591 } 2592 else if (*ptr == 'Q') { 2593 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2594 /* FreeBSD has no media type for Quad ribbon fiber */ 2595 return; 2596 } 2597 2598 if (*ptr != 'R') { 2599 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2600 return; 2601 } 2602 2603 /* 2604 * At this point we know the NIC has an XFP cage, so now we 2605 * try to determine what is in the cage by using the 2606 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2607 * register. We read just one byte, which may take over 2608 * a millisecond 2609 */ 2610 2611 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2612 cmd.data1 = MXGE_XFP_COMPLIANCE_BYTE; /* the byte we want */ 2613 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_I2C_READ, &cmd); 2614 if (err == MXGEFW_CMD_ERROR_XFP_FAILURE) { 2615 device_printf(sc->dev, "failed to read XFP\n"); 2616 } 2617 if (err == MXGEFW_CMD_ERROR_XFP_ABSENT) { 2618 device_printf(sc->dev, "Type R with no XFP!?!?\n"); 2619 } 2620 if (err != MXGEFW_CMD_OK) { 2621 return; 2622 } 2623 2624 /* now we wait for the data to be cached */ 2625 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2626 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2627 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2628 DELAY(1000); 2629 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2630 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2631 } 2632 if (err != MXGEFW_CMD_OK) { 2633 device_printf(sc->dev, "failed to read XFP (%d, %dms)\n", 2634 err, ms); 2635 return; 2636 } 2637 2638 if (cmd.data0 == mxge_media_types[0].bitmask) { 2639 if (mxge_verbose) 2640 device_printf(sc->dev, "XFP:%s\n", 2641 mxge_media_types[0].name); 2642 mxge_set_media(sc, IFM_10G_CX4); 2643 return; 2644 } 2645 for (i = 1; 2646 i < sizeof (mxge_media_types) / sizeof (mxge_media_types[0]); 2647 i++) { 2648 if (cmd.data0 & mxge_media_types[i].bitmask) { 2649 if (mxge_verbose) 2650 device_printf(sc->dev, "XFP:%s\n", 2651 mxge_media_types[i].name); 2652 2653 mxge_set_media(sc, mxge_media_types[i].flag); 2654 return; 2655 } 2656 } 2657 device_printf(sc->dev, "XFP media 0x%x unknown\n", cmd.data0); 2658 2659 return; 2660 } 2661 2662 static void 2663 mxge_intr(void *arg) 2664 { 2665 struct mxge_slice_state *ss = arg; 2666 mxge_softc_t *sc = ss->sc; 2667 mcp_irq_data_t *stats = ss->fw_stats; 2668 mxge_tx_ring_t *tx = &ss->tx; 2669 mxge_rx_done_t *rx_done = &ss->rx_done; 2670 uint32_t send_done_count; 2671 uint8_t valid; 2672 2673 2674 /* an interrupt on a non-zero slice is implicitly valid 2675 since MSI-X irqs are not shared */ 2676 if (ss != sc->ss) { 2677 mxge_clean_rx_done(ss); 2678 *ss->irq_claim = be32toh(3); 2679 return; 2680 } 2681 2682 /* make sure the DMA has finished */ 2683 if (!stats->valid) { 2684 return; 2685 } 2686 valid = stats->valid; 2687 2688 if (sc->legacy_irq) { 2689 /* lower legacy IRQ */ 2690 *sc->irq_deassert = 0; 2691 if (!mxge_deassert_wait) 2692 /* don't wait for conf. that irq is low */ 2693 stats->valid = 0; 2694 } else { 2695 stats->valid = 0; 2696 } 2697 2698 /* loop while waiting for legacy irq deassertion */ 2699 do { 2700 /* check for transmit completes and receives */ 2701 send_done_count = be32toh(stats->send_done_count); 2702 while ((send_done_count != tx->pkt_done) || 2703 (rx_done->entry[rx_done->idx].length != 0)) { 2704 mxge_tx_done(ss, (int)send_done_count); 2705 mxge_clean_rx_done(ss); 2706 send_done_count = be32toh(stats->send_done_count); 2707 } 2708 if (sc->legacy_irq && mxge_deassert_wait) 2709 mb(); 2710 } while (*((volatile uint8_t *) &stats->valid)); 2711 2712 if (__predict_false(stats->stats_updated)) { 2713 if (sc->link_state != stats->link_up) { 2714 sc->link_state = stats->link_up; 2715 if (sc->link_state) { 2716 if_link_state_change(sc->ifp, LINK_STATE_UP); 2717 if (mxge_verbose) 2718 device_printf(sc->dev, "link up\n"); 2719 } else { 2720 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2721 if (mxge_verbose) 2722 device_printf(sc->dev, "link down\n"); 2723 } 2724 sc->need_media_probe = 1; 2725 } 2726 if (sc->rdma_tags_available != 2727 be32toh(stats->rdma_tags_available)) { 2728 sc->rdma_tags_available = 2729 be32toh(stats->rdma_tags_available); 2730 device_printf(sc->dev, "RDMA timed out! %d tags " 2731 "left\n", sc->rdma_tags_available); 2732 } 2733 2734 if (stats->link_down) { 2735 sc->down_cnt += stats->link_down; 2736 sc->link_state = 0; 2737 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2738 } 2739 } 2740 2741 /* check to see if we have rx token to pass back */ 2742 if (valid & 0x1) 2743 *ss->irq_claim = be32toh(3); 2744 *(ss->irq_claim + 1) = be32toh(3); 2745 } 2746 2747 static void 2748 mxge_init(void *arg) 2749 { 2750 } 2751 2752 2753 2754 static void 2755 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2756 { 2757 struct lro_entry *lro_entry; 2758 int i; 2759 2760 while (!SLIST_EMPTY(&ss->lro_free)) { 2761 lro_entry = SLIST_FIRST(&ss->lro_free); 2762 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2763 free(lro_entry, M_DEVBUF); 2764 } 2765 2766 for (i = 0; i <= ss->rx_big.mask; i++) { 2767 if (ss->rx_big.info[i].m == NULL) 2768 continue; 2769 bus_dmamap_unload(ss->rx_big.dmat, 2770 ss->rx_big.info[i].map); 2771 m_freem(ss->rx_big.info[i].m); 2772 ss->rx_big.info[i].m = NULL; 2773 } 2774 2775 for (i = 0; i <= ss->rx_small.mask; i++) { 2776 if (ss->rx_small.info[i].m == NULL) 2777 continue; 2778 bus_dmamap_unload(ss->rx_small.dmat, 2779 ss->rx_small.info[i].map); 2780 m_freem(ss->rx_small.info[i].m); 2781 ss->rx_small.info[i].m = NULL; 2782 } 2783 2784 /* transmit ring used only on the first slice */ 2785 if (ss->tx.info == NULL) 2786 return; 2787 2788 for (i = 0; i <= ss->tx.mask; i++) { 2789 ss->tx.info[i].flag = 0; 2790 if (ss->tx.info[i].m == NULL) 2791 continue; 2792 bus_dmamap_unload(ss->tx.dmat, 2793 ss->tx.info[i].map); 2794 m_freem(ss->tx.info[i].m); 2795 ss->tx.info[i].m = NULL; 2796 } 2797 } 2798 2799 static void 2800 mxge_free_mbufs(mxge_softc_t *sc) 2801 { 2802 int slice; 2803 2804 for (slice = 0; slice < sc->num_slices; slice++) 2805 mxge_free_slice_mbufs(&sc->ss[slice]); 2806 } 2807 2808 static void 2809 mxge_free_slice_rings(struct mxge_slice_state *ss) 2810 { 2811 int i; 2812 2813 2814 if (ss->rx_done.entry != NULL) 2815 mxge_dma_free(&ss->rx_done.dma); 2816 ss->rx_done.entry = NULL; 2817 2818 if (ss->tx.req_bytes != NULL) 2819 free(ss->tx.req_bytes, M_DEVBUF); 2820 ss->tx.req_bytes = NULL; 2821 2822 if (ss->tx.seg_list != NULL) 2823 free(ss->tx.seg_list, M_DEVBUF); 2824 ss->tx.seg_list = NULL; 2825 2826 if (ss->rx_small.shadow != NULL) 2827 free(ss->rx_small.shadow, M_DEVBUF); 2828 ss->rx_small.shadow = NULL; 2829 2830 if (ss->rx_big.shadow != NULL) 2831 free(ss->rx_big.shadow, M_DEVBUF); 2832 ss->rx_big.shadow = NULL; 2833 2834 if (ss->tx.info != NULL) { 2835 if (ss->tx.dmat != NULL) { 2836 for (i = 0; i <= ss->tx.mask; i++) { 2837 bus_dmamap_destroy(ss->tx.dmat, 2838 ss->tx.info[i].map); 2839 } 2840 bus_dma_tag_destroy(ss->tx.dmat); 2841 } 2842 free(ss->tx.info, M_DEVBUF); 2843 } 2844 ss->tx.info = NULL; 2845 2846 if (ss->rx_small.info != NULL) { 2847 if (ss->rx_small.dmat != NULL) { 2848 for (i = 0; i <= ss->rx_small.mask; i++) { 2849 bus_dmamap_destroy(ss->rx_small.dmat, 2850 ss->rx_small.info[i].map); 2851 } 2852 bus_dmamap_destroy(ss->rx_small.dmat, 2853 ss->rx_small.extra_map); 2854 bus_dma_tag_destroy(ss->rx_small.dmat); 2855 } 2856 free(ss->rx_small.info, M_DEVBUF); 2857 } 2858 ss->rx_small.info = NULL; 2859 2860 if (ss->rx_big.info != NULL) { 2861 if (ss->rx_big.dmat != NULL) { 2862 for (i = 0; i <= ss->rx_big.mask; i++) { 2863 bus_dmamap_destroy(ss->rx_big.dmat, 2864 ss->rx_big.info[i].map); 2865 } 2866 bus_dmamap_destroy(ss->rx_big.dmat, 2867 ss->rx_big.extra_map); 2868 bus_dma_tag_destroy(ss->rx_big.dmat); 2869 } 2870 free(ss->rx_big.info, M_DEVBUF); 2871 } 2872 ss->rx_big.info = NULL; 2873 } 2874 2875 static void 2876 mxge_free_rings(mxge_softc_t *sc) 2877 { 2878 int slice; 2879 2880 for (slice = 0; slice < sc->num_slices; slice++) 2881 mxge_free_slice_rings(&sc->ss[slice]); 2882 } 2883 2884 static int 2885 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2886 int tx_ring_entries) 2887 { 2888 mxge_softc_t *sc = ss->sc; 2889 size_t bytes; 2890 int err, i; 2891 2892 err = ENOMEM; 2893 2894 /* allocate per-slice receive resources */ 2895 2896 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2897 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 2898 2899 /* allocate the rx shadow rings */ 2900 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2901 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2902 if (ss->rx_small.shadow == NULL) 2903 return err;; 2904 2905 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2906 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2907 if (ss->rx_big.shadow == NULL) 2908 return err;; 2909 2910 /* allocate the rx host info rings */ 2911 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2912 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2913 if (ss->rx_small.info == NULL) 2914 return err;; 2915 2916 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2917 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2918 if (ss->rx_big.info == NULL) 2919 return err;; 2920 2921 /* allocate the rx busdma resources */ 2922 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2923 1, /* alignment */ 2924 4096, /* boundary */ 2925 BUS_SPACE_MAXADDR, /* low */ 2926 BUS_SPACE_MAXADDR, /* high */ 2927 NULL, NULL, /* filter */ 2928 MHLEN, /* maxsize */ 2929 1, /* num segs */ 2930 MHLEN, /* maxsegsize */ 2931 BUS_DMA_ALLOCNOW, /* flags */ 2932 NULL, NULL, /* lock */ 2933 &ss->rx_small.dmat); /* tag */ 2934 if (err != 0) { 2935 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2936 err); 2937 return err;; 2938 } 2939 2940 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2941 1, /* alignment */ 2942 #if MXGE_VIRT_JUMBOS 2943 4096, /* boundary */ 2944 #else 2945 0, /* boundary */ 2946 #endif 2947 BUS_SPACE_MAXADDR, /* low */ 2948 BUS_SPACE_MAXADDR, /* high */ 2949 NULL, NULL, /* filter */ 2950 3*4096, /* maxsize */ 2951 #if MXGE_VIRT_JUMBOS 2952 3, /* num segs */ 2953 4096, /* maxsegsize*/ 2954 #else 2955 1, /* num segs */ 2956 MJUM9BYTES, /* maxsegsize*/ 2957 #endif 2958 BUS_DMA_ALLOCNOW, /* flags */ 2959 NULL, NULL, /* lock */ 2960 &ss->rx_big.dmat); /* tag */ 2961 if (err != 0) { 2962 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2963 err); 2964 return err;; 2965 } 2966 for (i = 0; i <= ss->rx_small.mask; i++) { 2967 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2968 &ss->rx_small.info[i].map); 2969 if (err != 0) { 2970 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2971 err); 2972 return err;; 2973 } 2974 } 2975 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2976 &ss->rx_small.extra_map); 2977 if (err != 0) { 2978 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2979 err); 2980 return err;; 2981 } 2982 2983 for (i = 0; i <= ss->rx_big.mask; i++) { 2984 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2985 &ss->rx_big.info[i].map); 2986 if (err != 0) { 2987 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2988 err); 2989 return err;; 2990 } 2991 } 2992 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2993 &ss->rx_big.extra_map); 2994 if (err != 0) { 2995 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2996 err); 2997 return err;; 2998 } 2999 3000 /* now allocate TX resouces */ 3001 3002 /* only use a single TX ring for now */ 3003 if (ss != ss->sc->ss) 3004 return 0; 3005 3006 ss->tx.mask = tx_ring_entries - 1; 3007 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3008 3009 3010 /* allocate the tx request copy block */ 3011 bytes = 8 + 3012 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3013 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3014 if (ss->tx.req_bytes == NULL) 3015 return err;; 3016 /* ensure req_list entries are aligned to 8 bytes */ 3017 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3018 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3019 3020 /* allocate the tx busdma segment list */ 3021 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3022 ss->tx.seg_list = (bus_dma_segment_t *) 3023 malloc(bytes, M_DEVBUF, M_WAITOK); 3024 if (ss->tx.seg_list == NULL) 3025 return err;; 3026 3027 /* allocate the tx host info ring */ 3028 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3029 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3030 if (ss->tx.info == NULL) 3031 return err;; 3032 3033 /* allocate the tx busdma resources */ 3034 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3035 1, /* alignment */ 3036 sc->tx_boundary, /* boundary */ 3037 BUS_SPACE_MAXADDR, /* low */ 3038 BUS_SPACE_MAXADDR, /* high */ 3039 NULL, NULL, /* filter */ 3040 65536 + 256, /* maxsize */ 3041 ss->tx.max_desc - 2, /* num segs */ 3042 sc->tx_boundary, /* maxsegsz */ 3043 BUS_DMA_ALLOCNOW, /* flags */ 3044 NULL, NULL, /* lock */ 3045 &ss->tx.dmat); /* tag */ 3046 3047 if (err != 0) { 3048 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3049 err); 3050 return err;; 3051 } 3052 3053 /* now use these tags to setup dmamaps for each slot 3054 in the ring */ 3055 for (i = 0; i <= ss->tx.mask; i++) { 3056 err = bus_dmamap_create(ss->tx.dmat, 0, 3057 &ss->tx.info[i].map); 3058 if (err != 0) { 3059 device_printf(sc->dev, "Err %d tx dmamap\n", 3060 err); 3061 return err;; 3062 } 3063 } 3064 return 0; 3065 3066 } 3067 3068 static int 3069 mxge_alloc_rings(mxge_softc_t *sc) 3070 { 3071 mxge_cmd_t cmd; 3072 int tx_ring_size; 3073 int tx_ring_entries, rx_ring_entries; 3074 int err, slice; 3075 3076 /* get ring sizes */ 3077 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3078 tx_ring_size = cmd.data0; 3079 if (err != 0) { 3080 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3081 goto abort; 3082 } 3083 3084 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3085 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3086 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3087 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3088 IFQ_SET_READY(&sc->ifp->if_snd); 3089 3090 for (slice = 0; slice < sc->num_slices; slice++) { 3091 err = mxge_alloc_slice_rings(&sc->ss[slice], 3092 rx_ring_entries, 3093 tx_ring_entries); 3094 if (err != 0) 3095 goto abort; 3096 } 3097 return 0; 3098 3099 abort: 3100 mxge_free_rings(sc); 3101 return err; 3102 3103 } 3104 3105 3106 static void 3107 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3108 { 3109 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3110 3111 if (bufsize < MCLBYTES) { 3112 /* easy, everything fits in a single buffer */ 3113 *big_buf_size = MCLBYTES; 3114 *cl_size = MCLBYTES; 3115 *nbufs = 1; 3116 return; 3117 } 3118 3119 if (bufsize < MJUMPAGESIZE) { 3120 /* still easy, everything still fits in a single buffer */ 3121 *big_buf_size = MJUMPAGESIZE; 3122 *cl_size = MJUMPAGESIZE; 3123 *nbufs = 1; 3124 return; 3125 } 3126 #if MXGE_VIRT_JUMBOS 3127 /* now we need to use virtually contiguous buffers */ 3128 *cl_size = MJUM9BYTES; 3129 *big_buf_size = 4096; 3130 *nbufs = mtu / 4096 + 1; 3131 /* needs to be a power of two, so round up */ 3132 if (*nbufs == 3) 3133 *nbufs = 4; 3134 #else 3135 *cl_size = MJUM9BYTES; 3136 *big_buf_size = MJUM9BYTES; 3137 *nbufs = 1; 3138 #endif 3139 } 3140 3141 static int 3142 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3143 { 3144 mxge_softc_t *sc; 3145 mxge_cmd_t cmd; 3146 bus_dmamap_t map; 3147 struct lro_entry *lro_entry; 3148 int err, i, slice; 3149 3150 3151 sc = ss->sc; 3152 slice = ss - sc->ss; 3153 3154 SLIST_INIT(&ss->lro_free); 3155 SLIST_INIT(&ss->lro_active); 3156 3157 for (i = 0; i < sc->lro_cnt; i++) { 3158 lro_entry = (struct lro_entry *) 3159 malloc(sizeof (*lro_entry), M_DEVBUF, 3160 M_NOWAIT | M_ZERO); 3161 if (lro_entry == NULL) { 3162 sc->lro_cnt = i; 3163 break; 3164 } 3165 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3166 } 3167 /* get the lanai pointers to the send and receive rings */ 3168 3169 err = 0; 3170 /* We currently only send from the first slice */ 3171 if (slice == 0) { 3172 cmd.data0 = slice; 3173 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3174 ss->tx.lanai = 3175 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3176 } 3177 cmd.data0 = slice; 3178 err |= mxge_send_cmd(sc, 3179 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3180 ss->rx_small.lanai = 3181 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3182 cmd.data0 = slice; 3183 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3184 ss->rx_big.lanai = 3185 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3186 3187 if (err != 0) { 3188 device_printf(sc->dev, 3189 "failed to get ring sizes or locations\n"); 3190 return EIO; 3191 } 3192 3193 /* stock receive rings */ 3194 for (i = 0; i <= ss->rx_small.mask; i++) { 3195 map = ss->rx_small.info[i].map; 3196 err = mxge_get_buf_small(ss, map, i); 3197 if (err) { 3198 device_printf(sc->dev, "alloced %d/%d smalls\n", 3199 i, ss->rx_small.mask + 1); 3200 return ENOMEM; 3201 } 3202 } 3203 for (i = 0; i <= ss->rx_big.mask; i++) { 3204 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3205 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3206 } 3207 ss->rx_big.nbufs = nbufs; 3208 ss->rx_big.cl_size = cl_size; 3209 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3210 map = ss->rx_big.info[i].map; 3211 err = mxge_get_buf_big(ss, map, i); 3212 if (err) { 3213 device_printf(sc->dev, "alloced %d/%d bigs\n", 3214 i, ss->rx_big.mask + 1); 3215 return ENOMEM; 3216 } 3217 } 3218 return 0; 3219 } 3220 3221 static int 3222 mxge_open(mxge_softc_t *sc) 3223 { 3224 mxge_cmd_t cmd; 3225 int err, big_bytes, nbufs, slice, cl_size, i; 3226 bus_addr_t bus; 3227 volatile uint8_t *itable; 3228 3229 /* Copy the MAC address in case it was overridden */ 3230 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3231 3232 err = mxge_reset(sc, 1); 3233 if (err != 0) { 3234 device_printf(sc->dev, "failed to reset\n"); 3235 return EIO; 3236 } 3237 3238 if (sc->num_slices > 1) { 3239 /* setup the indirection table */ 3240 cmd.data0 = sc->num_slices; 3241 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3242 &cmd); 3243 3244 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3245 &cmd); 3246 if (err != 0) { 3247 device_printf(sc->dev, 3248 "failed to setup rss tables\n"); 3249 return err; 3250 } 3251 3252 /* just enable an identity mapping */ 3253 itable = sc->sram + cmd.data0; 3254 for (i = 0; i < sc->num_slices; i++) 3255 itable[i] = (uint8_t)i; 3256 3257 cmd.data0 = 1; 3258 cmd.data1 = mxge_rss_hash_type; 3259 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3260 if (err != 0) { 3261 device_printf(sc->dev, "failed to enable slices\n"); 3262 return err; 3263 } 3264 } 3265 3266 3267 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3268 3269 cmd.data0 = nbufs; 3270 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3271 &cmd); 3272 /* error is only meaningful if we're trying to set 3273 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3274 if (err && nbufs > 1) { 3275 device_printf(sc->dev, 3276 "Failed to set alway-use-n to %d\n", 3277 nbufs); 3278 return EIO; 3279 } 3280 /* Give the firmware the mtu and the big and small buffer 3281 sizes. The firmware wants the big buf size to be a power 3282 of two. Luckily, FreeBSD's clusters are powers of two */ 3283 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3284 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3285 cmd.data0 = MHLEN - MXGEFW_PAD; 3286 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3287 &cmd); 3288 cmd.data0 = big_bytes; 3289 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3290 3291 if (err != 0) { 3292 device_printf(sc->dev, "failed to setup params\n"); 3293 goto abort; 3294 } 3295 3296 /* Now give him the pointer to the stats block */ 3297 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3298 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3299 cmd.data2 = sizeof(struct mcp_irq_data); 3300 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3301 3302 if (err != 0) { 3303 bus = sc->ss->fw_stats_dma.bus_addr; 3304 bus += offsetof(struct mcp_irq_data, send_done_count); 3305 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3306 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3307 err = mxge_send_cmd(sc, 3308 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3309 &cmd); 3310 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3311 sc->fw_multicast_support = 0; 3312 } else { 3313 sc->fw_multicast_support = 1; 3314 } 3315 3316 if (err != 0) { 3317 device_printf(sc->dev, "failed to setup params\n"); 3318 goto abort; 3319 } 3320 3321 for (slice = 0; slice < sc->num_slices; slice++) { 3322 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3323 if (err != 0) { 3324 device_printf(sc->dev, "couldn't open slice %d\n", 3325 slice); 3326 goto abort; 3327 } 3328 } 3329 3330 /* Finally, start the firmware running */ 3331 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3332 if (err) { 3333 device_printf(sc->dev, "Couldn't bring up link\n"); 3334 goto abort; 3335 } 3336 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3337 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3338 3339 return 0; 3340 3341 3342 abort: 3343 mxge_free_mbufs(sc); 3344 3345 return err; 3346 } 3347 3348 static int 3349 mxge_close(mxge_softc_t *sc) 3350 { 3351 mxge_cmd_t cmd; 3352 int err, old_down_cnt; 3353 3354 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3355 old_down_cnt = sc->down_cnt; 3356 mb(); 3357 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3358 if (err) { 3359 device_printf(sc->dev, "Couldn't bring down link\n"); 3360 } 3361 if (old_down_cnt == sc->down_cnt) { 3362 /* wait for down irq */ 3363 DELAY(10 * sc->intr_coal_delay); 3364 } 3365 mb(); 3366 if (old_down_cnt == sc->down_cnt) { 3367 device_printf(sc->dev, "never got down irq\n"); 3368 } 3369 3370 mxge_free_mbufs(sc); 3371 3372 return 0; 3373 } 3374 3375 static void 3376 mxge_setup_cfg_space(mxge_softc_t *sc) 3377 { 3378 device_t dev = sc->dev; 3379 int reg; 3380 uint16_t cmd, lnk, pectl; 3381 3382 /* find the PCIe link width and set max read request to 4KB*/ 3383 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3384 lnk = pci_read_config(dev, reg + 0x12, 2); 3385 sc->link_width = (lnk >> 4) & 0x3f; 3386 3387 pectl = pci_read_config(dev, reg + 0x8, 2); 3388 pectl = (pectl & ~0x7000) | (5 << 12); 3389 pci_write_config(dev, reg + 0x8, pectl, 2); 3390 } 3391 3392 /* Enable DMA and Memory space access */ 3393 pci_enable_busmaster(dev); 3394 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3395 cmd |= PCIM_CMD_MEMEN; 3396 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3397 } 3398 3399 static uint32_t 3400 mxge_read_reboot(mxge_softc_t *sc) 3401 { 3402 device_t dev = sc->dev; 3403 uint32_t vs; 3404 3405 /* find the vendor specific offset */ 3406 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3407 device_printf(sc->dev, 3408 "could not find vendor specific offset\n"); 3409 return (uint32_t)-1; 3410 } 3411 /* enable read32 mode */ 3412 pci_write_config(dev, vs + 0x10, 0x3, 1); 3413 /* tell NIC which register to read */ 3414 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3415 return (pci_read_config(dev, vs + 0x14, 4)); 3416 } 3417 3418 static void 3419 mxge_watchdog_reset(mxge_softc_t *sc) 3420 { 3421 int err; 3422 uint32_t reboot; 3423 uint16_t cmd; 3424 3425 err = ENXIO; 3426 3427 device_printf(sc->dev, "Watchdog reset!\n"); 3428 3429 /* 3430 * check to see if the NIC rebooted. If it did, then all of 3431 * PCI config space has been reset, and things like the 3432 * busmaster bit will be zero. If this is the case, then we 3433 * must restore PCI config space before the NIC can be used 3434 * again 3435 */ 3436 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3437 if (cmd == 0xffff) { 3438 /* 3439 * maybe the watchdog caught the NIC rebooting; wait 3440 * up to 100ms for it to finish. If it does not come 3441 * back, then give up 3442 */ 3443 DELAY(1000*100); 3444 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3445 if (cmd == 0xffff) { 3446 device_printf(sc->dev, "NIC disappeared!\n"); 3447 goto abort; 3448 } 3449 } 3450 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3451 /* print the reboot status */ 3452 reboot = mxge_read_reboot(sc); 3453 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3454 reboot); 3455 /* restore PCI configuration space */ 3456 3457 /* XXXX waiting for pci_cfg_restore() to be exported */ 3458 goto abort; /* just abort for now */ 3459 3460 /* and redo any changes we made to our config space */ 3461 mxge_setup_cfg_space(sc); 3462 3463 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3464 mxge_close(sc); 3465 err = mxge_open(sc); 3466 } 3467 } else { 3468 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 3469 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 3470 sc->ss->tx.req, sc->ss->tx.done); 3471 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3472 sc->ss->tx.pkt_done, 3473 be32toh(sc->ss->fw_stats->send_done_count)); 3474 device_printf(sc->dev, "not resetting\n"); 3475 } 3476 3477 abort: 3478 /* 3479 * stop the watchdog if the nic is dead, to avoid spamming the 3480 * console 3481 */ 3482 if (err != 0) { 3483 callout_stop(&sc->co_hdl); 3484 } 3485 } 3486 3487 static void 3488 mxge_watchdog(mxge_softc_t *sc) 3489 { 3490 mxge_tx_ring_t *tx = &sc->ss->tx; 3491 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3492 3493 /* see if we have outstanding transmits, which 3494 have been pending for more than mxge_ticks */ 3495 if (tx->req != tx->done && 3496 tx->watchdog_req != tx->watchdog_done && 3497 tx->done == tx->watchdog_done) { 3498 /* check for pause blocking before resetting */ 3499 if (tx->watchdog_rx_pause == rx_pause) 3500 mxge_watchdog_reset(sc); 3501 else 3502 device_printf(sc->dev, "Flow control blocking " 3503 "xmits, check link partner\n"); 3504 } 3505 3506 tx->watchdog_req = tx->req; 3507 tx->watchdog_done = tx->done; 3508 tx->watchdog_rx_pause = rx_pause; 3509 3510 if (sc->need_media_probe) 3511 mxge_media_probe(sc); 3512 } 3513 3514 static void 3515 mxge_update_stats(mxge_softc_t *sc) 3516 { 3517 struct mxge_slice_state *ss; 3518 u_long ipackets = 0; 3519 int slice; 3520 3521 for(slice = 0; slice < sc->num_slices; slice++) { 3522 ss = &sc->ss[slice]; 3523 ipackets += ss->ipackets; 3524 } 3525 sc->ifp->if_ipackets = ipackets; 3526 3527 } 3528 static void 3529 mxge_tick(void *arg) 3530 { 3531 mxge_softc_t *sc = arg; 3532 3533 3534 /* Synchronize with possible callout reset/stop. */ 3535 if (callout_pending(&sc->co_hdl) || 3536 !callout_active(&sc->co_hdl)) { 3537 mtx_unlock(&sc->driver_mtx); 3538 return; 3539 } 3540 3541 /* aggregate stats from different slices */ 3542 mxge_update_stats(sc); 3543 3544 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3545 if (!sc->watchdog_countdown) { 3546 mxge_watchdog(sc); 3547 sc->watchdog_countdown = 4; 3548 } 3549 sc->watchdog_countdown--; 3550 } 3551 3552 static int 3553 mxge_media_change(struct ifnet *ifp) 3554 { 3555 return EINVAL; 3556 } 3557 3558 static int 3559 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3560 { 3561 struct ifnet *ifp = sc->ifp; 3562 int real_mtu, old_mtu; 3563 int err = 0; 3564 3565 3566 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3567 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3568 return EINVAL; 3569 mtx_lock(&sc->driver_mtx); 3570 old_mtu = ifp->if_mtu; 3571 ifp->if_mtu = mtu; 3572 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3573 callout_stop(&sc->co_hdl); 3574 mxge_close(sc); 3575 err = mxge_open(sc); 3576 if (err != 0) { 3577 ifp->if_mtu = old_mtu; 3578 mxge_close(sc); 3579 (void) mxge_open(sc); 3580 } 3581 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3582 } 3583 mtx_unlock(&sc->driver_mtx); 3584 return err; 3585 } 3586 3587 static void 3588 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3589 { 3590 mxge_softc_t *sc = ifp->if_softc; 3591 3592 3593 if (sc == NULL) 3594 return; 3595 ifmr->ifm_status = IFM_AVALID; 3596 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3597 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3598 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3599 } 3600 3601 static int 3602 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3603 { 3604 mxge_softc_t *sc = ifp->if_softc; 3605 struct ifreq *ifr = (struct ifreq *)data; 3606 int err, mask; 3607 3608 err = 0; 3609 switch (command) { 3610 case SIOCSIFADDR: 3611 case SIOCGIFADDR: 3612 err = ether_ioctl(ifp, command, data); 3613 break; 3614 3615 case SIOCSIFMTU: 3616 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3617 break; 3618 3619 case SIOCSIFFLAGS: 3620 mtx_lock(&sc->driver_mtx); 3621 if (ifp->if_flags & IFF_UP) { 3622 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3623 err = mxge_open(sc); 3624 callout_reset(&sc->co_hdl, mxge_ticks, 3625 mxge_tick, sc); 3626 } else { 3627 /* take care of promis can allmulti 3628 flag chages */ 3629 mxge_change_promisc(sc, 3630 ifp->if_flags & IFF_PROMISC); 3631 mxge_set_multicast_list(sc); 3632 } 3633 } else { 3634 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3635 callout_stop(&sc->co_hdl); 3636 mxge_close(sc); 3637 } 3638 } 3639 mtx_unlock(&sc->driver_mtx); 3640 break; 3641 3642 case SIOCADDMULTI: 3643 case SIOCDELMULTI: 3644 mtx_lock(&sc->driver_mtx); 3645 mxge_set_multicast_list(sc); 3646 mtx_unlock(&sc->driver_mtx); 3647 break; 3648 3649 case SIOCSIFCAP: 3650 mtx_lock(&sc->driver_mtx); 3651 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3652 if (mask & IFCAP_TXCSUM) { 3653 if (IFCAP_TXCSUM & ifp->if_capenable) { 3654 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3655 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3656 | CSUM_TSO); 3657 } else { 3658 ifp->if_capenable |= IFCAP_TXCSUM; 3659 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3660 } 3661 } else if (mask & IFCAP_RXCSUM) { 3662 if (IFCAP_RXCSUM & ifp->if_capenable) { 3663 ifp->if_capenable &= ~IFCAP_RXCSUM; 3664 sc->csum_flag = 0; 3665 } else { 3666 ifp->if_capenable |= IFCAP_RXCSUM; 3667 sc->csum_flag = 1; 3668 } 3669 } 3670 if (mask & IFCAP_TSO4) { 3671 if (IFCAP_TSO4 & ifp->if_capenable) { 3672 ifp->if_capenable &= ~IFCAP_TSO4; 3673 ifp->if_hwassist &= ~CSUM_TSO; 3674 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3675 ifp->if_capenable |= IFCAP_TSO4; 3676 ifp->if_hwassist |= CSUM_TSO; 3677 } else { 3678 printf("mxge requires tx checksum offload" 3679 " be enabled to use TSO\n"); 3680 err = EINVAL; 3681 } 3682 } 3683 if (mask & IFCAP_LRO) { 3684 if (IFCAP_LRO & ifp->if_capenable) 3685 err = mxge_change_lro_locked(sc, 0); 3686 else 3687 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3688 } 3689 if (mask & IFCAP_VLAN_HWTAGGING) 3690 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3691 mtx_unlock(&sc->driver_mtx); 3692 VLAN_CAPABILITIES(ifp); 3693 3694 break; 3695 3696 case SIOCGIFMEDIA: 3697 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3698 &sc->media, command); 3699 break; 3700 3701 default: 3702 err = ENOTTY; 3703 } 3704 return err; 3705 } 3706 3707 static void 3708 mxge_fetch_tunables(mxge_softc_t *sc) 3709 { 3710 3711 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3712 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3713 &mxge_flow_control); 3714 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3715 &mxge_intr_coal_delay); 3716 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3717 &mxge_nvidia_ecrc_enable); 3718 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3719 &mxge_force_firmware); 3720 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3721 &mxge_deassert_wait); 3722 TUNABLE_INT_FETCH("hw.mxge.verbose", 3723 &mxge_verbose); 3724 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3725 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3726 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3727 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3728 if (sc->lro_cnt != 0) 3729 mxge_lro_cnt = sc->lro_cnt; 3730 3731 if (bootverbose) 3732 mxge_verbose = 1; 3733 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3734 mxge_intr_coal_delay = 30; 3735 if (mxge_ticks == 0) 3736 mxge_ticks = hz / 2; 3737 sc->pause = mxge_flow_control; 3738 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3739 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 3740 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 3741 } 3742 } 3743 3744 3745 static void 3746 mxge_free_slices(mxge_softc_t *sc) 3747 { 3748 struct mxge_slice_state *ss; 3749 int i; 3750 3751 3752 if (sc->ss == NULL) 3753 return; 3754 3755 for (i = 0; i < sc->num_slices; i++) { 3756 ss = &sc->ss[i]; 3757 if (ss->fw_stats != NULL) { 3758 mxge_dma_free(&ss->fw_stats_dma); 3759 ss->fw_stats = NULL; 3760 mtx_destroy(&ss->tx.mtx); 3761 } 3762 if (ss->rx_done.entry != NULL) { 3763 mxge_dma_free(&ss->rx_done.dma); 3764 ss->rx_done.entry = NULL; 3765 } 3766 } 3767 free(sc->ss, M_DEVBUF); 3768 sc->ss = NULL; 3769 } 3770 3771 static int 3772 mxge_alloc_slices(mxge_softc_t *sc) 3773 { 3774 mxge_cmd_t cmd; 3775 struct mxge_slice_state *ss; 3776 size_t bytes; 3777 int err, i, max_intr_slots; 3778 3779 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3780 if (err != 0) { 3781 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3782 return err; 3783 } 3784 sc->rx_ring_size = cmd.data0; 3785 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3786 3787 bytes = sizeof (*sc->ss) * sc->num_slices; 3788 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 3789 if (sc->ss == NULL) 3790 return (ENOMEM); 3791 for (i = 0; i < sc->num_slices; i++) { 3792 ss = &sc->ss[i]; 3793 3794 ss->sc = sc; 3795 3796 /* allocate per-slice rx interrupt queues */ 3797 3798 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 3799 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 3800 if (err != 0) 3801 goto abort; 3802 ss->rx_done.entry = ss->rx_done.dma.addr; 3803 bzero(ss->rx_done.entry, bytes); 3804 3805 /* 3806 * allocate the per-slice firmware stats; stats 3807 * (including tx) are used used only on the first 3808 * slice for now 3809 */ 3810 if (i > 0) 3811 continue; 3812 3813 bytes = sizeof (*ss->fw_stats); 3814 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3815 sizeof (*ss->fw_stats), 64); 3816 if (err != 0) 3817 goto abort; 3818 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 3819 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 3820 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 3821 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 3822 } 3823 3824 return (0); 3825 3826 abort: 3827 mxge_free_slices(sc); 3828 return (ENOMEM); 3829 } 3830 3831 static void 3832 mxge_slice_probe(mxge_softc_t *sc) 3833 { 3834 mxge_cmd_t cmd; 3835 char *old_fw; 3836 int msix_cnt, status, max_intr_slots; 3837 3838 sc->num_slices = 1; 3839 /* 3840 * don't enable multiple slices if they are not enabled, 3841 * or if this is not an SMP system 3842 */ 3843 3844 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 3845 return; 3846 3847 /* see how many MSI-X interrupts are available */ 3848 msix_cnt = pci_msix_count(sc->dev); 3849 if (msix_cnt < 2) 3850 return; 3851 3852 /* now load the slice aware firmware see what it supports */ 3853 old_fw = sc->fw_name; 3854 if (old_fw == mxge_fw_aligned) 3855 sc->fw_name = mxge_fw_rss_aligned; 3856 else 3857 sc->fw_name = mxge_fw_rss_unaligned; 3858 status = mxge_load_firmware(sc, 0); 3859 if (status != 0) { 3860 device_printf(sc->dev, "Falling back to a single slice\n"); 3861 return; 3862 } 3863 3864 /* try to send a reset command to the card to see if it 3865 is alive */ 3866 memset(&cmd, 0, sizeof (cmd)); 3867 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3868 if (status != 0) { 3869 device_printf(sc->dev, "failed reset\n"); 3870 goto abort_with_fw; 3871 } 3872 3873 /* get rx ring size */ 3874 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3875 if (status != 0) { 3876 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3877 goto abort_with_fw; 3878 } 3879 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3880 3881 /* tell it the size of the interrupt queues */ 3882 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3883 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3884 if (status != 0) { 3885 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3886 goto abort_with_fw; 3887 } 3888 3889 /* ask the maximum number of slices it supports */ 3890 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3891 if (status != 0) { 3892 device_printf(sc->dev, 3893 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3894 goto abort_with_fw; 3895 } 3896 sc->num_slices = cmd.data0; 3897 if (sc->num_slices > msix_cnt) 3898 sc->num_slices = msix_cnt; 3899 3900 if (mxge_max_slices == -1) { 3901 /* cap to number of CPUs in system */ 3902 if (sc->num_slices > mp_ncpus) 3903 sc->num_slices = mp_ncpus; 3904 } else { 3905 if (sc->num_slices > mxge_max_slices) 3906 sc->num_slices = mxge_max_slices; 3907 } 3908 /* make sure it is a power of two */ 3909 while (sc->num_slices & (sc->num_slices - 1)) 3910 sc->num_slices--; 3911 3912 if (mxge_verbose) 3913 device_printf(sc->dev, "using %d slices\n", 3914 sc->num_slices); 3915 3916 return; 3917 3918 abort_with_fw: 3919 sc->fw_name = old_fw; 3920 (void) mxge_load_firmware(sc, 0); 3921 } 3922 3923 static int 3924 mxge_add_msix_irqs(mxge_softc_t *sc) 3925 { 3926 size_t bytes; 3927 int count, err, i, rid; 3928 3929 rid = PCIR_BAR(2); 3930 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3931 &rid, RF_ACTIVE); 3932 3933 if (sc->msix_table_res == NULL) { 3934 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3935 return ENXIO; 3936 } 3937 3938 count = sc->num_slices; 3939 err = pci_alloc_msix(sc->dev, &count); 3940 if (err != 0) { 3941 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3942 "err = %d \n", sc->num_slices, err); 3943 goto abort_with_msix_table; 3944 } 3945 if (count < sc->num_slices) { 3946 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3947 count, sc->num_slices); 3948 device_printf(sc->dev, 3949 "Try setting hw.mxge.max_slices to %d\n", 3950 count); 3951 err = ENOSPC; 3952 goto abort_with_msix; 3953 } 3954 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3955 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3956 if (sc->msix_irq_res == NULL) { 3957 err = ENOMEM; 3958 goto abort_with_msix; 3959 } 3960 3961 for (i = 0; i < sc->num_slices; i++) { 3962 rid = i + 1; 3963 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3964 SYS_RES_IRQ, 3965 &rid, RF_ACTIVE); 3966 if (sc->msix_irq_res[i] == NULL) { 3967 device_printf(sc->dev, "couldn't allocate IRQ res" 3968 " for message %d\n", i); 3969 err = ENXIO; 3970 goto abort_with_res; 3971 } 3972 } 3973 3974 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3975 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3976 3977 for (i = 0; i < sc->num_slices; i++) { 3978 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3979 INTR_TYPE_NET | INTR_MPSAFE, 3980 #if __FreeBSD_version > 700030 3981 NULL, 3982 #endif 3983 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 3984 if (err != 0) { 3985 device_printf(sc->dev, "couldn't setup intr for " 3986 "message %d\n", i); 3987 goto abort_with_intr; 3988 } 3989 } 3990 3991 if (mxge_verbose) { 3992 device_printf(sc->dev, "using %d msix IRQs:", 3993 sc->num_slices); 3994 for (i = 0; i < sc->num_slices; i++) 3995 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 3996 printf("\n"); 3997 } 3998 return (0); 3999 4000 abort_with_intr: 4001 for (i = 0; i < sc->num_slices; i++) { 4002 if (sc->msix_ih[i] != NULL) { 4003 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4004 sc->msix_ih[i]); 4005 sc->msix_ih[i] = NULL; 4006 } 4007 } 4008 free(sc->msix_ih, M_DEVBUF); 4009 4010 4011 abort_with_res: 4012 for (i = 0; i < sc->num_slices; i++) { 4013 rid = i + 1; 4014 if (sc->msix_irq_res[i] != NULL) 4015 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4016 sc->msix_irq_res[i]); 4017 sc->msix_irq_res[i] = NULL; 4018 } 4019 free(sc->msix_irq_res, M_DEVBUF); 4020 4021 4022 abort_with_msix: 4023 pci_release_msi(sc->dev); 4024 4025 abort_with_msix_table: 4026 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4027 sc->msix_table_res); 4028 4029 return err; 4030 } 4031 4032 static int 4033 mxge_add_single_irq(mxge_softc_t *sc) 4034 { 4035 int count, err, rid; 4036 4037 count = pci_msi_count(sc->dev); 4038 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4039 rid = 1; 4040 } else { 4041 rid = 0; 4042 sc->legacy_irq = 1; 4043 } 4044 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4045 1, RF_SHAREABLE | RF_ACTIVE); 4046 if (sc->irq_res == NULL) { 4047 device_printf(sc->dev, "could not alloc interrupt\n"); 4048 return ENXIO; 4049 } 4050 if (mxge_verbose) 4051 device_printf(sc->dev, "using %s irq %ld\n", 4052 sc->legacy_irq ? "INTx" : "MSI", 4053 rman_get_start(sc->irq_res)); 4054 err = bus_setup_intr(sc->dev, sc->irq_res, 4055 INTR_TYPE_NET | INTR_MPSAFE, 4056 #if __FreeBSD_version > 700030 4057 NULL, 4058 #endif 4059 mxge_intr, &sc->ss[0], &sc->ih); 4060 if (err != 0) { 4061 bus_release_resource(sc->dev, SYS_RES_IRQ, 4062 sc->legacy_irq ? 0 : 1, sc->irq_res); 4063 if (!sc->legacy_irq) 4064 pci_release_msi(sc->dev); 4065 } 4066 return err; 4067 } 4068 4069 static void 4070 mxge_rem_msix_irqs(mxge_softc_t *sc) 4071 { 4072 int i, rid; 4073 4074 for (i = 0; i < sc->num_slices; i++) { 4075 if (sc->msix_ih[i] != NULL) { 4076 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4077 sc->msix_ih[i]); 4078 sc->msix_ih[i] = NULL; 4079 } 4080 } 4081 free(sc->msix_ih, M_DEVBUF); 4082 4083 for (i = 0; i < sc->num_slices; i++) { 4084 rid = i + 1; 4085 if (sc->msix_irq_res[i] != NULL) 4086 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4087 sc->msix_irq_res[i]); 4088 sc->msix_irq_res[i] = NULL; 4089 } 4090 free(sc->msix_irq_res, M_DEVBUF); 4091 4092 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4093 sc->msix_table_res); 4094 4095 pci_release_msi(sc->dev); 4096 return; 4097 } 4098 4099 static void 4100 mxge_rem_single_irq(mxge_softc_t *sc) 4101 { 4102 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4103 bus_release_resource(sc->dev, SYS_RES_IRQ, 4104 sc->legacy_irq ? 0 : 1, sc->irq_res); 4105 if (!sc->legacy_irq) 4106 pci_release_msi(sc->dev); 4107 } 4108 4109 static void 4110 mxge_rem_irq(mxge_softc_t *sc) 4111 { 4112 if (sc->num_slices > 1) 4113 mxge_rem_msix_irqs(sc); 4114 else 4115 mxge_rem_single_irq(sc); 4116 } 4117 4118 static int 4119 mxge_add_irq(mxge_softc_t *sc) 4120 { 4121 int err; 4122 4123 if (sc->num_slices > 1) 4124 err = mxge_add_msix_irqs(sc); 4125 else 4126 err = mxge_add_single_irq(sc); 4127 4128 if (0 && err == 0 && sc->num_slices > 1) { 4129 mxge_rem_msix_irqs(sc); 4130 err = mxge_add_msix_irqs(sc); 4131 } 4132 return err; 4133 } 4134 4135 4136 static int 4137 mxge_attach(device_t dev) 4138 { 4139 mxge_softc_t *sc = device_get_softc(dev); 4140 struct ifnet *ifp; 4141 int err, rid; 4142 4143 sc->dev = dev; 4144 mxge_fetch_tunables(sc); 4145 4146 err = bus_dma_tag_create(NULL, /* parent */ 4147 1, /* alignment */ 4148 0, /* boundary */ 4149 BUS_SPACE_MAXADDR, /* low */ 4150 BUS_SPACE_MAXADDR, /* high */ 4151 NULL, NULL, /* filter */ 4152 65536 + 256, /* maxsize */ 4153 MXGE_MAX_SEND_DESC, /* num segs */ 4154 65536, /* maxsegsize */ 4155 0, /* flags */ 4156 NULL, NULL, /* lock */ 4157 &sc->parent_dmat); /* tag */ 4158 4159 if (err != 0) { 4160 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4161 err); 4162 goto abort_with_nothing; 4163 } 4164 4165 ifp = sc->ifp = if_alloc(IFT_ETHER); 4166 if (ifp == NULL) { 4167 device_printf(dev, "can not if_alloc()\n"); 4168 err = ENOSPC; 4169 goto abort_with_parent_dmat; 4170 } 4171 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4172 4173 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4174 device_get_nameunit(dev)); 4175 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4176 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4177 "%s:drv", device_get_nameunit(dev)); 4178 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4179 MTX_NETWORK_LOCK, MTX_DEF); 4180 4181 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4182 4183 mxge_setup_cfg_space(sc); 4184 4185 /* Map the board into the kernel */ 4186 rid = PCIR_BARS; 4187 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4188 ~0, 1, RF_ACTIVE); 4189 if (sc->mem_res == NULL) { 4190 device_printf(dev, "could not map memory\n"); 4191 err = ENXIO; 4192 goto abort_with_lock; 4193 } 4194 sc->sram = rman_get_virtual(sc->mem_res); 4195 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4196 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4197 device_printf(dev, "impossible memory region size %ld\n", 4198 rman_get_size(sc->mem_res)); 4199 err = ENXIO; 4200 goto abort_with_mem_res; 4201 } 4202 4203 /* make NULL terminated copy of the EEPROM strings section of 4204 lanai SRAM */ 4205 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4206 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4207 rman_get_bushandle(sc->mem_res), 4208 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4209 sc->eeprom_strings, 4210 MXGE_EEPROM_STRINGS_SIZE - 2); 4211 err = mxge_parse_strings(sc); 4212 if (err != 0) 4213 goto abort_with_mem_res; 4214 4215 /* Enable write combining for efficient use of PCIe bus */ 4216 mxge_enable_wc(sc); 4217 4218 /* Allocate the out of band dma memory */ 4219 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4220 sizeof (mxge_cmd_t), 64); 4221 if (err != 0) 4222 goto abort_with_mem_res; 4223 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4224 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4225 if (err != 0) 4226 goto abort_with_cmd_dma; 4227 4228 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4229 if (err != 0) 4230 goto abort_with_zeropad_dma; 4231 4232 /* select & load the firmware */ 4233 err = mxge_select_firmware(sc); 4234 if (err != 0) 4235 goto abort_with_dmabench; 4236 sc->intr_coal_delay = mxge_intr_coal_delay; 4237 4238 mxge_slice_probe(sc); 4239 err = mxge_alloc_slices(sc); 4240 if (err != 0) 4241 goto abort_with_dmabench; 4242 4243 err = mxge_reset(sc, 0); 4244 if (err != 0) 4245 goto abort_with_slices; 4246 4247 err = mxge_alloc_rings(sc); 4248 if (err != 0) { 4249 device_printf(sc->dev, "failed to allocate rings\n"); 4250 goto abort_with_dmabench; 4251 } 4252 4253 err = mxge_add_irq(sc); 4254 if (err != 0) { 4255 device_printf(sc->dev, "failed to add irq\n"); 4256 goto abort_with_rings; 4257 } 4258 4259 ifp->if_baudrate = 100000000; 4260 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4261 IFCAP_VLAN_MTU | IFCAP_LRO; 4262 4263 #ifdef MXGE_NEW_VLAN_API 4264 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4265 #endif 4266 4267 sc->max_mtu = mxge_max_mtu(sc); 4268 if (sc->max_mtu >= 9000) 4269 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4270 else 4271 device_printf(dev, "MTU limited to %d. Install " 4272 "latest firmware for 9000 byte jumbo support\n", 4273 sc->max_mtu - ETHER_HDR_LEN); 4274 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4275 ifp->if_capenable = ifp->if_capabilities; 4276 if (sc->lro_cnt == 0) 4277 ifp->if_capenable &= ~IFCAP_LRO; 4278 sc->csum_flag = 1; 4279 ifp->if_init = mxge_init; 4280 ifp->if_softc = sc; 4281 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4282 ifp->if_ioctl = mxge_ioctl; 4283 ifp->if_start = mxge_start; 4284 /* Initialise the ifmedia structure */ 4285 ifmedia_init(&sc->media, 0, mxge_media_change, 4286 mxge_media_status); 4287 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4288 mxge_media_probe(sc); 4289 ether_ifattach(ifp, sc->mac_addr); 4290 /* ether_ifattach sets mtu to 1500 */ 4291 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4292 ifp->if_mtu = 9000; 4293 4294 mxge_add_sysctls(sc); 4295 return 0; 4296 4297 abort_with_rings: 4298 mxge_free_rings(sc); 4299 abort_with_slices: 4300 mxge_free_slices(sc); 4301 abort_with_dmabench: 4302 mxge_dma_free(&sc->dmabench_dma); 4303 abort_with_zeropad_dma: 4304 mxge_dma_free(&sc->zeropad_dma); 4305 abort_with_cmd_dma: 4306 mxge_dma_free(&sc->cmd_dma); 4307 abort_with_mem_res: 4308 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4309 abort_with_lock: 4310 pci_disable_busmaster(dev); 4311 mtx_destroy(&sc->cmd_mtx); 4312 mtx_destroy(&sc->driver_mtx); 4313 if_free(ifp); 4314 abort_with_parent_dmat: 4315 bus_dma_tag_destroy(sc->parent_dmat); 4316 4317 abort_with_nothing: 4318 return err; 4319 } 4320 4321 static int 4322 mxge_detach(device_t dev) 4323 { 4324 mxge_softc_t *sc = device_get_softc(dev); 4325 4326 if (mxge_vlans_active(sc)) { 4327 device_printf(sc->dev, 4328 "Detach vlans before removing module\n"); 4329 return EBUSY; 4330 } 4331 mtx_lock(&sc->driver_mtx); 4332 callout_stop(&sc->co_hdl); 4333 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4334 mxge_close(sc); 4335 mtx_unlock(&sc->driver_mtx); 4336 ether_ifdetach(sc->ifp); 4337 ifmedia_removeall(&sc->media); 4338 mxge_dummy_rdma(sc, 0); 4339 mxge_rem_sysctls(sc); 4340 mxge_rem_irq(sc); 4341 mxge_free_rings(sc); 4342 mxge_free_slices(sc); 4343 mxge_dma_free(&sc->dmabench_dma); 4344 mxge_dma_free(&sc->zeropad_dma); 4345 mxge_dma_free(&sc->cmd_dma); 4346 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4347 pci_disable_busmaster(dev); 4348 mtx_destroy(&sc->cmd_mtx); 4349 mtx_destroy(&sc->driver_mtx); 4350 if_free(sc->ifp); 4351 bus_dma_tag_destroy(sc->parent_dmat); 4352 return 0; 4353 } 4354 4355 static int 4356 mxge_shutdown(device_t dev) 4357 { 4358 return 0; 4359 } 4360 4361 /* 4362 This file uses Myri10GE driver indentation. 4363 4364 Local Variables: 4365 c-file-style:"linux" 4366 tab-width:8 4367 End: 4368 */ 4369