1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #include <sys/buf_ring.h> 96 97 #include "opt_inet.h" 98 #include "opt_inet6.h" 99 100 /* tunable params */ 101 static int mxge_nvidia_ecrc_enable = 1; 102 static int mxge_force_firmware = 0; 103 static int mxge_intr_coal_delay = 30; 104 static int mxge_deassert_wait = 1; 105 static int mxge_flow_control = 1; 106 static int mxge_verbose = 0; 107 static int mxge_ticks; 108 static int mxge_max_slices = 1; 109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 110 static int mxge_always_promisc = 0; 111 static int mxge_initial_mtu = ETHERMTU_JUMBO; 112 static int mxge_throttle = 0; 113 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 114 static char *mxge_fw_aligned = "mxge_eth_z8e"; 115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 117 118 static int mxge_probe(device_t dev); 119 static int mxge_attach(device_t dev); 120 static int mxge_detach(device_t dev); 121 static int mxge_shutdown(device_t dev); 122 static void mxge_intr(void *arg); 123 124 static device_method_t mxge_methods[] = 125 { 126 /* Device interface */ 127 DEVMETHOD(device_probe, mxge_probe), 128 DEVMETHOD(device_attach, mxge_attach), 129 DEVMETHOD(device_detach, mxge_detach), 130 DEVMETHOD(device_shutdown, mxge_shutdown), 131 132 DEVMETHOD_END 133 }; 134 135 static driver_t mxge_driver = 136 { 137 "mxge", 138 mxge_methods, 139 sizeof(mxge_softc_t), 140 }; 141 142 /* Declare ourselves to be a child of the PCI bus.*/ 143 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0); 144 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 145 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 146 147 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 148 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 149 static int mxge_close(mxge_softc_t *sc, int down); 150 static int mxge_open(mxge_softc_t *sc); 151 static void mxge_tick(void *arg); 152 153 static int 154 mxge_probe(device_t dev) 155 { 156 int rev; 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 /* callback to get our DMA address */ 200 static void 201 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 202 int error) 203 { 204 if (error == 0) { 205 *(bus_addr_t *) arg = segs->ds_addr; 206 } 207 } 208 209 static int 210 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 211 bus_size_t alignment) 212 { 213 int err; 214 device_t dev = sc->dev; 215 bus_size_t boundary, maxsegsize; 216 217 if (bytes > 4096 && alignment == 4096) { 218 boundary = 0; 219 maxsegsize = bytes; 220 } else { 221 boundary = 4096; 222 maxsegsize = 4096; 223 } 224 225 /* allocate DMAable memory tags */ 226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 227 alignment, /* alignment */ 228 boundary, /* boundary */ 229 BUS_SPACE_MAXADDR, /* low */ 230 BUS_SPACE_MAXADDR, /* high */ 231 NULL, NULL, /* filter */ 232 bytes, /* maxsize */ 233 1, /* num segs */ 234 maxsegsize, /* maxsegsize */ 235 BUS_DMA_COHERENT, /* flags */ 236 NULL, NULL, /* lock */ 237 &dma->dmat); /* tag */ 238 if (err != 0) { 239 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 240 return err; 241 } 242 243 /* allocate DMAable memory & map */ 244 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 245 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 246 | BUS_DMA_ZERO), &dma->map); 247 if (err != 0) { 248 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 249 goto abort_with_dmat; 250 } 251 252 /* load the memory */ 253 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 254 mxge_dmamap_callback, 255 (void *)&dma->bus_addr, 0); 256 if (err != 0) { 257 device_printf(dev, "couldn't load map (err = %d)\n", err); 258 goto abort_with_mem; 259 } 260 return 0; 261 262 abort_with_mem: 263 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 264 abort_with_dmat: 265 (void)bus_dma_tag_destroy(dma->dmat); 266 return err; 267 } 268 269 static void 270 mxge_dma_free(mxge_dma_t *dma) 271 { 272 bus_dmamap_unload(dma->dmat, dma->map); 273 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 274 (void)bus_dma_tag_destroy(dma->dmat); 275 } 276 277 /* 278 * The eeprom strings on the lanaiX have the format 279 * SN=x\0 280 * MAC=x:x:x:x:x:x\0 281 * PC=text\0 282 */ 283 284 static int 285 mxge_parse_strings(mxge_softc_t *sc) 286 { 287 char *ptr; 288 int i, found_mac, found_sn2; 289 char *endptr; 290 291 ptr = sc->eeprom_strings; 292 found_mac = 0; 293 found_sn2 = 0; 294 while (*ptr != '\0') { 295 if (strncmp(ptr, "MAC=", 4) == 0) { 296 ptr += 4; 297 for (i = 0;;) { 298 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 299 if (endptr - ptr != 2) 300 goto abort; 301 ptr = endptr; 302 if (++i == 6) 303 break; 304 if (*ptr++ != ':') 305 goto abort; 306 } 307 found_mac = 1; 308 } else if (strncmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strlcpy(sc->product_code_string, ptr, 311 sizeof(sc->product_code_string)); 312 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 313 ptr += 3; 314 strlcpy(sc->serial_number_string, ptr, 315 sizeof(sc->serial_number_string)); 316 } else if (strncmp(ptr, "SN2=", 4) == 0) { 317 /* SN2 takes precedence over SN */ 318 ptr += 4; 319 found_sn2 = 1; 320 strlcpy(sc->serial_number_string, ptr, 321 sizeof(sc->serial_number_string)); 322 } 323 while (*ptr++ != '\0') {} 324 } 325 326 if (found_mac) 327 return 0; 328 329 abort: 330 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 331 332 return ENXIO; 333 } 334 335 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 336 static void 337 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 338 { 339 uint32_t val; 340 unsigned long base, off; 341 char *va, *cfgptr; 342 device_t pdev, mcp55; 343 uint16_t vendor_id, device_id, word; 344 uintptr_t bus, slot, func, ivend, idev; 345 uint32_t *ptr32; 346 347 if (!mxge_nvidia_ecrc_enable) 348 return; 349 350 pdev = device_get_parent(device_get_parent(sc->dev)); 351 if (pdev == NULL) { 352 device_printf(sc->dev, "could not find parent?\n"); 353 return; 354 } 355 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 356 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 357 358 if (vendor_id != 0x10de) 359 return; 360 361 base = 0; 362 363 if (device_id == 0x005d) { 364 /* ck804, base address is magic */ 365 base = 0xe0000000UL; 366 } else if (device_id >= 0x0374 && device_id <= 0x378) { 367 /* mcp55, base address stored in chipset */ 368 mcp55 = pci_find_bsf(0, 0, 0); 369 if (mcp55 && 370 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 371 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 372 word = pci_read_config(mcp55, 0x90, 2); 373 base = ((unsigned long)word & 0x7ffeU) << 25; 374 } 375 } 376 if (!base) 377 return; 378 379 /* XXXX 380 Test below is commented because it is believed that doing 381 config read/write beyond 0xff will access the config space 382 for the next larger function. Uncomment this and remove 383 the hacky pmap_mapdev() way of accessing config space when 384 FreeBSD grows support for extended pcie config space access 385 */ 386 #if 0 387 /* See if we can, by some miracle, access the extended 388 config space */ 389 val = pci_read_config(pdev, 0x178, 4); 390 if (val != 0xffffffff) { 391 val |= 0x40; 392 pci_write_config(pdev, 0x178, val, 4); 393 return; 394 } 395 #endif 396 /* Rather than using normal pci config space writes, we must 397 * map the Nvidia config space ourselves. This is because on 398 * opteron/nvidia class machine the 0xe000000 mapping is 399 * handled by the nvidia chipset, that means the internal PCI 400 * device (the on-chip northbridge), or the amd-8131 bridge 401 * and things behind them are not visible by this method. 402 */ 403 404 BUS_READ_IVAR(device_get_parent(pdev), pdev, 405 PCI_IVAR_BUS, &bus); 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_SLOT, &slot); 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_FUNCTION, &func); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_VENDOR, &ivend); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_DEVICE, &idev); 414 415 off = base 416 + 0x00100000UL * (unsigned long)bus 417 + 0x00001000UL * (unsigned long)(func 418 + 8 * slot); 419 420 /* map it into the kernel */ 421 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 422 423 if (va == NULL) { 424 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 425 return; 426 } 427 /* get a pointer to the config space mapped into the kernel */ 428 cfgptr = va + (off & PAGE_MASK); 429 430 /* make sure that we can really access it */ 431 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 432 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 433 if (! (vendor_id == ivend && device_id == idev)) { 434 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 435 vendor_id, device_id); 436 pmap_unmapdev(va, PAGE_SIZE); 437 return; 438 } 439 440 ptr32 = (uint32_t*)(cfgptr + 0x178); 441 val = *ptr32; 442 443 if (val == 0xffffffff) { 444 device_printf(sc->dev, "extended mapping failed\n"); 445 pmap_unmapdev(va, PAGE_SIZE); 446 return; 447 } 448 *ptr32 = val | 0x40; 449 pmap_unmapdev(va, PAGE_SIZE); 450 if (mxge_verbose) 451 device_printf(sc->dev, 452 "Enabled ECRC on upstream Nvidia bridge " 453 "at %d:%d:%d\n", 454 (int)bus, (int)slot, (int)func); 455 return; 456 } 457 #else 458 static void 459 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 460 { 461 device_printf(sc->dev, 462 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 463 return; 464 } 465 #endif 466 467 static int 468 mxge_dma_test(mxge_softc_t *sc, int test_type) 469 { 470 mxge_cmd_t cmd; 471 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 472 int status; 473 uint32_t len; 474 char *test = " "; 475 476 /* Run a small DMA test. 477 * The magic multipliers to the length tell the firmware 478 * to do DMA read, write, or read+write tests. The 479 * results are returned in cmd.data0. The upper 16 480 * bits of the return is the number of transfers completed. 481 * The lower 16 bits is the time in 0.5us ticks that the 482 * transfers took to complete. 483 */ 484 485 len = sc->tx_boundary; 486 487 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 488 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 489 cmd.data2 = len * 0x10000; 490 status = mxge_send_cmd(sc, test_type, &cmd); 491 if (status != 0) { 492 test = "read"; 493 goto abort; 494 } 495 sc->read_dma = ((cmd.data0>>16) * len * 2) / 496 (cmd.data0 & 0xffff); 497 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 498 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 499 cmd.data2 = len * 0x1; 500 status = mxge_send_cmd(sc, test_type, &cmd); 501 if (status != 0) { 502 test = "write"; 503 goto abort; 504 } 505 sc->write_dma = ((cmd.data0>>16) * len * 2) / 506 (cmd.data0 & 0xffff); 507 508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 510 cmd.data2 = len * 0x10001; 511 status = mxge_send_cmd(sc, test_type, &cmd); 512 if (status != 0) { 513 test = "read/write"; 514 goto abort; 515 } 516 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 517 (cmd.data0 & 0xffff); 518 519 abort: 520 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 521 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 522 test, status); 523 524 return status; 525 } 526 527 /* 528 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 529 * when the PCI-E Completion packets are aligned on an 8-byte 530 * boundary. Some PCI-E chip sets always align Completion packets; on 531 * the ones that do not, the alignment can be enforced by enabling 532 * ECRC generation (if supported). 533 * 534 * When PCI-E Completion packets are not aligned, it is actually more 535 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 536 * 537 * If the driver can neither enable ECRC nor verify that it has 538 * already been enabled, then it must use a firmware image which works 539 * around unaligned completion packets (ethp_z8e.dat), and it should 540 * also ensure that it never gives the device a Read-DMA which is 541 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 542 * enabled, then the driver should use the aligned (eth_z8e.dat) 543 * firmware image, and set tx_boundary to 4KB. 544 */ 545 546 static int 547 mxge_firmware_probe(mxge_softc_t *sc) 548 { 549 device_t dev = sc->dev; 550 int reg, status; 551 uint16_t pectl; 552 553 sc->tx_boundary = 4096; 554 /* 555 * Verify the max read request size was set to 4KB 556 * before trying the test with 4KB. 557 */ 558 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 559 pectl = pci_read_config(dev, reg + 0x8, 2); 560 if ((pectl & (5 << 12)) != (5 << 12)) { 561 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 562 pectl); 563 sc->tx_boundary = 2048; 564 } 565 } 566 567 /* 568 * load the optimized firmware (which assumes aligned PCIe 569 * completions) in order to see if it works on this host. 570 */ 571 sc->fw_name = mxge_fw_aligned; 572 status = mxge_load_firmware(sc, 1); 573 if (status != 0) { 574 return status; 575 } 576 577 /* 578 * Enable ECRC if possible 579 */ 580 mxge_enable_nvidia_ecrc(sc); 581 582 /* 583 * Run a DMA test which watches for unaligned completions and 584 * aborts on the first one seen. Not required on Z8ES or newer. 585 */ 586 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 587 return 0; 588 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 589 if (status == 0) 590 return 0; /* keep the aligned firmware */ 591 592 if (status != E2BIG) 593 device_printf(dev, "DMA test failed: %d\n", status); 594 if (status == ENOSYS) 595 device_printf(dev, "Falling back to ethp! " 596 "Please install up to date fw\n"); 597 return status; 598 } 599 600 static int 601 mxge_select_firmware(mxge_softc_t *sc) 602 { 603 int aligned = 0; 604 int force_firmware = mxge_force_firmware; 605 606 if (sc->throttle) 607 force_firmware = sc->throttle; 608 609 if (force_firmware != 0) { 610 if (force_firmware == 1) 611 aligned = 1; 612 else 613 aligned = 0; 614 if (mxge_verbose) 615 device_printf(sc->dev, 616 "Assuming %s completions (forced)\n", 617 aligned ? "aligned" : "unaligned"); 618 goto abort; 619 } 620 621 /* if the PCIe link width is 4 or less, we can use the aligned 622 firmware and skip any checks */ 623 if (sc->link_width != 0 && sc->link_width <= 4) { 624 device_printf(sc->dev, 625 "PCIe x%d Link, expect reduced performance\n", 626 sc->link_width); 627 aligned = 1; 628 goto abort; 629 } 630 631 if (0 == mxge_firmware_probe(sc)) 632 return 0; 633 634 abort: 635 if (aligned) { 636 sc->fw_name = mxge_fw_aligned; 637 sc->tx_boundary = 4096; 638 } else { 639 sc->fw_name = mxge_fw_unaligned; 640 sc->tx_boundary = 2048; 641 } 642 return (mxge_load_firmware(sc, 0)); 643 } 644 645 static int 646 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 647 { 648 649 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 650 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 651 be32toh(hdr->mcp_type)); 652 return EIO; 653 } 654 655 /* save firmware version for sysctl */ 656 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 657 if (mxge_verbose) 658 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 659 660 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 661 &sc->fw_ver_minor, &sc->fw_ver_tiny); 662 663 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 664 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 665 device_printf(sc->dev, "Found firmware version %s\n", 666 sc->fw_version); 667 device_printf(sc->dev, "Driver needs %d.%d\n", 668 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 669 return EINVAL; 670 } 671 return 0; 672 673 } 674 675 static int 676 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 677 { 678 z_stream zs; 679 char *inflate_buffer; 680 const struct firmware *fw; 681 const mcp_gen_header_t *hdr; 682 unsigned hdr_offset; 683 int status; 684 unsigned int i; 685 size_t fw_len; 686 687 fw = firmware_get(sc->fw_name); 688 if (fw == NULL) { 689 device_printf(sc->dev, "Could not find firmware image %s\n", 690 sc->fw_name); 691 return ENOENT; 692 } 693 694 /* setup zlib and decompress f/w */ 695 bzero(&zs, sizeof (zs)); 696 zs.zalloc = zcalloc_nowait; 697 zs.zfree = zcfree; 698 status = inflateInit(&zs); 699 if (status != Z_OK) { 700 status = EIO; 701 goto abort_with_fw; 702 } 703 704 /* the uncompressed size is stored as the firmware version, 705 which would otherwise go unused */ 706 fw_len = (size_t) fw->version; 707 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 708 if (inflate_buffer == NULL) 709 goto abort_with_zs; 710 zs.avail_in = fw->datasize; 711 zs.next_in = __DECONST(char *, fw->data); 712 zs.avail_out = fw_len; 713 zs.next_out = inflate_buffer; 714 status = inflate(&zs, Z_FINISH); 715 if (status != Z_STREAM_END) { 716 device_printf(sc->dev, "zlib %d\n", status); 717 status = EIO; 718 goto abort_with_buffer; 719 } 720 721 /* check id */ 722 hdr_offset = htobe32(*(const uint32_t *) 723 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 724 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 725 device_printf(sc->dev, "Bad firmware file"); 726 status = EIO; 727 goto abort_with_buffer; 728 } 729 hdr = (const void*)(inflate_buffer + hdr_offset); 730 731 status = mxge_validate_firmware(sc, hdr); 732 if (status != 0) 733 goto abort_with_buffer; 734 735 /* Copy the inflated firmware to NIC SRAM. */ 736 for (i = 0; i < fw_len; i += 256) { 737 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 738 inflate_buffer + i, 739 min(256U, (unsigned)(fw_len - i))); 740 wmb(); 741 (void)*sc->sram; 742 wmb(); 743 } 744 745 *limit = fw_len; 746 status = 0; 747 abort_with_buffer: 748 free(inflate_buffer, M_TEMP); 749 abort_with_zs: 750 inflateEnd(&zs); 751 abort_with_fw: 752 firmware_put(fw, FIRMWARE_UNLOAD); 753 return status; 754 } 755 756 /* 757 * Enable or disable periodic RDMAs from the host to make certain 758 * chipsets resend dropped PCIe messages 759 */ 760 761 static void 762 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 763 { 764 char buf_bytes[72]; 765 volatile uint32_t *confirm; 766 volatile char *submit; 767 uint32_t *buf, dma_low, dma_high; 768 int i; 769 770 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 771 772 /* clear confirmation addr */ 773 confirm = (volatile uint32_t *)sc->cmd; 774 *confirm = 0; 775 wmb(); 776 777 /* send an rdma command to the PCIe engine, and wait for the 778 response in the confirmation address. The firmware should 779 write a -1 there to indicate it is alive and well 780 */ 781 782 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 783 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 784 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 785 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 786 buf[2] = htobe32(0xffffffff); /* confirm data */ 787 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 788 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 789 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 790 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 791 buf[5] = htobe32(enable); /* enable? */ 792 793 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 794 795 mxge_pio_copy(submit, buf, 64); 796 wmb(); 797 DELAY(1000); 798 wmb(); 799 i = 0; 800 while (*confirm != 0xffffffff && i < 20) { 801 DELAY(1000); 802 i++; 803 } 804 if (*confirm != 0xffffffff) { 805 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 806 (enable ? "enable" : "disable"), confirm, 807 *confirm); 808 } 809 return; 810 } 811 812 static int 813 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 814 { 815 mcp_cmd_t *buf; 816 char buf_bytes[sizeof(*buf) + 8]; 817 volatile mcp_cmd_response_t *response = sc->cmd; 818 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 819 uint32_t dma_low, dma_high; 820 int err, sleep_total = 0; 821 822 /* ensure buf is aligned to 8 bytes */ 823 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 824 825 buf->data0 = htobe32(data->data0); 826 buf->data1 = htobe32(data->data1); 827 buf->data2 = htobe32(data->data2); 828 buf->cmd = htobe32(cmd); 829 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 830 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 831 832 buf->response_addr.low = htobe32(dma_low); 833 buf->response_addr.high = htobe32(dma_high); 834 mtx_lock(&sc->cmd_mtx); 835 response->result = 0xffffffff; 836 wmb(); 837 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 838 839 /* wait up to 20ms */ 840 err = EAGAIN; 841 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 842 bus_dmamap_sync(sc->cmd_dma.dmat, 843 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 844 wmb(); 845 switch (be32toh(response->result)) { 846 case 0: 847 data->data0 = be32toh(response->data); 848 err = 0; 849 break; 850 case 0xffffffff: 851 DELAY(1000); 852 break; 853 case MXGEFW_CMD_UNKNOWN: 854 err = ENOSYS; 855 break; 856 case MXGEFW_CMD_ERROR_UNALIGNED: 857 err = E2BIG; 858 break; 859 case MXGEFW_CMD_ERROR_BUSY: 860 err = EBUSY; 861 break; 862 case MXGEFW_CMD_ERROR_I2C_ABSENT: 863 err = ENXIO; 864 break; 865 default: 866 device_printf(sc->dev, 867 "mxge: command %d " 868 "failed, result = %d\n", 869 cmd, be32toh(response->result)); 870 err = ENXIO; 871 break; 872 } 873 if (err != EAGAIN) 874 break; 875 } 876 if (err == EAGAIN) 877 device_printf(sc->dev, "mxge: command %d timed out" 878 "result = %d\n", 879 cmd, be32toh(response->result)); 880 mtx_unlock(&sc->cmd_mtx); 881 return err; 882 } 883 884 static int 885 mxge_adopt_running_firmware(mxge_softc_t *sc) 886 { 887 struct mcp_gen_header *hdr; 888 const size_t bytes = sizeof (struct mcp_gen_header); 889 size_t hdr_offset; 890 int status; 891 892 /* find running firmware header */ 893 hdr_offset = htobe32(*(volatile uint32_t *) 894 (sc->sram + MCP_HEADER_PTR_OFFSET)); 895 896 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 897 device_printf(sc->dev, 898 "Running firmware has bad header offset (%d)\n", 899 (int)hdr_offset); 900 return EIO; 901 } 902 903 /* copy header of running firmware from SRAM to host memory to 904 * validate firmware */ 905 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 906 if (hdr == NULL) { 907 device_printf(sc->dev, "could not malloc firmware hdr\n"); 908 return ENOMEM; 909 } 910 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 911 rman_get_bushandle(sc->mem_res), 912 hdr_offset, (char *)hdr, bytes); 913 status = mxge_validate_firmware(sc, hdr); 914 free(hdr, M_DEVBUF); 915 916 /* 917 * check to see if adopted firmware has bug where adopting 918 * it will cause broadcasts to be filtered unless the NIC 919 * is kept in ALLMULTI mode 920 */ 921 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 922 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 923 sc->adopted_rx_filter_bug = 1; 924 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 925 "working around rx filter bug\n", 926 sc->fw_ver_major, sc->fw_ver_minor, 927 sc->fw_ver_tiny); 928 } 929 930 return status; 931 } 932 933 static int 934 mxge_load_firmware(mxge_softc_t *sc, int adopt) 935 { 936 volatile uint32_t *confirm; 937 volatile char *submit; 938 char buf_bytes[72]; 939 uint32_t *buf, size, dma_low, dma_high; 940 int status, i; 941 942 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 943 944 size = sc->sram_size; 945 status = mxge_load_firmware_helper(sc, &size); 946 if (status) { 947 if (!adopt) 948 return status; 949 /* Try to use the currently running firmware, if 950 it is new enough */ 951 status = mxge_adopt_running_firmware(sc); 952 if (status) { 953 device_printf(sc->dev, 954 "failed to adopt running firmware\n"); 955 return status; 956 } 957 device_printf(sc->dev, 958 "Successfully adopted running firmware\n"); 959 if (sc->tx_boundary == 4096) { 960 device_printf(sc->dev, 961 "Using firmware currently running on NIC" 962 ". For optimal\n"); 963 device_printf(sc->dev, 964 "performance consider loading optimized " 965 "firmware\n"); 966 } 967 sc->fw_name = mxge_fw_unaligned; 968 sc->tx_boundary = 2048; 969 return 0; 970 } 971 /* clear confirmation addr */ 972 confirm = (volatile uint32_t *)sc->cmd; 973 *confirm = 0; 974 wmb(); 975 /* send a reload command to the bootstrap MCP, and wait for the 976 response in the confirmation address. The firmware should 977 write a -1 there to indicate it is alive and well 978 */ 979 980 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 981 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 982 983 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 984 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 985 buf[2] = htobe32(0xffffffff); /* confirm data */ 986 987 /* FIX: All newest firmware should un-protect the bottom of 988 the sram before handoff. However, the very first interfaces 989 do not. Therefore the handoff copy must skip the first 8 bytes 990 */ 991 /* where the code starts*/ 992 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 993 buf[4] = htobe32(size - 8); /* length of code */ 994 buf[5] = htobe32(8); /* where to copy to */ 995 buf[6] = htobe32(0); /* where to jump to */ 996 997 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 998 mxge_pio_copy(submit, buf, 64); 999 wmb(); 1000 DELAY(1000); 1001 wmb(); 1002 i = 0; 1003 while (*confirm != 0xffffffff && i < 20) { 1004 DELAY(1000*10); 1005 i++; 1006 bus_dmamap_sync(sc->cmd_dma.dmat, 1007 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1008 } 1009 if (*confirm != 0xffffffff) { 1010 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1011 confirm, *confirm); 1012 1013 return ENXIO; 1014 } 1015 return 0; 1016 } 1017 1018 static int 1019 mxge_update_mac_address(mxge_softc_t *sc) 1020 { 1021 mxge_cmd_t cmd; 1022 uint8_t *addr = sc->mac_addr; 1023 int status; 1024 1025 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1026 | (addr[2] << 8) | addr[3]); 1027 1028 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1029 1030 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1031 return status; 1032 } 1033 1034 static int 1035 mxge_change_pause(mxge_softc_t *sc, int pause) 1036 { 1037 mxge_cmd_t cmd; 1038 int status; 1039 1040 if (pause) 1041 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1042 &cmd); 1043 else 1044 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1045 &cmd); 1046 1047 if (status) { 1048 device_printf(sc->dev, "Failed to set flow control mode\n"); 1049 return ENXIO; 1050 } 1051 sc->pause = pause; 1052 return 0; 1053 } 1054 1055 static void 1056 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1057 { 1058 mxge_cmd_t cmd; 1059 int status; 1060 1061 if (mxge_always_promisc) 1062 promisc = 1; 1063 1064 if (promisc) 1065 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1066 &cmd); 1067 else 1068 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1069 &cmd); 1070 1071 if (status) { 1072 device_printf(sc->dev, "Failed to set promisc mode\n"); 1073 } 1074 } 1075 1076 struct mxge_add_maddr_ctx { 1077 mxge_softc_t *sc; 1078 int error; 1079 }; 1080 1081 static u_int 1082 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1083 { 1084 struct mxge_add_maddr_ctx *ctx = arg; 1085 mxge_cmd_t cmd; 1086 1087 if (ctx->error != 0) 1088 return (0); 1089 bcopy(LLADDR(sdl), &cmd.data0, 4); 1090 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1091 cmd.data0 = htonl(cmd.data0); 1092 cmd.data1 = htonl(cmd.data1); 1093 1094 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1095 1096 return (1); 1097 } 1098 1099 static void 1100 mxge_set_multicast_list(mxge_softc_t *sc) 1101 { 1102 struct mxge_add_maddr_ctx ctx; 1103 if_t ifp = sc->ifp; 1104 mxge_cmd_t cmd; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (if_getflags(ifp) & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 ctx.sc = sc; 1138 ctx.error = 0; 1139 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1140 if (ctx.error != 0) { 1141 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1142 "error status:" "%d\t", ctx.error); 1143 /* abort, leaving multicast filtering off */ 1144 return; 1145 } 1146 1147 /* Enable multicast filtering */ 1148 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1151 ", error status: %d\n", err); 1152 } 1153 } 1154 1155 static int 1156 mxge_max_mtu(mxge_softc_t *sc) 1157 { 1158 mxge_cmd_t cmd; 1159 int status; 1160 1161 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1162 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1163 1164 /* try to set nbufs to see if it we can 1165 use virtually contiguous jumbos */ 1166 cmd.data0 = 0; 1167 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1168 &cmd); 1169 if (status == 0) 1170 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1171 1172 /* otherwise, we're limited to MJUMPAGESIZE */ 1173 return MJUMPAGESIZE - MXGEFW_PAD; 1174 } 1175 1176 static int 1177 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1178 { 1179 struct mxge_slice_state *ss; 1180 mxge_rx_done_t *rx_done; 1181 volatile uint32_t *irq_claim; 1182 mxge_cmd_t cmd; 1183 int slice, status; 1184 1185 /* try to send a reset command to the card to see if it 1186 is alive */ 1187 memset(&cmd, 0, sizeof (cmd)); 1188 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1189 if (status != 0) { 1190 device_printf(sc->dev, "failed reset\n"); 1191 return ENXIO; 1192 } 1193 1194 mxge_dummy_rdma(sc, 1); 1195 1196 /* set the intrq size */ 1197 cmd.data0 = sc->rx_ring_size; 1198 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1199 1200 /* 1201 * Even though we already know how many slices are supported 1202 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1203 * has magic side effects, and must be called after a reset. 1204 * It must be called prior to calling any RSS related cmds, 1205 * including assigning an interrupt queue for anything but 1206 * slice 0. It must also be called *after* 1207 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1208 * the firmware to compute offsets. 1209 */ 1210 1211 if (sc->num_slices > 1) { 1212 /* ask the maximum number of slices it supports */ 1213 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1214 &cmd); 1215 if (status != 0) { 1216 device_printf(sc->dev, 1217 "failed to get number of slices\n"); 1218 return status; 1219 } 1220 /* 1221 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1222 * to setting up the interrupt queue DMA 1223 */ 1224 cmd.data0 = sc->num_slices; 1225 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1226 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1227 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1228 &cmd); 1229 if (status != 0) { 1230 device_printf(sc->dev, 1231 "failed to set number of slices\n"); 1232 return status; 1233 } 1234 } 1235 1236 if (interrupts_setup) { 1237 /* Now exchange information about interrupts */ 1238 for (slice = 0; slice < sc->num_slices; slice++) { 1239 rx_done = &sc->ss[slice].rx_done; 1240 memset(rx_done->entry, 0, sc->rx_ring_size); 1241 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1242 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1243 cmd.data2 = slice; 1244 status |= mxge_send_cmd(sc, 1245 MXGEFW_CMD_SET_INTRQ_DMA, 1246 &cmd); 1247 } 1248 } 1249 1250 status |= mxge_send_cmd(sc, 1251 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1252 1253 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1254 1255 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1256 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1257 1258 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1259 &cmd); 1260 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 if (status != 0) { 1262 device_printf(sc->dev, "failed set interrupt parameters\n"); 1263 return status; 1264 } 1265 1266 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1267 1268 /* run a DMA benchmark */ 1269 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1270 1271 for (slice = 0; slice < sc->num_slices; slice++) { 1272 ss = &sc->ss[slice]; 1273 1274 ss->irq_claim = irq_claim + (2 * slice); 1275 /* reset mcp/driver shared state back to 0 */ 1276 ss->rx_done.idx = 0; 1277 ss->rx_done.cnt = 0; 1278 ss->tx.req = 0; 1279 ss->tx.done = 0; 1280 ss->tx.pkt_done = 0; 1281 ss->tx.queue_active = 0; 1282 ss->tx.activate = 0; 1283 ss->tx.deactivate = 0; 1284 ss->tx.wake = 0; 1285 ss->tx.defrag = 0; 1286 ss->tx.stall = 0; 1287 ss->rx_big.cnt = 0; 1288 ss->rx_small.cnt = 0; 1289 ss->lc.lro_bad_csum = 0; 1290 ss->lc.lro_queued = 0; 1291 ss->lc.lro_flushed = 0; 1292 if (ss->fw_stats != NULL) { 1293 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1294 } 1295 } 1296 sc->rdma_tags_available = 15; 1297 status = mxge_update_mac_address(sc); 1298 mxge_change_promisc(sc, if_getflags(sc->ifp) & IFF_PROMISC); 1299 mxge_change_pause(sc, sc->pause); 1300 mxge_set_multicast_list(sc); 1301 if (sc->throttle) { 1302 cmd.data0 = sc->throttle; 1303 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1304 &cmd)) { 1305 device_printf(sc->dev, 1306 "can't enable throttle\n"); 1307 } 1308 } 1309 return status; 1310 } 1311 1312 static int 1313 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1314 { 1315 mxge_cmd_t cmd; 1316 mxge_softc_t *sc; 1317 int err; 1318 unsigned int throttle; 1319 1320 sc = arg1; 1321 throttle = sc->throttle; 1322 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1323 if (err != 0) { 1324 return err; 1325 } 1326 1327 if (throttle == sc->throttle) 1328 return 0; 1329 1330 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1331 return EINVAL; 1332 1333 mtx_lock(&sc->driver_mtx); 1334 cmd.data0 = throttle; 1335 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1336 if (err == 0) 1337 sc->throttle = throttle; 1338 mtx_unlock(&sc->driver_mtx); 1339 return err; 1340 } 1341 1342 static int 1343 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1344 { 1345 mxge_softc_t *sc; 1346 unsigned int intr_coal_delay; 1347 int err; 1348 1349 sc = arg1; 1350 intr_coal_delay = sc->intr_coal_delay; 1351 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1352 if (err != 0) { 1353 return err; 1354 } 1355 if (intr_coal_delay == sc->intr_coal_delay) 1356 return 0; 1357 1358 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1359 return EINVAL; 1360 1361 mtx_lock(&sc->driver_mtx); 1362 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1363 sc->intr_coal_delay = intr_coal_delay; 1364 1365 mtx_unlock(&sc->driver_mtx); 1366 return err; 1367 } 1368 1369 static int 1370 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1371 { 1372 mxge_softc_t *sc; 1373 unsigned int enabled; 1374 int err; 1375 1376 sc = arg1; 1377 enabled = sc->pause; 1378 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1379 if (err != 0) { 1380 return err; 1381 } 1382 if (enabled == sc->pause) 1383 return 0; 1384 1385 mtx_lock(&sc->driver_mtx); 1386 err = mxge_change_pause(sc, enabled); 1387 mtx_unlock(&sc->driver_mtx); 1388 return err; 1389 } 1390 1391 static int 1392 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1393 { 1394 int err; 1395 1396 if (arg1 == NULL) 1397 return EFAULT; 1398 arg2 = be32toh(*(int *)arg1); 1399 arg1 = NULL; 1400 err = sysctl_handle_int(oidp, arg1, arg2, req); 1401 1402 return err; 1403 } 1404 1405 static void 1406 mxge_rem_sysctls(mxge_softc_t *sc) 1407 { 1408 struct mxge_slice_state *ss; 1409 int slice; 1410 1411 if (sc->slice_sysctl_tree == NULL) 1412 return; 1413 1414 for (slice = 0; slice < sc->num_slices; slice++) { 1415 ss = &sc->ss[slice]; 1416 if (ss == NULL || ss->sysctl_tree == NULL) 1417 continue; 1418 sysctl_ctx_free(&ss->sysctl_ctx); 1419 ss->sysctl_tree = NULL; 1420 } 1421 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1422 sc->slice_sysctl_tree = NULL; 1423 } 1424 1425 static void 1426 mxge_add_sysctls(mxge_softc_t *sc) 1427 { 1428 struct sysctl_ctx_list *ctx; 1429 struct sysctl_oid_list *children; 1430 mcp_irq_data_t *fw; 1431 struct mxge_slice_state *ss; 1432 int slice; 1433 char slice_num[8]; 1434 1435 ctx = device_get_sysctl_ctx(sc->dev); 1436 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1437 fw = sc->ss[0].fw_stats; 1438 1439 /* random information */ 1440 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1441 "firmware_version", 1442 CTLFLAG_RD, sc->fw_version, 1443 0, "firmware version"); 1444 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1445 "serial_number", 1446 CTLFLAG_RD, sc->serial_number_string, 1447 0, "serial number"); 1448 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1449 "product_code", 1450 CTLFLAG_RD, sc->product_code_string, 1451 0, "product_code"); 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1453 "pcie_link_width", 1454 CTLFLAG_RD, &sc->link_width, 1455 0, "tx_boundary"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "tx_boundary", 1458 CTLFLAG_RD, &sc->tx_boundary, 1459 0, "tx_boundary"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "write_combine", 1462 CTLFLAG_RD, &sc->wc, 1463 0, "write combining PIO?"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "read_dma_MBs", 1466 CTLFLAG_RD, &sc->read_dma, 1467 0, "DMA Read speed in MB/s"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "write_dma_MBs", 1470 CTLFLAG_RD, &sc->write_dma, 1471 0, "DMA Write speed in MB/s"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "read_write_dma_MBs", 1474 CTLFLAG_RD, &sc->read_write_dma, 1475 0, "DMA concurrent Read/Write speed in MB/s"); 1476 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1477 "watchdog_resets", 1478 CTLFLAG_RD, &sc->watchdog_resets, 1479 0, "Number of times NIC was reset"); 1480 1481 /* performance related tunables */ 1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1483 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1484 sc, 0, mxge_change_intr_coal, "I", 1485 "interrupt coalescing delay in usecs"); 1486 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1488 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1489 mxge_change_throttle, "I", "transmit throttling"); 1490 1491 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1492 "flow_control_enabled", 1493 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1494 mxge_change_flow_control, "I", 1495 "interrupt coalescing delay in usecs"); 1496 1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1498 "deassert_wait", 1499 CTLFLAG_RW, &mxge_deassert_wait, 1500 0, "Wait for IRQ line to go low in ihandler"); 1501 1502 /* stats block from firmware is in network byte order. 1503 Need to swap it */ 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1505 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1506 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1507 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1508 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1509 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1510 "rdma_tags_available"); 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1513 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1514 "dropped_bad_crc32"); 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1517 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_link_error_or_filtered", 1520 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1521 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1522 "dropped_link_error_or_filtered"); 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "dropped_link_overflow", 1525 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1526 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1527 "dropped_link_overflow"); 1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1529 "dropped_multicast_filtered", 1530 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1531 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1532 "dropped_multicast_filtered"); 1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1534 "dropped_no_big_buffer", 1535 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1536 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1537 "dropped_no_big_buffer"); 1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1539 "dropped_no_small_buffer", 1540 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1541 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1542 "dropped_no_small_buffer"); 1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1544 "dropped_overrun", 1545 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1546 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1547 "dropped_overrun"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1550 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1552 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1553 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1554 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_unicast_filtered", 1557 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1558 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1559 "dropped_unicast_filtered"); 1560 1561 /* verbose printing? */ 1562 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1563 "verbose", 1564 CTLFLAG_RW, &mxge_verbose, 1565 0, "verbose printing"); 1566 1567 /* add counters exported for debugging from all slices */ 1568 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1569 sc->slice_sysctl_tree = 1570 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1571 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1572 1573 for (slice = 0; slice < sc->num_slices; slice++) { 1574 ss = &sc->ss[slice]; 1575 sysctl_ctx_init(&ss->sysctl_ctx); 1576 ctx = &ss->sysctl_ctx; 1577 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1578 sprintf(slice_num, "%d", slice); 1579 ss->sysctl_tree = 1580 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1581 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1582 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1583 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1584 "rx_small_cnt", 1585 CTLFLAG_RD, &ss->rx_small.cnt, 1586 0, "rx_small_cnt"); 1587 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1588 "rx_big_cnt", 1589 CTLFLAG_RD, &ss->rx_big.cnt, 1590 0, "rx_small_cnt"); 1591 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1592 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1593 0, "number of lro merge queues flushed"); 1594 1595 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1596 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1597 0, "number of bad csums preventing LRO"); 1598 1599 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1600 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1601 0, "number of frames appended to lro merge" 1602 "queues"); 1603 1604 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1605 "tx_req", 1606 CTLFLAG_RD, &ss->tx.req, 1607 0, "tx_req"); 1608 1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1610 "tx_done", 1611 CTLFLAG_RD, &ss->tx.done, 1612 0, "tx_done"); 1613 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1614 "tx_pkt_done", 1615 CTLFLAG_RD, &ss->tx.pkt_done, 1616 0, "tx_done"); 1617 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1618 "tx_stall", 1619 CTLFLAG_RD, &ss->tx.stall, 1620 0, "tx_stall"); 1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1622 "tx_wake", 1623 CTLFLAG_RD, &ss->tx.wake, 1624 0, "tx_wake"); 1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1626 "tx_defrag", 1627 CTLFLAG_RD, &ss->tx.defrag, 1628 0, "tx_defrag"); 1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1630 "tx_queue_active", 1631 CTLFLAG_RD, &ss->tx.queue_active, 1632 0, "tx_queue_active"); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "tx_activate", 1635 CTLFLAG_RD, &ss->tx.activate, 1636 0, "tx_activate"); 1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1638 "tx_deactivate", 1639 CTLFLAG_RD, &ss->tx.deactivate, 1640 0, "tx_deactivate"); 1641 } 1642 } 1643 1644 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1645 backwards one at a time and handle ring wraps */ 1646 1647 static inline void 1648 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1649 mcp_kreq_ether_send_t *src, int cnt) 1650 { 1651 int idx, starting_slot; 1652 starting_slot = tx->req; 1653 while (cnt > 1) { 1654 cnt--; 1655 idx = (starting_slot + cnt) & tx->mask; 1656 mxge_pio_copy(&tx->lanai[idx], 1657 &src[cnt], sizeof(*src)); 1658 wmb(); 1659 } 1660 } 1661 1662 /* 1663 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1664 * at most 32 bytes at a time, so as to avoid involving the software 1665 * pio handler in the nic. We re-write the first segment's flags 1666 * to mark them valid only after writing the entire chain 1667 */ 1668 1669 static inline void 1670 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1671 int cnt) 1672 { 1673 int idx, i; 1674 uint32_t *src_ints; 1675 volatile uint32_t *dst_ints; 1676 mcp_kreq_ether_send_t *srcp; 1677 volatile mcp_kreq_ether_send_t *dstp, *dst; 1678 uint8_t last_flags; 1679 1680 idx = tx->req & tx->mask; 1681 1682 last_flags = src->flags; 1683 src->flags = 0; 1684 wmb(); 1685 dst = dstp = &tx->lanai[idx]; 1686 srcp = src; 1687 1688 if ((idx + cnt) < tx->mask) { 1689 for (i = 0; i < (cnt - 1); i += 2) { 1690 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1691 wmb(); /* force write every 32 bytes */ 1692 srcp += 2; 1693 dstp += 2; 1694 } 1695 } else { 1696 /* submit all but the first request, and ensure 1697 that it is submitted below */ 1698 mxge_submit_req_backwards(tx, src, cnt); 1699 i = 0; 1700 } 1701 if (i < cnt) { 1702 /* submit the first request */ 1703 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1704 wmb(); /* barrier before setting valid flag */ 1705 } 1706 1707 /* re-write the last 32-bits with the valid flags */ 1708 src->flags = last_flags; 1709 src_ints = (uint32_t *)src; 1710 src_ints+=3; 1711 dst_ints = (volatile uint32_t *)dst; 1712 dst_ints+=3; 1713 *dst_ints = *src_ints; 1714 tx->req += cnt; 1715 wmb(); 1716 } 1717 1718 static int 1719 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1720 struct mxge_pkt_info *pi) 1721 { 1722 struct ether_vlan_header *eh; 1723 uint16_t etype; 1724 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1725 #if IFCAP_TSO6 && defined(INET6) 1726 int nxt; 1727 #endif 1728 1729 eh = mtod(m, struct ether_vlan_header *); 1730 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1731 etype = ntohs(eh->evl_proto); 1732 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1733 } else { 1734 etype = ntohs(eh->evl_encap_proto); 1735 pi->ip_off = ETHER_HDR_LEN; 1736 } 1737 1738 switch (etype) { 1739 case ETHERTYPE_IP: 1740 /* 1741 * ensure ip header is in first mbuf, copy it to a 1742 * scratch buffer if not 1743 */ 1744 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1745 pi->ip6 = NULL; 1746 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1747 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1748 ss->scratch); 1749 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1750 } 1751 pi->ip_hlen = pi->ip->ip_hl << 2; 1752 if (!tso) 1753 return 0; 1754 1755 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1756 sizeof(struct tcphdr))) { 1757 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1758 sizeof(struct tcphdr), ss->scratch); 1759 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1760 } 1761 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1762 break; 1763 #if IFCAP_TSO6 && defined(INET6) 1764 case ETHERTYPE_IPV6: 1765 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1766 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1767 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1768 ss->scratch); 1769 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1770 } 1771 nxt = 0; 1772 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1773 pi->ip_hlen -= pi->ip_off; 1774 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1775 return EINVAL; 1776 1777 if (!tso) 1778 return 0; 1779 1780 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1781 return EINVAL; 1782 1783 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1784 sizeof(struct tcphdr))) { 1785 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1786 sizeof(struct tcphdr), ss->scratch); 1787 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1788 } 1789 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1790 break; 1791 #endif 1792 default: 1793 return EINVAL; 1794 } 1795 return 0; 1796 } 1797 1798 #if IFCAP_TSO4 1799 1800 static void 1801 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1802 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1803 { 1804 mxge_tx_ring_t *tx; 1805 mcp_kreq_ether_send_t *req; 1806 bus_dma_segment_t *seg; 1807 uint32_t low, high_swapped; 1808 int len, seglen, cum_len, cum_len_next; 1809 int next_is_first, chop, cnt, rdma_count, small; 1810 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1811 uint8_t flags, flags_next; 1812 static int once; 1813 1814 mss = m->m_pkthdr.tso_segsz; 1815 1816 /* negative cum_len signifies to the 1817 * send loop that we are still in the 1818 * header portion of the TSO packet. 1819 */ 1820 1821 cksum_offset = pi->ip_off + pi->ip_hlen; 1822 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1823 1824 /* TSO implies checksum offload on this hardware */ 1825 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1826 /* 1827 * If packet has full TCP csum, replace it with pseudo hdr 1828 * sum that the NIC expects, otherwise the NIC will emit 1829 * packets with bad TCP checksums. 1830 */ 1831 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1832 if (pi->ip6) { 1833 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1834 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1835 sum = in6_cksum_pseudo(pi->ip6, 1836 m->m_pkthdr.len - cksum_offset, 1837 IPPROTO_TCP, 0); 1838 #endif 1839 } else { 1840 #ifdef INET 1841 m->m_pkthdr.csum_flags |= CSUM_TCP; 1842 sum = in_pseudo(pi->ip->ip_src.s_addr, 1843 pi->ip->ip_dst.s_addr, 1844 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1845 cksum_offset))); 1846 #endif 1847 } 1848 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1849 cksum_offset, sizeof(sum), (caddr_t)&sum); 1850 } 1851 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1852 1853 /* for TSO, pseudo_hdr_offset holds mss. 1854 * The firmware figures out where to put 1855 * the checksum by parsing the header. */ 1856 pseudo_hdr_offset = htobe16(mss); 1857 1858 if (pi->ip6) { 1859 /* 1860 * for IPv6 TSO, the "checksum offset" is re-purposed 1861 * to store the TCP header len 1862 */ 1863 cksum_offset = (pi->tcp->th_off << 2); 1864 } 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (cksum_offset != 0 && !pi->ip6) { 1938 if (__predict_false(cksum_offset > seglen)) 1939 cksum_offset -= seglen; 1940 else 1941 cksum_offset = 0; 1942 } 1943 if (__predict_false(cnt > tx->max_desc)) 1944 goto drop; 1945 } 1946 busdma_seg_cnt--; 1947 seg++; 1948 } 1949 (req-rdma_count)->rdma_count = rdma_count; 1950 1951 do { 1952 req--; 1953 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1954 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1955 1956 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1957 mxge_submit_req(tx, tx->req_list, cnt); 1958 1959 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1960 /* tell the NIC to start polling this slice */ 1961 *tx->send_go = 1; 1962 tx->queue_active = 1; 1963 tx->activate++; 1964 wmb(); 1965 } 1966 1967 return; 1968 1969 drop: 1970 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1971 m_freem(m); 1972 ss->oerrors++; 1973 if (!once) { 1974 printf("tx->max_desc exceeded via TSO!\n"); 1975 printf("mss = %d, %ld, %d!\n", mss, 1976 (long)seg - (long)tx->seg_list, tx->max_desc); 1977 once = 1; 1978 } 1979 return; 1980 1981 } 1982 1983 #endif /* IFCAP_TSO4 */ 1984 1985 #ifdef MXGE_NEW_VLAN_API 1986 /* 1987 * We reproduce the software vlan tag insertion from 1988 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1989 * vlan tag insertion. We need to advertise this in order to have the 1990 * vlan interface respect our csum offload flags. 1991 */ 1992 static struct mbuf * 1993 mxge_vlan_tag_insert(struct mbuf *m) 1994 { 1995 struct ether_vlan_header *evl; 1996 1997 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 1998 if (__predict_false(m == NULL)) 1999 return NULL; 2000 if (m->m_len < sizeof(*evl)) { 2001 m = m_pullup(m, sizeof(*evl)); 2002 if (__predict_false(m == NULL)) 2003 return NULL; 2004 } 2005 /* 2006 * Transform the Ethernet header into an Ethernet header 2007 * with 802.1Q encapsulation. 2008 */ 2009 evl = mtod(m, struct ether_vlan_header *); 2010 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2011 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2012 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2013 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2014 m->m_flags &= ~M_VLANTAG; 2015 return m; 2016 } 2017 #endif /* MXGE_NEW_VLAN_API */ 2018 2019 static void 2020 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2021 { 2022 struct mxge_pkt_info pi = {0,0,0,0}; 2023 mxge_softc_t *sc; 2024 mcp_kreq_ether_send_t *req; 2025 bus_dma_segment_t *seg; 2026 struct mbuf *m_tmp; 2027 mxge_tx_ring_t *tx; 2028 int cnt, cum_len, err, i, idx, odd_flag; 2029 uint16_t pseudo_hdr_offset; 2030 uint8_t flags, cksum_offset; 2031 2032 sc = ss->sc; 2033 tx = &ss->tx; 2034 2035 #ifdef MXGE_NEW_VLAN_API 2036 if (m->m_flags & M_VLANTAG) { 2037 m = mxge_vlan_tag_insert(m); 2038 if (__predict_false(m == NULL)) 2039 goto drop_without_m; 2040 } 2041 #endif 2042 if (m->m_pkthdr.csum_flags & 2043 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2044 if (mxge_parse_tx(ss, m, &pi)) 2045 goto drop; 2046 } 2047 2048 /* (try to) map the frame for DMA */ 2049 idx = tx->req & tx->mask; 2050 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2051 m, tx->seg_list, &cnt, 2052 BUS_DMA_NOWAIT); 2053 if (__predict_false(err == EFBIG)) { 2054 /* Too many segments in the chain. Try 2055 to defrag */ 2056 m_tmp = m_defrag(m, M_NOWAIT); 2057 if (m_tmp == NULL) { 2058 goto drop; 2059 } 2060 ss->tx.defrag++; 2061 m = m_tmp; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2063 tx->info[idx].map, 2064 m, tx->seg_list, &cnt, 2065 BUS_DMA_NOWAIT); 2066 } 2067 if (__predict_false(err != 0)) { 2068 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2069 " packet len = %d\n", err, m->m_pkthdr.len); 2070 goto drop; 2071 } 2072 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2073 BUS_DMASYNC_PREWRITE); 2074 tx->info[idx].m = m; 2075 2076 #if IFCAP_TSO4 2077 /* TSO is different enough, we handle it in another routine */ 2078 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2079 mxge_encap_tso(ss, m, cnt, &pi); 2080 return; 2081 } 2082 #endif 2083 2084 req = tx->req_list; 2085 cksum_offset = 0; 2086 pseudo_hdr_offset = 0; 2087 flags = MXGEFW_FLAGS_NO_TSO; 2088 2089 /* checksum offloading? */ 2090 if (m->m_pkthdr.csum_flags & 2091 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2092 /* ensure ip header is in first mbuf, copy 2093 it to a scratch buffer if not */ 2094 cksum_offset = pi.ip_off + pi.ip_hlen; 2095 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2096 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2097 req->cksum_offset = cksum_offset; 2098 flags |= MXGEFW_FLAGS_CKSUM; 2099 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2100 } else { 2101 odd_flag = 0; 2102 } 2103 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2104 flags |= MXGEFW_FLAGS_SMALL; 2105 2106 /* convert segments into a request list */ 2107 cum_len = 0; 2108 seg = tx->seg_list; 2109 req->flags = MXGEFW_FLAGS_FIRST; 2110 for (i = 0; i < cnt; i++) { 2111 req->addr_low = 2112 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2113 req->addr_high = 2114 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2115 req->length = htobe16(seg->ds_len); 2116 req->cksum_offset = cksum_offset; 2117 if (cksum_offset > seg->ds_len) 2118 cksum_offset -= seg->ds_len; 2119 else 2120 cksum_offset = 0; 2121 req->pseudo_hdr_offset = pseudo_hdr_offset; 2122 req->pad = 0; /* complete solid 16-byte block */ 2123 req->rdma_count = 1; 2124 req->flags |= flags | ((cum_len & 1) * odd_flag); 2125 cum_len += seg->ds_len; 2126 seg++; 2127 req++; 2128 req->flags = 0; 2129 } 2130 req--; 2131 /* pad runts to 60 bytes */ 2132 if (cum_len < 60) { 2133 req++; 2134 req->addr_low = 2135 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2136 req->addr_high = 2137 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2138 req->length = htobe16(60 - cum_len); 2139 req->cksum_offset = 0; 2140 req->pseudo_hdr_offset = pseudo_hdr_offset; 2141 req->pad = 0; /* complete solid 16-byte block */ 2142 req->rdma_count = 1; 2143 req->flags |= flags | ((cum_len & 1) * odd_flag); 2144 cnt++; 2145 } 2146 2147 tx->req_list[0].rdma_count = cnt; 2148 #if 0 2149 /* print what the firmware will see */ 2150 for (i = 0; i < cnt; i++) { 2151 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2152 "cso:%d, flags:0x%x, rdma:%d\n", 2153 i, (int)ntohl(tx->req_list[i].addr_high), 2154 (int)ntohl(tx->req_list[i].addr_low), 2155 (int)ntohs(tx->req_list[i].length), 2156 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2157 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2158 tx->req_list[i].rdma_count); 2159 } 2160 printf("--------------\n"); 2161 #endif 2162 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2163 mxge_submit_req(tx, tx->req_list, cnt); 2164 2165 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2166 /* tell the NIC to start polling this slice */ 2167 *tx->send_go = 1; 2168 tx->queue_active = 1; 2169 tx->activate++; 2170 wmb(); 2171 } 2172 2173 return; 2174 2175 drop: 2176 m_freem(m); 2177 drop_without_m: 2178 ss->oerrors++; 2179 return; 2180 } 2181 2182 static void 2183 mxge_qflush(if_t ifp) 2184 { 2185 mxge_softc_t *sc = if_getsoftc(ifp); 2186 mxge_tx_ring_t *tx; 2187 struct mbuf *m; 2188 int slice; 2189 2190 for (slice = 0; slice < sc->num_slices; slice++) { 2191 tx = &sc->ss[slice].tx; 2192 mtx_lock(&tx->mtx); 2193 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2194 m_freem(m); 2195 mtx_unlock(&tx->mtx); 2196 } 2197 if_qflush(ifp); 2198 } 2199 2200 static inline void 2201 mxge_start_locked(struct mxge_slice_state *ss) 2202 { 2203 mxge_softc_t *sc; 2204 struct mbuf *m; 2205 if_t ifp; 2206 mxge_tx_ring_t *tx; 2207 2208 sc = ss->sc; 2209 ifp = sc->ifp; 2210 tx = &ss->tx; 2211 2212 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2213 m = drbr_dequeue(ifp, tx->br); 2214 if (m == NULL) { 2215 return; 2216 } 2217 /* let BPF see it */ 2218 BPF_MTAP(ifp, m); 2219 2220 /* give it to the nic */ 2221 mxge_encap(ss, m); 2222 } 2223 /* ran out of transmit slots */ 2224 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2225 && (!drbr_empty(ifp, tx->br))) { 2226 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2227 tx->stall++; 2228 } 2229 } 2230 2231 static int 2232 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2233 { 2234 mxge_softc_t *sc; 2235 if_t ifp; 2236 mxge_tx_ring_t *tx; 2237 int err; 2238 2239 sc = ss->sc; 2240 ifp = sc->ifp; 2241 tx = &ss->tx; 2242 2243 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2244 IFF_DRV_RUNNING) { 2245 err = drbr_enqueue(ifp, tx->br, m); 2246 return (err); 2247 } 2248 2249 if (!drbr_needs_enqueue(ifp, tx->br) && 2250 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2251 /* let BPF see it */ 2252 BPF_MTAP(ifp, m); 2253 /* give it to the nic */ 2254 mxge_encap(ss, m); 2255 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2256 return (err); 2257 } 2258 if (!drbr_empty(ifp, tx->br)) 2259 mxge_start_locked(ss); 2260 return (0); 2261 } 2262 2263 static int 2264 mxge_transmit(if_t ifp, struct mbuf *m) 2265 { 2266 mxge_softc_t *sc = if_getsoftc(ifp); 2267 struct mxge_slice_state *ss; 2268 mxge_tx_ring_t *tx; 2269 int err = 0; 2270 int slice; 2271 2272 slice = m->m_pkthdr.flowid; 2273 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2274 2275 ss = &sc->ss[slice]; 2276 tx = &ss->tx; 2277 2278 if (mtx_trylock(&tx->mtx)) { 2279 err = mxge_transmit_locked(ss, m); 2280 mtx_unlock(&tx->mtx); 2281 } else { 2282 err = drbr_enqueue(ifp, tx->br, m); 2283 } 2284 2285 return (err); 2286 } 2287 2288 static void 2289 mxge_start(if_t ifp) 2290 { 2291 mxge_softc_t *sc = if_getsoftc(ifp); 2292 struct mxge_slice_state *ss; 2293 2294 /* only use the first slice for now */ 2295 ss = &sc->ss[0]; 2296 mtx_lock(&ss->tx.mtx); 2297 mxge_start_locked(ss); 2298 mtx_unlock(&ss->tx.mtx); 2299 } 2300 2301 /* 2302 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2303 * at most 32 bytes at a time, so as to avoid involving the software 2304 * pio handler in the nic. We re-write the first segment's low 2305 * DMA address to mark it valid only after we write the entire chunk 2306 * in a burst 2307 */ 2308 static inline void 2309 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2310 mcp_kreq_ether_recv_t *src) 2311 { 2312 uint32_t low; 2313 2314 low = src->addr_low; 2315 src->addr_low = 0xffffffff; 2316 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2317 wmb(); 2318 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2319 wmb(); 2320 src->addr_low = low; 2321 dst->addr_low = low; 2322 wmb(); 2323 } 2324 2325 static int 2326 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2327 { 2328 bus_dma_segment_t seg; 2329 struct mbuf *m; 2330 mxge_rx_ring_t *rx = &ss->rx_small; 2331 int cnt, err; 2332 2333 m = m_gethdr(M_NOWAIT, MT_DATA); 2334 if (m == NULL) { 2335 rx->alloc_fail++; 2336 err = ENOBUFS; 2337 goto done; 2338 } 2339 m->m_len = MHLEN; 2340 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2341 &seg, &cnt, BUS_DMA_NOWAIT); 2342 if (err != 0) { 2343 m_free(m); 2344 goto done; 2345 } 2346 rx->info[idx].m = m; 2347 rx->shadow[idx].addr_low = 2348 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2349 rx->shadow[idx].addr_high = 2350 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2351 2352 done: 2353 if ((idx & 7) == 7) 2354 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2355 return err; 2356 } 2357 2358 static int 2359 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2360 { 2361 bus_dma_segment_t seg[3]; 2362 struct mbuf *m; 2363 mxge_rx_ring_t *rx = &ss->rx_big; 2364 int cnt, err, i; 2365 2366 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2367 if (m == NULL) { 2368 rx->alloc_fail++; 2369 err = ENOBUFS; 2370 goto done; 2371 } 2372 m->m_len = rx->mlen; 2373 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2374 seg, &cnt, BUS_DMA_NOWAIT); 2375 if (err != 0) { 2376 m_free(m); 2377 goto done; 2378 } 2379 rx->info[idx].m = m; 2380 rx->shadow[idx].addr_low = 2381 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2382 rx->shadow[idx].addr_high = 2383 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2384 2385 done: 2386 for (i = 0; i < rx->nbufs; i++) { 2387 if ((idx & 7) == 7) { 2388 mxge_submit_8rx(&rx->lanai[idx - 7], 2389 &rx->shadow[idx - 7]); 2390 } 2391 idx++; 2392 } 2393 return err; 2394 } 2395 2396 #ifdef INET6 2397 2398 static uint16_t 2399 mxge_csum_generic(uint16_t *raw, int len) 2400 { 2401 uint32_t csum; 2402 2403 csum = 0; 2404 while (len > 0) { 2405 csum += *raw; 2406 raw++; 2407 len -= 2; 2408 } 2409 csum = (csum >> 16) + (csum & 0xffff); 2410 csum = (csum >> 16) + (csum & 0xffff); 2411 return (uint16_t)csum; 2412 } 2413 2414 static inline uint16_t 2415 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2416 { 2417 uint32_t partial; 2418 int nxt, cksum_offset; 2419 struct ip6_hdr *ip6 = p; 2420 uint16_t c; 2421 2422 nxt = ip6->ip6_nxt; 2423 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2424 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2425 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2426 IPPROTO_IPV6, &nxt); 2427 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2428 return (1); 2429 } 2430 2431 /* 2432 * IPv6 headers do not contain a checksum, and hence 2433 * do not checksum to zero, so they don't "fall out" 2434 * of the partial checksum calculation like IPv4 2435 * headers do. We need to fix the partial checksum by 2436 * subtracting the checksum of the IPv6 header. 2437 */ 2438 2439 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2440 ETHER_HDR_LEN); 2441 csum += ~partial; 2442 csum += (csum < ~partial); 2443 csum = (csum >> 16) + (csum & 0xFFFF); 2444 csum = (csum >> 16) + (csum & 0xFFFF); 2445 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2446 csum); 2447 c ^= 0xffff; 2448 return (c); 2449 } 2450 #endif /* INET6 */ 2451 /* 2452 * Myri10GE hardware checksums are not valid if the sender 2453 * padded the frame with non-zero padding. This is because 2454 * the firmware just does a simple 16-bit 1s complement 2455 * checksum across the entire frame, excluding the first 14 2456 * bytes. It is best to simply to check the checksum and 2457 * tell the stack about it only if the checksum is good 2458 */ 2459 2460 static inline uint16_t 2461 mxge_rx_csum(struct mbuf *m, int csum) 2462 { 2463 struct ether_header *eh; 2464 #ifdef INET 2465 struct ip *ip; 2466 #endif 2467 #if defined(INET) || defined(INET6) 2468 int cap = if_getcapenable(m->m_pkthdr.rcvif); 2469 #endif 2470 uint16_t c, etype; 2471 2472 eh = mtod(m, struct ether_header *); 2473 etype = ntohs(eh->ether_type); 2474 switch (etype) { 2475 #ifdef INET 2476 case ETHERTYPE_IP: 2477 if ((cap & IFCAP_RXCSUM) == 0) 2478 return (1); 2479 ip = (struct ip *)(eh + 1); 2480 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2481 return (1); 2482 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2483 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2484 (ip->ip_hl << 2) + ip->ip_p)); 2485 c ^= 0xffff; 2486 break; 2487 #endif 2488 #ifdef INET6 2489 case ETHERTYPE_IPV6: 2490 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2491 return (1); 2492 c = mxge_rx_csum6((eh + 1), m, csum); 2493 break; 2494 #endif 2495 default: 2496 c = 1; 2497 } 2498 return (c); 2499 } 2500 2501 static void 2502 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2503 { 2504 struct ether_vlan_header *evl; 2505 uint32_t partial; 2506 2507 evl = mtod(m, struct ether_vlan_header *); 2508 2509 /* 2510 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2511 * after what the firmware thought was the end of the ethernet 2512 * header. 2513 */ 2514 2515 /* put checksum into host byte order */ 2516 *csum = ntohs(*csum); 2517 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2518 (*csum) += ~partial; 2519 (*csum) += ((*csum) < ~partial); 2520 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2521 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2522 2523 /* restore checksum to network byte order; 2524 later consumers expect this */ 2525 *csum = htons(*csum); 2526 2527 /* save the tag */ 2528 #ifdef MXGE_NEW_VLAN_API 2529 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2530 #else 2531 { 2532 struct m_tag *mtag; 2533 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2534 M_NOWAIT); 2535 if (mtag == NULL) 2536 return; 2537 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2538 m_tag_prepend(m, mtag); 2539 } 2540 2541 #endif 2542 m->m_flags |= M_VLANTAG; 2543 2544 /* 2545 * Remove the 802.1q header by copying the Ethernet 2546 * addresses over it and adjusting the beginning of 2547 * the data in the mbuf. The encapsulated Ethernet 2548 * type field is already in place. 2549 */ 2550 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2551 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2552 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2553 } 2554 2555 static inline void 2556 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2557 uint32_t csum, int lro) 2558 { 2559 mxge_softc_t *sc; 2560 if_t ifp; 2561 struct mbuf *m; 2562 struct ether_header *eh; 2563 mxge_rx_ring_t *rx; 2564 bus_dmamap_t old_map; 2565 int idx; 2566 2567 sc = ss->sc; 2568 ifp = sc->ifp; 2569 rx = &ss->rx_big; 2570 idx = rx->cnt & rx->mask; 2571 rx->cnt += rx->nbufs; 2572 /* save a pointer to the received mbuf */ 2573 m = rx->info[idx].m; 2574 /* try to replace the received mbuf */ 2575 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2576 /* drop the frame -- the old mbuf is re-cycled */ 2577 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2578 return; 2579 } 2580 2581 /* unmap the received buffer */ 2582 old_map = rx->info[idx].map; 2583 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2584 bus_dmamap_unload(rx->dmat, old_map); 2585 2586 /* swap the bus_dmamap_t's */ 2587 rx->info[idx].map = rx->extra_map; 2588 rx->extra_map = old_map; 2589 2590 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2591 * aligned */ 2592 m->m_data += MXGEFW_PAD; 2593 2594 m->m_pkthdr.rcvif = ifp; 2595 m->m_len = m->m_pkthdr.len = len; 2596 ss->ipackets++; 2597 eh = mtod(m, struct ether_header *); 2598 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2599 mxge_vlan_tag_remove(m, &csum); 2600 } 2601 /* flowid only valid if RSS hashing is enabled */ 2602 if (sc->num_slices > 1) { 2603 m->m_pkthdr.flowid = (ss - sc->ss); 2604 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2605 } 2606 /* if the checksum is valid, mark it in the mbuf header */ 2607 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2608 (0 == mxge_rx_csum(m, csum))) { 2609 /* Tell the stack that the checksum is good */ 2610 m->m_pkthdr.csum_data = 0xffff; 2611 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2612 CSUM_DATA_VALID; 2613 2614 #if defined(INET) || defined (INET6) 2615 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2616 return; 2617 #endif 2618 } 2619 /* pass the frame up the stack */ 2620 if_input(ifp, m); 2621 } 2622 2623 static inline void 2624 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2625 uint32_t csum, int lro) 2626 { 2627 mxge_softc_t *sc; 2628 if_t ifp; 2629 struct ether_header *eh; 2630 struct mbuf *m; 2631 mxge_rx_ring_t *rx; 2632 bus_dmamap_t old_map; 2633 int idx; 2634 2635 sc = ss->sc; 2636 ifp = sc->ifp; 2637 rx = &ss->rx_small; 2638 idx = rx->cnt & rx->mask; 2639 rx->cnt++; 2640 /* save a pointer to the received mbuf */ 2641 m = rx->info[idx].m; 2642 /* try to replace the received mbuf */ 2643 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2644 /* drop the frame -- the old mbuf is re-cycled */ 2645 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2646 return; 2647 } 2648 2649 /* unmap the received buffer */ 2650 old_map = rx->info[idx].map; 2651 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2652 bus_dmamap_unload(rx->dmat, old_map); 2653 2654 /* swap the bus_dmamap_t's */ 2655 rx->info[idx].map = rx->extra_map; 2656 rx->extra_map = old_map; 2657 2658 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2659 * aligned */ 2660 m->m_data += MXGEFW_PAD; 2661 2662 m->m_pkthdr.rcvif = ifp; 2663 m->m_len = m->m_pkthdr.len = len; 2664 ss->ipackets++; 2665 eh = mtod(m, struct ether_header *); 2666 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2667 mxge_vlan_tag_remove(m, &csum); 2668 } 2669 /* flowid only valid if RSS hashing is enabled */ 2670 if (sc->num_slices > 1) { 2671 m->m_pkthdr.flowid = (ss - sc->ss); 2672 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2673 } 2674 /* if the checksum is valid, mark it in the mbuf header */ 2675 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2676 (0 == mxge_rx_csum(m, csum))) { 2677 /* Tell the stack that the checksum is good */ 2678 m->m_pkthdr.csum_data = 0xffff; 2679 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2680 CSUM_DATA_VALID; 2681 2682 #if defined(INET) || defined (INET6) 2683 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2684 return; 2685 #endif 2686 } 2687 /* pass the frame up the stack */ 2688 if_input(ifp, m); 2689 } 2690 2691 static inline void 2692 mxge_clean_rx_done(struct mxge_slice_state *ss) 2693 { 2694 mxge_rx_done_t *rx_done = &ss->rx_done; 2695 int limit = 0; 2696 uint16_t length; 2697 uint16_t checksum; 2698 int lro; 2699 2700 lro = if_getcapenable(ss->sc->ifp) & IFCAP_LRO; 2701 while (rx_done->entry[rx_done->idx].length != 0) { 2702 length = ntohs(rx_done->entry[rx_done->idx].length); 2703 rx_done->entry[rx_done->idx].length = 0; 2704 checksum = rx_done->entry[rx_done->idx].checksum; 2705 if (length <= (MHLEN - MXGEFW_PAD)) 2706 mxge_rx_done_small(ss, length, checksum, lro); 2707 else 2708 mxge_rx_done_big(ss, length, checksum, lro); 2709 rx_done->cnt++; 2710 rx_done->idx = rx_done->cnt & rx_done->mask; 2711 2712 /* limit potential for livelock */ 2713 if (__predict_false(++limit > rx_done->mask / 2)) 2714 break; 2715 } 2716 #if defined(INET) || defined (INET6) 2717 tcp_lro_flush_all(&ss->lc); 2718 #endif 2719 } 2720 2721 static inline void 2722 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2723 { 2724 if_t ifp __unused; 2725 mxge_tx_ring_t *tx; 2726 struct mbuf *m; 2727 bus_dmamap_t map; 2728 int idx; 2729 int *flags; 2730 2731 tx = &ss->tx; 2732 ifp = ss->sc->ifp; 2733 while (tx->pkt_done != mcp_idx) { 2734 idx = tx->done & tx->mask; 2735 tx->done++; 2736 m = tx->info[idx].m; 2737 /* mbuf and DMA map only attached to the first 2738 segment per-mbuf */ 2739 if (m != NULL) { 2740 ss->obytes += m->m_pkthdr.len; 2741 if (m->m_flags & M_MCAST) 2742 ss->omcasts++; 2743 ss->opackets++; 2744 tx->info[idx].m = NULL; 2745 map = tx->info[idx].map; 2746 bus_dmamap_unload(tx->dmat, map); 2747 m_freem(m); 2748 } 2749 if (tx->info[idx].flag) { 2750 tx->info[idx].flag = 0; 2751 tx->pkt_done++; 2752 } 2753 } 2754 2755 /* If we have space, clear IFF_OACTIVE to tell the stack that 2756 its OK to send packets */ 2757 flags = &ss->if_drv_flags; 2758 mtx_lock(&ss->tx.mtx); 2759 if ((*flags) & IFF_DRV_OACTIVE && 2760 tx->req - tx->done < (tx->mask + 1)/4) { 2761 *(flags) &= ~IFF_DRV_OACTIVE; 2762 ss->tx.wake++; 2763 mxge_start_locked(ss); 2764 } 2765 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2766 /* let the NIC stop polling this queue, since there 2767 * are no more transmits pending */ 2768 if (tx->req == tx->done) { 2769 *tx->send_stop = 1; 2770 tx->queue_active = 0; 2771 tx->deactivate++; 2772 wmb(); 2773 } 2774 } 2775 mtx_unlock(&ss->tx.mtx); 2776 } 2777 2778 static struct mxge_media_type mxge_xfp_media_types[] = 2779 { 2780 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2781 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2782 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2783 {0, (1 << 5), "10GBASE-ER"}, 2784 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2785 {0, (1 << 3), "10GBASE-SW"}, 2786 {0, (1 << 2), "10GBASE-LW"}, 2787 {0, (1 << 1), "10GBASE-EW"}, 2788 {0, (1 << 0), "Reserved"} 2789 }; 2790 static struct mxge_media_type mxge_sfp_media_types[] = 2791 { 2792 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2793 {0, (1 << 7), "Reserved"}, 2794 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2795 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2796 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2797 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2798 }; 2799 2800 static void 2801 mxge_media_set(mxge_softc_t *sc, int media_type) 2802 { 2803 2804 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2805 0, NULL); 2806 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2807 sc->current_media = media_type; 2808 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2809 } 2810 2811 static void 2812 mxge_media_init(mxge_softc_t *sc) 2813 { 2814 char *ptr; 2815 int i; 2816 2817 ifmedia_removeall(&sc->media); 2818 mxge_media_set(sc, IFM_AUTO); 2819 2820 /* 2821 * parse the product code to deterimine the interface type 2822 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2823 * after the 3rd dash in the driver's cached copy of the 2824 * EEPROM's product code string. 2825 */ 2826 ptr = sc->product_code_string; 2827 if (ptr == NULL) { 2828 device_printf(sc->dev, "Missing product code\n"); 2829 return; 2830 } 2831 2832 for (i = 0; i < 3; i++, ptr++) { 2833 ptr = strchr(ptr, '-'); 2834 if (ptr == NULL) { 2835 device_printf(sc->dev, 2836 "only %d dashes in PC?!?\n", i); 2837 return; 2838 } 2839 } 2840 if (*ptr == 'C' || *(ptr +1) == 'C') { 2841 /* -C is CX4 */ 2842 sc->connector = MXGE_CX4; 2843 mxge_media_set(sc, IFM_10G_CX4); 2844 } else if (*ptr == 'Q') { 2845 /* -Q is Quad Ribbon Fiber */ 2846 sc->connector = MXGE_QRF; 2847 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2848 /* FreeBSD has no media type for Quad ribbon fiber */ 2849 } else if (*ptr == 'R') { 2850 /* -R is XFP */ 2851 sc->connector = MXGE_XFP; 2852 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2853 /* -S or -2S is SFP+ */ 2854 sc->connector = MXGE_SFP; 2855 } else { 2856 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2857 } 2858 } 2859 2860 /* 2861 * Determine the media type for a NIC. Some XFPs will identify 2862 * themselves only when their link is up, so this is initiated via a 2863 * link up interrupt. However, this can potentially take up to 2864 * several milliseconds, so it is run via the watchdog routine, rather 2865 * than in the interrupt handler itself. 2866 */ 2867 static void 2868 mxge_media_probe(mxge_softc_t *sc) 2869 { 2870 mxge_cmd_t cmd; 2871 char *cage_type; 2872 2873 struct mxge_media_type *mxge_media_types = NULL; 2874 int i, err, ms, mxge_media_type_entries; 2875 uint32_t byte; 2876 2877 sc->need_media_probe = 0; 2878 2879 if (sc->connector == MXGE_XFP) { 2880 /* -R is XFP */ 2881 mxge_media_types = mxge_xfp_media_types; 2882 mxge_media_type_entries = 2883 nitems(mxge_xfp_media_types); 2884 byte = MXGE_XFP_COMPLIANCE_BYTE; 2885 cage_type = "XFP"; 2886 } else if (sc->connector == MXGE_SFP) { 2887 /* -S or -2S is SFP+ */ 2888 mxge_media_types = mxge_sfp_media_types; 2889 mxge_media_type_entries = 2890 nitems(mxge_sfp_media_types); 2891 cage_type = "SFP+"; 2892 byte = 3; 2893 } else { 2894 /* nothing to do; media type cannot change */ 2895 return; 2896 } 2897 2898 /* 2899 * At this point we know the NIC has an XFP cage, so now we 2900 * try to determine what is in the cage by using the 2901 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2902 * register. We read just one byte, which may take over 2903 * a millisecond 2904 */ 2905 2906 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2907 cmd.data1 = byte; 2908 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2909 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2910 device_printf(sc->dev, "failed to read XFP\n"); 2911 } 2912 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2913 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2914 } 2915 if (err != MXGEFW_CMD_OK) { 2916 return; 2917 } 2918 2919 /* now we wait for the data to be cached */ 2920 cmd.data0 = byte; 2921 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2922 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2923 DELAY(1000); 2924 cmd.data0 = byte; 2925 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2926 } 2927 if (err != MXGEFW_CMD_OK) { 2928 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2929 cage_type, err, ms); 2930 return; 2931 } 2932 2933 if (cmd.data0 == mxge_media_types[0].bitmask) { 2934 if (mxge_verbose) 2935 device_printf(sc->dev, "%s:%s\n", cage_type, 2936 mxge_media_types[0].name); 2937 if (sc->current_media != mxge_media_types[0].flag) { 2938 mxge_media_init(sc); 2939 mxge_media_set(sc, mxge_media_types[0].flag); 2940 } 2941 return; 2942 } 2943 for (i = 1; i < mxge_media_type_entries; i++) { 2944 if (cmd.data0 & mxge_media_types[i].bitmask) { 2945 if (mxge_verbose) 2946 device_printf(sc->dev, "%s:%s\n", 2947 cage_type, 2948 mxge_media_types[i].name); 2949 2950 if (sc->current_media != mxge_media_types[i].flag) { 2951 mxge_media_init(sc); 2952 mxge_media_set(sc, mxge_media_types[i].flag); 2953 } 2954 return; 2955 } 2956 } 2957 if (mxge_verbose) 2958 device_printf(sc->dev, "%s media 0x%x unknown\n", 2959 cage_type, cmd.data0); 2960 2961 return; 2962 } 2963 2964 static void 2965 mxge_intr(void *arg) 2966 { 2967 struct mxge_slice_state *ss = arg; 2968 mxge_softc_t *sc = ss->sc; 2969 mcp_irq_data_t *stats = ss->fw_stats; 2970 mxge_tx_ring_t *tx = &ss->tx; 2971 mxge_rx_done_t *rx_done = &ss->rx_done; 2972 uint32_t send_done_count; 2973 uint8_t valid; 2974 2975 /* make sure the DMA has finished */ 2976 if (!stats->valid) { 2977 return; 2978 } 2979 valid = stats->valid; 2980 2981 if (sc->legacy_irq) { 2982 /* lower legacy IRQ */ 2983 *sc->irq_deassert = 0; 2984 if (!mxge_deassert_wait) 2985 /* don't wait for conf. that irq is low */ 2986 stats->valid = 0; 2987 } else { 2988 stats->valid = 0; 2989 } 2990 2991 /* loop while waiting for legacy irq deassertion */ 2992 do { 2993 /* check for transmit completes and receives */ 2994 send_done_count = be32toh(stats->send_done_count); 2995 while ((send_done_count != tx->pkt_done) || 2996 (rx_done->entry[rx_done->idx].length != 0)) { 2997 if (send_done_count != tx->pkt_done) 2998 mxge_tx_done(ss, (int)send_done_count); 2999 mxge_clean_rx_done(ss); 3000 send_done_count = be32toh(stats->send_done_count); 3001 } 3002 if (sc->legacy_irq && mxge_deassert_wait) 3003 wmb(); 3004 } while (*((volatile uint8_t *) &stats->valid)); 3005 3006 /* fw link & error stats meaningful only on the first slice */ 3007 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3008 if (sc->link_state != stats->link_up) { 3009 sc->link_state = stats->link_up; 3010 if (sc->link_state) { 3011 if_link_state_change(sc->ifp, LINK_STATE_UP); 3012 if (mxge_verbose) 3013 device_printf(sc->dev, "link up\n"); 3014 } else { 3015 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3016 if (mxge_verbose) 3017 device_printf(sc->dev, "link down\n"); 3018 } 3019 sc->need_media_probe = 1; 3020 } 3021 if (sc->rdma_tags_available != 3022 be32toh(stats->rdma_tags_available)) { 3023 sc->rdma_tags_available = 3024 be32toh(stats->rdma_tags_available); 3025 device_printf(sc->dev, "RDMA timed out! %d tags " 3026 "left\n", sc->rdma_tags_available); 3027 } 3028 3029 if (stats->link_down) { 3030 sc->down_cnt += stats->link_down; 3031 sc->link_state = 0; 3032 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3033 } 3034 } 3035 3036 /* check to see if we have rx token to pass back */ 3037 if (valid & 0x1) 3038 *ss->irq_claim = be32toh(3); 3039 *(ss->irq_claim + 1) = be32toh(3); 3040 } 3041 3042 static void 3043 mxge_init(void *arg) 3044 { 3045 mxge_softc_t *sc = arg; 3046 if_t ifp = sc->ifp; 3047 3048 mtx_lock(&sc->driver_mtx); 3049 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 3050 (void) mxge_open(sc); 3051 mtx_unlock(&sc->driver_mtx); 3052 } 3053 3054 static void 3055 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3056 { 3057 int i; 3058 3059 #if defined(INET) || defined(INET6) 3060 tcp_lro_free(&ss->lc); 3061 #endif 3062 for (i = 0; i <= ss->rx_big.mask; i++) { 3063 if (ss->rx_big.info[i].m == NULL) 3064 continue; 3065 bus_dmamap_unload(ss->rx_big.dmat, 3066 ss->rx_big.info[i].map); 3067 m_freem(ss->rx_big.info[i].m); 3068 ss->rx_big.info[i].m = NULL; 3069 } 3070 3071 for (i = 0; i <= ss->rx_small.mask; i++) { 3072 if (ss->rx_small.info[i].m == NULL) 3073 continue; 3074 bus_dmamap_unload(ss->rx_small.dmat, 3075 ss->rx_small.info[i].map); 3076 m_freem(ss->rx_small.info[i].m); 3077 ss->rx_small.info[i].m = NULL; 3078 } 3079 3080 /* transmit ring used only on the first slice */ 3081 if (ss->tx.info == NULL) 3082 return; 3083 3084 for (i = 0; i <= ss->tx.mask; i++) { 3085 ss->tx.info[i].flag = 0; 3086 if (ss->tx.info[i].m == NULL) 3087 continue; 3088 bus_dmamap_unload(ss->tx.dmat, 3089 ss->tx.info[i].map); 3090 m_freem(ss->tx.info[i].m); 3091 ss->tx.info[i].m = NULL; 3092 } 3093 } 3094 3095 static void 3096 mxge_free_mbufs(mxge_softc_t *sc) 3097 { 3098 int slice; 3099 3100 for (slice = 0; slice < sc->num_slices; slice++) 3101 mxge_free_slice_mbufs(&sc->ss[slice]); 3102 } 3103 3104 static void 3105 mxge_free_slice_rings(struct mxge_slice_state *ss) 3106 { 3107 int i; 3108 3109 if (ss->rx_done.entry != NULL) 3110 mxge_dma_free(&ss->rx_done.dma); 3111 ss->rx_done.entry = NULL; 3112 3113 if (ss->tx.req_bytes != NULL) 3114 free(ss->tx.req_bytes, M_DEVBUF); 3115 ss->tx.req_bytes = NULL; 3116 3117 if (ss->tx.seg_list != NULL) 3118 free(ss->tx.seg_list, M_DEVBUF); 3119 ss->tx.seg_list = NULL; 3120 3121 if (ss->rx_small.shadow != NULL) 3122 free(ss->rx_small.shadow, M_DEVBUF); 3123 ss->rx_small.shadow = NULL; 3124 3125 if (ss->rx_big.shadow != NULL) 3126 free(ss->rx_big.shadow, M_DEVBUF); 3127 ss->rx_big.shadow = NULL; 3128 3129 if (ss->tx.info != NULL) { 3130 if (ss->tx.dmat != NULL) { 3131 for (i = 0; i <= ss->tx.mask; i++) { 3132 bus_dmamap_destroy(ss->tx.dmat, 3133 ss->tx.info[i].map); 3134 } 3135 bus_dma_tag_destroy(ss->tx.dmat); 3136 } 3137 free(ss->tx.info, M_DEVBUF); 3138 } 3139 ss->tx.info = NULL; 3140 3141 if (ss->rx_small.info != NULL) { 3142 if (ss->rx_small.dmat != NULL) { 3143 for (i = 0; i <= ss->rx_small.mask; i++) { 3144 bus_dmamap_destroy(ss->rx_small.dmat, 3145 ss->rx_small.info[i].map); 3146 } 3147 bus_dmamap_destroy(ss->rx_small.dmat, 3148 ss->rx_small.extra_map); 3149 bus_dma_tag_destroy(ss->rx_small.dmat); 3150 } 3151 free(ss->rx_small.info, M_DEVBUF); 3152 } 3153 ss->rx_small.info = NULL; 3154 3155 if (ss->rx_big.info != NULL) { 3156 if (ss->rx_big.dmat != NULL) { 3157 for (i = 0; i <= ss->rx_big.mask; i++) { 3158 bus_dmamap_destroy(ss->rx_big.dmat, 3159 ss->rx_big.info[i].map); 3160 } 3161 bus_dmamap_destroy(ss->rx_big.dmat, 3162 ss->rx_big.extra_map); 3163 bus_dma_tag_destroy(ss->rx_big.dmat); 3164 } 3165 free(ss->rx_big.info, M_DEVBUF); 3166 } 3167 ss->rx_big.info = NULL; 3168 } 3169 3170 static void 3171 mxge_free_rings(mxge_softc_t *sc) 3172 { 3173 int slice; 3174 3175 for (slice = 0; slice < sc->num_slices; slice++) 3176 mxge_free_slice_rings(&sc->ss[slice]); 3177 } 3178 3179 static int 3180 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3181 int tx_ring_entries) 3182 { 3183 mxge_softc_t *sc = ss->sc; 3184 size_t bytes; 3185 int err, i; 3186 3187 /* allocate per-slice receive resources */ 3188 3189 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3190 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3191 3192 /* allocate the rx shadow rings */ 3193 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3194 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3195 3196 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3197 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3198 3199 /* allocate the rx host info rings */ 3200 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3201 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3202 3203 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3204 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3205 3206 /* allocate the rx busdma resources */ 3207 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3208 1, /* alignment */ 3209 4096, /* boundary */ 3210 BUS_SPACE_MAXADDR, /* low */ 3211 BUS_SPACE_MAXADDR, /* high */ 3212 NULL, NULL, /* filter */ 3213 MHLEN, /* maxsize */ 3214 1, /* num segs */ 3215 MHLEN, /* maxsegsize */ 3216 BUS_DMA_ALLOCNOW, /* flags */ 3217 NULL, NULL, /* lock */ 3218 &ss->rx_small.dmat); /* tag */ 3219 if (err != 0) { 3220 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3221 err); 3222 return err; 3223 } 3224 3225 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3226 1, /* alignment */ 3227 0, /* boundary */ 3228 BUS_SPACE_MAXADDR, /* low */ 3229 BUS_SPACE_MAXADDR, /* high */ 3230 NULL, NULL, /* filter */ 3231 3*4096, /* maxsize */ 3232 1, /* num segs */ 3233 MJUM9BYTES, /* maxsegsize*/ 3234 BUS_DMA_ALLOCNOW, /* flags */ 3235 NULL, NULL, /* lock */ 3236 &ss->rx_big.dmat); /* tag */ 3237 if (err != 0) { 3238 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3239 err); 3240 return err; 3241 } 3242 for (i = 0; i <= ss->rx_small.mask; i++) { 3243 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3244 &ss->rx_small.info[i].map); 3245 if (err != 0) { 3246 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3247 err); 3248 return err; 3249 } 3250 } 3251 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3252 &ss->rx_small.extra_map); 3253 if (err != 0) { 3254 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3255 err); 3256 return err; 3257 } 3258 3259 for (i = 0; i <= ss->rx_big.mask; i++) { 3260 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3261 &ss->rx_big.info[i].map); 3262 if (err != 0) { 3263 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3264 err); 3265 return err; 3266 } 3267 } 3268 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3269 &ss->rx_big.extra_map); 3270 if (err != 0) { 3271 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3272 err); 3273 return err; 3274 } 3275 3276 /* now allocate TX resources */ 3277 3278 ss->tx.mask = tx_ring_entries - 1; 3279 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3280 3281 /* allocate the tx request copy block */ 3282 bytes = 8 + 3283 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3284 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3285 /* ensure req_list entries are aligned to 8 bytes */ 3286 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3287 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3288 3289 /* allocate the tx busdma segment list */ 3290 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3291 ss->tx.seg_list = (bus_dma_segment_t *) 3292 malloc(bytes, M_DEVBUF, M_WAITOK); 3293 3294 /* allocate the tx host info ring */ 3295 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3296 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3297 3298 /* allocate the tx busdma resources */ 3299 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3300 1, /* alignment */ 3301 sc->tx_boundary, /* boundary */ 3302 BUS_SPACE_MAXADDR, /* low */ 3303 BUS_SPACE_MAXADDR, /* high */ 3304 NULL, NULL, /* filter */ 3305 65536 + 256, /* maxsize */ 3306 ss->tx.max_desc - 2, /* num segs */ 3307 sc->tx_boundary, /* maxsegsz */ 3308 BUS_DMA_ALLOCNOW, /* flags */ 3309 NULL, NULL, /* lock */ 3310 &ss->tx.dmat); /* tag */ 3311 3312 if (err != 0) { 3313 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3314 err); 3315 return err; 3316 } 3317 3318 /* now use these tags to setup dmamaps for each slot 3319 in the ring */ 3320 for (i = 0; i <= ss->tx.mask; i++) { 3321 err = bus_dmamap_create(ss->tx.dmat, 0, 3322 &ss->tx.info[i].map); 3323 if (err != 0) { 3324 device_printf(sc->dev, "Err %d tx dmamap\n", 3325 err); 3326 return err; 3327 } 3328 } 3329 return 0; 3330 3331 } 3332 3333 static int 3334 mxge_alloc_rings(mxge_softc_t *sc) 3335 { 3336 mxge_cmd_t cmd; 3337 int tx_ring_size; 3338 int tx_ring_entries, rx_ring_entries; 3339 int err, slice; 3340 3341 /* get ring sizes */ 3342 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3343 tx_ring_size = cmd.data0; 3344 if (err != 0) { 3345 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3346 goto abort; 3347 } 3348 3349 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3350 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3351 if_setsendqlen(sc->ifp, tx_ring_entries - 1); 3352 if_setsendqready(sc->ifp); 3353 3354 for (slice = 0; slice < sc->num_slices; slice++) { 3355 err = mxge_alloc_slice_rings(&sc->ss[slice], 3356 rx_ring_entries, 3357 tx_ring_entries); 3358 if (err != 0) 3359 goto abort; 3360 } 3361 return 0; 3362 3363 abort: 3364 mxge_free_rings(sc); 3365 return err; 3366 3367 } 3368 3369 static void 3370 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3371 { 3372 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3373 3374 if (bufsize < MCLBYTES) { 3375 /* easy, everything fits in a single buffer */ 3376 *big_buf_size = MCLBYTES; 3377 *cl_size = MCLBYTES; 3378 *nbufs = 1; 3379 return; 3380 } 3381 3382 if (bufsize < MJUMPAGESIZE) { 3383 /* still easy, everything still fits in a single buffer */ 3384 *big_buf_size = MJUMPAGESIZE; 3385 *cl_size = MJUMPAGESIZE; 3386 *nbufs = 1; 3387 return; 3388 } 3389 *cl_size = MJUM9BYTES; 3390 *big_buf_size = MJUM9BYTES; 3391 *nbufs = 1; 3392 } 3393 3394 static int 3395 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3396 { 3397 mxge_softc_t *sc; 3398 mxge_cmd_t cmd; 3399 bus_dmamap_t map; 3400 int err, i, slice; 3401 3402 sc = ss->sc; 3403 slice = ss - sc->ss; 3404 3405 #if defined(INET) || defined(INET6) 3406 (void)tcp_lro_init(&ss->lc); 3407 #endif 3408 ss->lc.ifp = sc->ifp; 3409 3410 /* get the lanai pointers to the send and receive rings */ 3411 3412 err = 0; 3413 3414 cmd.data0 = slice; 3415 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3416 ss->tx.lanai = 3417 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3418 ss->tx.send_go = (volatile uint32_t *) 3419 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3420 ss->tx.send_stop = (volatile uint32_t *) 3421 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3422 3423 cmd.data0 = slice; 3424 err |= mxge_send_cmd(sc, 3425 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3426 ss->rx_small.lanai = 3427 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3428 cmd.data0 = slice; 3429 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3430 ss->rx_big.lanai = 3431 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3432 3433 if (err != 0) { 3434 device_printf(sc->dev, 3435 "failed to get ring sizes or locations\n"); 3436 return EIO; 3437 } 3438 3439 /* stock receive rings */ 3440 for (i = 0; i <= ss->rx_small.mask; i++) { 3441 map = ss->rx_small.info[i].map; 3442 err = mxge_get_buf_small(ss, map, i); 3443 if (err) { 3444 device_printf(sc->dev, "alloced %d/%d smalls\n", 3445 i, ss->rx_small.mask + 1); 3446 return ENOMEM; 3447 } 3448 } 3449 for (i = 0; i <= ss->rx_big.mask; i++) { 3450 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3451 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3452 } 3453 ss->rx_big.nbufs = nbufs; 3454 ss->rx_big.cl_size = cl_size; 3455 ss->rx_big.mlen = if_getmtu(ss->sc->ifp) + ETHER_HDR_LEN + 3456 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3457 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3458 map = ss->rx_big.info[i].map; 3459 err = mxge_get_buf_big(ss, map, i); 3460 if (err) { 3461 device_printf(sc->dev, "alloced %d/%d bigs\n", 3462 i, ss->rx_big.mask + 1); 3463 return ENOMEM; 3464 } 3465 } 3466 return 0; 3467 } 3468 3469 static int 3470 mxge_open(mxge_softc_t *sc) 3471 { 3472 mxge_cmd_t cmd; 3473 int err, big_bytes, nbufs, slice, cl_size, i; 3474 bus_addr_t bus; 3475 volatile uint8_t *itable; 3476 struct mxge_slice_state *ss; 3477 3478 /* Copy the MAC address in case it was overridden */ 3479 bcopy(if_getlladdr(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3480 3481 err = mxge_reset(sc, 1); 3482 if (err != 0) { 3483 device_printf(sc->dev, "failed to reset\n"); 3484 return EIO; 3485 } 3486 3487 if (sc->num_slices > 1) { 3488 /* setup the indirection table */ 3489 cmd.data0 = sc->num_slices; 3490 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3491 &cmd); 3492 3493 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3494 &cmd); 3495 if (err != 0) { 3496 device_printf(sc->dev, 3497 "failed to setup rss tables\n"); 3498 return err; 3499 } 3500 3501 /* just enable an identity mapping */ 3502 itable = sc->sram + cmd.data0; 3503 for (i = 0; i < sc->num_slices; i++) 3504 itable[i] = (uint8_t)i; 3505 3506 cmd.data0 = 1; 3507 cmd.data1 = mxge_rss_hash_type; 3508 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3509 if (err != 0) { 3510 device_printf(sc->dev, "failed to enable slices\n"); 3511 return err; 3512 } 3513 } 3514 3515 mxge_choose_params(if_getmtu(sc->ifp), &big_bytes, &cl_size, &nbufs); 3516 3517 cmd.data0 = nbufs; 3518 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3519 &cmd); 3520 /* error is only meaningful if we're trying to set 3521 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3522 if (err && nbufs > 1) { 3523 device_printf(sc->dev, 3524 "Failed to set alway-use-n to %d\n", 3525 nbufs); 3526 return EIO; 3527 } 3528 /* Give the firmware the mtu and the big and small buffer 3529 sizes. The firmware wants the big buf size to be a power 3530 of two. Luckily, FreeBSD's clusters are powers of two */ 3531 cmd.data0 = if_getmtu(sc->ifp) + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3532 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3533 cmd.data0 = MHLEN - MXGEFW_PAD; 3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3535 &cmd); 3536 cmd.data0 = big_bytes; 3537 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3538 3539 if (err != 0) { 3540 device_printf(sc->dev, "failed to setup params\n"); 3541 goto abort; 3542 } 3543 3544 /* Now give him the pointer to the stats block */ 3545 for (slice = 0; slice < sc->num_slices; slice++) { 3546 ss = &sc->ss[slice]; 3547 cmd.data0 = 3548 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3549 cmd.data1 = 3550 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3551 cmd.data2 = sizeof(struct mcp_irq_data); 3552 cmd.data2 |= (slice << 16); 3553 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3554 } 3555 3556 if (err != 0) { 3557 bus = sc->ss->fw_stats_dma.bus_addr; 3558 bus += offsetof(struct mcp_irq_data, send_done_count); 3559 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3560 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3561 err = mxge_send_cmd(sc, 3562 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3563 &cmd); 3564 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3565 sc->fw_multicast_support = 0; 3566 } else { 3567 sc->fw_multicast_support = 1; 3568 } 3569 3570 if (err != 0) { 3571 device_printf(sc->dev, "failed to setup params\n"); 3572 goto abort; 3573 } 3574 3575 for (slice = 0; slice < sc->num_slices; slice++) { 3576 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3577 if (err != 0) { 3578 device_printf(sc->dev, "couldn't open slice %d\n", 3579 slice); 3580 goto abort; 3581 } 3582 } 3583 3584 /* Finally, start the firmware running */ 3585 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3586 if (err) { 3587 device_printf(sc->dev, "Couldn't bring up link\n"); 3588 goto abort; 3589 } 3590 for (slice = 0; slice < sc->num_slices; slice++) { 3591 ss = &sc->ss[slice]; 3592 ss->if_drv_flags |= IFF_DRV_RUNNING; 3593 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3594 } 3595 if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, 0); 3596 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE); 3597 3598 return 0; 3599 3600 abort: 3601 mxge_free_mbufs(sc); 3602 3603 return err; 3604 } 3605 3606 static int 3607 mxge_close(mxge_softc_t *sc, int down) 3608 { 3609 mxge_cmd_t cmd; 3610 int err, old_down_cnt; 3611 struct mxge_slice_state *ss; 3612 int slice; 3613 3614 for (slice = 0; slice < sc->num_slices; slice++) { 3615 ss = &sc->ss[slice]; 3616 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3617 } 3618 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); 3619 if (!down) { 3620 old_down_cnt = sc->down_cnt; 3621 wmb(); 3622 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3623 if (err) { 3624 device_printf(sc->dev, 3625 "Couldn't bring down link\n"); 3626 } 3627 if (old_down_cnt == sc->down_cnt) { 3628 /* wait for down irq */ 3629 DELAY(10 * sc->intr_coal_delay); 3630 } 3631 wmb(); 3632 if (old_down_cnt == sc->down_cnt) { 3633 device_printf(sc->dev, "never got down irq\n"); 3634 } 3635 } 3636 mxge_free_mbufs(sc); 3637 3638 return 0; 3639 } 3640 3641 static void 3642 mxge_setup_cfg_space(mxge_softc_t *sc) 3643 { 3644 device_t dev = sc->dev; 3645 int reg; 3646 uint16_t lnk, pectl; 3647 3648 /* find the PCIe link width and set max read request to 4KB*/ 3649 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3650 lnk = pci_read_config(dev, reg + 0x12, 2); 3651 sc->link_width = (lnk >> 4) & 0x3f; 3652 3653 if (sc->pectl == 0) { 3654 pectl = pci_read_config(dev, reg + 0x8, 2); 3655 pectl = (pectl & ~0x7000) | (5 << 12); 3656 pci_write_config(dev, reg + 0x8, pectl, 2); 3657 sc->pectl = pectl; 3658 } else { 3659 /* restore saved pectl after watchdog reset */ 3660 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3661 } 3662 } 3663 3664 /* Enable DMA and Memory space access */ 3665 pci_enable_busmaster(dev); 3666 } 3667 3668 static uint32_t 3669 mxge_read_reboot(mxge_softc_t *sc) 3670 { 3671 device_t dev = sc->dev; 3672 uint32_t vs; 3673 3674 /* find the vendor specific offset */ 3675 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3676 device_printf(sc->dev, 3677 "could not find vendor specific offset\n"); 3678 return (uint32_t)-1; 3679 } 3680 /* enable read32 mode */ 3681 pci_write_config(dev, vs + 0x10, 0x3, 1); 3682 /* tell NIC which register to read */ 3683 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3684 return (pci_read_config(dev, vs + 0x14, 4)); 3685 } 3686 3687 static void 3688 mxge_watchdog_reset(mxge_softc_t *sc) 3689 { 3690 struct pci_devinfo *dinfo; 3691 struct mxge_slice_state *ss; 3692 int err, running, s, num_tx_slices = 1; 3693 uint32_t reboot; 3694 uint16_t cmd; 3695 3696 err = ENXIO; 3697 3698 device_printf(sc->dev, "Watchdog reset!\n"); 3699 3700 /* 3701 * check to see if the NIC rebooted. If it did, then all of 3702 * PCI config space has been reset, and things like the 3703 * busmaster bit will be zero. If this is the case, then we 3704 * must restore PCI config space before the NIC can be used 3705 * again 3706 */ 3707 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3708 if (cmd == 0xffff) { 3709 /* 3710 * maybe the watchdog caught the NIC rebooting; wait 3711 * up to 100ms for it to finish. If it does not come 3712 * back, then give up 3713 */ 3714 DELAY(1000*100); 3715 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3716 if (cmd == 0xffff) { 3717 device_printf(sc->dev, "NIC disappeared!\n"); 3718 } 3719 } 3720 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3721 /* print the reboot status */ 3722 reboot = mxge_read_reboot(sc); 3723 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3724 reboot); 3725 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3726 if (running) { 3727 /* 3728 * quiesce NIC so that TX routines will not try to 3729 * xmit after restoration of BAR 3730 */ 3731 3732 /* Mark the link as down */ 3733 if (sc->link_state) { 3734 sc->link_state = 0; 3735 if_link_state_change(sc->ifp, 3736 LINK_STATE_DOWN); 3737 } 3738 3739 num_tx_slices = sc->num_slices; 3740 3741 /* grab all TX locks to ensure no tx */ 3742 for (s = 0; s < num_tx_slices; s++) { 3743 ss = &sc->ss[s]; 3744 mtx_lock(&ss->tx.mtx); 3745 } 3746 mxge_close(sc, 1); 3747 } 3748 /* restore PCI configuration space */ 3749 dinfo = device_get_ivars(sc->dev); 3750 pci_cfg_restore(sc->dev, dinfo); 3751 3752 /* and redo any changes we made to our config space */ 3753 mxge_setup_cfg_space(sc); 3754 3755 /* reload f/w */ 3756 err = mxge_load_firmware(sc, 0); 3757 if (err) { 3758 device_printf(sc->dev, 3759 "Unable to re-load f/w\n"); 3760 } 3761 if (running) { 3762 if (!err) 3763 err = mxge_open(sc); 3764 /* release all TX locks */ 3765 for (s = 0; s < num_tx_slices; s++) { 3766 ss = &sc->ss[s]; 3767 mxge_start_locked(ss); 3768 mtx_unlock(&ss->tx.mtx); 3769 } 3770 } 3771 sc->watchdog_resets++; 3772 } else { 3773 device_printf(sc->dev, 3774 "NIC did not reboot, not resetting\n"); 3775 err = 0; 3776 } 3777 if (err) { 3778 device_printf(sc->dev, "watchdog reset failed\n"); 3779 } else { 3780 if (sc->dying == 2) 3781 sc->dying = 0; 3782 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3783 } 3784 } 3785 3786 static void 3787 mxge_watchdog_task(void *arg, int pending) 3788 { 3789 mxge_softc_t *sc = arg; 3790 3791 mtx_lock(&sc->driver_mtx); 3792 mxge_watchdog_reset(sc); 3793 mtx_unlock(&sc->driver_mtx); 3794 } 3795 3796 static void 3797 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3798 { 3799 tx = &sc->ss[slice].tx; 3800 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3801 device_printf(sc->dev, 3802 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3803 tx->req, tx->done, tx->queue_active); 3804 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3805 tx->activate, tx->deactivate); 3806 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3807 tx->pkt_done, 3808 be32toh(sc->ss->fw_stats->send_done_count)); 3809 } 3810 3811 static int 3812 mxge_watchdog(mxge_softc_t *sc) 3813 { 3814 mxge_tx_ring_t *tx; 3815 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3816 int i, err = 0; 3817 3818 /* see if we have outstanding transmits, which 3819 have been pending for more than mxge_ticks */ 3820 for (i = 0; (i < sc->num_slices) && (err == 0); i++) { 3821 tx = &sc->ss[i].tx; 3822 if (tx->req != tx->done && 3823 tx->watchdog_req != tx->watchdog_done && 3824 tx->done == tx->watchdog_done) { 3825 /* check for pause blocking before resetting */ 3826 if (tx->watchdog_rx_pause == rx_pause) { 3827 mxge_warn_stuck(sc, tx, i); 3828 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3829 return (ENXIO); 3830 } 3831 else 3832 device_printf(sc->dev, "Flow control blocking " 3833 "xmits, check link partner\n"); 3834 } 3835 3836 tx->watchdog_req = tx->req; 3837 tx->watchdog_done = tx->done; 3838 tx->watchdog_rx_pause = rx_pause; 3839 } 3840 3841 if (sc->need_media_probe) 3842 mxge_media_probe(sc); 3843 return (err); 3844 } 3845 3846 static uint64_t 3847 mxge_get_counter(if_t ifp, ift_counter cnt) 3848 { 3849 struct mxge_softc *sc; 3850 uint64_t rv; 3851 3852 sc = if_getsoftc(ifp); 3853 rv = 0; 3854 3855 switch (cnt) { 3856 case IFCOUNTER_IPACKETS: 3857 for (int s = 0; s < sc->num_slices; s++) 3858 rv += sc->ss[s].ipackets; 3859 return (rv); 3860 case IFCOUNTER_OPACKETS: 3861 for (int s = 0; s < sc->num_slices; s++) 3862 rv += sc->ss[s].opackets; 3863 return (rv); 3864 case IFCOUNTER_OERRORS: 3865 for (int s = 0; s < sc->num_slices; s++) 3866 rv += sc->ss[s].oerrors; 3867 return (rv); 3868 case IFCOUNTER_OBYTES: 3869 for (int s = 0; s < sc->num_slices; s++) 3870 rv += sc->ss[s].obytes; 3871 return (rv); 3872 case IFCOUNTER_OMCASTS: 3873 for (int s = 0; s < sc->num_slices; s++) 3874 rv += sc->ss[s].omcasts; 3875 return (rv); 3876 case IFCOUNTER_OQDROPS: 3877 for (int s = 0; s < sc->num_slices; s++) 3878 rv += sc->ss[s].tx.br->br_drops; 3879 return (rv); 3880 default: 3881 return (if_get_counter_default(ifp, cnt)); 3882 } 3883 } 3884 3885 static void 3886 mxge_tick(void *arg) 3887 { 3888 mxge_softc_t *sc = arg; 3889 u_long pkts = 0; 3890 int err = 0; 3891 int running, ticks; 3892 uint16_t cmd; 3893 3894 ticks = mxge_ticks; 3895 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3896 if (running) { 3897 if (!sc->watchdog_countdown) { 3898 err = mxge_watchdog(sc); 3899 sc->watchdog_countdown = 4; 3900 } 3901 sc->watchdog_countdown--; 3902 } 3903 if (pkts == 0) { 3904 /* ensure NIC did not suffer h/w fault while idle */ 3905 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3906 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3907 sc->dying = 2; 3908 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3909 err = ENXIO; 3910 } 3911 /* look less often if NIC is idle */ 3912 ticks *= 4; 3913 } 3914 3915 if (err == 0) 3916 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3917 3918 } 3919 3920 static int 3921 mxge_media_change(if_t ifp) 3922 { 3923 return EINVAL; 3924 } 3925 3926 static int 3927 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3928 { 3929 if_t ifp = sc->ifp; 3930 int real_mtu, old_mtu; 3931 int err = 0; 3932 3933 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3934 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3935 return EINVAL; 3936 mtx_lock(&sc->driver_mtx); 3937 old_mtu = if_getmtu(ifp); 3938 if_setmtu(ifp, mtu); 3939 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3940 mxge_close(sc, 0); 3941 err = mxge_open(sc); 3942 if (err != 0) { 3943 if_setmtu(ifp, old_mtu); 3944 mxge_close(sc, 0); 3945 (void) mxge_open(sc); 3946 } 3947 } 3948 mtx_unlock(&sc->driver_mtx); 3949 return err; 3950 } 3951 3952 static void 3953 mxge_media_status(if_t ifp, struct ifmediareq *ifmr) 3954 { 3955 mxge_softc_t *sc = if_getsoftc(ifp); 3956 3957 if (sc == NULL) 3958 return; 3959 ifmr->ifm_status = IFM_AVALID; 3960 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3961 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3962 ifmr->ifm_active |= sc->current_media; 3963 } 3964 3965 static int 3966 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 3967 { 3968 mxge_cmd_t cmd; 3969 uint32_t i2c_args; 3970 int i, ms, err; 3971 3972 if (i2c->dev_addr != 0xA0 && 3973 i2c->dev_addr != 0xA2) 3974 return (EINVAL); 3975 if (i2c->len > sizeof(i2c->data)) 3976 return (EINVAL); 3977 3978 for (i = 0; i < i2c->len; i++) { 3979 i2c_args = i2c->dev_addr << 0x8; 3980 i2c_args |= i2c->offset + i; 3981 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3982 cmd.data1 = i2c_args; 3983 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3984 3985 if (err != MXGEFW_CMD_OK) 3986 return (EIO); 3987 /* now we wait for the data to be cached */ 3988 cmd.data0 = i2c_args & 0xff; 3989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3990 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3991 cmd.data0 = i2c_args & 0xff; 3992 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3993 if (err == EBUSY) 3994 DELAY(1000); 3995 } 3996 if (err != MXGEFW_CMD_OK) 3997 return (EIO); 3998 i2c->data[i] = cmd.data0; 3999 } 4000 return (0); 4001 } 4002 4003 static int 4004 mxge_ioctl(if_t ifp, u_long command, caddr_t data) 4005 { 4006 mxge_softc_t *sc = if_getsoftc(ifp); 4007 struct ifreq *ifr = (struct ifreq *)data; 4008 struct ifi2creq i2c; 4009 int err, mask; 4010 4011 err = 0; 4012 switch (command) { 4013 case SIOCSIFMTU: 4014 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4015 break; 4016 4017 case SIOCSIFFLAGS: 4018 mtx_lock(&sc->driver_mtx); 4019 if (sc->dying) { 4020 mtx_unlock(&sc->driver_mtx); 4021 return EINVAL; 4022 } 4023 if (if_getflags(ifp) & IFF_UP) { 4024 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { 4025 err = mxge_open(sc); 4026 } else { 4027 /* take care of promis can allmulti 4028 flag chages */ 4029 mxge_change_promisc(sc, 4030 if_getflags(ifp) & IFF_PROMISC); 4031 mxge_set_multicast_list(sc); 4032 } 4033 } else { 4034 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4035 mxge_close(sc, 0); 4036 } 4037 } 4038 mtx_unlock(&sc->driver_mtx); 4039 break; 4040 4041 case SIOCADDMULTI: 4042 case SIOCDELMULTI: 4043 mtx_lock(&sc->driver_mtx); 4044 if (sc->dying) { 4045 mtx_unlock(&sc->driver_mtx); 4046 return (EINVAL); 4047 } 4048 mxge_set_multicast_list(sc); 4049 mtx_unlock(&sc->driver_mtx); 4050 break; 4051 4052 case SIOCSIFCAP: 4053 mtx_lock(&sc->driver_mtx); 4054 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 4055 if (mask & IFCAP_TXCSUM) { 4056 if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4057 mask &= ~IFCAP_TSO4; 4058 if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM|IFCAP_TSO4)); 4059 if_sethwassistbits(ifp, 0, (CSUM_TCP | CSUM_UDP)); 4060 } else { 4061 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 4062 if_sethwassistbits(ifp, (CSUM_TCP | CSUM_UDP), 0); 4063 } 4064 } 4065 if (mask & IFCAP_RXCSUM) { 4066 if (IFCAP_RXCSUM & if_getcapenable(ifp)) { 4067 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 4068 } else { 4069 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 4070 } 4071 } 4072 if (mask & IFCAP_TSO4) { 4073 if (IFCAP_TSO4 & if_getcapenable(ifp)) { 4074 if_setcapenablebit(ifp, 0, IFCAP_TSO4); 4075 } else if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4076 if_setcapenablebit(ifp, IFCAP_TSO4, 0); 4077 if_sethwassistbits(ifp, CSUM_TSO, 0); 4078 } else { 4079 printf("mxge requires tx checksum offload" 4080 " be enabled to use TSO\n"); 4081 err = EINVAL; 4082 } 4083 } 4084 #if IFCAP_TSO6 4085 if (mask & IFCAP_TXCSUM_IPV6) { 4086 if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4087 mask &= ~IFCAP_TSO6; 4088 if_setcapenablebit(ifp, 0, 4089 IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 4090 if_sethwassistbits(ifp, 0, 4091 CSUM_TCP_IPV6 | CSUM_UDP); 4092 } else { 4093 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 4094 if_sethwassistbits(ifp, 4095 CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4096 } 4097 } 4098 if (mask & IFCAP_RXCSUM_IPV6) { 4099 if (IFCAP_RXCSUM_IPV6 & if_getcapenable(ifp)) { 4100 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 4101 } else { 4102 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 4103 } 4104 } 4105 if (mask & IFCAP_TSO6) { 4106 if (IFCAP_TSO6 & if_getcapenable(ifp)) { 4107 if_setcapenablebit(ifp, 0, IFCAP_TSO6); 4108 } else if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4109 if_setcapenablebit(ifp, IFCAP_TSO6, 0); 4110 if_sethwassistbits(ifp, CSUM_TSO, 0); 4111 } else { 4112 printf("mxge requires tx checksum offload" 4113 " be enabled to use TSO\n"); 4114 err = EINVAL; 4115 } 4116 } 4117 #endif /*IFCAP_TSO6 */ 4118 4119 if (mask & IFCAP_LRO) 4120 if_togglecapenable(ifp, IFCAP_LRO); 4121 if (mask & IFCAP_VLAN_HWTAGGING) 4122 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); 4123 if (mask & IFCAP_VLAN_HWTSO) 4124 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 4125 4126 if (!(if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) || 4127 !(if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)) 4128 if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO); 4129 4130 mtx_unlock(&sc->driver_mtx); 4131 VLAN_CAPABILITIES(ifp); 4132 4133 break; 4134 4135 case SIOCGIFMEDIA: 4136 mtx_lock(&sc->driver_mtx); 4137 if (sc->dying) { 4138 mtx_unlock(&sc->driver_mtx); 4139 return (EINVAL); 4140 } 4141 mxge_media_probe(sc); 4142 mtx_unlock(&sc->driver_mtx); 4143 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4144 &sc->media, command); 4145 break; 4146 4147 case SIOCGI2C: 4148 if (sc->connector != MXGE_XFP && 4149 sc->connector != MXGE_SFP) { 4150 err = ENXIO; 4151 break; 4152 } 4153 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4154 if (err != 0) 4155 break; 4156 mtx_lock(&sc->driver_mtx); 4157 if (sc->dying) { 4158 mtx_unlock(&sc->driver_mtx); 4159 return (EINVAL); 4160 } 4161 err = mxge_fetch_i2c(sc, &i2c); 4162 mtx_unlock(&sc->driver_mtx); 4163 if (err == 0) 4164 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4165 sizeof(i2c)); 4166 break; 4167 default: 4168 err = ether_ioctl(ifp, command, data); 4169 break; 4170 } 4171 return err; 4172 } 4173 4174 static void 4175 mxge_fetch_tunables(mxge_softc_t *sc) 4176 { 4177 4178 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4179 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4180 &mxge_flow_control); 4181 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4182 &mxge_intr_coal_delay); 4183 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4184 &mxge_nvidia_ecrc_enable); 4185 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4186 &mxge_force_firmware); 4187 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4188 &mxge_deassert_wait); 4189 TUNABLE_INT_FETCH("hw.mxge.verbose", 4190 &mxge_verbose); 4191 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4192 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4193 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4194 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4195 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4196 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4197 4198 if (bootverbose) 4199 mxge_verbose = 1; 4200 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4201 mxge_intr_coal_delay = 30; 4202 if (mxge_ticks == 0) 4203 mxge_ticks = hz / 2; 4204 sc->pause = mxge_flow_control; 4205 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4206 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4207 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4208 } 4209 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4210 mxge_initial_mtu < ETHER_MIN_LEN) 4211 mxge_initial_mtu = ETHERMTU_JUMBO; 4212 4213 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4214 mxge_throttle = MXGE_MAX_THROTTLE; 4215 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4216 mxge_throttle = MXGE_MIN_THROTTLE; 4217 sc->throttle = mxge_throttle; 4218 } 4219 4220 static void 4221 mxge_free_slices(mxge_softc_t *sc) 4222 { 4223 struct mxge_slice_state *ss; 4224 int i; 4225 4226 if (sc->ss == NULL) 4227 return; 4228 4229 for (i = 0; i < sc->num_slices; i++) { 4230 ss = &sc->ss[i]; 4231 if (ss->fw_stats != NULL) { 4232 mxge_dma_free(&ss->fw_stats_dma); 4233 ss->fw_stats = NULL; 4234 if (ss->tx.br != NULL) { 4235 drbr_free(ss->tx.br, M_DEVBUF); 4236 ss->tx.br = NULL; 4237 } 4238 mtx_destroy(&ss->tx.mtx); 4239 } 4240 if (ss->rx_done.entry != NULL) { 4241 mxge_dma_free(&ss->rx_done.dma); 4242 ss->rx_done.entry = NULL; 4243 } 4244 } 4245 free(sc->ss, M_DEVBUF); 4246 sc->ss = NULL; 4247 } 4248 4249 static int 4250 mxge_alloc_slices(mxge_softc_t *sc) 4251 { 4252 mxge_cmd_t cmd; 4253 struct mxge_slice_state *ss; 4254 size_t bytes; 4255 int err, i, max_intr_slots; 4256 4257 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4258 if (err != 0) { 4259 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4260 return err; 4261 } 4262 sc->rx_ring_size = cmd.data0; 4263 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4264 4265 bytes = sizeof (*sc->ss) * sc->num_slices; 4266 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4267 if (sc->ss == NULL) 4268 return (ENOMEM); 4269 for (i = 0; i < sc->num_slices; i++) { 4270 ss = &sc->ss[i]; 4271 4272 ss->sc = sc; 4273 4274 /* allocate per-slice rx interrupt queues */ 4275 4276 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4277 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4278 if (err != 0) 4279 goto abort; 4280 ss->rx_done.entry = ss->rx_done.dma.addr; 4281 bzero(ss->rx_done.entry, bytes); 4282 4283 /* 4284 * allocate the per-slice firmware stats; stats 4285 * (including tx) are used used only on the first 4286 * slice for now 4287 */ 4288 4289 bytes = sizeof (*ss->fw_stats); 4290 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4291 sizeof (*ss->fw_stats), 64); 4292 if (err != 0) 4293 goto abort; 4294 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4295 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4296 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4297 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4298 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4299 &ss->tx.mtx); 4300 } 4301 4302 return (0); 4303 4304 abort: 4305 mxge_free_slices(sc); 4306 return (ENOMEM); 4307 } 4308 4309 static void 4310 mxge_slice_probe(mxge_softc_t *sc) 4311 { 4312 mxge_cmd_t cmd; 4313 char *old_fw; 4314 int msix_cnt, status, max_intr_slots; 4315 4316 sc->num_slices = 1; 4317 /* 4318 * don't enable multiple slices if they are not enabled, 4319 * or if this is not an SMP system 4320 */ 4321 4322 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4323 return; 4324 4325 /* see how many MSI-X interrupts are available */ 4326 msix_cnt = pci_msix_count(sc->dev); 4327 if (msix_cnt < 2) 4328 return; 4329 4330 /* now load the slice aware firmware see what it supports */ 4331 old_fw = sc->fw_name; 4332 if (old_fw == mxge_fw_aligned) 4333 sc->fw_name = mxge_fw_rss_aligned; 4334 else 4335 sc->fw_name = mxge_fw_rss_unaligned; 4336 status = mxge_load_firmware(sc, 0); 4337 if (status != 0) { 4338 device_printf(sc->dev, "Falling back to a single slice\n"); 4339 return; 4340 } 4341 4342 /* try to send a reset command to the card to see if it 4343 is alive */ 4344 memset(&cmd, 0, sizeof (cmd)); 4345 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4346 if (status != 0) { 4347 device_printf(sc->dev, "failed reset\n"); 4348 goto abort_with_fw; 4349 } 4350 4351 /* get rx ring size */ 4352 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4353 if (status != 0) { 4354 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4355 goto abort_with_fw; 4356 } 4357 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4358 4359 /* tell it the size of the interrupt queues */ 4360 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4361 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4362 if (status != 0) { 4363 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4364 goto abort_with_fw; 4365 } 4366 4367 /* ask the maximum number of slices it supports */ 4368 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4369 if (status != 0) { 4370 device_printf(sc->dev, 4371 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4372 goto abort_with_fw; 4373 } 4374 sc->num_slices = cmd.data0; 4375 if (sc->num_slices > msix_cnt) 4376 sc->num_slices = msix_cnt; 4377 4378 if (mxge_max_slices == -1) { 4379 /* cap to number of CPUs in system */ 4380 if (sc->num_slices > mp_ncpus) 4381 sc->num_slices = mp_ncpus; 4382 } else { 4383 if (sc->num_slices > mxge_max_slices) 4384 sc->num_slices = mxge_max_slices; 4385 } 4386 /* make sure it is a power of two */ 4387 while (sc->num_slices & (sc->num_slices - 1)) 4388 sc->num_slices--; 4389 4390 if (mxge_verbose) 4391 device_printf(sc->dev, "using %d slices\n", 4392 sc->num_slices); 4393 4394 return; 4395 4396 abort_with_fw: 4397 sc->fw_name = old_fw; 4398 (void) mxge_load_firmware(sc, 0); 4399 } 4400 4401 static int 4402 mxge_add_msix_irqs(mxge_softc_t *sc) 4403 { 4404 size_t bytes; 4405 int count, err, i, rid; 4406 4407 rid = PCIR_BAR(2); 4408 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4409 &rid, RF_ACTIVE); 4410 4411 if (sc->msix_table_res == NULL) { 4412 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4413 return ENXIO; 4414 } 4415 4416 count = sc->num_slices; 4417 err = pci_alloc_msix(sc->dev, &count); 4418 if (err != 0) { 4419 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4420 "err = %d \n", sc->num_slices, err); 4421 goto abort_with_msix_table; 4422 } 4423 if (count < sc->num_slices) { 4424 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4425 count, sc->num_slices); 4426 device_printf(sc->dev, 4427 "Try setting hw.mxge.max_slices to %d\n", 4428 count); 4429 err = ENOSPC; 4430 goto abort_with_msix; 4431 } 4432 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4433 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4434 if (sc->msix_irq_res == NULL) { 4435 err = ENOMEM; 4436 goto abort_with_msix; 4437 } 4438 4439 for (i = 0; i < sc->num_slices; i++) { 4440 rid = i + 1; 4441 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4442 SYS_RES_IRQ, 4443 &rid, RF_ACTIVE); 4444 if (sc->msix_irq_res[i] == NULL) { 4445 device_printf(sc->dev, "couldn't allocate IRQ res" 4446 " for message %d\n", i); 4447 err = ENXIO; 4448 goto abort_with_res; 4449 } 4450 } 4451 4452 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4453 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4454 4455 for (i = 0; i < sc->num_slices; i++) { 4456 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4457 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4458 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4459 if (err != 0) { 4460 device_printf(sc->dev, "couldn't setup intr for " 4461 "message %d\n", i); 4462 goto abort_with_intr; 4463 } 4464 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4465 sc->msix_ih[i], "s%d", i); 4466 } 4467 4468 if (mxge_verbose) { 4469 device_printf(sc->dev, "using %d msix IRQs:", 4470 sc->num_slices); 4471 for (i = 0; i < sc->num_slices; i++) 4472 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4473 printf("\n"); 4474 } 4475 return (0); 4476 4477 abort_with_intr: 4478 for (i = 0; i < sc->num_slices; i++) { 4479 if (sc->msix_ih[i] != NULL) { 4480 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4481 sc->msix_ih[i]); 4482 sc->msix_ih[i] = NULL; 4483 } 4484 } 4485 free(sc->msix_ih, M_DEVBUF); 4486 4487 abort_with_res: 4488 for (i = 0; i < sc->num_slices; i++) { 4489 rid = i + 1; 4490 if (sc->msix_irq_res[i] != NULL) 4491 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4492 sc->msix_irq_res[i]); 4493 sc->msix_irq_res[i] = NULL; 4494 } 4495 free(sc->msix_irq_res, M_DEVBUF); 4496 4497 abort_with_msix: 4498 pci_release_msi(sc->dev); 4499 4500 abort_with_msix_table: 4501 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4502 sc->msix_table_res); 4503 4504 return err; 4505 } 4506 4507 static int 4508 mxge_add_single_irq(mxge_softc_t *sc) 4509 { 4510 int count, err, rid; 4511 4512 count = pci_msi_count(sc->dev); 4513 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4514 rid = 1; 4515 } else { 4516 rid = 0; 4517 sc->legacy_irq = 1; 4518 } 4519 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4520 RF_SHAREABLE | RF_ACTIVE); 4521 if (sc->irq_res == NULL) { 4522 device_printf(sc->dev, "could not alloc interrupt\n"); 4523 return ENXIO; 4524 } 4525 if (mxge_verbose) 4526 device_printf(sc->dev, "using %s irq %jd\n", 4527 sc->legacy_irq ? "INTx" : "MSI", 4528 rman_get_start(sc->irq_res)); 4529 err = bus_setup_intr(sc->dev, sc->irq_res, 4530 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4531 mxge_intr, &sc->ss[0], &sc->ih); 4532 if (err != 0) { 4533 bus_release_resource(sc->dev, SYS_RES_IRQ, 4534 sc->legacy_irq ? 0 : 1, sc->irq_res); 4535 if (!sc->legacy_irq) 4536 pci_release_msi(sc->dev); 4537 } 4538 return err; 4539 } 4540 4541 static void 4542 mxge_rem_msix_irqs(mxge_softc_t *sc) 4543 { 4544 int i, rid; 4545 4546 for (i = 0; i < sc->num_slices; i++) { 4547 if (sc->msix_ih[i] != NULL) { 4548 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4549 sc->msix_ih[i]); 4550 sc->msix_ih[i] = NULL; 4551 } 4552 } 4553 free(sc->msix_ih, M_DEVBUF); 4554 4555 for (i = 0; i < sc->num_slices; i++) { 4556 rid = i + 1; 4557 if (sc->msix_irq_res[i] != NULL) 4558 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4559 sc->msix_irq_res[i]); 4560 sc->msix_irq_res[i] = NULL; 4561 } 4562 free(sc->msix_irq_res, M_DEVBUF); 4563 4564 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4565 sc->msix_table_res); 4566 4567 pci_release_msi(sc->dev); 4568 return; 4569 } 4570 4571 static void 4572 mxge_rem_single_irq(mxge_softc_t *sc) 4573 { 4574 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4575 bus_release_resource(sc->dev, SYS_RES_IRQ, 4576 sc->legacy_irq ? 0 : 1, sc->irq_res); 4577 if (!sc->legacy_irq) 4578 pci_release_msi(sc->dev); 4579 } 4580 4581 static void 4582 mxge_rem_irq(mxge_softc_t *sc) 4583 { 4584 if (sc->num_slices > 1) 4585 mxge_rem_msix_irqs(sc); 4586 else 4587 mxge_rem_single_irq(sc); 4588 } 4589 4590 static int 4591 mxge_add_irq(mxge_softc_t *sc) 4592 { 4593 int err; 4594 4595 if (sc->num_slices > 1) 4596 err = mxge_add_msix_irqs(sc); 4597 else 4598 err = mxge_add_single_irq(sc); 4599 4600 if (0 && err == 0 && sc->num_slices > 1) { 4601 mxge_rem_msix_irqs(sc); 4602 err = mxge_add_msix_irqs(sc); 4603 } 4604 return err; 4605 } 4606 4607 static int 4608 mxge_attach(device_t dev) 4609 { 4610 mxge_cmd_t cmd; 4611 mxge_softc_t *sc = device_get_softc(dev); 4612 if_t ifp; 4613 int err, rid; 4614 4615 sc->dev = dev; 4616 mxge_fetch_tunables(sc); 4617 4618 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4619 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4620 taskqueue_thread_enqueue, &sc->tq); 4621 if (sc->tq == NULL) { 4622 err = ENOMEM; 4623 goto abort_with_nothing; 4624 } 4625 4626 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4627 1, /* alignment */ 4628 0, /* boundary */ 4629 BUS_SPACE_MAXADDR, /* low */ 4630 BUS_SPACE_MAXADDR, /* high */ 4631 NULL, NULL, /* filter */ 4632 65536 + 256, /* maxsize */ 4633 MXGE_MAX_SEND_DESC, /* num segs */ 4634 65536, /* maxsegsize */ 4635 0, /* flags */ 4636 NULL, NULL, /* lock */ 4637 &sc->parent_dmat); /* tag */ 4638 4639 if (err != 0) { 4640 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4641 err); 4642 goto abort_with_tq; 4643 } 4644 4645 ifp = sc->ifp = if_alloc(IFT_ETHER); 4646 if (ifp == NULL) { 4647 device_printf(dev, "can not if_alloc()\n"); 4648 err = ENOSPC; 4649 goto abort_with_parent_dmat; 4650 } 4651 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4652 4653 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4654 device_get_nameunit(dev)); 4655 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4656 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4657 "%s:drv", device_get_nameunit(dev)); 4658 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4659 MTX_NETWORK_LOCK, MTX_DEF); 4660 4661 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4662 4663 mxge_setup_cfg_space(sc); 4664 4665 /* Map the board into the kernel */ 4666 rid = PCIR_BARS; 4667 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4668 RF_ACTIVE); 4669 if (sc->mem_res == NULL) { 4670 device_printf(dev, "could not map memory\n"); 4671 err = ENXIO; 4672 goto abort_with_lock; 4673 } 4674 sc->sram = rman_get_virtual(sc->mem_res); 4675 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4676 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4677 device_printf(dev, "impossible memory region size %jd\n", 4678 rman_get_size(sc->mem_res)); 4679 err = ENXIO; 4680 goto abort_with_mem_res; 4681 } 4682 4683 /* make NULL terminated copy of the EEPROM strings section of 4684 lanai SRAM */ 4685 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4686 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4687 rman_get_bushandle(sc->mem_res), 4688 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4689 sc->eeprom_strings, 4690 MXGE_EEPROM_STRINGS_SIZE - 2); 4691 err = mxge_parse_strings(sc); 4692 if (err != 0) 4693 goto abort_with_mem_res; 4694 4695 /* Enable write combining for efficient use of PCIe bus */ 4696 mxge_enable_wc(sc); 4697 4698 /* Allocate the out of band dma memory */ 4699 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4700 sizeof (mxge_cmd_t), 64); 4701 if (err != 0) 4702 goto abort_with_mem_res; 4703 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4704 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4705 if (err != 0) 4706 goto abort_with_cmd_dma; 4707 4708 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4709 if (err != 0) 4710 goto abort_with_zeropad_dma; 4711 4712 /* select & load the firmware */ 4713 err = mxge_select_firmware(sc); 4714 if (err != 0) 4715 goto abort_with_dmabench; 4716 sc->intr_coal_delay = mxge_intr_coal_delay; 4717 4718 mxge_slice_probe(sc); 4719 err = mxge_alloc_slices(sc); 4720 if (err != 0) 4721 goto abort_with_dmabench; 4722 4723 err = mxge_reset(sc, 0); 4724 if (err != 0) 4725 goto abort_with_slices; 4726 4727 err = mxge_alloc_rings(sc); 4728 if (err != 0) { 4729 device_printf(sc->dev, "failed to allocate rings\n"); 4730 goto abort_with_slices; 4731 } 4732 4733 err = mxge_add_irq(sc); 4734 if (err != 0) { 4735 device_printf(sc->dev, "failed to add irq\n"); 4736 goto abort_with_rings; 4737 } 4738 4739 if_setbaudrate(ifp, IF_Gbps(10)); 4740 if_setcapabilities(ifp, IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4741 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4742 IFCAP_RXCSUM_IPV6); 4743 #if defined(INET) || defined(INET6) 4744 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0); 4745 #endif 4746 4747 #ifdef MXGE_NEW_VLAN_API 4748 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0); 4749 4750 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4751 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4752 sc->fw_ver_tiny >= 32) 4753 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0); 4754 #endif 4755 sc->max_mtu = mxge_max_mtu(sc); 4756 if (sc->max_mtu >= 9000) 4757 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); 4758 else 4759 device_printf(dev, "MTU limited to %d. Install " 4760 "latest firmware for 9000 byte jumbo support\n", 4761 sc->max_mtu - ETHER_HDR_LEN); 4762 if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_TSO); 4763 if_sethwassistbits(ifp, CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4764 /* check to see if f/w supports TSO for IPv6 */ 4765 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4766 if (CSUM_TCP_IPV6) 4767 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0); 4768 sc->max_tso6_hlen = min(cmd.data0, 4769 sizeof (sc->ss[0].scratch)); 4770 } 4771 if_setcapenable(ifp, if_getcapabilities(ifp)); 4772 if (sc->lro_cnt == 0) 4773 if_setcapenablebit(ifp, 0, IFCAP_LRO); 4774 if_setinitfn(ifp, mxge_init); 4775 if_setsoftc(ifp, sc); 4776 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 4777 if_setioctlfn(ifp, mxge_ioctl); 4778 if_setstartfn(ifp, mxge_start); 4779 if_setgetcounterfn(ifp, mxge_get_counter); 4780 if_sethwtsomax(ifp, IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 4781 if_sethwtsomaxsegcount(ifp, sc->ss[0].tx.max_desc); 4782 if_sethwtsomaxsegsize(ifp, IP_MAXPACKET); 4783 /* Initialise the ifmedia structure */ 4784 ifmedia_init(&sc->media, 0, mxge_media_change, 4785 mxge_media_status); 4786 mxge_media_init(sc); 4787 mxge_media_probe(sc); 4788 sc->dying = 0; 4789 ether_ifattach(ifp, sc->mac_addr); 4790 /* ether_ifattach sets mtu to ETHERMTU */ 4791 if (mxge_initial_mtu != ETHERMTU) 4792 mxge_change_mtu(sc, mxge_initial_mtu); 4793 4794 mxge_add_sysctls(sc); 4795 if_settransmitfn(ifp, mxge_transmit); 4796 if_setqflushfn(ifp, mxge_qflush); 4797 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4798 device_get_nameunit(sc->dev)); 4799 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4800 return 0; 4801 4802 abort_with_rings: 4803 mxge_free_rings(sc); 4804 abort_with_slices: 4805 mxge_free_slices(sc); 4806 abort_with_dmabench: 4807 mxge_dma_free(&sc->dmabench_dma); 4808 abort_with_zeropad_dma: 4809 mxge_dma_free(&sc->zeropad_dma); 4810 abort_with_cmd_dma: 4811 mxge_dma_free(&sc->cmd_dma); 4812 abort_with_mem_res: 4813 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4814 abort_with_lock: 4815 pci_disable_busmaster(dev); 4816 mtx_destroy(&sc->cmd_mtx); 4817 mtx_destroy(&sc->driver_mtx); 4818 if_free(ifp); 4819 abort_with_parent_dmat: 4820 bus_dma_tag_destroy(sc->parent_dmat); 4821 abort_with_tq: 4822 if (sc->tq != NULL) { 4823 taskqueue_drain(sc->tq, &sc->watchdog_task); 4824 taskqueue_free(sc->tq); 4825 sc->tq = NULL; 4826 } 4827 abort_with_nothing: 4828 return err; 4829 } 4830 4831 static int 4832 mxge_detach(device_t dev) 4833 { 4834 mxge_softc_t *sc = device_get_softc(dev); 4835 4836 if (mxge_vlans_active(sc)) { 4837 device_printf(sc->dev, 4838 "Detach vlans before removing module\n"); 4839 return EBUSY; 4840 } 4841 mtx_lock(&sc->driver_mtx); 4842 sc->dying = 1; 4843 if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) 4844 mxge_close(sc, 0); 4845 mtx_unlock(&sc->driver_mtx); 4846 ether_ifdetach(sc->ifp); 4847 if (sc->tq != NULL) { 4848 taskqueue_drain(sc->tq, &sc->watchdog_task); 4849 taskqueue_free(sc->tq); 4850 sc->tq = NULL; 4851 } 4852 callout_drain(&sc->co_hdl); 4853 ifmedia_removeall(&sc->media); 4854 mxge_dummy_rdma(sc, 0); 4855 mxge_rem_sysctls(sc); 4856 mxge_rem_irq(sc); 4857 mxge_free_rings(sc); 4858 mxge_free_slices(sc); 4859 mxge_dma_free(&sc->dmabench_dma); 4860 mxge_dma_free(&sc->zeropad_dma); 4861 mxge_dma_free(&sc->cmd_dma); 4862 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4863 pci_disable_busmaster(dev); 4864 mtx_destroy(&sc->cmd_mtx); 4865 mtx_destroy(&sc->driver_mtx); 4866 if_free(sc->ifp); 4867 bus_dma_tag_destroy(sc->parent_dmat); 4868 return 0; 4869 } 4870 4871 static int 4872 mxge_shutdown(device_t dev) 4873 { 4874 return 0; 4875 } 4876 4877 /* 4878 This file uses Myri10GE driver indentation. 4879 4880 Local Variables: 4881 c-file-style:"linux" 4882 tab-width:8 4883 End: 4884 */ 4885