1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/linker.h> 34 #include <sys/firmware.h> 35 #include <sys/endian.h> 36 #include <sys/sockio.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/kdb.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/module.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 #include <sys/sx.h> 46 #include <sys/taskqueue.h> 47 #include <contrib/zlib/zlib.h> 48 #include <dev/zlib/zcalloc.h> 49 50 #include <net/if.h> 51 #include <net/if_var.h> 52 #include <net/if_arp.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 57 #include <net/bpf.h> 58 59 #include <net/if_types.h> 60 #include <net/if_vlan_var.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip6.h> 66 #include <netinet/tcp.h> 67 #include <netinet/tcp_lro.h> 68 #include <netinet6/ip6_var.h> 69 70 #include <machine/bus.h> 71 #include <machine/in_cksum.h> 72 #include <machine/resource.h> 73 #include <sys/bus.h> 74 #include <sys/rman.h> 75 #include <sys/smp.h> 76 77 #include <dev/pci/pcireg.h> 78 #include <dev/pci/pcivar.h> 79 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 80 81 #include <vm/vm.h> /* for pmap_mapdev() */ 82 #include <vm/pmap.h> 83 84 #if defined(__i386) || defined(__amd64) 85 #include <machine/specialreg.h> 86 #endif 87 88 #include <dev/mxge/mxge_mcp.h> 89 #include <dev/mxge/mcp_gen_header.h> 90 /*#define MXGE_FAKE_IFP*/ 91 #include <dev/mxge/if_mxge_var.h> 92 #include <sys/buf_ring.h> 93 94 #include "opt_inet.h" 95 #include "opt_inet6.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_ticks; 105 static int mxge_max_slices = 1; 106 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 107 static int mxge_always_promisc = 0; 108 static int mxge_initial_mtu = ETHERMTU_JUMBO; 109 static int mxge_throttle = 0; 110 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 111 static char *mxge_fw_aligned = "mxge_eth_z8e"; 112 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 113 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 114 115 static int mxge_probe(device_t dev); 116 static int mxge_attach(device_t dev); 117 static int mxge_detach(device_t dev); 118 static int mxge_shutdown(device_t dev); 119 static void mxge_intr(void *arg); 120 121 static device_method_t mxge_methods[] = 122 { 123 /* Device interface */ 124 DEVMETHOD(device_probe, mxge_probe), 125 DEVMETHOD(device_attach, mxge_attach), 126 DEVMETHOD(device_detach, mxge_detach), 127 DEVMETHOD(device_shutdown, mxge_shutdown), 128 129 DEVMETHOD_END 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 /* Declare ourselves to be a child of the PCI bus.*/ 140 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0); 141 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 142 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 143 144 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 145 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 146 static int mxge_close(mxge_softc_t *sc, int down); 147 static int mxge_open(mxge_softc_t *sc); 148 static void mxge_tick(void *arg); 149 150 static int 151 mxge_probe(device_t dev) 152 { 153 int rev; 154 155 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 156 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 157 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 158 rev = pci_get_revid(dev); 159 switch (rev) { 160 case MXGE_PCI_REV_Z8E: 161 device_set_desc(dev, "Myri10G-PCIE-8A"); 162 break; 163 case MXGE_PCI_REV_Z8ES: 164 device_set_desc(dev, "Myri10G-PCIE-8B"); 165 break; 166 default: 167 device_set_desc(dev, "Myri10G-PCIE-8??"); 168 device_printf(dev, "Unrecognized rev %d NIC\n", 169 rev); 170 break; 171 } 172 return 0; 173 } 174 return ENXIO; 175 } 176 177 static void 178 mxge_enable_wc(mxge_softc_t *sc) 179 { 180 #if defined(__i386) || defined(__amd64) 181 vm_offset_t len; 182 int err; 183 184 sc->wc = 1; 185 len = rman_get_size(sc->mem_res); 186 err = pmap_change_attr((vm_offset_t) sc->sram, 187 len, PAT_WRITE_COMBINING); 188 if (err != 0) { 189 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 190 err); 191 sc->wc = 0; 192 } 193 #endif 194 } 195 196 /* callback to get our DMA address */ 197 static void 198 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 199 int error) 200 { 201 if (error == 0) { 202 *(bus_addr_t *) arg = segs->ds_addr; 203 } 204 } 205 206 static int 207 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 208 bus_size_t alignment) 209 { 210 int err; 211 device_t dev = sc->dev; 212 bus_size_t boundary, maxsegsize; 213 214 if (bytes > 4096 && alignment == 4096) { 215 boundary = 0; 216 maxsegsize = bytes; 217 } else { 218 boundary = 4096; 219 maxsegsize = 4096; 220 } 221 222 /* allocate DMAable memory tags */ 223 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 224 alignment, /* alignment */ 225 boundary, /* boundary */ 226 BUS_SPACE_MAXADDR, /* low */ 227 BUS_SPACE_MAXADDR, /* high */ 228 NULL, NULL, /* filter */ 229 bytes, /* maxsize */ 230 1, /* num segs */ 231 maxsegsize, /* maxsegsize */ 232 BUS_DMA_COHERENT, /* flags */ 233 NULL, NULL, /* lock */ 234 &dma->dmat); /* tag */ 235 if (err != 0) { 236 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 237 return err; 238 } 239 240 /* allocate DMAable memory & map */ 241 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 242 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 243 | BUS_DMA_ZERO), &dma->map); 244 if (err != 0) { 245 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 246 goto abort_with_dmat; 247 } 248 249 /* load the memory */ 250 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 251 mxge_dmamap_callback, 252 (void *)&dma->bus_addr, 0); 253 if (err != 0) { 254 device_printf(dev, "couldn't load map (err = %d)\n", err); 255 goto abort_with_mem; 256 } 257 return 0; 258 259 abort_with_mem: 260 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 261 abort_with_dmat: 262 (void)bus_dma_tag_destroy(dma->dmat); 263 return err; 264 } 265 266 static void 267 mxge_dma_free(mxge_dma_t *dma) 268 { 269 bus_dmamap_unload(dma->dmat, dma->map); 270 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 271 (void)bus_dma_tag_destroy(dma->dmat); 272 } 273 274 /* 275 * The eeprom strings on the lanaiX have the format 276 * SN=x\0 277 * MAC=x:x:x:x:x:x\0 278 * PC=text\0 279 */ 280 281 static int 282 mxge_parse_strings(mxge_softc_t *sc) 283 { 284 char *ptr; 285 int i, found_mac, found_sn2; 286 char *endptr; 287 288 ptr = sc->eeprom_strings; 289 found_mac = 0; 290 found_sn2 = 0; 291 while (*ptr != '\0') { 292 if (strncmp(ptr, "MAC=", 4) == 0) { 293 ptr += 4; 294 for (i = 0;;) { 295 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 296 if (endptr - ptr != 2) 297 goto abort; 298 ptr = endptr; 299 if (++i == 6) 300 break; 301 if (*ptr++ != ':') 302 goto abort; 303 } 304 found_mac = 1; 305 } else if (strncmp(ptr, "PC=", 3) == 0) { 306 ptr += 3; 307 strlcpy(sc->product_code_string, ptr, 308 sizeof(sc->product_code_string)); 309 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 310 ptr += 3; 311 strlcpy(sc->serial_number_string, ptr, 312 sizeof(sc->serial_number_string)); 313 } else if (strncmp(ptr, "SN2=", 4) == 0) { 314 /* SN2 takes precedence over SN */ 315 ptr += 4; 316 found_sn2 = 1; 317 strlcpy(sc->serial_number_string, ptr, 318 sizeof(sc->serial_number_string)); 319 } 320 while (*ptr++ != '\0') {} 321 } 322 323 if (found_mac) 324 return 0; 325 326 abort: 327 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 328 329 return ENXIO; 330 } 331 332 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 333 static void 334 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 335 { 336 uint32_t val; 337 unsigned long base, off; 338 char *va, *cfgptr; 339 device_t pdev, mcp55; 340 uint16_t vendor_id, device_id, word; 341 uintptr_t bus, slot, func, ivend, idev; 342 uint32_t *ptr32; 343 344 if (!mxge_nvidia_ecrc_enable) 345 return; 346 347 pdev = device_get_parent(device_get_parent(sc->dev)); 348 if (pdev == NULL) { 349 device_printf(sc->dev, "could not find parent?\n"); 350 return; 351 } 352 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 353 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 354 355 if (vendor_id != 0x10de) 356 return; 357 358 base = 0; 359 360 if (device_id == 0x005d) { 361 /* ck804, base address is magic */ 362 base = 0xe0000000UL; 363 } else if (device_id >= 0x0374 && device_id <= 0x378) { 364 /* mcp55, base address stored in chipset */ 365 mcp55 = pci_find_bsf(0, 0, 0); 366 if (mcp55 && 367 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 368 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 369 word = pci_read_config(mcp55, 0x90, 2); 370 base = ((unsigned long)word & 0x7ffeU) << 25; 371 } 372 } 373 if (!base) 374 return; 375 376 /* XXXX 377 Test below is commented because it is believed that doing 378 config read/write beyond 0xff will access the config space 379 for the next larger function. Uncomment this and remove 380 the hacky pmap_mapdev() way of accessing config space when 381 FreeBSD grows support for extended pcie config space access 382 */ 383 #if 0 384 /* See if we can, by some miracle, access the extended 385 config space */ 386 val = pci_read_config(pdev, 0x178, 4); 387 if (val != 0xffffffff) { 388 val |= 0x40; 389 pci_write_config(pdev, 0x178, val, 4); 390 return; 391 } 392 #endif 393 /* Rather than using normal pci config space writes, we must 394 * map the Nvidia config space ourselves. This is because on 395 * opteron/nvidia class machine the 0xe000000 mapping is 396 * handled by the nvidia chipset, that means the internal PCI 397 * device (the on-chip northbridge), or the amd-8131 bridge 398 * and things behind them are not visible by this method. 399 */ 400 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_BUS, &bus); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_SLOT, &slot); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_FUNCTION, &func); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_VENDOR, &ivend); 409 BUS_READ_IVAR(device_get_parent(pdev), pdev, 410 PCI_IVAR_DEVICE, &idev); 411 412 off = base 413 + 0x00100000UL * (unsigned long)bus 414 + 0x00001000UL * (unsigned long)(func 415 + 8 * slot); 416 417 /* map it into the kernel */ 418 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 419 420 if (va == NULL) { 421 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 422 return; 423 } 424 /* get a pointer to the config space mapped into the kernel */ 425 cfgptr = va + (off & PAGE_MASK); 426 427 /* make sure that we can really access it */ 428 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 429 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 430 if (! (vendor_id == ivend && device_id == idev)) { 431 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 432 vendor_id, device_id); 433 pmap_unmapdev(va, PAGE_SIZE); 434 return; 435 } 436 437 ptr32 = (uint32_t*)(cfgptr + 0x178); 438 val = *ptr32; 439 440 if (val == 0xffffffff) { 441 device_printf(sc->dev, "extended mapping failed\n"); 442 pmap_unmapdev(va, PAGE_SIZE); 443 return; 444 } 445 *ptr32 = val | 0x40; 446 pmap_unmapdev(va, PAGE_SIZE); 447 if (mxge_verbose) 448 device_printf(sc->dev, 449 "Enabled ECRC on upstream Nvidia bridge " 450 "at %d:%d:%d\n", 451 (int)bus, (int)slot, (int)func); 452 return; 453 } 454 #else 455 static void 456 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 457 { 458 device_printf(sc->dev, 459 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 460 return; 461 } 462 #endif 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 /* Run a small DMA test. 474 * The magic multipliers to the length tell the firmware 475 * to do DMA read, write, or read+write tests. The 476 * results are returned in cmd.data0. The upper 16 477 * bits of the return is the number of transfers completed. 478 * The lower 16 bits is the time in 0.5us ticks that the 479 * transfers took to complete. 480 */ 481 482 len = sc->tx_boundary; 483 484 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 485 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 486 cmd.data2 = len * 0x10000; 487 status = mxge_send_cmd(sc, test_type, &cmd); 488 if (status != 0) { 489 test = "read"; 490 goto abort; 491 } 492 sc->read_dma = ((cmd.data0>>16) * len * 2) / 493 (cmd.data0 & 0xffff); 494 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 495 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 496 cmd.data2 = len * 0x1; 497 status = mxge_send_cmd(sc, test_type, &cmd); 498 if (status != 0) { 499 test = "write"; 500 goto abort; 501 } 502 sc->write_dma = ((cmd.data0>>16) * len * 2) / 503 (cmd.data0 & 0xffff); 504 505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 507 cmd.data2 = len * 0x10001; 508 status = mxge_send_cmd(sc, test_type, &cmd); 509 if (status != 0) { 510 test = "read/write"; 511 goto abort; 512 } 513 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 514 (cmd.data0 & 0xffff); 515 516 abort: 517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 518 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 519 test, status); 520 521 return status; 522 } 523 524 /* 525 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 526 * when the PCI-E Completion packets are aligned on an 8-byte 527 * boundary. Some PCI-E chip sets always align Completion packets; on 528 * the ones that do not, the alignment can be enforced by enabling 529 * ECRC generation (if supported). 530 * 531 * When PCI-E Completion packets are not aligned, it is actually more 532 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 533 * 534 * If the driver can neither enable ECRC nor verify that it has 535 * already been enabled, then it must use a firmware image which works 536 * around unaligned completion packets (ethp_z8e.dat), and it should 537 * also ensure that it never gives the device a Read-DMA which is 538 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 539 * enabled, then the driver should use the aligned (eth_z8e.dat) 540 * firmware image, and set tx_boundary to 4KB. 541 */ 542 543 static int 544 mxge_firmware_probe(mxge_softc_t *sc) 545 { 546 device_t dev = sc->dev; 547 int reg, status; 548 uint16_t pectl; 549 550 sc->tx_boundary = 4096; 551 /* 552 * Verify the max read request size was set to 4KB 553 * before trying the test with 4KB. 554 */ 555 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 556 pectl = pci_read_config(dev, reg + 0x8, 2); 557 if ((pectl & (5 << 12)) != (5 << 12)) { 558 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 559 pectl); 560 sc->tx_boundary = 2048; 561 } 562 } 563 564 /* 565 * load the optimized firmware (which assumes aligned PCIe 566 * completions) in order to see if it works on this host. 567 */ 568 sc->fw_name = mxge_fw_aligned; 569 status = mxge_load_firmware(sc, 1); 570 if (status != 0) { 571 return status; 572 } 573 574 /* 575 * Enable ECRC if possible 576 */ 577 mxge_enable_nvidia_ecrc(sc); 578 579 /* 580 * Run a DMA test which watches for unaligned completions and 581 * aborts on the first one seen. Not required on Z8ES or newer. 582 */ 583 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 584 return 0; 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 static int 643 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 644 { 645 646 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 647 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 648 be32toh(hdr->mcp_type)); 649 return EIO; 650 } 651 652 /* save firmware version for sysctl */ 653 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 654 if (mxge_verbose) 655 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 656 657 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 658 &sc->fw_ver_minor, &sc->fw_ver_tiny); 659 660 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 661 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 662 device_printf(sc->dev, "Found firmware version %s\n", 663 sc->fw_version); 664 device_printf(sc->dev, "Driver needs %d.%d\n", 665 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 666 return EINVAL; 667 } 668 return 0; 669 670 } 671 672 static int 673 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 674 { 675 z_stream zs; 676 char *inflate_buffer; 677 const struct firmware *fw; 678 const mcp_gen_header_t *hdr; 679 unsigned hdr_offset; 680 int status; 681 unsigned int i; 682 size_t fw_len; 683 684 fw = firmware_get(sc->fw_name); 685 if (fw == NULL) { 686 device_printf(sc->dev, "Could not find firmware image %s\n", 687 sc->fw_name); 688 return ENOENT; 689 } 690 691 /* setup zlib and decompress f/w */ 692 bzero(&zs, sizeof (zs)); 693 zs.zalloc = zcalloc_nowait; 694 zs.zfree = zcfree; 695 status = inflateInit(&zs); 696 if (status != Z_OK) { 697 status = EIO; 698 goto abort_with_fw; 699 } 700 701 /* the uncompressed size is stored as the firmware version, 702 which would otherwise go unused */ 703 fw_len = (size_t) fw->version; 704 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 705 if (inflate_buffer == NULL) 706 goto abort_with_zs; 707 zs.avail_in = fw->datasize; 708 zs.next_in = __DECONST(char *, fw->data); 709 zs.avail_out = fw_len; 710 zs.next_out = inflate_buffer; 711 status = inflate(&zs, Z_FINISH); 712 if (status != Z_STREAM_END) { 713 device_printf(sc->dev, "zlib %d\n", status); 714 status = EIO; 715 goto abort_with_buffer; 716 } 717 718 /* check id */ 719 hdr_offset = htobe32(*(const uint32_t *) 720 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 721 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 722 device_printf(sc->dev, "Bad firmware file"); 723 status = EIO; 724 goto abort_with_buffer; 725 } 726 hdr = (const void*)(inflate_buffer + hdr_offset); 727 728 status = mxge_validate_firmware(sc, hdr); 729 if (status != 0) 730 goto abort_with_buffer; 731 732 /* Copy the inflated firmware to NIC SRAM. */ 733 for (i = 0; i < fw_len; i += 256) { 734 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 735 inflate_buffer + i, 736 min(256U, (unsigned)(fw_len - i))); 737 wmb(); 738 (void)*sc->sram; 739 wmb(); 740 } 741 742 *limit = fw_len; 743 status = 0; 744 abort_with_buffer: 745 free(inflate_buffer, M_TEMP); 746 abort_with_zs: 747 inflateEnd(&zs); 748 abort_with_fw: 749 firmware_put(fw, FIRMWARE_UNLOAD); 750 return status; 751 } 752 753 /* 754 * Enable or disable periodic RDMAs from the host to make certain 755 * chipsets resend dropped PCIe messages 756 */ 757 758 static void 759 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 760 { 761 char buf_bytes[72]; 762 volatile uint32_t *confirm; 763 volatile char *submit; 764 uint32_t *buf, dma_low, dma_high; 765 int i; 766 767 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 768 769 /* clear confirmation addr */ 770 confirm = (volatile uint32_t *)sc->cmd; 771 *confirm = 0; 772 wmb(); 773 774 /* send an rdma command to the PCIe engine, and wait for the 775 response in the confirmation address. The firmware should 776 write a -1 there to indicate it is alive and well 777 */ 778 779 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 780 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 781 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 782 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 783 buf[2] = htobe32(0xffffffff); /* confirm data */ 784 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 785 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 786 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 787 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 788 buf[5] = htobe32(enable); /* enable? */ 789 790 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 791 792 mxge_pio_copy(submit, buf, 64); 793 wmb(); 794 DELAY(1000); 795 wmb(); 796 i = 0; 797 while (*confirm != 0xffffffff && i < 20) { 798 DELAY(1000); 799 i++; 800 } 801 if (*confirm != 0xffffffff) { 802 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 803 (enable ? "enable" : "disable"), confirm, 804 *confirm); 805 } 806 return; 807 } 808 809 static int 810 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 811 { 812 mcp_cmd_t *buf; 813 char buf_bytes[sizeof(*buf) + 8]; 814 volatile mcp_cmd_response_t *response = sc->cmd; 815 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 816 uint32_t dma_low, dma_high; 817 int err, sleep_total = 0; 818 819 /* ensure buf is aligned to 8 bytes */ 820 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 821 822 buf->data0 = htobe32(data->data0); 823 buf->data1 = htobe32(data->data1); 824 buf->data2 = htobe32(data->data2); 825 buf->cmd = htobe32(cmd); 826 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 827 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 828 829 buf->response_addr.low = htobe32(dma_low); 830 buf->response_addr.high = htobe32(dma_high); 831 mtx_lock(&sc->cmd_mtx); 832 response->result = 0xffffffff; 833 wmb(); 834 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 835 836 /* wait up to 20ms */ 837 err = EAGAIN; 838 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 839 bus_dmamap_sync(sc->cmd_dma.dmat, 840 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 841 wmb(); 842 switch (be32toh(response->result)) { 843 case 0: 844 data->data0 = be32toh(response->data); 845 err = 0; 846 break; 847 case 0xffffffff: 848 DELAY(1000); 849 break; 850 case MXGEFW_CMD_UNKNOWN: 851 err = ENOSYS; 852 break; 853 case MXGEFW_CMD_ERROR_UNALIGNED: 854 err = E2BIG; 855 break; 856 case MXGEFW_CMD_ERROR_BUSY: 857 err = EBUSY; 858 break; 859 case MXGEFW_CMD_ERROR_I2C_ABSENT: 860 err = ENXIO; 861 break; 862 default: 863 device_printf(sc->dev, 864 "mxge: command %d " 865 "failed, result = %d\n", 866 cmd, be32toh(response->result)); 867 err = ENXIO; 868 break; 869 } 870 if (err != EAGAIN) 871 break; 872 } 873 if (err == EAGAIN) 874 device_printf(sc->dev, "mxge: command %d timed out" 875 "result = %d\n", 876 cmd, be32toh(response->result)); 877 mtx_unlock(&sc->cmd_mtx); 878 return err; 879 } 880 881 static int 882 mxge_adopt_running_firmware(mxge_softc_t *sc) 883 { 884 struct mcp_gen_header *hdr; 885 const size_t bytes = sizeof (struct mcp_gen_header); 886 size_t hdr_offset; 887 int status; 888 889 /* find running firmware header */ 890 hdr_offset = htobe32(*(volatile uint32_t *) 891 (sc->sram + MCP_HEADER_PTR_OFFSET)); 892 893 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 894 device_printf(sc->dev, 895 "Running firmware has bad header offset (%d)\n", 896 (int)hdr_offset); 897 return EIO; 898 } 899 900 /* copy header of running firmware from SRAM to host memory to 901 * validate firmware */ 902 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 903 if (hdr == NULL) { 904 device_printf(sc->dev, "could not malloc firmware hdr\n"); 905 return ENOMEM; 906 } 907 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 908 rman_get_bushandle(sc->mem_res), 909 hdr_offset, (char *)hdr, bytes); 910 status = mxge_validate_firmware(sc, hdr); 911 free(hdr, M_DEVBUF); 912 913 /* 914 * check to see if adopted firmware has bug where adopting 915 * it will cause broadcasts to be filtered unless the NIC 916 * is kept in ALLMULTI mode 917 */ 918 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 919 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 920 sc->adopted_rx_filter_bug = 1; 921 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 922 "working around rx filter bug\n", 923 sc->fw_ver_major, sc->fw_ver_minor, 924 sc->fw_ver_tiny); 925 } 926 927 return status; 928 } 929 930 static int 931 mxge_load_firmware(mxge_softc_t *sc, int adopt) 932 { 933 volatile uint32_t *confirm; 934 volatile char *submit; 935 char buf_bytes[72]; 936 uint32_t *buf, size, dma_low, dma_high; 937 int status, i; 938 939 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 940 941 size = sc->sram_size; 942 status = mxge_load_firmware_helper(sc, &size); 943 if (status) { 944 if (!adopt) 945 return status; 946 /* Try to use the currently running firmware, if 947 it is new enough */ 948 status = mxge_adopt_running_firmware(sc); 949 if (status) { 950 device_printf(sc->dev, 951 "failed to adopt running firmware\n"); 952 return status; 953 } 954 device_printf(sc->dev, 955 "Successfully adopted running firmware\n"); 956 if (sc->tx_boundary == 4096) { 957 device_printf(sc->dev, 958 "Using firmware currently running on NIC" 959 ". For optimal\n"); 960 device_printf(sc->dev, 961 "performance consider loading optimized " 962 "firmware\n"); 963 } 964 sc->fw_name = mxge_fw_unaligned; 965 sc->tx_boundary = 2048; 966 return 0; 967 } 968 /* clear confirmation addr */ 969 confirm = (volatile uint32_t *)sc->cmd; 970 *confirm = 0; 971 wmb(); 972 /* send a reload command to the bootstrap MCP, and wait for the 973 response in the confirmation address. The firmware should 974 write a -1 there to indicate it is alive and well 975 */ 976 977 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 978 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 979 980 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 981 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 982 buf[2] = htobe32(0xffffffff); /* confirm data */ 983 984 /* FIX: All newest firmware should un-protect the bottom of 985 the sram before handoff. However, the very first interfaces 986 do not. Therefore the handoff copy must skip the first 8 bytes 987 */ 988 /* where the code starts*/ 989 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 990 buf[4] = htobe32(size - 8); /* length of code */ 991 buf[5] = htobe32(8); /* where to copy to */ 992 buf[6] = htobe32(0); /* where to jump to */ 993 994 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 995 mxge_pio_copy(submit, buf, 64); 996 wmb(); 997 DELAY(1000); 998 wmb(); 999 i = 0; 1000 while (*confirm != 0xffffffff && i < 20) { 1001 DELAY(1000*10); 1002 i++; 1003 bus_dmamap_sync(sc->cmd_dma.dmat, 1004 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1005 } 1006 if (*confirm != 0xffffffff) { 1007 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1008 confirm, *confirm); 1009 1010 return ENXIO; 1011 } 1012 return 0; 1013 } 1014 1015 static int 1016 mxge_update_mac_address(mxge_softc_t *sc) 1017 { 1018 mxge_cmd_t cmd; 1019 uint8_t *addr = sc->mac_addr; 1020 int status; 1021 1022 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1023 | (addr[2] << 8) | addr[3]); 1024 1025 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1026 1027 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1028 return status; 1029 } 1030 1031 static int 1032 mxge_change_pause(mxge_softc_t *sc, int pause) 1033 { 1034 mxge_cmd_t cmd; 1035 int status; 1036 1037 if (pause) 1038 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1039 &cmd); 1040 else 1041 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1042 &cmd); 1043 1044 if (status) { 1045 device_printf(sc->dev, "Failed to set flow control mode\n"); 1046 return ENXIO; 1047 } 1048 sc->pause = pause; 1049 return 0; 1050 } 1051 1052 static void 1053 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1054 { 1055 mxge_cmd_t cmd; 1056 int status; 1057 1058 if (mxge_always_promisc) 1059 promisc = 1; 1060 1061 if (promisc) 1062 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1063 &cmd); 1064 else 1065 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1066 &cmd); 1067 1068 if (status) { 1069 device_printf(sc->dev, "Failed to set promisc mode\n"); 1070 } 1071 } 1072 1073 struct mxge_add_maddr_ctx { 1074 mxge_softc_t *sc; 1075 int error; 1076 }; 1077 1078 static u_int 1079 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1080 { 1081 struct mxge_add_maddr_ctx *ctx = arg; 1082 mxge_cmd_t cmd; 1083 1084 if (ctx->error != 0) 1085 return (0); 1086 bcopy(LLADDR(sdl), &cmd.data0, 4); 1087 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1088 cmd.data0 = htonl(cmd.data0); 1089 cmd.data1 = htonl(cmd.data1); 1090 1091 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1092 1093 return (1); 1094 } 1095 1096 static void 1097 mxge_set_multicast_list(mxge_softc_t *sc) 1098 { 1099 struct mxge_add_maddr_ctx ctx; 1100 if_t ifp = sc->ifp; 1101 mxge_cmd_t cmd; 1102 int err; 1103 1104 /* This firmware is known to not support multicast */ 1105 if (!sc->fw_multicast_support) 1106 return; 1107 1108 /* Disable multicast filtering while we play with the lists*/ 1109 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1110 if (err != 0) { 1111 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1112 " error status: %d\n", err); 1113 return; 1114 } 1115 1116 if (sc->adopted_rx_filter_bug) 1117 return; 1118 1119 if (if_getflags(ifp) & IFF_ALLMULTI) 1120 /* request to disable multicast filtering, so quit here */ 1121 return; 1122 1123 /* Flush all the filters */ 1124 1125 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1126 if (err != 0) { 1127 device_printf(sc->dev, 1128 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1129 ", error status: %d\n", err); 1130 return; 1131 } 1132 1133 /* Walk the multicast list, and add each address */ 1134 ctx.sc = sc; 1135 ctx.error = 0; 1136 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1137 if (ctx.error != 0) { 1138 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1139 "error status:" "%d\t", ctx.error); 1140 /* abort, leaving multicast filtering off */ 1141 return; 1142 } 1143 1144 /* Enable multicast filtering */ 1145 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1146 if (err != 0) { 1147 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1148 ", error status: %d\n", err); 1149 } 1150 } 1151 1152 static int 1153 mxge_max_mtu(mxge_softc_t *sc) 1154 { 1155 mxge_cmd_t cmd; 1156 int status; 1157 1158 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1159 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1160 1161 /* try to set nbufs to see if it we can 1162 use virtually contiguous jumbos */ 1163 cmd.data0 = 0; 1164 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1165 &cmd); 1166 if (status == 0) 1167 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1168 1169 /* otherwise, we're limited to MJUMPAGESIZE */ 1170 return MJUMPAGESIZE - MXGEFW_PAD; 1171 } 1172 1173 static int 1174 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1175 { 1176 struct mxge_slice_state *ss; 1177 mxge_rx_done_t *rx_done; 1178 volatile uint32_t *irq_claim; 1179 mxge_cmd_t cmd; 1180 int slice, status; 1181 1182 /* try to send a reset command to the card to see if it 1183 is alive */ 1184 memset(&cmd, 0, sizeof (cmd)); 1185 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1186 if (status != 0) { 1187 device_printf(sc->dev, "failed reset\n"); 1188 return ENXIO; 1189 } 1190 1191 mxge_dummy_rdma(sc, 1); 1192 1193 /* set the intrq size */ 1194 cmd.data0 = sc->rx_ring_size; 1195 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1196 1197 /* 1198 * Even though we already know how many slices are supported 1199 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1200 * has magic side effects, and must be called after a reset. 1201 * It must be called prior to calling any RSS related cmds, 1202 * including assigning an interrupt queue for anything but 1203 * slice 0. It must also be called *after* 1204 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1205 * the firmware to compute offsets. 1206 */ 1207 1208 if (sc->num_slices > 1) { 1209 /* ask the maximum number of slices it supports */ 1210 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1211 &cmd); 1212 if (status != 0) { 1213 device_printf(sc->dev, 1214 "failed to get number of slices\n"); 1215 return status; 1216 } 1217 /* 1218 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1219 * to setting up the interrupt queue DMA 1220 */ 1221 cmd.data0 = sc->num_slices; 1222 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1223 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1224 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1225 &cmd); 1226 if (status != 0) { 1227 device_printf(sc->dev, 1228 "failed to set number of slices\n"); 1229 return status; 1230 } 1231 } 1232 1233 if (interrupts_setup) { 1234 /* Now exchange information about interrupts */ 1235 for (slice = 0; slice < sc->num_slices; slice++) { 1236 rx_done = &sc->ss[slice].rx_done; 1237 memset(rx_done->entry, 0, sc->rx_ring_size); 1238 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1239 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1240 cmd.data2 = slice; 1241 status |= mxge_send_cmd(sc, 1242 MXGEFW_CMD_SET_INTRQ_DMA, 1243 &cmd); 1244 } 1245 } 1246 1247 status |= mxge_send_cmd(sc, 1248 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1249 1250 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1251 1252 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1253 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1254 1255 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1256 &cmd); 1257 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1258 if (status != 0) { 1259 device_printf(sc->dev, "failed set interrupt parameters\n"); 1260 return status; 1261 } 1262 1263 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1264 1265 /* run a DMA benchmark */ 1266 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1267 1268 for (slice = 0; slice < sc->num_slices; slice++) { 1269 ss = &sc->ss[slice]; 1270 1271 ss->irq_claim = irq_claim + (2 * slice); 1272 /* reset mcp/driver shared state back to 0 */ 1273 ss->rx_done.idx = 0; 1274 ss->rx_done.cnt = 0; 1275 ss->tx.req = 0; 1276 ss->tx.done = 0; 1277 ss->tx.pkt_done = 0; 1278 ss->tx.queue_active = 0; 1279 ss->tx.activate = 0; 1280 ss->tx.deactivate = 0; 1281 ss->tx.wake = 0; 1282 ss->tx.defrag = 0; 1283 ss->tx.stall = 0; 1284 ss->rx_big.cnt = 0; 1285 ss->rx_small.cnt = 0; 1286 ss->lc.lro_bad_csum = 0; 1287 ss->lc.lro_queued = 0; 1288 ss->lc.lro_flushed = 0; 1289 if (ss->fw_stats != NULL) { 1290 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1291 } 1292 } 1293 sc->rdma_tags_available = 15; 1294 status = mxge_update_mac_address(sc); 1295 mxge_change_promisc(sc, if_getflags(sc->ifp) & IFF_PROMISC); 1296 mxge_change_pause(sc, sc->pause); 1297 mxge_set_multicast_list(sc); 1298 if (sc->throttle) { 1299 cmd.data0 = sc->throttle; 1300 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1301 &cmd)) { 1302 device_printf(sc->dev, 1303 "can't enable throttle\n"); 1304 } 1305 } 1306 return status; 1307 } 1308 1309 static int 1310 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1311 { 1312 mxge_cmd_t cmd; 1313 mxge_softc_t *sc; 1314 int err; 1315 unsigned int throttle; 1316 1317 sc = arg1; 1318 throttle = sc->throttle; 1319 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1320 if (err != 0) { 1321 return err; 1322 } 1323 1324 if (throttle == sc->throttle) 1325 return 0; 1326 1327 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1328 return EINVAL; 1329 1330 mtx_lock(&sc->driver_mtx); 1331 cmd.data0 = throttle; 1332 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1333 if (err == 0) 1334 sc->throttle = throttle; 1335 mtx_unlock(&sc->driver_mtx); 1336 return err; 1337 } 1338 1339 static int 1340 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1341 { 1342 mxge_softc_t *sc; 1343 unsigned int intr_coal_delay; 1344 int err; 1345 1346 sc = arg1; 1347 intr_coal_delay = sc->intr_coal_delay; 1348 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1349 if (err != 0) { 1350 return err; 1351 } 1352 if (intr_coal_delay == sc->intr_coal_delay) 1353 return 0; 1354 1355 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1356 return EINVAL; 1357 1358 mtx_lock(&sc->driver_mtx); 1359 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1360 sc->intr_coal_delay = intr_coal_delay; 1361 1362 mtx_unlock(&sc->driver_mtx); 1363 return err; 1364 } 1365 1366 static int 1367 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1368 { 1369 mxge_softc_t *sc; 1370 unsigned int enabled; 1371 int err; 1372 1373 sc = arg1; 1374 enabled = sc->pause; 1375 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1376 if (err != 0) { 1377 return err; 1378 } 1379 if (enabled == sc->pause) 1380 return 0; 1381 1382 mtx_lock(&sc->driver_mtx); 1383 err = mxge_change_pause(sc, enabled); 1384 mtx_unlock(&sc->driver_mtx); 1385 return err; 1386 } 1387 1388 static int 1389 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1390 { 1391 int err; 1392 1393 if (arg1 == NULL) 1394 return EFAULT; 1395 arg2 = be32toh(*(int *)arg1); 1396 arg1 = NULL; 1397 err = sysctl_handle_int(oidp, arg1, arg2, req); 1398 1399 return err; 1400 } 1401 1402 static void 1403 mxge_rem_sysctls(mxge_softc_t *sc) 1404 { 1405 struct mxge_slice_state *ss; 1406 int slice; 1407 1408 if (sc->slice_sysctl_tree == NULL) 1409 return; 1410 1411 for (slice = 0; slice < sc->num_slices; slice++) { 1412 ss = &sc->ss[slice]; 1413 if (ss == NULL || ss->sysctl_tree == NULL) 1414 continue; 1415 sysctl_ctx_free(&ss->sysctl_ctx); 1416 ss->sysctl_tree = NULL; 1417 } 1418 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1419 sc->slice_sysctl_tree = NULL; 1420 } 1421 1422 static void 1423 mxge_add_sysctls(mxge_softc_t *sc) 1424 { 1425 struct sysctl_ctx_list *ctx; 1426 struct sysctl_oid_list *children; 1427 mcp_irq_data_t *fw; 1428 struct mxge_slice_state *ss; 1429 int slice; 1430 char slice_num[8]; 1431 1432 ctx = device_get_sysctl_ctx(sc->dev); 1433 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1434 fw = sc->ss[0].fw_stats; 1435 1436 /* random information */ 1437 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1438 "firmware_version", 1439 CTLFLAG_RD, sc->fw_version, 1440 0, "firmware version"); 1441 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1442 "serial_number", 1443 CTLFLAG_RD, sc->serial_number_string, 1444 0, "serial number"); 1445 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1446 "product_code", 1447 CTLFLAG_RD, sc->product_code_string, 1448 0, "product_code"); 1449 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1450 "pcie_link_width", 1451 CTLFLAG_RD, &sc->link_width, 1452 0, "tx_boundary"); 1453 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1454 "tx_boundary", 1455 CTLFLAG_RD, &sc->tx_boundary, 1456 0, "tx_boundary"); 1457 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1458 "write_combine", 1459 CTLFLAG_RD, &sc->wc, 1460 0, "write combining PIO?"); 1461 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1462 "read_dma_MBs", 1463 CTLFLAG_RD, &sc->read_dma, 1464 0, "DMA Read speed in MB/s"); 1465 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1466 "write_dma_MBs", 1467 CTLFLAG_RD, &sc->write_dma, 1468 0, "DMA Write speed in MB/s"); 1469 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1470 "read_write_dma_MBs", 1471 CTLFLAG_RD, &sc->read_write_dma, 1472 0, "DMA concurrent Read/Write speed in MB/s"); 1473 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1474 "watchdog_resets", 1475 CTLFLAG_RD, &sc->watchdog_resets, 1476 0, "Number of times NIC was reset"); 1477 1478 /* performance related tunables */ 1479 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1480 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1481 sc, 0, mxge_change_intr_coal, "I", 1482 "interrupt coalescing delay in usecs"); 1483 1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1485 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1486 mxge_change_throttle, "I", "transmit throttling"); 1487 1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1489 "flow_control_enabled", 1490 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1491 mxge_change_flow_control, "I", 1492 "interrupt coalescing delay in usecs"); 1493 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "deassert_wait", 1496 CTLFLAG_RW, &mxge_deassert_wait, 1497 0, "Wait for IRQ line to go low in ihandler"); 1498 1499 /* stats block from firmware is in network byte order. 1500 Need to swap it */ 1501 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1502 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1503 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1505 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1506 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1507 "rdma_tags_available"); 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1509 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1510 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1511 "dropped_bad_crc32"); 1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1513 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1514 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "dropped_link_error_or_filtered", 1517 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1518 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1519 "dropped_link_error_or_filtered"); 1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1521 "dropped_link_overflow", 1522 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1523 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1524 "dropped_link_overflow"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "dropped_multicast_filtered", 1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1528 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1529 "dropped_multicast_filtered"); 1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1531 "dropped_no_big_buffer", 1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1533 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1534 "dropped_no_big_buffer"); 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1536 "dropped_no_small_buffer", 1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1538 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1539 "dropped_no_small_buffer"); 1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1541 "dropped_overrun", 1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1543 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1544 "dropped_overrun"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1547 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1550 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1551 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "dropped_unicast_filtered", 1554 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1555 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1556 "dropped_unicast_filtered"); 1557 1558 /* verbose printing? */ 1559 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1560 "verbose", 1561 CTLFLAG_RW, &mxge_verbose, 1562 0, "verbose printing"); 1563 1564 /* add counters exported for debugging from all slices */ 1565 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1566 sc->slice_sysctl_tree = 1567 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1568 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1569 1570 for (slice = 0; slice < sc->num_slices; slice++) { 1571 ss = &sc->ss[slice]; 1572 sysctl_ctx_init(&ss->sysctl_ctx); 1573 ctx = &ss->sysctl_ctx; 1574 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1575 sprintf(slice_num, "%d", slice); 1576 ss->sysctl_tree = 1577 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1578 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1579 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1580 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1581 "rx_small_cnt", 1582 CTLFLAG_RD, &ss->rx_small.cnt, 1583 0, "rx_small_cnt"); 1584 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1585 "rx_big_cnt", 1586 CTLFLAG_RD, &ss->rx_big.cnt, 1587 0, "rx_small_cnt"); 1588 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1589 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1590 0, "number of lro merge queues flushed"); 1591 1592 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1593 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1594 0, "number of bad csums preventing LRO"); 1595 1596 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1597 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1598 0, "number of frames appended to lro merge" 1599 "queues"); 1600 1601 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1602 "tx_req", 1603 CTLFLAG_RD, &ss->tx.req, 1604 0, "tx_req"); 1605 1606 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1607 "tx_done", 1608 CTLFLAG_RD, &ss->tx.done, 1609 0, "tx_done"); 1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1611 "tx_pkt_done", 1612 CTLFLAG_RD, &ss->tx.pkt_done, 1613 0, "tx_done"); 1614 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1615 "tx_stall", 1616 CTLFLAG_RD, &ss->tx.stall, 1617 0, "tx_stall"); 1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1619 "tx_wake", 1620 CTLFLAG_RD, &ss->tx.wake, 1621 0, "tx_wake"); 1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1623 "tx_defrag", 1624 CTLFLAG_RD, &ss->tx.defrag, 1625 0, "tx_defrag"); 1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1627 "tx_queue_active", 1628 CTLFLAG_RD, &ss->tx.queue_active, 1629 0, "tx_queue_active"); 1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1631 "tx_activate", 1632 CTLFLAG_RD, &ss->tx.activate, 1633 0, "tx_activate"); 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "tx_deactivate", 1636 CTLFLAG_RD, &ss->tx.deactivate, 1637 0, "tx_deactivate"); 1638 } 1639 } 1640 1641 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1642 backwards one at a time and handle ring wraps */ 1643 1644 static inline void 1645 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1646 mcp_kreq_ether_send_t *src, int cnt) 1647 { 1648 int idx, starting_slot; 1649 starting_slot = tx->req; 1650 while (cnt > 1) { 1651 cnt--; 1652 idx = (starting_slot + cnt) & tx->mask; 1653 mxge_pio_copy(&tx->lanai[idx], 1654 &src[cnt], sizeof(*src)); 1655 wmb(); 1656 } 1657 } 1658 1659 /* 1660 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1661 * at most 32 bytes at a time, so as to avoid involving the software 1662 * pio handler in the nic. We re-write the first segment's flags 1663 * to mark them valid only after writing the entire chain 1664 */ 1665 1666 static inline void 1667 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1668 int cnt) 1669 { 1670 int idx, i; 1671 uint32_t *src_ints; 1672 volatile uint32_t *dst_ints; 1673 mcp_kreq_ether_send_t *srcp; 1674 volatile mcp_kreq_ether_send_t *dstp, *dst; 1675 uint8_t last_flags; 1676 1677 idx = tx->req & tx->mask; 1678 1679 last_flags = src->flags; 1680 src->flags = 0; 1681 wmb(); 1682 dst = dstp = &tx->lanai[idx]; 1683 srcp = src; 1684 1685 if ((idx + cnt) < tx->mask) { 1686 for (i = 0; i < (cnt - 1); i += 2) { 1687 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1688 wmb(); /* force write every 32 bytes */ 1689 srcp += 2; 1690 dstp += 2; 1691 } 1692 } else { 1693 /* submit all but the first request, and ensure 1694 that it is submitted below */ 1695 mxge_submit_req_backwards(tx, src, cnt); 1696 i = 0; 1697 } 1698 if (i < cnt) { 1699 /* submit the first request */ 1700 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1701 wmb(); /* barrier before setting valid flag */ 1702 } 1703 1704 /* re-write the last 32-bits with the valid flags */ 1705 src->flags = last_flags; 1706 src_ints = (uint32_t *)src; 1707 src_ints+=3; 1708 dst_ints = (volatile uint32_t *)dst; 1709 dst_ints+=3; 1710 *dst_ints = *src_ints; 1711 tx->req += cnt; 1712 wmb(); 1713 } 1714 1715 static int 1716 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1717 struct mxge_pkt_info *pi) 1718 { 1719 struct ether_vlan_header *eh; 1720 uint16_t etype; 1721 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1722 #if IFCAP_TSO6 && defined(INET6) 1723 int nxt; 1724 #endif 1725 1726 eh = mtod(m, struct ether_vlan_header *); 1727 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1728 etype = ntohs(eh->evl_proto); 1729 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1730 } else { 1731 etype = ntohs(eh->evl_encap_proto); 1732 pi->ip_off = ETHER_HDR_LEN; 1733 } 1734 1735 switch (etype) { 1736 case ETHERTYPE_IP: 1737 /* 1738 * ensure ip header is in first mbuf, copy it to a 1739 * scratch buffer if not 1740 */ 1741 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1742 pi->ip6 = NULL; 1743 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1744 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1745 ss->scratch); 1746 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1747 } 1748 pi->ip_hlen = pi->ip->ip_hl << 2; 1749 if (!tso) 1750 return 0; 1751 1752 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1753 sizeof(struct tcphdr))) { 1754 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1755 sizeof(struct tcphdr), ss->scratch); 1756 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1757 } 1758 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1759 break; 1760 #if IFCAP_TSO6 && defined(INET6) 1761 case ETHERTYPE_IPV6: 1762 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1763 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1764 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1765 ss->scratch); 1766 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1767 } 1768 nxt = 0; 1769 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1770 pi->ip_hlen -= pi->ip_off; 1771 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1772 return EINVAL; 1773 1774 if (!tso) 1775 return 0; 1776 1777 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1778 return EINVAL; 1779 1780 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1781 sizeof(struct tcphdr))) { 1782 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1783 sizeof(struct tcphdr), ss->scratch); 1784 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1785 } 1786 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1787 break; 1788 #endif 1789 default: 1790 return EINVAL; 1791 } 1792 return 0; 1793 } 1794 1795 #if IFCAP_TSO4 1796 1797 static void 1798 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1799 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1800 { 1801 mxge_tx_ring_t *tx; 1802 mcp_kreq_ether_send_t *req; 1803 bus_dma_segment_t *seg; 1804 uint32_t low, high_swapped; 1805 int len, seglen, cum_len, cum_len_next; 1806 int next_is_first, chop, cnt, rdma_count, small; 1807 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1808 uint8_t flags, flags_next; 1809 static int once; 1810 1811 mss = m->m_pkthdr.tso_segsz; 1812 1813 /* negative cum_len signifies to the 1814 * send loop that we are still in the 1815 * header portion of the TSO packet. 1816 */ 1817 1818 cksum_offset = pi->ip_off + pi->ip_hlen; 1819 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1820 1821 /* TSO implies checksum offload on this hardware */ 1822 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1823 /* 1824 * If packet has full TCP csum, replace it with pseudo hdr 1825 * sum that the NIC expects, otherwise the NIC will emit 1826 * packets with bad TCP checksums. 1827 */ 1828 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1829 if (pi->ip6) { 1830 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1831 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1832 sum = in6_cksum_pseudo(pi->ip6, 1833 m->m_pkthdr.len - cksum_offset, 1834 IPPROTO_TCP, 0); 1835 #endif 1836 } else { 1837 #ifdef INET 1838 m->m_pkthdr.csum_flags |= CSUM_TCP; 1839 sum = in_pseudo(pi->ip->ip_src.s_addr, 1840 pi->ip->ip_dst.s_addr, 1841 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1842 cksum_offset))); 1843 #endif 1844 } 1845 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1846 cksum_offset, sizeof(sum), (caddr_t)&sum); 1847 } 1848 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1849 1850 /* for TSO, pseudo_hdr_offset holds mss. 1851 * The firmware figures out where to put 1852 * the checksum by parsing the header. */ 1853 pseudo_hdr_offset = htobe16(mss); 1854 1855 if (pi->ip6) { 1856 /* 1857 * for IPv6 TSO, the "checksum offset" is re-purposed 1858 * to store the TCP header len 1859 */ 1860 cksum_offset = (pi->tcp->th_off << 2); 1861 } 1862 1863 tx = &ss->tx; 1864 req = tx->req_list; 1865 seg = tx->seg_list; 1866 cnt = 0; 1867 rdma_count = 0; 1868 /* "rdma_count" is the number of RDMAs belonging to the 1869 * current packet BEFORE the current send request. For 1870 * non-TSO packets, this is equal to "count". 1871 * For TSO packets, rdma_count needs to be reset 1872 * to 0 after a segment cut. 1873 * 1874 * The rdma_count field of the send request is 1875 * the number of RDMAs of the packet starting at 1876 * that request. For TSO send requests with one ore more cuts 1877 * in the middle, this is the number of RDMAs starting 1878 * after the last cut in the request. All previous 1879 * segments before the last cut implicitly have 1 RDMA. 1880 * 1881 * Since the number of RDMAs is not known beforehand, 1882 * it must be filled-in retroactively - after each 1883 * segmentation cut or at the end of the entire packet. 1884 */ 1885 1886 while (busdma_seg_cnt) { 1887 /* Break the busdma segment up into pieces*/ 1888 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1889 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1890 len = seg->ds_len; 1891 1892 while (len) { 1893 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1894 seglen = len; 1895 cum_len_next = cum_len + seglen; 1896 (req-rdma_count)->rdma_count = rdma_count + 1; 1897 if (__predict_true(cum_len >= 0)) { 1898 /* payload */ 1899 chop = (cum_len_next > mss); 1900 cum_len_next = cum_len_next % mss; 1901 next_is_first = (cum_len_next == 0); 1902 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1903 flags_next |= next_is_first * 1904 MXGEFW_FLAGS_FIRST; 1905 rdma_count |= -(chop | next_is_first); 1906 rdma_count += chop & !next_is_first; 1907 } else if (cum_len_next >= 0) { 1908 /* header ends */ 1909 rdma_count = -1; 1910 cum_len_next = 0; 1911 seglen = -cum_len; 1912 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1913 flags_next = MXGEFW_FLAGS_TSO_PLD | 1914 MXGEFW_FLAGS_FIRST | 1915 (small * MXGEFW_FLAGS_SMALL); 1916 } 1917 1918 req->addr_high = high_swapped; 1919 req->addr_low = htobe32(low); 1920 req->pseudo_hdr_offset = pseudo_hdr_offset; 1921 req->pad = 0; 1922 req->rdma_count = 1; 1923 req->length = htobe16(seglen); 1924 req->cksum_offset = cksum_offset; 1925 req->flags = flags | ((cum_len & 1) * 1926 MXGEFW_FLAGS_ALIGN_ODD); 1927 low += seglen; 1928 len -= seglen; 1929 cum_len = cum_len_next; 1930 flags = flags_next; 1931 req++; 1932 cnt++; 1933 rdma_count++; 1934 if (cksum_offset != 0 && !pi->ip6) { 1935 if (__predict_false(cksum_offset > seglen)) 1936 cksum_offset -= seglen; 1937 else 1938 cksum_offset = 0; 1939 } 1940 if (__predict_false(cnt > tx->max_desc)) 1941 goto drop; 1942 } 1943 busdma_seg_cnt--; 1944 seg++; 1945 } 1946 (req-rdma_count)->rdma_count = rdma_count; 1947 1948 do { 1949 req--; 1950 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1951 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1952 1953 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1954 mxge_submit_req(tx, tx->req_list, cnt); 1955 1956 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1957 /* tell the NIC to start polling this slice */ 1958 *tx->send_go = 1; 1959 tx->queue_active = 1; 1960 tx->activate++; 1961 wmb(); 1962 } 1963 1964 return; 1965 1966 drop: 1967 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1968 m_freem(m); 1969 ss->oerrors++; 1970 if (!once) { 1971 printf("tx->max_desc exceeded via TSO!\n"); 1972 printf("mss = %d, %ld, %d!\n", mss, 1973 (long)seg - (long)tx->seg_list, tx->max_desc); 1974 once = 1; 1975 } 1976 return; 1977 1978 } 1979 1980 #endif /* IFCAP_TSO4 */ 1981 1982 #ifdef MXGE_NEW_VLAN_API 1983 /* 1984 * We reproduce the software vlan tag insertion from 1985 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1986 * vlan tag insertion. We need to advertise this in order to have the 1987 * vlan interface respect our csum offload flags. 1988 */ 1989 static struct mbuf * 1990 mxge_vlan_tag_insert(struct mbuf *m) 1991 { 1992 struct ether_vlan_header *evl; 1993 1994 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 1995 if (__predict_false(m == NULL)) 1996 return NULL; 1997 if (m->m_len < sizeof(*evl)) { 1998 m = m_pullup(m, sizeof(*evl)); 1999 if (__predict_false(m == NULL)) 2000 return NULL; 2001 } 2002 /* 2003 * Transform the Ethernet header into an Ethernet header 2004 * with 802.1Q encapsulation. 2005 */ 2006 evl = mtod(m, struct ether_vlan_header *); 2007 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2008 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2009 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2010 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2011 m->m_flags &= ~M_VLANTAG; 2012 return m; 2013 } 2014 #endif /* MXGE_NEW_VLAN_API */ 2015 2016 static void 2017 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2018 { 2019 struct mxge_pkt_info pi = {0,0,0,0}; 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 mxge_tx_ring_t *tx; 2025 int cnt, cum_len, err, i, idx, odd_flag; 2026 uint16_t pseudo_hdr_offset; 2027 uint8_t flags, cksum_offset; 2028 2029 sc = ss->sc; 2030 tx = &ss->tx; 2031 2032 #ifdef MXGE_NEW_VLAN_API 2033 if (m->m_flags & M_VLANTAG) { 2034 m = mxge_vlan_tag_insert(m); 2035 if (__predict_false(m == NULL)) 2036 goto drop_without_m; 2037 } 2038 #endif 2039 if (m->m_pkthdr.csum_flags & 2040 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2041 if (mxge_parse_tx(ss, m, &pi)) 2042 goto drop; 2043 } 2044 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073 #if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, &pi); 2077 return; 2078 } 2079 #endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & 2088 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2089 /* ensure ip header is in first mbuf, copy 2090 it to a scratch buffer if not */ 2091 cksum_offset = pi.ip_off + pi.ip_hlen; 2092 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2093 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2094 req->cksum_offset = cksum_offset; 2095 flags |= MXGEFW_FLAGS_CKSUM; 2096 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2097 } else { 2098 odd_flag = 0; 2099 } 2100 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2101 flags |= MXGEFW_FLAGS_SMALL; 2102 2103 /* convert segments into a request list */ 2104 cum_len = 0; 2105 seg = tx->seg_list; 2106 req->flags = MXGEFW_FLAGS_FIRST; 2107 for (i = 0; i < cnt; i++) { 2108 req->addr_low = 2109 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2110 req->addr_high = 2111 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2112 req->length = htobe16(seg->ds_len); 2113 req->cksum_offset = cksum_offset; 2114 if (cksum_offset > seg->ds_len) 2115 cksum_offset -= seg->ds_len; 2116 else 2117 cksum_offset = 0; 2118 req->pseudo_hdr_offset = pseudo_hdr_offset; 2119 req->pad = 0; /* complete solid 16-byte block */ 2120 req->rdma_count = 1; 2121 req->flags |= flags | ((cum_len & 1) * odd_flag); 2122 cum_len += seg->ds_len; 2123 seg++; 2124 req++; 2125 req->flags = 0; 2126 } 2127 req--; 2128 /* pad runts to 60 bytes */ 2129 if (cum_len < 60) { 2130 req++; 2131 req->addr_low = 2132 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2133 req->addr_high = 2134 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2135 req->length = htobe16(60 - cum_len); 2136 req->cksum_offset = 0; 2137 req->pseudo_hdr_offset = pseudo_hdr_offset; 2138 req->pad = 0; /* complete solid 16-byte block */ 2139 req->rdma_count = 1; 2140 req->flags |= flags | ((cum_len & 1) * odd_flag); 2141 cnt++; 2142 } 2143 2144 tx->req_list[0].rdma_count = cnt; 2145 #if 0 2146 /* print what the firmware will see */ 2147 for (i = 0; i < cnt; i++) { 2148 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2149 "cso:%d, flags:0x%x, rdma:%d\n", 2150 i, (int)ntohl(tx->req_list[i].addr_high), 2151 (int)ntohl(tx->req_list[i].addr_low), 2152 (int)ntohs(tx->req_list[i].length), 2153 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2154 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2155 tx->req_list[i].rdma_count); 2156 } 2157 printf("--------------\n"); 2158 #endif 2159 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2160 mxge_submit_req(tx, tx->req_list, cnt); 2161 2162 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2163 /* tell the NIC to start polling this slice */ 2164 *tx->send_go = 1; 2165 tx->queue_active = 1; 2166 tx->activate++; 2167 wmb(); 2168 } 2169 2170 return; 2171 2172 drop: 2173 m_freem(m); 2174 drop_without_m: 2175 ss->oerrors++; 2176 return; 2177 } 2178 2179 static void 2180 mxge_qflush(if_t ifp) 2181 { 2182 mxge_softc_t *sc = if_getsoftc(ifp); 2183 mxge_tx_ring_t *tx; 2184 struct mbuf *m; 2185 int slice; 2186 2187 for (slice = 0; slice < sc->num_slices; slice++) { 2188 tx = &sc->ss[slice].tx; 2189 mtx_lock(&tx->mtx); 2190 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2191 m_freem(m); 2192 mtx_unlock(&tx->mtx); 2193 } 2194 if_qflush(ifp); 2195 } 2196 2197 static inline void 2198 mxge_start_locked(struct mxge_slice_state *ss) 2199 { 2200 mxge_softc_t *sc; 2201 struct mbuf *m; 2202 if_t ifp; 2203 mxge_tx_ring_t *tx; 2204 2205 sc = ss->sc; 2206 ifp = sc->ifp; 2207 tx = &ss->tx; 2208 2209 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2210 m = drbr_dequeue(ifp, tx->br); 2211 if (m == NULL) { 2212 return; 2213 } 2214 /* let BPF see it */ 2215 BPF_MTAP(ifp, m); 2216 2217 /* give it to the nic */ 2218 mxge_encap(ss, m); 2219 } 2220 /* ran out of transmit slots */ 2221 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2222 && (!drbr_empty(ifp, tx->br))) { 2223 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2224 tx->stall++; 2225 } 2226 } 2227 2228 static int 2229 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2230 { 2231 mxge_softc_t *sc; 2232 if_t ifp; 2233 mxge_tx_ring_t *tx; 2234 int err; 2235 2236 sc = ss->sc; 2237 ifp = sc->ifp; 2238 tx = &ss->tx; 2239 2240 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2241 IFF_DRV_RUNNING) { 2242 err = drbr_enqueue(ifp, tx->br, m); 2243 return (err); 2244 } 2245 2246 if (!drbr_needs_enqueue(ifp, tx->br) && 2247 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2248 /* let BPF see it */ 2249 BPF_MTAP(ifp, m); 2250 /* give it to the nic */ 2251 mxge_encap(ss, m); 2252 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2253 return (err); 2254 } 2255 if (!drbr_empty(ifp, tx->br)) 2256 mxge_start_locked(ss); 2257 return (0); 2258 } 2259 2260 static int 2261 mxge_transmit(if_t ifp, struct mbuf *m) 2262 { 2263 mxge_softc_t *sc = if_getsoftc(ifp); 2264 struct mxge_slice_state *ss; 2265 mxge_tx_ring_t *tx; 2266 int err = 0; 2267 int slice; 2268 2269 slice = m->m_pkthdr.flowid; 2270 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2271 2272 ss = &sc->ss[slice]; 2273 tx = &ss->tx; 2274 2275 if (mtx_trylock(&tx->mtx)) { 2276 err = mxge_transmit_locked(ss, m); 2277 mtx_unlock(&tx->mtx); 2278 } else { 2279 err = drbr_enqueue(ifp, tx->br, m); 2280 } 2281 2282 return (err); 2283 } 2284 2285 static void 2286 mxge_start(if_t ifp) 2287 { 2288 mxge_softc_t *sc = if_getsoftc(ifp); 2289 struct mxge_slice_state *ss; 2290 2291 /* only use the first slice for now */ 2292 ss = &sc->ss[0]; 2293 mtx_lock(&ss->tx.mtx); 2294 mxge_start_locked(ss); 2295 mtx_unlock(&ss->tx.mtx); 2296 } 2297 2298 /* 2299 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2300 * at most 32 bytes at a time, so as to avoid involving the software 2301 * pio handler in the nic. We re-write the first segment's low 2302 * DMA address to mark it valid only after we write the entire chunk 2303 * in a burst 2304 */ 2305 static inline void 2306 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2307 mcp_kreq_ether_recv_t *src) 2308 { 2309 uint32_t low; 2310 2311 low = src->addr_low; 2312 src->addr_low = 0xffffffff; 2313 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2314 wmb(); 2315 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2316 wmb(); 2317 src->addr_low = low; 2318 dst->addr_low = low; 2319 wmb(); 2320 } 2321 2322 static int 2323 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2324 { 2325 bus_dma_segment_t seg; 2326 struct mbuf *m; 2327 mxge_rx_ring_t *rx = &ss->rx_small; 2328 int cnt, err; 2329 2330 m = m_gethdr(M_NOWAIT, MT_DATA); 2331 if (m == NULL) { 2332 rx->alloc_fail++; 2333 err = ENOBUFS; 2334 goto done; 2335 } 2336 m->m_len = MHLEN; 2337 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2338 &seg, &cnt, BUS_DMA_NOWAIT); 2339 if (err != 0) { 2340 m_free(m); 2341 goto done; 2342 } 2343 rx->info[idx].m = m; 2344 rx->shadow[idx].addr_low = 2345 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2346 rx->shadow[idx].addr_high = 2347 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2348 2349 done: 2350 if ((idx & 7) == 7) 2351 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2352 return err; 2353 } 2354 2355 static int 2356 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2357 { 2358 bus_dma_segment_t seg[3]; 2359 struct mbuf *m; 2360 mxge_rx_ring_t *rx = &ss->rx_big; 2361 int cnt, err, i; 2362 2363 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2364 if (m == NULL) { 2365 rx->alloc_fail++; 2366 err = ENOBUFS; 2367 goto done; 2368 } 2369 m->m_len = rx->mlen; 2370 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2371 seg, &cnt, BUS_DMA_NOWAIT); 2372 if (err != 0) { 2373 m_free(m); 2374 goto done; 2375 } 2376 rx->info[idx].m = m; 2377 rx->shadow[idx].addr_low = 2378 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2379 rx->shadow[idx].addr_high = 2380 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2381 2382 done: 2383 for (i = 0; i < rx->nbufs; i++) { 2384 if ((idx & 7) == 7) { 2385 mxge_submit_8rx(&rx->lanai[idx - 7], 2386 &rx->shadow[idx - 7]); 2387 } 2388 idx++; 2389 } 2390 return err; 2391 } 2392 2393 #ifdef INET6 2394 2395 static uint16_t 2396 mxge_csum_generic(uint16_t *raw, int len) 2397 { 2398 uint32_t csum; 2399 2400 csum = 0; 2401 while (len > 0) { 2402 csum += *raw; 2403 raw++; 2404 len -= 2; 2405 } 2406 csum = (csum >> 16) + (csum & 0xffff); 2407 csum = (csum >> 16) + (csum & 0xffff); 2408 return (uint16_t)csum; 2409 } 2410 2411 static inline uint16_t 2412 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2413 { 2414 uint32_t partial; 2415 int nxt, cksum_offset; 2416 struct ip6_hdr *ip6 = p; 2417 uint16_t c; 2418 2419 nxt = ip6->ip6_nxt; 2420 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2421 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2422 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2423 IPPROTO_IPV6, &nxt); 2424 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2425 return (1); 2426 } 2427 2428 /* 2429 * IPv6 headers do not contain a checksum, and hence 2430 * do not checksum to zero, so they don't "fall out" 2431 * of the partial checksum calculation like IPv4 2432 * headers do. We need to fix the partial checksum by 2433 * subtracting the checksum of the IPv6 header. 2434 */ 2435 2436 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2437 ETHER_HDR_LEN); 2438 csum += ~partial; 2439 csum += (csum < ~partial); 2440 csum = (csum >> 16) + (csum & 0xFFFF); 2441 csum = (csum >> 16) + (csum & 0xFFFF); 2442 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2443 csum); 2444 c ^= 0xffff; 2445 return (c); 2446 } 2447 #endif /* INET6 */ 2448 /* 2449 * Myri10GE hardware checksums are not valid if the sender 2450 * padded the frame with non-zero padding. This is because 2451 * the firmware just does a simple 16-bit 1s complement 2452 * checksum across the entire frame, excluding the first 14 2453 * bytes. It is best to simply to check the checksum and 2454 * tell the stack about it only if the checksum is good 2455 */ 2456 2457 static inline uint16_t 2458 mxge_rx_csum(struct mbuf *m, int csum) 2459 { 2460 struct ether_header *eh; 2461 #ifdef INET 2462 struct ip *ip; 2463 #endif 2464 #if defined(INET) || defined(INET6) 2465 int cap = if_getcapenable(m->m_pkthdr.rcvif); 2466 #endif 2467 uint16_t c, etype; 2468 2469 eh = mtod(m, struct ether_header *); 2470 etype = ntohs(eh->ether_type); 2471 switch (etype) { 2472 #ifdef INET 2473 case ETHERTYPE_IP: 2474 if ((cap & IFCAP_RXCSUM) == 0) 2475 return (1); 2476 ip = (struct ip *)(eh + 1); 2477 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2478 return (1); 2479 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2480 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2481 (ip->ip_hl << 2) + ip->ip_p)); 2482 c ^= 0xffff; 2483 break; 2484 #endif 2485 #ifdef INET6 2486 case ETHERTYPE_IPV6: 2487 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2488 return (1); 2489 c = mxge_rx_csum6((eh + 1), m, csum); 2490 break; 2491 #endif 2492 default: 2493 c = 1; 2494 } 2495 return (c); 2496 } 2497 2498 static void 2499 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2500 { 2501 struct ether_vlan_header *evl; 2502 uint32_t partial; 2503 2504 evl = mtod(m, struct ether_vlan_header *); 2505 2506 /* 2507 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2508 * after what the firmware thought was the end of the ethernet 2509 * header. 2510 */ 2511 2512 /* put checksum into host byte order */ 2513 *csum = ntohs(*csum); 2514 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2515 (*csum) += ~partial; 2516 (*csum) += ((*csum) < ~partial); 2517 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2518 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2519 2520 /* restore checksum to network byte order; 2521 later consumers expect this */ 2522 *csum = htons(*csum); 2523 2524 /* save the tag */ 2525 #ifdef MXGE_NEW_VLAN_API 2526 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2527 #else 2528 { 2529 struct m_tag *mtag; 2530 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2531 M_NOWAIT); 2532 if (mtag == NULL) 2533 return; 2534 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2535 m_tag_prepend(m, mtag); 2536 } 2537 2538 #endif 2539 m->m_flags |= M_VLANTAG; 2540 2541 /* 2542 * Remove the 802.1q header by copying the Ethernet 2543 * addresses over it and adjusting the beginning of 2544 * the data in the mbuf. The encapsulated Ethernet 2545 * type field is already in place. 2546 */ 2547 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2548 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2549 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2550 } 2551 2552 static inline void 2553 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2554 uint32_t csum, int lro) 2555 { 2556 mxge_softc_t *sc; 2557 if_t ifp; 2558 struct mbuf *m; 2559 struct ether_header *eh; 2560 mxge_rx_ring_t *rx; 2561 bus_dmamap_t old_map; 2562 int idx; 2563 2564 sc = ss->sc; 2565 ifp = sc->ifp; 2566 rx = &ss->rx_big; 2567 idx = rx->cnt & rx->mask; 2568 rx->cnt += rx->nbufs; 2569 /* save a pointer to the received mbuf */ 2570 m = rx->info[idx].m; 2571 /* try to replace the received mbuf */ 2572 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2573 /* drop the frame -- the old mbuf is re-cycled */ 2574 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2575 return; 2576 } 2577 2578 /* unmap the received buffer */ 2579 old_map = rx->info[idx].map; 2580 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2581 bus_dmamap_unload(rx->dmat, old_map); 2582 2583 /* swap the bus_dmamap_t's */ 2584 rx->info[idx].map = rx->extra_map; 2585 rx->extra_map = old_map; 2586 2587 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2588 * aligned */ 2589 m->m_data += MXGEFW_PAD; 2590 2591 m->m_pkthdr.rcvif = ifp; 2592 m->m_len = m->m_pkthdr.len = len; 2593 ss->ipackets++; 2594 eh = mtod(m, struct ether_header *); 2595 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2596 mxge_vlan_tag_remove(m, &csum); 2597 } 2598 /* flowid only valid if RSS hashing is enabled */ 2599 if (sc->num_slices > 1) { 2600 m->m_pkthdr.flowid = (ss - sc->ss); 2601 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2602 } 2603 /* if the checksum is valid, mark it in the mbuf header */ 2604 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2605 (0 == mxge_rx_csum(m, csum))) { 2606 /* Tell the stack that the checksum is good */ 2607 m->m_pkthdr.csum_data = 0xffff; 2608 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2609 CSUM_DATA_VALID; 2610 2611 #if defined(INET) || defined (INET6) 2612 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2613 return; 2614 #endif 2615 } 2616 /* pass the frame up the stack */ 2617 if_input(ifp, m); 2618 } 2619 2620 static inline void 2621 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2622 uint32_t csum, int lro) 2623 { 2624 mxge_softc_t *sc; 2625 if_t ifp; 2626 struct ether_header *eh; 2627 struct mbuf *m; 2628 mxge_rx_ring_t *rx; 2629 bus_dmamap_t old_map; 2630 int idx; 2631 2632 sc = ss->sc; 2633 ifp = sc->ifp; 2634 rx = &ss->rx_small; 2635 idx = rx->cnt & rx->mask; 2636 rx->cnt++; 2637 /* save a pointer to the received mbuf */ 2638 m = rx->info[idx].m; 2639 /* try to replace the received mbuf */ 2640 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2641 /* drop the frame -- the old mbuf is re-cycled */ 2642 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2643 return; 2644 } 2645 2646 /* unmap the received buffer */ 2647 old_map = rx->info[idx].map; 2648 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2649 bus_dmamap_unload(rx->dmat, old_map); 2650 2651 /* swap the bus_dmamap_t's */ 2652 rx->info[idx].map = rx->extra_map; 2653 rx->extra_map = old_map; 2654 2655 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2656 * aligned */ 2657 m->m_data += MXGEFW_PAD; 2658 2659 m->m_pkthdr.rcvif = ifp; 2660 m->m_len = m->m_pkthdr.len = len; 2661 ss->ipackets++; 2662 eh = mtod(m, struct ether_header *); 2663 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2664 mxge_vlan_tag_remove(m, &csum); 2665 } 2666 /* flowid only valid if RSS hashing is enabled */ 2667 if (sc->num_slices > 1) { 2668 m->m_pkthdr.flowid = (ss - sc->ss); 2669 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2670 } 2671 /* if the checksum is valid, mark it in the mbuf header */ 2672 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2673 (0 == mxge_rx_csum(m, csum))) { 2674 /* Tell the stack that the checksum is good */ 2675 m->m_pkthdr.csum_data = 0xffff; 2676 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2677 CSUM_DATA_VALID; 2678 2679 #if defined(INET) || defined (INET6) 2680 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2681 return; 2682 #endif 2683 } 2684 /* pass the frame up the stack */ 2685 if_input(ifp, m); 2686 } 2687 2688 static inline void 2689 mxge_clean_rx_done(struct mxge_slice_state *ss) 2690 { 2691 mxge_rx_done_t *rx_done = &ss->rx_done; 2692 int limit = 0; 2693 uint16_t length; 2694 uint16_t checksum; 2695 int lro; 2696 2697 lro = if_getcapenable(ss->sc->ifp) & IFCAP_LRO; 2698 while (rx_done->entry[rx_done->idx].length != 0) { 2699 length = ntohs(rx_done->entry[rx_done->idx].length); 2700 rx_done->entry[rx_done->idx].length = 0; 2701 checksum = rx_done->entry[rx_done->idx].checksum; 2702 if (length <= (MHLEN - MXGEFW_PAD)) 2703 mxge_rx_done_small(ss, length, checksum, lro); 2704 else 2705 mxge_rx_done_big(ss, length, checksum, lro); 2706 rx_done->cnt++; 2707 rx_done->idx = rx_done->cnt & rx_done->mask; 2708 2709 /* limit potential for livelock */ 2710 if (__predict_false(++limit > rx_done->mask / 2)) 2711 break; 2712 } 2713 #if defined(INET) || defined (INET6) 2714 tcp_lro_flush_all(&ss->lc); 2715 #endif 2716 } 2717 2718 static inline void 2719 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2720 { 2721 if_t ifp __unused; 2722 mxge_tx_ring_t *tx; 2723 struct mbuf *m; 2724 bus_dmamap_t map; 2725 int idx; 2726 int *flags; 2727 2728 tx = &ss->tx; 2729 ifp = ss->sc->ifp; 2730 while (tx->pkt_done != mcp_idx) { 2731 idx = tx->done & tx->mask; 2732 tx->done++; 2733 m = tx->info[idx].m; 2734 /* mbuf and DMA map only attached to the first 2735 segment per-mbuf */ 2736 if (m != NULL) { 2737 ss->obytes += m->m_pkthdr.len; 2738 if (m->m_flags & M_MCAST) 2739 ss->omcasts++; 2740 ss->opackets++; 2741 tx->info[idx].m = NULL; 2742 map = tx->info[idx].map; 2743 bus_dmamap_unload(tx->dmat, map); 2744 m_freem(m); 2745 } 2746 if (tx->info[idx].flag) { 2747 tx->info[idx].flag = 0; 2748 tx->pkt_done++; 2749 } 2750 } 2751 2752 /* If we have space, clear IFF_OACTIVE to tell the stack that 2753 its OK to send packets */ 2754 flags = &ss->if_drv_flags; 2755 mtx_lock(&ss->tx.mtx); 2756 if ((*flags) & IFF_DRV_OACTIVE && 2757 tx->req - tx->done < (tx->mask + 1)/4) { 2758 *(flags) &= ~IFF_DRV_OACTIVE; 2759 ss->tx.wake++; 2760 mxge_start_locked(ss); 2761 } 2762 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2763 /* let the NIC stop polling this queue, since there 2764 * are no more transmits pending */ 2765 if (tx->req == tx->done) { 2766 *tx->send_stop = 1; 2767 tx->queue_active = 0; 2768 tx->deactivate++; 2769 wmb(); 2770 } 2771 } 2772 mtx_unlock(&ss->tx.mtx); 2773 } 2774 2775 static struct mxge_media_type mxge_xfp_media_types[] = 2776 { 2777 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2778 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2779 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2780 {0, (1 << 5), "10GBASE-ER"}, 2781 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2782 {0, (1 << 3), "10GBASE-SW"}, 2783 {0, (1 << 2), "10GBASE-LW"}, 2784 {0, (1 << 1), "10GBASE-EW"}, 2785 {0, (1 << 0), "Reserved"} 2786 }; 2787 static struct mxge_media_type mxge_sfp_media_types[] = 2788 { 2789 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2790 {0, (1 << 7), "Reserved"}, 2791 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2792 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2793 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2794 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2795 }; 2796 2797 static void 2798 mxge_media_set(mxge_softc_t *sc, int media_type) 2799 { 2800 2801 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2802 0, NULL); 2803 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2804 sc->current_media = media_type; 2805 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2806 } 2807 2808 static void 2809 mxge_media_init(mxge_softc_t *sc) 2810 { 2811 char *ptr; 2812 int i; 2813 2814 ifmedia_removeall(&sc->media); 2815 mxge_media_set(sc, IFM_AUTO); 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 return; 2827 } 2828 2829 for (i = 0; i < 3; i++, ptr++) { 2830 ptr = strchr(ptr, '-'); 2831 if (ptr == NULL) { 2832 device_printf(sc->dev, 2833 "only %d dashes in PC?!?\n", i); 2834 return; 2835 } 2836 } 2837 if (*ptr == 'C' || *(ptr +1) == 'C') { 2838 /* -C is CX4 */ 2839 sc->connector = MXGE_CX4; 2840 mxge_media_set(sc, IFM_10G_CX4); 2841 } else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 sc->connector = MXGE_QRF; 2844 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2845 /* FreeBSD has no media type for Quad ribbon fiber */ 2846 } else if (*ptr == 'R') { 2847 /* -R is XFP */ 2848 sc->connector = MXGE_XFP; 2849 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2850 /* -S or -2S is SFP+ */ 2851 sc->connector = MXGE_SFP; 2852 } else { 2853 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2854 } 2855 } 2856 2857 /* 2858 * Determine the media type for a NIC. Some XFPs will identify 2859 * themselves only when their link is up, so this is initiated via a 2860 * link up interrupt. However, this can potentially take up to 2861 * several milliseconds, so it is run via the watchdog routine, rather 2862 * than in the interrupt handler itself. 2863 */ 2864 static void 2865 mxge_media_probe(mxge_softc_t *sc) 2866 { 2867 mxge_cmd_t cmd; 2868 char *cage_type; 2869 2870 struct mxge_media_type *mxge_media_types = NULL; 2871 int i, err, ms, mxge_media_type_entries; 2872 uint32_t byte; 2873 2874 sc->need_media_probe = 0; 2875 2876 if (sc->connector == MXGE_XFP) { 2877 /* -R is XFP */ 2878 mxge_media_types = mxge_xfp_media_types; 2879 mxge_media_type_entries = 2880 nitems(mxge_xfp_media_types); 2881 byte = MXGE_XFP_COMPLIANCE_BYTE; 2882 cage_type = "XFP"; 2883 } else if (sc->connector == MXGE_SFP) { 2884 /* -S or -2S is SFP+ */ 2885 mxge_media_types = mxge_sfp_media_types; 2886 mxge_media_type_entries = 2887 nitems(mxge_sfp_media_types); 2888 cage_type = "SFP+"; 2889 byte = 3; 2890 } else { 2891 /* nothing to do; media type cannot change */ 2892 return; 2893 } 2894 2895 /* 2896 * At this point we know the NIC has an XFP cage, so now we 2897 * try to determine what is in the cage by using the 2898 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2899 * register. We read just one byte, which may take over 2900 * a millisecond 2901 */ 2902 2903 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2904 cmd.data1 = byte; 2905 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2906 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2907 device_printf(sc->dev, "failed to read XFP\n"); 2908 } 2909 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2910 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2911 } 2912 if (err != MXGEFW_CMD_OK) { 2913 return; 2914 } 2915 2916 /* now we wait for the data to be cached */ 2917 cmd.data0 = byte; 2918 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2919 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2920 DELAY(1000); 2921 cmd.data0 = byte; 2922 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2923 } 2924 if (err != MXGEFW_CMD_OK) { 2925 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2926 cage_type, err, ms); 2927 return; 2928 } 2929 2930 if (cmd.data0 == mxge_media_types[0].bitmask) { 2931 if (mxge_verbose) 2932 device_printf(sc->dev, "%s:%s\n", cage_type, 2933 mxge_media_types[0].name); 2934 if (sc->current_media != mxge_media_types[0].flag) { 2935 mxge_media_init(sc); 2936 mxge_media_set(sc, mxge_media_types[0].flag); 2937 } 2938 return; 2939 } 2940 for (i = 1; i < mxge_media_type_entries; i++) { 2941 if (cmd.data0 & mxge_media_types[i].bitmask) { 2942 if (mxge_verbose) 2943 device_printf(sc->dev, "%s:%s\n", 2944 cage_type, 2945 mxge_media_types[i].name); 2946 2947 if (sc->current_media != mxge_media_types[i].flag) { 2948 mxge_media_init(sc); 2949 mxge_media_set(sc, mxge_media_types[i].flag); 2950 } 2951 return; 2952 } 2953 } 2954 if (mxge_verbose) 2955 device_printf(sc->dev, "%s media 0x%x unknown\n", 2956 cage_type, cmd.data0); 2957 2958 return; 2959 } 2960 2961 static void 2962 mxge_intr(void *arg) 2963 { 2964 struct mxge_slice_state *ss = arg; 2965 mxge_softc_t *sc = ss->sc; 2966 mcp_irq_data_t *stats = ss->fw_stats; 2967 mxge_tx_ring_t *tx = &ss->tx; 2968 mxge_rx_done_t *rx_done = &ss->rx_done; 2969 uint32_t send_done_count; 2970 uint8_t valid; 2971 2972 /* make sure the DMA has finished */ 2973 if (!stats->valid) { 2974 return; 2975 } 2976 valid = stats->valid; 2977 2978 if (sc->legacy_irq) { 2979 /* lower legacy IRQ */ 2980 *sc->irq_deassert = 0; 2981 if (!mxge_deassert_wait) 2982 /* don't wait for conf. that irq is low */ 2983 stats->valid = 0; 2984 } else { 2985 stats->valid = 0; 2986 } 2987 2988 /* loop while waiting for legacy irq deassertion */ 2989 do { 2990 /* check for transmit completes and receives */ 2991 send_done_count = be32toh(stats->send_done_count); 2992 while ((send_done_count != tx->pkt_done) || 2993 (rx_done->entry[rx_done->idx].length != 0)) { 2994 if (send_done_count != tx->pkt_done) 2995 mxge_tx_done(ss, (int)send_done_count); 2996 mxge_clean_rx_done(ss); 2997 send_done_count = be32toh(stats->send_done_count); 2998 } 2999 if (sc->legacy_irq && mxge_deassert_wait) 3000 wmb(); 3001 } while (*((volatile uint8_t *) &stats->valid)); 3002 3003 /* fw link & error stats meaningful only on the first slice */ 3004 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3005 if (sc->link_state != stats->link_up) { 3006 sc->link_state = stats->link_up; 3007 if (sc->link_state) { 3008 if_link_state_change(sc->ifp, LINK_STATE_UP); 3009 if (mxge_verbose) 3010 device_printf(sc->dev, "link up\n"); 3011 } else { 3012 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3013 if (mxge_verbose) 3014 device_printf(sc->dev, "link down\n"); 3015 } 3016 sc->need_media_probe = 1; 3017 } 3018 if (sc->rdma_tags_available != 3019 be32toh(stats->rdma_tags_available)) { 3020 sc->rdma_tags_available = 3021 be32toh(stats->rdma_tags_available); 3022 device_printf(sc->dev, "RDMA timed out! %d tags " 3023 "left\n", sc->rdma_tags_available); 3024 } 3025 3026 if (stats->link_down) { 3027 sc->down_cnt += stats->link_down; 3028 sc->link_state = 0; 3029 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3030 } 3031 } 3032 3033 /* check to see if we have rx token to pass back */ 3034 if (valid & 0x1) 3035 *ss->irq_claim = be32toh(3); 3036 *(ss->irq_claim + 1) = be32toh(3); 3037 } 3038 3039 static void 3040 mxge_init(void *arg) 3041 { 3042 mxge_softc_t *sc = arg; 3043 if_t ifp = sc->ifp; 3044 3045 mtx_lock(&sc->driver_mtx); 3046 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 3047 (void) mxge_open(sc); 3048 mtx_unlock(&sc->driver_mtx); 3049 } 3050 3051 static void 3052 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3053 { 3054 int i; 3055 3056 #if defined(INET) || defined(INET6) 3057 tcp_lro_free(&ss->lc); 3058 #endif 3059 for (i = 0; i <= ss->rx_big.mask; i++) { 3060 if (ss->rx_big.info[i].m == NULL) 3061 continue; 3062 bus_dmamap_unload(ss->rx_big.dmat, 3063 ss->rx_big.info[i].map); 3064 m_freem(ss->rx_big.info[i].m); 3065 ss->rx_big.info[i].m = NULL; 3066 } 3067 3068 for (i = 0; i <= ss->rx_small.mask; i++) { 3069 if (ss->rx_small.info[i].m == NULL) 3070 continue; 3071 bus_dmamap_unload(ss->rx_small.dmat, 3072 ss->rx_small.info[i].map); 3073 m_freem(ss->rx_small.info[i].m); 3074 ss->rx_small.info[i].m = NULL; 3075 } 3076 3077 /* transmit ring used only on the first slice */ 3078 if (ss->tx.info == NULL) 3079 return; 3080 3081 for (i = 0; i <= ss->tx.mask; i++) { 3082 ss->tx.info[i].flag = 0; 3083 if (ss->tx.info[i].m == NULL) 3084 continue; 3085 bus_dmamap_unload(ss->tx.dmat, 3086 ss->tx.info[i].map); 3087 m_freem(ss->tx.info[i].m); 3088 ss->tx.info[i].m = NULL; 3089 } 3090 } 3091 3092 static void 3093 mxge_free_mbufs(mxge_softc_t *sc) 3094 { 3095 int slice; 3096 3097 for (slice = 0; slice < sc->num_slices; slice++) 3098 mxge_free_slice_mbufs(&sc->ss[slice]); 3099 } 3100 3101 static void 3102 mxge_free_slice_rings(struct mxge_slice_state *ss) 3103 { 3104 int i; 3105 3106 if (ss->rx_done.entry != NULL) 3107 mxge_dma_free(&ss->rx_done.dma); 3108 ss->rx_done.entry = NULL; 3109 3110 if (ss->tx.req_bytes != NULL) 3111 free(ss->tx.req_bytes, M_DEVBUF); 3112 ss->tx.req_bytes = NULL; 3113 3114 if (ss->tx.seg_list != NULL) 3115 free(ss->tx.seg_list, M_DEVBUF); 3116 ss->tx.seg_list = NULL; 3117 3118 if (ss->rx_small.shadow != NULL) 3119 free(ss->rx_small.shadow, M_DEVBUF); 3120 ss->rx_small.shadow = NULL; 3121 3122 if (ss->rx_big.shadow != NULL) 3123 free(ss->rx_big.shadow, M_DEVBUF); 3124 ss->rx_big.shadow = NULL; 3125 3126 if (ss->tx.info != NULL) { 3127 if (ss->tx.dmat != NULL) { 3128 for (i = 0; i <= ss->tx.mask; i++) { 3129 bus_dmamap_destroy(ss->tx.dmat, 3130 ss->tx.info[i].map); 3131 } 3132 bus_dma_tag_destroy(ss->tx.dmat); 3133 } 3134 free(ss->tx.info, M_DEVBUF); 3135 } 3136 ss->tx.info = NULL; 3137 3138 if (ss->rx_small.info != NULL) { 3139 if (ss->rx_small.dmat != NULL) { 3140 for (i = 0; i <= ss->rx_small.mask; i++) { 3141 bus_dmamap_destroy(ss->rx_small.dmat, 3142 ss->rx_small.info[i].map); 3143 } 3144 bus_dmamap_destroy(ss->rx_small.dmat, 3145 ss->rx_small.extra_map); 3146 bus_dma_tag_destroy(ss->rx_small.dmat); 3147 } 3148 free(ss->rx_small.info, M_DEVBUF); 3149 } 3150 ss->rx_small.info = NULL; 3151 3152 if (ss->rx_big.info != NULL) { 3153 if (ss->rx_big.dmat != NULL) { 3154 for (i = 0; i <= ss->rx_big.mask; i++) { 3155 bus_dmamap_destroy(ss->rx_big.dmat, 3156 ss->rx_big.info[i].map); 3157 } 3158 bus_dmamap_destroy(ss->rx_big.dmat, 3159 ss->rx_big.extra_map); 3160 bus_dma_tag_destroy(ss->rx_big.dmat); 3161 } 3162 free(ss->rx_big.info, M_DEVBUF); 3163 } 3164 ss->rx_big.info = NULL; 3165 } 3166 3167 static void 3168 mxge_free_rings(mxge_softc_t *sc) 3169 { 3170 int slice; 3171 3172 for (slice = 0; slice < sc->num_slices; slice++) 3173 mxge_free_slice_rings(&sc->ss[slice]); 3174 } 3175 3176 static int 3177 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3178 int tx_ring_entries) 3179 { 3180 mxge_softc_t *sc = ss->sc; 3181 size_t bytes; 3182 int err, i; 3183 3184 /* allocate per-slice receive resources */ 3185 3186 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3187 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3188 3189 /* allocate the rx shadow rings */ 3190 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3191 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3192 3193 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3194 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3195 3196 /* allocate the rx host info rings */ 3197 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3198 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3199 3200 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3201 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3202 3203 /* allocate the rx busdma resources */ 3204 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3205 1, /* alignment */ 3206 4096, /* boundary */ 3207 BUS_SPACE_MAXADDR, /* low */ 3208 BUS_SPACE_MAXADDR, /* high */ 3209 NULL, NULL, /* filter */ 3210 MHLEN, /* maxsize */ 3211 1, /* num segs */ 3212 MHLEN, /* maxsegsize */ 3213 BUS_DMA_ALLOCNOW, /* flags */ 3214 NULL, NULL, /* lock */ 3215 &ss->rx_small.dmat); /* tag */ 3216 if (err != 0) { 3217 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3218 err); 3219 return err; 3220 } 3221 3222 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3223 1, /* alignment */ 3224 0, /* boundary */ 3225 BUS_SPACE_MAXADDR, /* low */ 3226 BUS_SPACE_MAXADDR, /* high */ 3227 NULL, NULL, /* filter */ 3228 3*4096, /* maxsize */ 3229 1, /* num segs */ 3230 MJUM9BYTES, /* maxsegsize*/ 3231 BUS_DMA_ALLOCNOW, /* flags */ 3232 NULL, NULL, /* lock */ 3233 &ss->rx_big.dmat); /* tag */ 3234 if (err != 0) { 3235 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3236 err); 3237 return err; 3238 } 3239 for (i = 0; i <= ss->rx_small.mask; i++) { 3240 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3241 &ss->rx_small.info[i].map); 3242 if (err != 0) { 3243 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3244 err); 3245 return err; 3246 } 3247 } 3248 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3249 &ss->rx_small.extra_map); 3250 if (err != 0) { 3251 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3252 err); 3253 return err; 3254 } 3255 3256 for (i = 0; i <= ss->rx_big.mask; i++) { 3257 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3258 &ss->rx_big.info[i].map); 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3261 err); 3262 return err; 3263 } 3264 } 3265 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3266 &ss->rx_big.extra_map); 3267 if (err != 0) { 3268 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3269 err); 3270 return err; 3271 } 3272 3273 /* now allocate TX resources */ 3274 3275 ss->tx.mask = tx_ring_entries - 1; 3276 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3277 3278 /* allocate the tx request copy block */ 3279 bytes = 8 + 3280 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3281 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3282 /* ensure req_list entries are aligned to 8 bytes */ 3283 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3284 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3285 3286 /* allocate the tx busdma segment list */ 3287 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3288 ss->tx.seg_list = (bus_dma_segment_t *) 3289 malloc(bytes, M_DEVBUF, M_WAITOK); 3290 3291 /* allocate the tx host info ring */ 3292 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3293 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3294 3295 /* allocate the tx busdma resources */ 3296 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3297 1, /* alignment */ 3298 sc->tx_boundary, /* boundary */ 3299 BUS_SPACE_MAXADDR, /* low */ 3300 BUS_SPACE_MAXADDR, /* high */ 3301 NULL, NULL, /* filter */ 3302 65536 + 256, /* maxsize */ 3303 ss->tx.max_desc - 2, /* num segs */ 3304 sc->tx_boundary, /* maxsegsz */ 3305 BUS_DMA_ALLOCNOW, /* flags */ 3306 NULL, NULL, /* lock */ 3307 &ss->tx.dmat); /* tag */ 3308 3309 if (err != 0) { 3310 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3311 err); 3312 return err; 3313 } 3314 3315 /* now use these tags to setup dmamaps for each slot 3316 in the ring */ 3317 for (i = 0; i <= ss->tx.mask; i++) { 3318 err = bus_dmamap_create(ss->tx.dmat, 0, 3319 &ss->tx.info[i].map); 3320 if (err != 0) { 3321 device_printf(sc->dev, "Err %d tx dmamap\n", 3322 err); 3323 return err; 3324 } 3325 } 3326 return 0; 3327 3328 } 3329 3330 static int 3331 mxge_alloc_rings(mxge_softc_t *sc) 3332 { 3333 mxge_cmd_t cmd; 3334 int tx_ring_size; 3335 int tx_ring_entries, rx_ring_entries; 3336 int err, slice; 3337 3338 /* get ring sizes */ 3339 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3340 tx_ring_size = cmd.data0; 3341 if (err != 0) { 3342 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3343 goto abort; 3344 } 3345 3346 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3347 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3348 if_setsendqlen(sc->ifp, tx_ring_entries - 1); 3349 if_setsendqready(sc->ifp); 3350 3351 for (slice = 0; slice < sc->num_slices; slice++) { 3352 err = mxge_alloc_slice_rings(&sc->ss[slice], 3353 rx_ring_entries, 3354 tx_ring_entries); 3355 if (err != 0) 3356 goto abort; 3357 } 3358 return 0; 3359 3360 abort: 3361 mxge_free_rings(sc); 3362 return err; 3363 3364 } 3365 3366 static void 3367 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3368 { 3369 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3370 3371 if (bufsize < MCLBYTES) { 3372 /* easy, everything fits in a single buffer */ 3373 *big_buf_size = MCLBYTES; 3374 *cl_size = MCLBYTES; 3375 *nbufs = 1; 3376 return; 3377 } 3378 3379 if (bufsize < MJUMPAGESIZE) { 3380 /* still easy, everything still fits in a single buffer */ 3381 *big_buf_size = MJUMPAGESIZE; 3382 *cl_size = MJUMPAGESIZE; 3383 *nbufs = 1; 3384 return; 3385 } 3386 *cl_size = MJUM9BYTES; 3387 *big_buf_size = MJUM9BYTES; 3388 *nbufs = 1; 3389 } 3390 3391 static int 3392 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3393 { 3394 mxge_softc_t *sc; 3395 mxge_cmd_t cmd; 3396 bus_dmamap_t map; 3397 int err, i, slice; 3398 3399 sc = ss->sc; 3400 slice = ss - sc->ss; 3401 3402 #if defined(INET) || defined(INET6) 3403 (void)tcp_lro_init(&ss->lc); 3404 #endif 3405 ss->lc.ifp = sc->ifp; 3406 3407 /* get the lanai pointers to the send and receive rings */ 3408 3409 err = 0; 3410 3411 cmd.data0 = slice; 3412 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3413 ss->tx.lanai = 3414 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3415 ss->tx.send_go = (volatile uint32_t *) 3416 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3417 ss->tx.send_stop = (volatile uint32_t *) 3418 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3419 3420 cmd.data0 = slice; 3421 err |= mxge_send_cmd(sc, 3422 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3423 ss->rx_small.lanai = 3424 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3425 cmd.data0 = slice; 3426 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3427 ss->rx_big.lanai = 3428 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3429 3430 if (err != 0) { 3431 device_printf(sc->dev, 3432 "failed to get ring sizes or locations\n"); 3433 return EIO; 3434 } 3435 3436 /* stock receive rings */ 3437 for (i = 0; i <= ss->rx_small.mask; i++) { 3438 map = ss->rx_small.info[i].map; 3439 err = mxge_get_buf_small(ss, map, i); 3440 if (err) { 3441 device_printf(sc->dev, "alloced %d/%d smalls\n", 3442 i, ss->rx_small.mask + 1); 3443 return ENOMEM; 3444 } 3445 } 3446 for (i = 0; i <= ss->rx_big.mask; i++) { 3447 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3448 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3449 } 3450 ss->rx_big.nbufs = nbufs; 3451 ss->rx_big.cl_size = cl_size; 3452 ss->rx_big.mlen = if_getmtu(ss->sc->ifp) + ETHER_HDR_LEN + 3453 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3454 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3455 map = ss->rx_big.info[i].map; 3456 err = mxge_get_buf_big(ss, map, i); 3457 if (err) { 3458 device_printf(sc->dev, "alloced %d/%d bigs\n", 3459 i, ss->rx_big.mask + 1); 3460 return ENOMEM; 3461 } 3462 } 3463 return 0; 3464 } 3465 3466 static int 3467 mxge_open(mxge_softc_t *sc) 3468 { 3469 mxge_cmd_t cmd; 3470 int err, big_bytes, nbufs, slice, cl_size, i; 3471 bus_addr_t bus; 3472 volatile uint8_t *itable; 3473 struct mxge_slice_state *ss; 3474 3475 /* Copy the MAC address in case it was overridden */ 3476 bcopy(if_getlladdr(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3477 3478 err = mxge_reset(sc, 1); 3479 if (err != 0) { 3480 device_printf(sc->dev, "failed to reset\n"); 3481 return EIO; 3482 } 3483 3484 if (sc->num_slices > 1) { 3485 /* setup the indirection table */ 3486 cmd.data0 = sc->num_slices; 3487 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3488 &cmd); 3489 3490 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3491 &cmd); 3492 if (err != 0) { 3493 device_printf(sc->dev, 3494 "failed to setup rss tables\n"); 3495 return err; 3496 } 3497 3498 /* just enable an identity mapping */ 3499 itable = sc->sram + cmd.data0; 3500 for (i = 0; i < sc->num_slices; i++) 3501 itable[i] = (uint8_t)i; 3502 3503 cmd.data0 = 1; 3504 cmd.data1 = mxge_rss_hash_type; 3505 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3506 if (err != 0) { 3507 device_printf(sc->dev, "failed to enable slices\n"); 3508 return err; 3509 } 3510 } 3511 3512 mxge_choose_params(if_getmtu(sc->ifp), &big_bytes, &cl_size, &nbufs); 3513 3514 cmd.data0 = nbufs; 3515 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3516 &cmd); 3517 /* error is only meaningful if we're trying to set 3518 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3519 if (err && nbufs > 1) { 3520 device_printf(sc->dev, 3521 "Failed to set alway-use-n to %d\n", 3522 nbufs); 3523 return EIO; 3524 } 3525 /* Give the firmware the mtu and the big and small buffer 3526 sizes. The firmware wants the big buf size to be a power 3527 of two. Luckily, FreeBSD's clusters are powers of two */ 3528 cmd.data0 = if_getmtu(sc->ifp) + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3529 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3530 cmd.data0 = MHLEN - MXGEFW_PAD; 3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3532 &cmd); 3533 cmd.data0 = big_bytes; 3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3535 3536 if (err != 0) { 3537 device_printf(sc->dev, "failed to setup params\n"); 3538 goto abort; 3539 } 3540 3541 /* Now give him the pointer to the stats block */ 3542 for (slice = 0; slice < sc->num_slices; slice++) { 3543 ss = &sc->ss[slice]; 3544 cmd.data0 = 3545 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3546 cmd.data1 = 3547 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3548 cmd.data2 = sizeof(struct mcp_irq_data); 3549 cmd.data2 |= (slice << 16); 3550 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3551 } 3552 3553 if (err != 0) { 3554 bus = sc->ss->fw_stats_dma.bus_addr; 3555 bus += offsetof(struct mcp_irq_data, send_done_count); 3556 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3557 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3558 err = mxge_send_cmd(sc, 3559 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3560 &cmd); 3561 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3562 sc->fw_multicast_support = 0; 3563 } else { 3564 sc->fw_multicast_support = 1; 3565 } 3566 3567 if (err != 0) { 3568 device_printf(sc->dev, "failed to setup params\n"); 3569 goto abort; 3570 } 3571 3572 for (slice = 0; slice < sc->num_slices; slice++) { 3573 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3574 if (err != 0) { 3575 device_printf(sc->dev, "couldn't open slice %d\n", 3576 slice); 3577 goto abort; 3578 } 3579 } 3580 3581 /* Finally, start the firmware running */ 3582 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3583 if (err) { 3584 device_printf(sc->dev, "Couldn't bring up link\n"); 3585 goto abort; 3586 } 3587 for (slice = 0; slice < sc->num_slices; slice++) { 3588 ss = &sc->ss[slice]; 3589 ss->if_drv_flags |= IFF_DRV_RUNNING; 3590 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3591 } 3592 if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, 0); 3593 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE); 3594 3595 return 0; 3596 3597 abort: 3598 mxge_free_mbufs(sc); 3599 3600 return err; 3601 } 3602 3603 static int 3604 mxge_close(mxge_softc_t *sc, int down) 3605 { 3606 mxge_cmd_t cmd; 3607 int err, old_down_cnt; 3608 struct mxge_slice_state *ss; 3609 int slice; 3610 3611 for (slice = 0; slice < sc->num_slices; slice++) { 3612 ss = &sc->ss[slice]; 3613 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3614 } 3615 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); 3616 if (!down) { 3617 old_down_cnt = sc->down_cnt; 3618 wmb(); 3619 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3620 if (err) { 3621 device_printf(sc->dev, 3622 "Couldn't bring down link\n"); 3623 } 3624 if (old_down_cnt == sc->down_cnt) { 3625 /* wait for down irq */ 3626 DELAY(10 * sc->intr_coal_delay); 3627 } 3628 wmb(); 3629 if (old_down_cnt == sc->down_cnt) { 3630 device_printf(sc->dev, "never got down irq\n"); 3631 } 3632 } 3633 mxge_free_mbufs(sc); 3634 3635 return 0; 3636 } 3637 3638 static void 3639 mxge_setup_cfg_space(mxge_softc_t *sc) 3640 { 3641 device_t dev = sc->dev; 3642 int reg; 3643 uint16_t lnk, pectl; 3644 3645 /* find the PCIe link width and set max read request to 4KB*/ 3646 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3647 lnk = pci_read_config(dev, reg + 0x12, 2); 3648 sc->link_width = (lnk >> 4) & 0x3f; 3649 3650 if (sc->pectl == 0) { 3651 pectl = pci_read_config(dev, reg + 0x8, 2); 3652 pectl = (pectl & ~0x7000) | (5 << 12); 3653 pci_write_config(dev, reg + 0x8, pectl, 2); 3654 sc->pectl = pectl; 3655 } else { 3656 /* restore saved pectl after watchdog reset */ 3657 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3658 } 3659 } 3660 3661 /* Enable DMA and Memory space access */ 3662 pci_enable_busmaster(dev); 3663 } 3664 3665 static uint32_t 3666 mxge_read_reboot(mxge_softc_t *sc) 3667 { 3668 device_t dev = sc->dev; 3669 uint32_t vs; 3670 3671 /* find the vendor specific offset */ 3672 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3673 device_printf(sc->dev, 3674 "could not find vendor specific offset\n"); 3675 return (uint32_t)-1; 3676 } 3677 /* enable read32 mode */ 3678 pci_write_config(dev, vs + 0x10, 0x3, 1); 3679 /* tell NIC which register to read */ 3680 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3681 return (pci_read_config(dev, vs + 0x14, 4)); 3682 } 3683 3684 static void 3685 mxge_watchdog_reset(mxge_softc_t *sc) 3686 { 3687 struct pci_devinfo *dinfo; 3688 struct mxge_slice_state *ss; 3689 int err, running, s, num_tx_slices = 1; 3690 uint32_t reboot; 3691 uint16_t cmd; 3692 3693 err = ENXIO; 3694 3695 device_printf(sc->dev, "Watchdog reset!\n"); 3696 3697 /* 3698 * check to see if the NIC rebooted. If it did, then all of 3699 * PCI config space has been reset, and things like the 3700 * busmaster bit will be zero. If this is the case, then we 3701 * must restore PCI config space before the NIC can be used 3702 * again 3703 */ 3704 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3705 if (cmd == 0xffff) { 3706 /* 3707 * maybe the watchdog caught the NIC rebooting; wait 3708 * up to 100ms for it to finish. If it does not come 3709 * back, then give up 3710 */ 3711 DELAY(1000*100); 3712 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3713 if (cmd == 0xffff) { 3714 device_printf(sc->dev, "NIC disappeared!\n"); 3715 } 3716 } 3717 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3718 /* print the reboot status */ 3719 reboot = mxge_read_reboot(sc); 3720 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3721 reboot); 3722 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3723 if (running) { 3724 /* 3725 * quiesce NIC so that TX routines will not try to 3726 * xmit after restoration of BAR 3727 */ 3728 3729 /* Mark the link as down */ 3730 if (sc->link_state) { 3731 sc->link_state = 0; 3732 if_link_state_change(sc->ifp, 3733 LINK_STATE_DOWN); 3734 } 3735 3736 num_tx_slices = sc->num_slices; 3737 3738 /* grab all TX locks to ensure no tx */ 3739 for (s = 0; s < num_tx_slices; s++) { 3740 ss = &sc->ss[s]; 3741 mtx_lock(&ss->tx.mtx); 3742 } 3743 mxge_close(sc, 1); 3744 } 3745 /* restore PCI configuration space */ 3746 dinfo = device_get_ivars(sc->dev); 3747 pci_cfg_restore(sc->dev, dinfo); 3748 3749 /* and redo any changes we made to our config space */ 3750 mxge_setup_cfg_space(sc); 3751 3752 /* reload f/w */ 3753 err = mxge_load_firmware(sc, 0); 3754 if (err) { 3755 device_printf(sc->dev, 3756 "Unable to re-load f/w\n"); 3757 } 3758 if (running) { 3759 if (!err) 3760 err = mxge_open(sc); 3761 /* release all TX locks */ 3762 for (s = 0; s < num_tx_slices; s++) { 3763 ss = &sc->ss[s]; 3764 mxge_start_locked(ss); 3765 mtx_unlock(&ss->tx.mtx); 3766 } 3767 } 3768 sc->watchdog_resets++; 3769 } else { 3770 device_printf(sc->dev, 3771 "NIC did not reboot, not resetting\n"); 3772 err = 0; 3773 } 3774 if (err) { 3775 device_printf(sc->dev, "watchdog reset failed\n"); 3776 } else { 3777 if (sc->dying == 2) 3778 sc->dying = 0; 3779 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3780 } 3781 } 3782 3783 static void 3784 mxge_watchdog_task(void *arg, int pending) 3785 { 3786 mxge_softc_t *sc = arg; 3787 3788 mtx_lock(&sc->driver_mtx); 3789 mxge_watchdog_reset(sc); 3790 mtx_unlock(&sc->driver_mtx); 3791 } 3792 3793 static void 3794 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3795 { 3796 tx = &sc->ss[slice].tx; 3797 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3798 device_printf(sc->dev, 3799 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3800 tx->req, tx->done, tx->queue_active); 3801 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3802 tx->activate, tx->deactivate); 3803 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3804 tx->pkt_done, 3805 be32toh(sc->ss->fw_stats->send_done_count)); 3806 } 3807 3808 static int 3809 mxge_watchdog(mxge_softc_t *sc) 3810 { 3811 mxge_tx_ring_t *tx; 3812 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3813 int i, err = 0; 3814 3815 /* see if we have outstanding transmits, which 3816 have been pending for more than mxge_ticks */ 3817 for (i = 0; (i < sc->num_slices) && (err == 0); i++) { 3818 tx = &sc->ss[i].tx; 3819 if (tx->req != tx->done && 3820 tx->watchdog_req != tx->watchdog_done && 3821 tx->done == tx->watchdog_done) { 3822 /* check for pause blocking before resetting */ 3823 if (tx->watchdog_rx_pause == rx_pause) { 3824 mxge_warn_stuck(sc, tx, i); 3825 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3826 return (ENXIO); 3827 } 3828 else 3829 device_printf(sc->dev, "Flow control blocking " 3830 "xmits, check link partner\n"); 3831 } 3832 3833 tx->watchdog_req = tx->req; 3834 tx->watchdog_done = tx->done; 3835 tx->watchdog_rx_pause = rx_pause; 3836 } 3837 3838 if (sc->need_media_probe) 3839 mxge_media_probe(sc); 3840 return (err); 3841 } 3842 3843 static uint64_t 3844 mxge_get_counter(if_t ifp, ift_counter cnt) 3845 { 3846 struct mxge_softc *sc; 3847 uint64_t rv; 3848 3849 sc = if_getsoftc(ifp); 3850 rv = 0; 3851 3852 switch (cnt) { 3853 case IFCOUNTER_IPACKETS: 3854 for (int s = 0; s < sc->num_slices; s++) 3855 rv += sc->ss[s].ipackets; 3856 return (rv); 3857 case IFCOUNTER_OPACKETS: 3858 for (int s = 0; s < sc->num_slices; s++) 3859 rv += sc->ss[s].opackets; 3860 return (rv); 3861 case IFCOUNTER_OERRORS: 3862 for (int s = 0; s < sc->num_slices; s++) 3863 rv += sc->ss[s].oerrors; 3864 return (rv); 3865 case IFCOUNTER_OBYTES: 3866 for (int s = 0; s < sc->num_slices; s++) 3867 rv += sc->ss[s].obytes; 3868 return (rv); 3869 case IFCOUNTER_OMCASTS: 3870 for (int s = 0; s < sc->num_slices; s++) 3871 rv += sc->ss[s].omcasts; 3872 return (rv); 3873 case IFCOUNTER_OQDROPS: 3874 for (int s = 0; s < sc->num_slices; s++) 3875 rv += sc->ss[s].tx.br->br_drops; 3876 return (rv); 3877 default: 3878 return (if_get_counter_default(ifp, cnt)); 3879 } 3880 } 3881 3882 static void 3883 mxge_tick(void *arg) 3884 { 3885 mxge_softc_t *sc = arg; 3886 u_long pkts = 0; 3887 int err = 0; 3888 int running, ticks; 3889 uint16_t cmd; 3890 3891 ticks = mxge_ticks; 3892 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3893 if (running) { 3894 if (!sc->watchdog_countdown) { 3895 err = mxge_watchdog(sc); 3896 sc->watchdog_countdown = 4; 3897 } 3898 sc->watchdog_countdown--; 3899 } 3900 if (pkts == 0) { 3901 /* ensure NIC did not suffer h/w fault while idle */ 3902 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3903 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3904 sc->dying = 2; 3905 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3906 err = ENXIO; 3907 } 3908 /* look less often if NIC is idle */ 3909 ticks *= 4; 3910 } 3911 3912 if (err == 0) 3913 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3914 3915 } 3916 3917 static int 3918 mxge_media_change(if_t ifp) 3919 { 3920 return EINVAL; 3921 } 3922 3923 static int 3924 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3925 { 3926 if_t ifp = sc->ifp; 3927 int real_mtu, old_mtu; 3928 int err = 0; 3929 3930 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3931 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3932 return EINVAL; 3933 mtx_lock(&sc->driver_mtx); 3934 old_mtu = if_getmtu(ifp); 3935 if_setmtu(ifp, mtu); 3936 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3937 mxge_close(sc, 0); 3938 err = mxge_open(sc); 3939 if (err != 0) { 3940 if_setmtu(ifp, old_mtu); 3941 mxge_close(sc, 0); 3942 (void) mxge_open(sc); 3943 } 3944 } 3945 mtx_unlock(&sc->driver_mtx); 3946 return err; 3947 } 3948 3949 static void 3950 mxge_media_status(if_t ifp, struct ifmediareq *ifmr) 3951 { 3952 mxge_softc_t *sc = if_getsoftc(ifp); 3953 3954 if (sc == NULL) 3955 return; 3956 ifmr->ifm_status = IFM_AVALID; 3957 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3958 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3959 ifmr->ifm_active |= sc->current_media; 3960 } 3961 3962 static int 3963 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 3964 { 3965 mxge_cmd_t cmd; 3966 uint32_t i2c_args; 3967 int i, ms, err; 3968 3969 if (i2c->dev_addr != 0xA0 && 3970 i2c->dev_addr != 0xA2) 3971 return (EINVAL); 3972 if (i2c->len > sizeof(i2c->data)) 3973 return (EINVAL); 3974 3975 for (i = 0; i < i2c->len; i++) { 3976 i2c_args = i2c->dev_addr << 0x8; 3977 i2c_args |= i2c->offset + i; 3978 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3979 cmd.data1 = i2c_args; 3980 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3981 3982 if (err != MXGEFW_CMD_OK) 3983 return (EIO); 3984 /* now we wait for the data to be cached */ 3985 cmd.data0 = i2c_args & 0xff; 3986 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3987 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3988 cmd.data0 = i2c_args & 0xff; 3989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3990 if (err == EBUSY) 3991 DELAY(1000); 3992 } 3993 if (err != MXGEFW_CMD_OK) 3994 return (EIO); 3995 i2c->data[i] = cmd.data0; 3996 } 3997 return (0); 3998 } 3999 4000 static int 4001 mxge_ioctl(if_t ifp, u_long command, caddr_t data) 4002 { 4003 mxge_softc_t *sc = if_getsoftc(ifp); 4004 struct ifreq *ifr = (struct ifreq *)data; 4005 struct ifi2creq i2c; 4006 int err, mask; 4007 4008 err = 0; 4009 switch (command) { 4010 case SIOCSIFMTU: 4011 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4012 break; 4013 4014 case SIOCSIFFLAGS: 4015 mtx_lock(&sc->driver_mtx); 4016 if (sc->dying) { 4017 mtx_unlock(&sc->driver_mtx); 4018 return EINVAL; 4019 } 4020 if (if_getflags(ifp) & IFF_UP) { 4021 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { 4022 err = mxge_open(sc); 4023 } else { 4024 /* take care of promis can allmulti 4025 flag chages */ 4026 mxge_change_promisc(sc, 4027 if_getflags(ifp) & IFF_PROMISC); 4028 mxge_set_multicast_list(sc); 4029 } 4030 } else { 4031 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4032 mxge_close(sc, 0); 4033 } 4034 } 4035 mtx_unlock(&sc->driver_mtx); 4036 break; 4037 4038 case SIOCADDMULTI: 4039 case SIOCDELMULTI: 4040 mtx_lock(&sc->driver_mtx); 4041 if (sc->dying) { 4042 mtx_unlock(&sc->driver_mtx); 4043 return (EINVAL); 4044 } 4045 mxge_set_multicast_list(sc); 4046 mtx_unlock(&sc->driver_mtx); 4047 break; 4048 4049 case SIOCSIFCAP: 4050 mtx_lock(&sc->driver_mtx); 4051 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 4052 if (mask & IFCAP_TXCSUM) { 4053 if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4054 mask &= ~IFCAP_TSO4; 4055 if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM|IFCAP_TSO4)); 4056 if_sethwassistbits(ifp, 0, (CSUM_TCP | CSUM_UDP)); 4057 } else { 4058 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 4059 if_sethwassistbits(ifp, (CSUM_TCP | CSUM_UDP), 0); 4060 } 4061 } 4062 if (mask & IFCAP_RXCSUM) { 4063 if (IFCAP_RXCSUM & if_getcapenable(ifp)) { 4064 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 4065 } else { 4066 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 4067 } 4068 } 4069 if (mask & IFCAP_TSO4) { 4070 if (IFCAP_TSO4 & if_getcapenable(ifp)) { 4071 if_setcapenablebit(ifp, 0, IFCAP_TSO4); 4072 } else if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4073 if_setcapenablebit(ifp, IFCAP_TSO4, 0); 4074 if_sethwassistbits(ifp, CSUM_TSO, 0); 4075 } else { 4076 printf("mxge requires tx checksum offload" 4077 " be enabled to use TSO\n"); 4078 err = EINVAL; 4079 } 4080 } 4081 #if IFCAP_TSO6 4082 if (mask & IFCAP_TXCSUM_IPV6) { 4083 if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4084 mask &= ~IFCAP_TSO6; 4085 if_setcapenablebit(ifp, 0, 4086 IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 4087 if_sethwassistbits(ifp, 0, 4088 CSUM_TCP_IPV6 | CSUM_UDP); 4089 } else { 4090 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 4091 if_sethwassistbits(ifp, 4092 CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4093 } 4094 } 4095 if (mask & IFCAP_RXCSUM_IPV6) { 4096 if (IFCAP_RXCSUM_IPV6 & if_getcapenable(ifp)) { 4097 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 4098 } else { 4099 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 4100 } 4101 } 4102 if (mask & IFCAP_TSO6) { 4103 if (IFCAP_TSO6 & if_getcapenable(ifp)) { 4104 if_setcapenablebit(ifp, 0, IFCAP_TSO6); 4105 } else if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4106 if_setcapenablebit(ifp, IFCAP_TSO6, 0); 4107 if_sethwassistbits(ifp, CSUM_TSO, 0); 4108 } else { 4109 printf("mxge requires tx checksum offload" 4110 " be enabled to use TSO\n"); 4111 err = EINVAL; 4112 } 4113 } 4114 #endif /*IFCAP_TSO6 */ 4115 4116 if (mask & IFCAP_LRO) 4117 if_togglecapenable(ifp, IFCAP_LRO); 4118 if (mask & IFCAP_VLAN_HWTAGGING) 4119 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); 4120 if (mask & IFCAP_VLAN_HWTSO) 4121 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 4122 4123 if (!(if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) || 4124 !(if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)) 4125 if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO); 4126 4127 mtx_unlock(&sc->driver_mtx); 4128 VLAN_CAPABILITIES(ifp); 4129 4130 break; 4131 4132 case SIOCGIFMEDIA: 4133 mtx_lock(&sc->driver_mtx); 4134 if (sc->dying) { 4135 mtx_unlock(&sc->driver_mtx); 4136 return (EINVAL); 4137 } 4138 mxge_media_probe(sc); 4139 mtx_unlock(&sc->driver_mtx); 4140 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4141 &sc->media, command); 4142 break; 4143 4144 case SIOCGI2C: 4145 if (sc->connector != MXGE_XFP && 4146 sc->connector != MXGE_SFP) { 4147 err = ENXIO; 4148 break; 4149 } 4150 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4151 if (err != 0) 4152 break; 4153 mtx_lock(&sc->driver_mtx); 4154 if (sc->dying) { 4155 mtx_unlock(&sc->driver_mtx); 4156 return (EINVAL); 4157 } 4158 err = mxge_fetch_i2c(sc, &i2c); 4159 mtx_unlock(&sc->driver_mtx); 4160 if (err == 0) 4161 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4162 sizeof(i2c)); 4163 break; 4164 default: 4165 err = ether_ioctl(ifp, command, data); 4166 break; 4167 } 4168 return err; 4169 } 4170 4171 static void 4172 mxge_fetch_tunables(mxge_softc_t *sc) 4173 { 4174 4175 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4176 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4177 &mxge_flow_control); 4178 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4179 &mxge_intr_coal_delay); 4180 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4181 &mxge_nvidia_ecrc_enable); 4182 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4183 &mxge_force_firmware); 4184 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4185 &mxge_deassert_wait); 4186 TUNABLE_INT_FETCH("hw.mxge.verbose", 4187 &mxge_verbose); 4188 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4189 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4190 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4191 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4192 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4193 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4194 4195 if (bootverbose) 4196 mxge_verbose = 1; 4197 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4198 mxge_intr_coal_delay = 30; 4199 if (mxge_ticks == 0) 4200 mxge_ticks = hz / 2; 4201 sc->pause = mxge_flow_control; 4202 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4203 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4204 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4205 } 4206 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4207 mxge_initial_mtu < ETHER_MIN_LEN) 4208 mxge_initial_mtu = ETHERMTU_JUMBO; 4209 4210 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4211 mxge_throttle = MXGE_MAX_THROTTLE; 4212 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4213 mxge_throttle = MXGE_MIN_THROTTLE; 4214 sc->throttle = mxge_throttle; 4215 } 4216 4217 static void 4218 mxge_free_slices(mxge_softc_t *sc) 4219 { 4220 struct mxge_slice_state *ss; 4221 int i; 4222 4223 if (sc->ss == NULL) 4224 return; 4225 4226 for (i = 0; i < sc->num_slices; i++) { 4227 ss = &sc->ss[i]; 4228 if (ss->fw_stats != NULL) { 4229 mxge_dma_free(&ss->fw_stats_dma); 4230 ss->fw_stats = NULL; 4231 if (ss->tx.br != NULL) { 4232 drbr_free(ss->tx.br, M_DEVBUF); 4233 ss->tx.br = NULL; 4234 } 4235 mtx_destroy(&ss->tx.mtx); 4236 } 4237 if (ss->rx_done.entry != NULL) { 4238 mxge_dma_free(&ss->rx_done.dma); 4239 ss->rx_done.entry = NULL; 4240 } 4241 } 4242 free(sc->ss, M_DEVBUF); 4243 sc->ss = NULL; 4244 } 4245 4246 static int 4247 mxge_alloc_slices(mxge_softc_t *sc) 4248 { 4249 mxge_cmd_t cmd; 4250 struct mxge_slice_state *ss; 4251 size_t bytes; 4252 int err, i, max_intr_slots; 4253 4254 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4255 if (err != 0) { 4256 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4257 return err; 4258 } 4259 sc->rx_ring_size = cmd.data0; 4260 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4261 4262 bytes = sizeof (*sc->ss) * sc->num_slices; 4263 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4264 if (sc->ss == NULL) 4265 return (ENOMEM); 4266 for (i = 0; i < sc->num_slices; i++) { 4267 ss = &sc->ss[i]; 4268 4269 ss->sc = sc; 4270 4271 /* allocate per-slice rx interrupt queues */ 4272 4273 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4274 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4275 if (err != 0) 4276 goto abort; 4277 ss->rx_done.entry = ss->rx_done.dma.addr; 4278 bzero(ss->rx_done.entry, bytes); 4279 4280 /* 4281 * allocate the per-slice firmware stats; stats 4282 * (including tx) are used used only on the first 4283 * slice for now 4284 */ 4285 4286 bytes = sizeof (*ss->fw_stats); 4287 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4288 sizeof (*ss->fw_stats), 64); 4289 if (err != 0) 4290 goto abort; 4291 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4292 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4293 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4294 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4295 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4296 &ss->tx.mtx); 4297 } 4298 4299 return (0); 4300 4301 abort: 4302 mxge_free_slices(sc); 4303 return (ENOMEM); 4304 } 4305 4306 static void 4307 mxge_slice_probe(mxge_softc_t *sc) 4308 { 4309 mxge_cmd_t cmd; 4310 char *old_fw; 4311 int msix_cnt, status, max_intr_slots; 4312 4313 sc->num_slices = 1; 4314 /* 4315 * don't enable multiple slices if they are not enabled, 4316 * or if this is not an SMP system 4317 */ 4318 4319 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4320 return; 4321 4322 /* see how many MSI-X interrupts are available */ 4323 msix_cnt = pci_msix_count(sc->dev); 4324 if (msix_cnt < 2) 4325 return; 4326 4327 /* now load the slice aware firmware see what it supports */ 4328 old_fw = sc->fw_name; 4329 if (old_fw == mxge_fw_aligned) 4330 sc->fw_name = mxge_fw_rss_aligned; 4331 else 4332 sc->fw_name = mxge_fw_rss_unaligned; 4333 status = mxge_load_firmware(sc, 0); 4334 if (status != 0) { 4335 device_printf(sc->dev, "Falling back to a single slice\n"); 4336 return; 4337 } 4338 4339 /* try to send a reset command to the card to see if it 4340 is alive */ 4341 memset(&cmd, 0, sizeof (cmd)); 4342 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4343 if (status != 0) { 4344 device_printf(sc->dev, "failed reset\n"); 4345 goto abort_with_fw; 4346 } 4347 4348 /* get rx ring size */ 4349 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4350 if (status != 0) { 4351 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4352 goto abort_with_fw; 4353 } 4354 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4355 4356 /* tell it the size of the interrupt queues */ 4357 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4358 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4359 if (status != 0) { 4360 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4361 goto abort_with_fw; 4362 } 4363 4364 /* ask the maximum number of slices it supports */ 4365 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4366 if (status != 0) { 4367 device_printf(sc->dev, 4368 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4369 goto abort_with_fw; 4370 } 4371 sc->num_slices = cmd.data0; 4372 if (sc->num_slices > msix_cnt) 4373 sc->num_slices = msix_cnt; 4374 4375 if (mxge_max_slices == -1) { 4376 /* cap to number of CPUs in system */ 4377 if (sc->num_slices > mp_ncpus) 4378 sc->num_slices = mp_ncpus; 4379 } else { 4380 if (sc->num_slices > mxge_max_slices) 4381 sc->num_slices = mxge_max_slices; 4382 } 4383 /* make sure it is a power of two */ 4384 while (sc->num_slices & (sc->num_slices - 1)) 4385 sc->num_slices--; 4386 4387 if (mxge_verbose) 4388 device_printf(sc->dev, "using %d slices\n", 4389 sc->num_slices); 4390 4391 return; 4392 4393 abort_with_fw: 4394 sc->fw_name = old_fw; 4395 (void) mxge_load_firmware(sc, 0); 4396 } 4397 4398 static int 4399 mxge_add_msix_irqs(mxge_softc_t *sc) 4400 { 4401 size_t bytes; 4402 int count, err, i, rid; 4403 4404 rid = PCIR_BAR(2); 4405 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4406 &rid, RF_ACTIVE); 4407 4408 if (sc->msix_table_res == NULL) { 4409 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4410 return ENXIO; 4411 } 4412 4413 count = sc->num_slices; 4414 err = pci_alloc_msix(sc->dev, &count); 4415 if (err != 0) { 4416 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4417 "err = %d \n", sc->num_slices, err); 4418 goto abort_with_msix_table; 4419 } 4420 if (count < sc->num_slices) { 4421 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4422 count, sc->num_slices); 4423 device_printf(sc->dev, 4424 "Try setting hw.mxge.max_slices to %d\n", 4425 count); 4426 err = ENOSPC; 4427 goto abort_with_msix; 4428 } 4429 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4430 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4431 if (sc->msix_irq_res == NULL) { 4432 err = ENOMEM; 4433 goto abort_with_msix; 4434 } 4435 4436 for (i = 0; i < sc->num_slices; i++) { 4437 rid = i + 1; 4438 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4439 SYS_RES_IRQ, 4440 &rid, RF_ACTIVE); 4441 if (sc->msix_irq_res[i] == NULL) { 4442 device_printf(sc->dev, "couldn't allocate IRQ res" 4443 " for message %d\n", i); 4444 err = ENXIO; 4445 goto abort_with_res; 4446 } 4447 } 4448 4449 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4450 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4451 4452 for (i = 0; i < sc->num_slices; i++) { 4453 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4454 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4455 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4456 if (err != 0) { 4457 device_printf(sc->dev, "couldn't setup intr for " 4458 "message %d\n", i); 4459 goto abort_with_intr; 4460 } 4461 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4462 sc->msix_ih[i], "s%d", i); 4463 } 4464 4465 if (mxge_verbose) { 4466 device_printf(sc->dev, "using %d msix IRQs:", 4467 sc->num_slices); 4468 for (i = 0; i < sc->num_slices; i++) 4469 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4470 printf("\n"); 4471 } 4472 return (0); 4473 4474 abort_with_intr: 4475 for (i = 0; i < sc->num_slices; i++) { 4476 if (sc->msix_ih[i] != NULL) { 4477 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4478 sc->msix_ih[i]); 4479 sc->msix_ih[i] = NULL; 4480 } 4481 } 4482 free(sc->msix_ih, M_DEVBUF); 4483 4484 abort_with_res: 4485 for (i = 0; i < sc->num_slices; i++) { 4486 rid = i + 1; 4487 if (sc->msix_irq_res[i] != NULL) 4488 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4489 sc->msix_irq_res[i]); 4490 sc->msix_irq_res[i] = NULL; 4491 } 4492 free(sc->msix_irq_res, M_DEVBUF); 4493 4494 abort_with_msix: 4495 pci_release_msi(sc->dev); 4496 4497 abort_with_msix_table: 4498 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4499 sc->msix_table_res); 4500 4501 return err; 4502 } 4503 4504 static int 4505 mxge_add_single_irq(mxge_softc_t *sc) 4506 { 4507 int count, err, rid; 4508 4509 count = pci_msi_count(sc->dev); 4510 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4511 rid = 1; 4512 } else { 4513 rid = 0; 4514 sc->legacy_irq = 1; 4515 } 4516 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4517 RF_SHAREABLE | RF_ACTIVE); 4518 if (sc->irq_res == NULL) { 4519 device_printf(sc->dev, "could not alloc interrupt\n"); 4520 return ENXIO; 4521 } 4522 if (mxge_verbose) 4523 device_printf(sc->dev, "using %s irq %jd\n", 4524 sc->legacy_irq ? "INTx" : "MSI", 4525 rman_get_start(sc->irq_res)); 4526 err = bus_setup_intr(sc->dev, sc->irq_res, 4527 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4528 mxge_intr, &sc->ss[0], &sc->ih); 4529 if (err != 0) { 4530 bus_release_resource(sc->dev, SYS_RES_IRQ, 4531 sc->legacy_irq ? 0 : 1, sc->irq_res); 4532 if (!sc->legacy_irq) 4533 pci_release_msi(sc->dev); 4534 } 4535 return err; 4536 } 4537 4538 static void 4539 mxge_rem_msix_irqs(mxge_softc_t *sc) 4540 { 4541 int i, rid; 4542 4543 for (i = 0; i < sc->num_slices; i++) { 4544 if (sc->msix_ih[i] != NULL) { 4545 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4546 sc->msix_ih[i]); 4547 sc->msix_ih[i] = NULL; 4548 } 4549 } 4550 free(sc->msix_ih, M_DEVBUF); 4551 4552 for (i = 0; i < sc->num_slices; i++) { 4553 rid = i + 1; 4554 if (sc->msix_irq_res[i] != NULL) 4555 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4556 sc->msix_irq_res[i]); 4557 sc->msix_irq_res[i] = NULL; 4558 } 4559 free(sc->msix_irq_res, M_DEVBUF); 4560 4561 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4562 sc->msix_table_res); 4563 4564 pci_release_msi(sc->dev); 4565 return; 4566 } 4567 4568 static void 4569 mxge_rem_single_irq(mxge_softc_t *sc) 4570 { 4571 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4572 bus_release_resource(sc->dev, SYS_RES_IRQ, 4573 sc->legacy_irq ? 0 : 1, sc->irq_res); 4574 if (!sc->legacy_irq) 4575 pci_release_msi(sc->dev); 4576 } 4577 4578 static void 4579 mxge_rem_irq(mxge_softc_t *sc) 4580 { 4581 if (sc->num_slices > 1) 4582 mxge_rem_msix_irqs(sc); 4583 else 4584 mxge_rem_single_irq(sc); 4585 } 4586 4587 static int 4588 mxge_add_irq(mxge_softc_t *sc) 4589 { 4590 int err; 4591 4592 if (sc->num_slices > 1) 4593 err = mxge_add_msix_irqs(sc); 4594 else 4595 err = mxge_add_single_irq(sc); 4596 4597 if (0 && err == 0 && sc->num_slices > 1) { 4598 mxge_rem_msix_irqs(sc); 4599 err = mxge_add_msix_irqs(sc); 4600 } 4601 return err; 4602 } 4603 4604 static int 4605 mxge_attach(device_t dev) 4606 { 4607 mxge_cmd_t cmd; 4608 mxge_softc_t *sc = device_get_softc(dev); 4609 if_t ifp; 4610 int err, rid; 4611 4612 sc->dev = dev; 4613 mxge_fetch_tunables(sc); 4614 4615 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4616 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4617 taskqueue_thread_enqueue, &sc->tq); 4618 4619 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4620 1, /* alignment */ 4621 0, /* boundary */ 4622 BUS_SPACE_MAXADDR, /* low */ 4623 BUS_SPACE_MAXADDR, /* high */ 4624 NULL, NULL, /* filter */ 4625 65536 + 256, /* maxsize */ 4626 MXGE_MAX_SEND_DESC, /* num segs */ 4627 65536, /* maxsegsize */ 4628 0, /* flags */ 4629 NULL, NULL, /* lock */ 4630 &sc->parent_dmat); /* tag */ 4631 4632 if (err != 0) { 4633 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4634 err); 4635 goto abort_with_tq; 4636 } 4637 4638 ifp = sc->ifp = if_alloc(IFT_ETHER); 4639 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4640 4641 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4642 device_get_nameunit(dev)); 4643 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4644 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4645 "%s:drv", device_get_nameunit(dev)); 4646 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4647 MTX_NETWORK_LOCK, MTX_DEF); 4648 4649 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4650 4651 mxge_setup_cfg_space(sc); 4652 4653 /* Map the board into the kernel */ 4654 rid = PCIR_BARS; 4655 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4656 RF_ACTIVE); 4657 if (sc->mem_res == NULL) { 4658 device_printf(dev, "could not map memory\n"); 4659 err = ENXIO; 4660 goto abort_with_lock; 4661 } 4662 sc->sram = rman_get_virtual(sc->mem_res); 4663 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4664 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4665 device_printf(dev, "impossible memory region size %jd\n", 4666 rman_get_size(sc->mem_res)); 4667 err = ENXIO; 4668 goto abort_with_mem_res; 4669 } 4670 4671 /* make NULL terminated copy of the EEPROM strings section of 4672 lanai SRAM */ 4673 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4674 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4675 rman_get_bushandle(sc->mem_res), 4676 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4677 sc->eeprom_strings, 4678 MXGE_EEPROM_STRINGS_SIZE - 2); 4679 err = mxge_parse_strings(sc); 4680 if (err != 0) 4681 goto abort_with_mem_res; 4682 4683 /* Enable write combining for efficient use of PCIe bus */ 4684 mxge_enable_wc(sc); 4685 4686 /* Allocate the out of band dma memory */ 4687 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4688 sizeof (mxge_cmd_t), 64); 4689 if (err != 0) 4690 goto abort_with_mem_res; 4691 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4692 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4693 if (err != 0) 4694 goto abort_with_cmd_dma; 4695 4696 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4697 if (err != 0) 4698 goto abort_with_zeropad_dma; 4699 4700 /* select & load the firmware */ 4701 err = mxge_select_firmware(sc); 4702 if (err != 0) 4703 goto abort_with_dmabench; 4704 sc->intr_coal_delay = mxge_intr_coal_delay; 4705 4706 mxge_slice_probe(sc); 4707 err = mxge_alloc_slices(sc); 4708 if (err != 0) 4709 goto abort_with_dmabench; 4710 4711 err = mxge_reset(sc, 0); 4712 if (err != 0) 4713 goto abort_with_slices; 4714 4715 err = mxge_alloc_rings(sc); 4716 if (err != 0) { 4717 device_printf(sc->dev, "failed to allocate rings\n"); 4718 goto abort_with_slices; 4719 } 4720 4721 err = mxge_add_irq(sc); 4722 if (err != 0) { 4723 device_printf(sc->dev, "failed to add irq\n"); 4724 goto abort_with_rings; 4725 } 4726 4727 if_setbaudrate(ifp, IF_Gbps(10)); 4728 if_setcapabilities(ifp, IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4729 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4730 IFCAP_RXCSUM_IPV6); 4731 #if defined(INET) || defined(INET6) 4732 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0); 4733 #endif 4734 4735 #ifdef MXGE_NEW_VLAN_API 4736 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0); 4737 4738 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4739 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4740 sc->fw_ver_tiny >= 32) 4741 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0); 4742 #endif 4743 sc->max_mtu = mxge_max_mtu(sc); 4744 if (sc->max_mtu >= 9000) 4745 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); 4746 else 4747 device_printf(dev, "MTU limited to %d. Install " 4748 "latest firmware for 9000 byte jumbo support\n", 4749 sc->max_mtu - ETHER_HDR_LEN); 4750 if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_TSO); 4751 if_sethwassistbits(ifp, CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4752 /* check to see if f/w supports TSO for IPv6 */ 4753 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4754 if (CSUM_TCP_IPV6) 4755 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0); 4756 sc->max_tso6_hlen = min(cmd.data0, 4757 sizeof (sc->ss[0].scratch)); 4758 } 4759 if_setcapenable(ifp, if_getcapabilities(ifp)); 4760 if (sc->lro_cnt == 0) 4761 if_setcapenablebit(ifp, 0, IFCAP_LRO); 4762 if_setinitfn(ifp, mxge_init); 4763 if_setsoftc(ifp, sc); 4764 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 4765 if_setioctlfn(ifp, mxge_ioctl); 4766 if_setstartfn(ifp, mxge_start); 4767 if_setgetcounterfn(ifp, mxge_get_counter); 4768 if_sethwtsomax(ifp, IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 4769 if_sethwtsomaxsegcount(ifp, sc->ss[0].tx.max_desc); 4770 if_sethwtsomaxsegsize(ifp, IP_MAXPACKET); 4771 /* Initialise the ifmedia structure */ 4772 ifmedia_init(&sc->media, 0, mxge_media_change, 4773 mxge_media_status); 4774 mxge_media_init(sc); 4775 mxge_media_probe(sc); 4776 sc->dying = 0; 4777 ether_ifattach(ifp, sc->mac_addr); 4778 /* ether_ifattach sets mtu to ETHERMTU */ 4779 if (mxge_initial_mtu != ETHERMTU) 4780 mxge_change_mtu(sc, mxge_initial_mtu); 4781 4782 mxge_add_sysctls(sc); 4783 if_settransmitfn(ifp, mxge_transmit); 4784 if_setqflushfn(ifp, mxge_qflush); 4785 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4786 device_get_nameunit(sc->dev)); 4787 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4788 return 0; 4789 4790 abort_with_rings: 4791 mxge_free_rings(sc); 4792 abort_with_slices: 4793 mxge_free_slices(sc); 4794 abort_with_dmabench: 4795 mxge_dma_free(&sc->dmabench_dma); 4796 abort_with_zeropad_dma: 4797 mxge_dma_free(&sc->zeropad_dma); 4798 abort_with_cmd_dma: 4799 mxge_dma_free(&sc->cmd_dma); 4800 abort_with_mem_res: 4801 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4802 abort_with_lock: 4803 pci_disable_busmaster(dev); 4804 mtx_destroy(&sc->cmd_mtx); 4805 mtx_destroy(&sc->driver_mtx); 4806 if_free(ifp); 4807 bus_dma_tag_destroy(sc->parent_dmat); 4808 abort_with_tq: 4809 if (sc->tq != NULL) { 4810 taskqueue_drain(sc->tq, &sc->watchdog_task); 4811 taskqueue_free(sc->tq); 4812 sc->tq = NULL; 4813 } 4814 return err; 4815 } 4816 4817 static int 4818 mxge_detach(device_t dev) 4819 { 4820 mxge_softc_t *sc = device_get_softc(dev); 4821 4822 if (mxge_vlans_active(sc)) { 4823 device_printf(sc->dev, 4824 "Detach vlans before removing module\n"); 4825 return EBUSY; 4826 } 4827 mtx_lock(&sc->driver_mtx); 4828 sc->dying = 1; 4829 if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) 4830 mxge_close(sc, 0); 4831 mtx_unlock(&sc->driver_mtx); 4832 ether_ifdetach(sc->ifp); 4833 if (sc->tq != NULL) { 4834 taskqueue_drain(sc->tq, &sc->watchdog_task); 4835 taskqueue_free(sc->tq); 4836 sc->tq = NULL; 4837 } 4838 callout_drain(&sc->co_hdl); 4839 ifmedia_removeall(&sc->media); 4840 mxge_dummy_rdma(sc, 0); 4841 mxge_rem_sysctls(sc); 4842 mxge_rem_irq(sc); 4843 mxge_free_rings(sc); 4844 mxge_free_slices(sc); 4845 mxge_dma_free(&sc->dmabench_dma); 4846 mxge_dma_free(&sc->zeropad_dma); 4847 mxge_dma_free(&sc->cmd_dma); 4848 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4849 pci_disable_busmaster(dev); 4850 mtx_destroy(&sc->cmd_mtx); 4851 mtx_destroy(&sc->driver_mtx); 4852 if_free(sc->ifp); 4853 bus_dma_tag_destroy(sc->parent_dmat); 4854 return 0; 4855 } 4856 4857 static int 4858 mxge_shutdown(device_t dev) 4859 { 4860 return 0; 4861 } 4862 4863 /* 4864 This file uses Myri10GE driver indentation. 4865 4866 Local Variables: 4867 c-file-style:"linux" 4868 tab-width:8 4869 End: 4870 */ 4871